Using more stable softmax implementation by subtracting max.
This commit is contained in:
parent
870c04f664
commit
b4b00e0c65
@ -154,7 +154,9 @@ class CrossEntropySoftmaxError(object):
|
||||
Returns:
|
||||
Scalar error function value.
|
||||
"""
|
||||
probs = np.exp(outputs)
|
||||
# subtract max inside exponential to improve numerical stability -
|
||||
# when we divide through by sum this term cancels
|
||||
probs = np.exp(outputs - outputs.max(-1)[:, None])
|
||||
probs /= probs.sum(-1)[:, None]
|
||||
return -np.mean(np.sum(targets * np.log(probs), axis=1))
|
||||
|
||||
|
@ -341,7 +341,9 @@ class SoftmaxLayer(Layer):
|
||||
Returns:
|
||||
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||
"""
|
||||
exp_inputs = np.exp(inputs)
|
||||
# subtract max inside exponential to improve numerical stability -
|
||||
# when we divide through by sum this term cancels
|
||||
exp_inputs = np.exp(inputs - inputs.max(-1)[:, None])
|
||||
return exp_inputs / exp_inputs.sum(-1)[:, None]
|
||||
|
||||
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||
|
Loading…
Reference in New Issue
Block a user