Using more stable softmax implementation by subtracting max.
This commit is contained in:
parent
870c04f664
commit
b4b00e0c65
@ -154,7 +154,9 @@ class CrossEntropySoftmaxError(object):
|
|||||||
Returns:
|
Returns:
|
||||||
Scalar error function value.
|
Scalar error function value.
|
||||||
"""
|
"""
|
||||||
probs = np.exp(outputs)
|
# subtract max inside exponential to improve numerical stability -
|
||||||
|
# when we divide through by sum this term cancels
|
||||||
|
probs = np.exp(outputs - outputs.max(-1)[:, None])
|
||||||
probs /= probs.sum(-1)[:, None]
|
probs /= probs.sum(-1)[:, None]
|
||||||
return -np.mean(np.sum(targets * np.log(probs), axis=1))
|
return -np.mean(np.sum(targets * np.log(probs), axis=1))
|
||||||
|
|
||||||
|
@ -341,7 +341,9 @@ class SoftmaxLayer(Layer):
|
|||||||
Returns:
|
Returns:
|
||||||
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||||
"""
|
"""
|
||||||
exp_inputs = np.exp(inputs)
|
# subtract max inside exponential to improve numerical stability -
|
||||||
|
# when we divide through by sum this term cancels
|
||||||
|
exp_inputs = np.exp(inputs - inputs.max(-1)[:, None])
|
||||||
return exp_inputs / exp_inputs.sum(-1)[:, None]
|
return exp_inputs / exp_inputs.sum(-1)[:, None]
|
||||||
|
|
||||||
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||||
|
Loading…
Reference in New Issue
Block a user