Using more stable softmax implementation by subtracting max.

This commit is contained in:
Matt Graham 2016-10-18 13:54:39 +01:00
parent 870c04f664
commit b4b00e0c65
2 changed files with 6 additions and 2 deletions

View File

@ -154,7 +154,9 @@ class CrossEntropySoftmaxError(object):
Returns: Returns:
Scalar error function value. Scalar error function value.
""" """
probs = np.exp(outputs) # subtract max inside exponential to improve numerical stability -
# when we divide through by sum this term cancels
probs = np.exp(outputs - outputs.max(-1)[:, None])
probs /= probs.sum(-1)[:, None] probs /= probs.sum(-1)[:, None]
return -np.mean(np.sum(targets * np.log(probs), axis=1)) return -np.mean(np.sum(targets * np.log(probs), axis=1))

View File

@ -341,7 +341,9 @@ class SoftmaxLayer(Layer):
Returns: Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim). outputs: Array of layer outputs of shape (batch_size, output_dim).
""" """
exp_inputs = np.exp(inputs) # subtract max inside exponential to improve numerical stability -
# when we divide through by sum this term cancels
exp_inputs = np.exp(inputs - inputs.max(-1)[:, None])
return exp_inputs / exp_inputs.sum(-1)[:, None] return exp_inputs / exp_inputs.sum(-1)[:, None]
def bprop(self, inputs, outputs, grads_wrt_outputs): def bprop(self, inputs, outputs, grads_wrt_outputs):