Using more stable softmax implementation by subtracting max.

2016-10-18 13:54:39 +01:00 · 2016-10-18 13:54:39 +01:00 · b4b00e0c65
commit b4b00e0c65
parent 870c04f664
2 changed files with 6 additions and 2 deletions
--- a/mlp/errors.py
+++ b/mlp/errors.py
@ -154,7 +154,9 @@ class CrossEntropySoftmaxError(object):
        Returns:
            Scalar error function value.
        """
-        probs = np.exp(outputs)
+        # subtract max inside exponential to improve numerical stability -
+        # when we divide through by sum this term cancels
+        probs = np.exp(outputs - outputs.max(-1)[:, None])
        probs /= probs.sum(-1)[:, None]
        return -np.mean(np.sum(targets * np.log(probs), axis=1))

--- a/mlp/layers.py
+++ b/mlp/layers.py
@ -341,7 +341,9 @@ class SoftmaxLayer(Layer):
        Returns:
            outputs: Array of layer outputs of shape (batch_size, output_dim).
        """
-        exp_inputs = np.exp(inputs)
+        # subtract max inside exponential to improve numerical stability -
+        # when we divide through by sum this term cancels
+        exp_inputs = np.exp(inputs - inputs.max(-1)[:, None])
        return exp_inputs / exp_inputs.sum(-1)[:, None]

    def bprop(self, inputs, outputs, grads_wrt_outputs):