diff --git a/mlp/layers.py b/mlp/layers.py index 760a01c..60fabca 100644 --- a/mlp/layers.py +++ b/mlp/layers.py @@ -323,3 +323,46 @@ class TanhLayer(Layer): def __repr__(self): return 'TanhLayer' + + +class SoftmaxLayer(Layer): + """Layer implementing a softmax transformation.""" + + def fprop(self, inputs): + """Forward propagates activations through the layer transformation. + + For inputs `x` and outputs `y` this corresponds to + + `y = exp(x) / sum(exp(x))`. + + Args: + inputs: Array of layer inputs of shape (batch_size, input_dim). + + Returns: + outputs: Array of layer outputs of shape (batch_size, output_dim). + """ + exp_inputs = np.exp(inputs) + return exp_inputs / exp_inputs.sum(-1)[:, None] + + def bprop(self, inputs, outputs, grads_wrt_outputs): + """Back propagates gradients through a layer. + + Given gradients with respect to the outputs of the layer calculates the + gradients with respect to the layer inputs. + + Args: + inputs: Array of layer inputs of shape (batch_size, input_dim). + outputs: Array of layer outputs calculated in forward pass of + shape (batch_size, output_dim). + grads_wrt_outputs: Array of gradients with respect to the layer + outputs of shape (batch_size, output_dim). + + Returns: + Array of gradients with respect to the layer inputs of shape + (batch_size, input_dim). + """ + return (outputs * (grads_wrt_outputs - + (grads_wrt_outputs * outputs).sum(-1)[:, None])) + + def __repr__(self): + return 'SoftmaxLayer'