260 lines
9.0 KiB
Python
260 lines
9.0 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""Layer definitions.
|
|
|
|
This module defines classes which encapsulate a single layer.
|
|
|
|
These layers map input activations to output activation with the `fprop`
|
|
method and map gradients with repsect to outputs to gradients with respect to
|
|
their inputs with the `bprop` method.
|
|
|
|
Some layers will have learnable parameters and so will additionally define
|
|
methods for getting and setting parameter and calculating gradients with
|
|
respect to the layer parameters.
|
|
"""
|
|
|
|
import numpy as np
|
|
import mlp.initialisers as init
|
|
|
|
|
|
class Layer(object):
|
|
"""Abstract class defining the interface for a layer."""
|
|
|
|
def fprop(self, inputs):
|
|
"""Forward propagates activations through the layer transformation.
|
|
|
|
Args:
|
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
|
|
|
Returns:
|
|
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
|
"""
|
|
raise NotImplementedError()
|
|
|
|
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
|
"""Back propagates gradients through a layer.
|
|
|
|
Given gradients with respect to the outputs of the layer calculates the
|
|
gradients with respect to the layer inputs.
|
|
|
|
Args:
|
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
|
outputs: Array of layer outputs calculated in forward pass of
|
|
shape (batch_size, output_dim).
|
|
grads_wrt_outputs: Array of gradients with respect to the layer
|
|
outputs of shape (batch_size, output_dim).
|
|
|
|
Returns:
|
|
Array of gradients with respect to the layer inputs of shape
|
|
(batch_size, input_dim).
|
|
"""
|
|
raise NotImplementedError()
|
|
|
|
|
|
class LayerWithParameters(Layer):
|
|
"""Abstract class defining the interface for a layer with parameters."""
|
|
|
|
def grads_wrt_params(self, inputs, grads_wrt_outputs):
|
|
"""Calculates gradients with respect to layer parameters.
|
|
|
|
Args:
|
|
inputs: Array of inputs to layer of shape (batch_size, input_dim).
|
|
grads_wrt_to_outputs: Array of gradients with respect to the layer
|
|
outputs of shape (batch_size, output_dim).
|
|
|
|
Returns:
|
|
List of arrays of gradients with respect to the layer parameters
|
|
with parameter gradients appearing in same order in tuple as
|
|
returned from `get_params` method.
|
|
"""
|
|
raise NotImplementedError()
|
|
|
|
@property
|
|
def params(self):
|
|
"""Returns a list of parameters of layer.
|
|
|
|
Returns:
|
|
List of current parameter values. This list should be in the
|
|
corresponding order to the `values` argument to `set_params`.
|
|
"""
|
|
raise NotImplementedError()
|
|
|
|
@params.setter
|
|
def params(self, values):
|
|
"""Sets layer parameters from a list of values.
|
|
|
|
Args:
|
|
values: List of values to set parameters to. This list should be
|
|
in the corresponding order to what is returned by `get_params`.
|
|
"""
|
|
raise NotImplementedError()
|
|
|
|
|
|
class AffineLayer(LayerWithParameters):
|
|
"""Layer implementing an affine tranformation of its inputs.
|
|
|
|
This layer is parameterised by a weight matrix and bias vector.
|
|
"""
|
|
|
|
def __init__(self, input_dim, output_dim,
|
|
weights_initialiser=init.UniformInit(-0.1, 0.1),
|
|
biases_initialiser=init.ConstantInit(0.)):
|
|
"""Initialises a parameterised affine layer.
|
|
|
|
Args:
|
|
input_dim (int): Dimension of inputs to the layer.
|
|
output_dim (int): Dimension of the layer outputs.
|
|
weights_initialiser: Initialiser for the weight parameters.
|
|
biases_initialiser: Initialiser for the bias parameters.
|
|
"""
|
|
self.input_dim = input_dim
|
|
self.output_dim = output_dim
|
|
self.weights = weights_initialiser((self.output_dim, self.input_dim))
|
|
self.biases = biases_initialiser(self.output_dim)
|
|
|
|
def fprop(self, inputs):
|
|
"""Forward propagates activations through the layer transformation.
|
|
|
|
For inputs `x`, outputs `y`, weights `W` and biases `b` the layer
|
|
corresponds to `y = W.dot(x) + b`.
|
|
|
|
Args:
|
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
|
|
|
Returns:
|
|
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
|
"""
|
|
return inputs.dot(self.weights.T) + self.biases
|
|
|
|
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
|
"""Back propagates gradients through a layer.
|
|
|
|
Given gradients with respect to the outputs of the layer calculates the
|
|
gradients with respect to the layer inputs.
|
|
|
|
Args:
|
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
|
outputs: Array of layer outputs calculated in forward pass of
|
|
shape (batch_size, output_dim).
|
|
grads_wrt_outputs: Array of gradients with respect to the layer
|
|
outputs of shape (batch_size, output_dim).
|
|
|
|
Returns:
|
|
Array of gradients with respect to the layer inputs of shape
|
|
(batch_size, input_dim).
|
|
"""
|
|
return grads_wrt_outputs.dot(self.weights)
|
|
|
|
def grads_wrt_params(self, inputs, grads_wrt_outputs):
|
|
"""Calculates gradients with respect to layer parameters.
|
|
|
|
Args:
|
|
inputs: array of inputs to layer of shape (batch_size, input_dim)
|
|
grads_wrt_to_outputs: array of gradients with respect to the layer
|
|
outputs of shape (batch_size, output_dim)
|
|
|
|
Returns:
|
|
list of arrays of gradients with respect to the layer parameters
|
|
`[grads_wrt_weights, grads_wrt_biases]`.
|
|
"""
|
|
|
|
grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
|
|
grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
|
|
return [grads_wrt_weights, grads_wrt_biases]
|
|
|
|
@property
|
|
def params(self):
|
|
"""A list of layer parameter values: `[weights, biases]`."""
|
|
return [self.weights, self.biases]
|
|
|
|
@params.setter
|
|
def params(self, values):
|
|
self.weights = values[0]
|
|
self.biases = values[1]
|
|
|
|
def __repr__(self):
|
|
return 'AffineLayer(input_dim={0}, output_dim={1})'.format(
|
|
self.input_dim, self.output_dim)
|
|
|
|
|
|
class SigmoidLayer(Layer):
|
|
"""Layer implementing an element-wise logistic sigmoid transformation."""
|
|
|
|
def fprop(self, inputs):
|
|
"""Forward propagates activations through the layer transformation.
|
|
|
|
For inputs `x` and outputs `y` this corresponds to
|
|
`y = 1 / (1 + exp(-x))`.
|
|
|
|
Args:
|
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
|
|
|
Returns:
|
|
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
|
"""
|
|
return 1. / (1. + np.exp(-inputs))
|
|
|
|
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
|
"""Back propagates gradients through a layer.
|
|
|
|
Given gradients with respect to the outputs of the layer calculates the
|
|
gradients with respect to the layer inputs.
|
|
|
|
Args:
|
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
|
outputs: Array of layer outputs calculated in forward pass of
|
|
shape (batch_size, output_dim).
|
|
grads_wrt_outputs: Array of gradients with respect to the layer
|
|
outputs of shape (batch_size, output_dim).
|
|
|
|
Returns:
|
|
Array of gradients with respect to the layer inputs of shape
|
|
(batch_size, input_dim).
|
|
"""
|
|
return grads_wrt_outputs * outputs * (1. - outputs)
|
|
|
|
def __repr__(self):
|
|
return 'SigmoidLayer'
|
|
|
|
|
|
class SoftmaxLayer(Layer):
|
|
"""Layer implementing a softmax transformation."""
|
|
|
|
def fprop(self, inputs):
|
|
"""Forward propagates activations through the layer transformation.
|
|
|
|
For inputs `x` and outputs `y` this corresponds to
|
|
|
|
`y = exp(x) / sum(exp(x))`.
|
|
|
|
Args:
|
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
|
|
|
Returns:
|
|
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
|
"""
|
|
exp_inputs = np.exp(inputs)
|
|
return exp_inputs / exp_inputs.sum(-1)[:, None]
|
|
|
|
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
|
"""Back propagates gradients through a layer.
|
|
|
|
Given gradients with respect to the outputs of the layer calculates the
|
|
gradients with respect to the layer inputs.
|
|
|
|
Args:
|
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
|
outputs: Array of layer outputs calculated in forward pass of
|
|
shape (batch_size, output_dim).
|
|
grads_wrt_outputs: Array of gradients with respect to the layer
|
|
outputs of shape (batch_size, output_dim).
|
|
|
|
Returns:
|
|
Array of gradients with respect to the layer inputs of shape
|
|
(batch_size, input_dim).
|
|
"""
|
|
return (outputs * (grads_wrt_outputs -
|
|
(grads_wrt_outputs * outputs).sum(-1)[:, None]))
|
|
|
|
def __repr__(self):
|
|
return 'SoftmaxLayer'
|