mlpractical/mlp/layers.py

434 lines
16 KiB
Python
Raw Normal View History

2016-09-19 08:31:31 +02:00
# -*- coding: utf-8 -*-
"""Layer definitions.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
This module defines classes which encapsulate a single layer.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
These layers map input activations to output activation with the `fprop`
method and map gradients with repsect to outputs to gradients with respect to
their inputs with the `bprop` method.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Some layers will have learnable parameters and so will additionally define
methods for getting and setting parameter and calculating gradients with
respect to the layer parameters.
"""
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
import numpy as np
import mlp.initialisers as init
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
class Layer(object):
"""Abstract class defining the interface for a layer."""
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
2015-10-12 02:50:05 +02:00
"""
2016-09-19 08:31:31 +02:00
raise NotImplementedError()
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
2015-11-14 18:06:12 +01:00
"""
2016-09-19 08:31:31 +02:00
raise NotImplementedError()
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
class LayerWithParameters(Layer):
"""Abstract class defining the interface for a layer with parameters."""
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
def grads_wrt_params(self, inputs, grads_wrt_outputs):
"""Calculates gradients with respect to layer parameters.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of inputs to layer of shape (batch_size, input_dim).
grads_wrt_to_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Returns:
List of arrays of gradients with respect to the layer parameters
with parameter gradients appearing in same order in tuple as
returned from `get_params` method.
2015-10-12 02:50:05 +02:00
"""
2016-09-19 08:31:31 +02:00
raise NotImplementedError()
2015-10-12 02:50:05 +02:00
def params_penalty(self):
"""Returns the parameter dependent penalty term for this layer.
2015-10-12 02:50:05 +02:00
If no parameter-dependent penalty terms are set this returns zero.
2016-09-19 08:31:31 +02:00
"""
raise NotImplementedError()
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
@property
def params(self):
"""Returns a list of parameters of layer.
2015-12-13 20:53:10 +01:00
2016-09-19 08:31:31 +02:00
Returns:
List of current parameter values. This list should be in the
corresponding order to the `values` argument to `set_params`.
"""
raise NotImplementedError()
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
@params.setter
def params(self, values):
"""Sets layer parameters from a list of values.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
values: List of values to set parameters to. This list should be
in the corresponding order to what is returned by `get_params`.
"""
raise NotImplementedError()
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
class AffineLayer(LayerWithParameters):
"""Layer implementing an affine tranformation of its inputs.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
This layer is parameterised by a weight matrix and bias vector.
2015-10-12 02:50:05 +02:00
"""
2016-09-19 08:31:31 +02:00
def __init__(self, input_dim, output_dim,
weights_initialiser=init.UniformInit(-0.1, 0.1),
biases_initialiser=init.ConstantInit(0.),
weights_penalty=None, biases_penalty=None):
2016-09-19 08:31:31 +02:00
"""Initialises a parameterised affine layer.
Args:
input_dim (int): Dimension of inputs to the layer.
output_dim (int): Dimension of the layer outputs.
weights_initialiser: Initialiser for the weight parameters.
biases_initialiser: Initialiser for the bias parameters.
weights_penalty: Weights-dependent penalty term (regulariser) or
None if no regularisation is to be applied to the weights.
biases_penalty: Biases-dependent penalty term (regulariser) or
None if no regularisation is to be applied to the biases.
2016-09-19 08:31:31 +02:00
"""
self.input_dim = input_dim
self.output_dim = output_dim
self.weights = weights_initialiser((self.output_dim, self.input_dim))
self.biases = biases_initialiser(self.output_dim)
self.weights_penalty = weights_penalty
self.biases_penalty = biases_penalty
2015-10-12 02:50:05 +02:00
def fprop(self, inputs):
2016-09-19 08:31:31 +02:00
"""Forward propagates activations through the layer transformation.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
For inputs `x`, outputs `y`, weights `W` and biases `b` the layer
corresponds to `y = W.dot(x) + b`.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
2015-10-12 02:50:05 +02:00
"""
2016-09-19 08:31:31 +02:00
return self.weights.dot(inputs.T).T + self.biases
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return grads_wrt_outputs.dot(self.weights)
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
def grads_wrt_params(self, inputs, grads_wrt_outputs):
"""Calculates gradients with respect to layer parameters.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
inputs: array of inputs to layer of shape (batch_size, input_dim)
grads_wrt_to_outputs: array of gradients with respect to the layer
outputs of shape (batch_size, output_dim)
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Returns:
list of arrays of gradients with respect to the layer parameters
`[grads_wrt_weights, grads_wrt_biases]`.
2015-10-12 02:50:05 +02:00
"""
2016-09-19 08:31:31 +02:00
grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
if self.weights_penalty is not None:
grads_wrt_weights += self.weights_penalty.grad(self.weights)
2015-10-12 02:50:05 +02:00
if self.biases_penalty is not None:
grads_wrt_biases += self.biases_penalty.grad(self.biases)
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
return [grads_wrt_weights, grads_wrt_biases]
2015-10-12 02:50:05 +02:00
def params_penalty(self):
"""Returns the parameter dependent penalty term for this layer.
2015-10-12 02:50:05 +02:00
If no parameter-dependent penalty terms are set this returns zero.
2015-10-12 02:50:05 +02:00
"""
params_penalty = 0
if self.weights_penalty is not None:
params_penalty += self.weights_penalty(self.weights)
if self.biases_penalty is not None:
params_penalty += self.biases_penalty(self.biases)
return params_penalty
2015-11-01 16:50:26 +01:00
2016-09-19 08:31:31 +02:00
@property
def params(self):
"""A list of layer parameter values: `[weights, biases]`."""
return [self.weights, self.biases]
2016-09-19 08:31:31 +02:00
@params.setter
def params(self, values):
self.weights = values[0]
self.biases = values[1]
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
def __repr__(self):
return 'AffineLayer(input_dim={0}, output_dim={1})'.format(
self.input_dim, self.output_dim)
2015-11-14 18:06:12 +01:00
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
class SigmoidLayer(Layer):
"""Layer implementing an element-wise logistic sigmoid transformation."""
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
For inputs `x` and outputs `y` this corresponds to
`y = 1 / (1 + exp(-x))`.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
return 1. / (1. + np.exp(-inputs))
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return grads_wrt_outputs * outputs * (1. - outputs)
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
def __repr__(self):
return 'SigmoidLayer'
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
class ReluLayer(Layer):
"""Layer implementing an element-wise rectified linear transformation."""
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
For inputs `x` and outputs `y` this corresponds to `y = max(0, x)`.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
return np.maximum(inputs, 0.)
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return (outputs > 0) * grads_wrt_outputs
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
def __repr__(self):
return 'ReluLayer'
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
class TanhLayer(Layer):
"""Layer implementing an element-wise hyperbolic tangent transformation."""
2015-11-14 18:06:12 +01:00
def fprop(self, inputs):
2016-09-19 08:31:31 +02:00
"""Forward propagates activations through the layer transformation.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
For inputs `x` and outputs `y` this corresponds to `y = tanh(x)`.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
return np.tanh(inputs)
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
2015-11-15 17:00:58 +01:00
2016-09-19 08:31:31 +02:00
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return (1. - outputs**2) * grads_wrt_outputs
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
def __repr__(self):
return 'TanhLayer'
2016-10-07 07:22:12 +02:00
class SoftmaxLayer(Layer):
"""Layer implementing a softmax transformation."""
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
For inputs `x` and outputs `y` this corresponds to
`y = exp(x) / sum(exp(x))`.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
# subtract max inside exponential to improve numerical stability -
# when we divide through by sum this term cancels
exp_inputs = np.exp(inputs - inputs.max(-1)[:, None])
2016-10-07 07:22:12 +02:00
return exp_inputs / exp_inputs.sum(-1)[:, None]
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return (outputs * (grads_wrt_outputs -
(grads_wrt_outputs * outputs).sum(-1)[:, None]))
def __repr__(self):
return 'SoftmaxLayer'
class RadialBasisFunctionLayer(Layer):
"""Layer implementing projection to a grid of radial basis functions."""
def __init__(self, grid_dim, intervals=[[0., 1.]]):
"""Creates a radial basis function layer object.
Args:
grid_dim: Integer specifying how many basis function to use in
grid across input space per dimension (so total number of
basis functions will be grid_dim**input_dim)
intervals: List of intervals (two element lists or tuples)
specifying extents of axis-aligned region in input-space to
tile basis functions in grid across. For example for a 2D input
space spanning [0, 1] x [0, 1] use intervals=[[0, 1], [0, 1]].
"""
num_basis = grid_dim**len(intervals)
self.centres = np.array(np.meshgrid(*[
np.linspace(low, high, grid_dim) for (low, high) in intervals])
).reshape((len(intervals), -1))
self.scales = np.array([
[(high - low) * 1. / grid_dim] for (low, high) in intervals])
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
return np.exp(-(inputs[..., None] - self.centres[None, ...])**2 /
self.scales**2).reshape((inputs.shape[0], -1))
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
num_basis = self.centres.shape[1]
return -2 * (
((inputs[..., None] - self.centres[None, ...]) / self.scales**2) *
grads_wrt_outputs.reshape((inputs.shape[0], -1, num_basis))
).sum(-1)
def __repr__(self):
return 'RadialBasisFunctionLayer(grid_dim={0})'.format(self.grid_dim)