mlpractical/mlp/layers.py

667 lines
25 KiB
Python
Raw Normal View History

2016-09-19 08:31:31 +02:00
# -*- coding: utf-8 -*-
"""Layer definitions.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
This module defines classes which encapsulate a single layer.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
These layers map input activations to output activation with the `fprop`
method and map gradients with repsect to outputs to gradients with respect to
their inputs with the `bprop` method.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Some layers will have learnable parameters and so will additionally define
methods for getting and setting parameter and calculating gradients with
respect to the layer parameters.
"""
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
import numpy as np
import mlp.initialisers as init
from mlp import DEFAULT_SEED
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
class Layer(object):
"""Abstract class defining the interface for a layer."""
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
2015-10-12 02:50:05 +02:00
"""
2016-09-19 08:31:31 +02:00
raise NotImplementedError()
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
2015-11-14 18:06:12 +01:00
"""
2016-09-19 08:31:31 +02:00
raise NotImplementedError()
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
class LayerWithParameters(Layer):
"""Abstract class defining the interface for a layer with parameters."""
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
def grads_wrt_params(self, inputs, grads_wrt_outputs):
"""Calculates gradients with respect to layer parameters.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of inputs to layer of shape (batch_size, input_dim).
grads_wrt_to_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Returns:
List of arrays of gradients with respect to the layer parameters
with parameter gradients appearing in same order in tuple as
returned from `get_params` method.
2015-10-12 02:50:05 +02:00
"""
2016-09-19 08:31:31 +02:00
raise NotImplementedError()
2015-10-12 02:50:05 +02:00
def params_penalty(self):
"""Returns the parameter dependent penalty term for this layer.
2015-10-12 02:50:05 +02:00
If no parameter-dependent penalty terms are set this returns zero.
2016-09-19 08:31:31 +02:00
"""
raise NotImplementedError()
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
@property
def params(self):
"""Returns a list of parameters of layer.
2015-12-13 20:53:10 +01:00
2016-09-19 08:31:31 +02:00
Returns:
List of current parameter values. This list should be in the
corresponding order to the `values` argument to `set_params`.
"""
raise NotImplementedError()
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
@params.setter
def params(self, values):
"""Sets layer parameters from a list of values.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
values: List of values to set parameters to. This list should be
in the corresponding order to what is returned by `get_params`.
"""
raise NotImplementedError()
2015-10-12 02:50:05 +02:00
class StochasticLayer(Layer):
"""Specialised layer which uses a stochastic forward propagation."""
def __init__(self, rng=None):
"""Constructs a new StochasticLayer object.
Args:
rng (RandomState): Seeded random number generator object.
"""
if rng is None:
rng = np.random.RandomState(DEFAULT_SEED)
self.rng = rng
def fprop(self, inputs, stochastic=True):
"""Forward propagates activations through the layer transformation.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
stochastic: Flag allowing different deterministic
forward-propagation mode in addition to default stochastic
forward-propagation e.g. for use at test time. If False
a deterministic forward-propagation transformation
corresponding to the expected output of the stochastic
forward-propagation is applied.
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
raise NotImplementedError()
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs. This should correspond to
default stochastic forward-propagation.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
raise NotImplementedError()
2016-09-19 08:31:31 +02:00
class AffineLayer(LayerWithParameters):
"""Layer implementing an affine tranformation of its inputs.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
This layer is parameterised by a weight matrix and bias vector.
2015-10-12 02:50:05 +02:00
"""
2016-09-19 08:31:31 +02:00
def __init__(self, input_dim, output_dim,
weights_initialiser=init.UniformInit(-0.1, 0.1),
biases_initialiser=init.ConstantInit(0.),
weights_penalty=None, biases_penalty=None):
2016-09-19 08:31:31 +02:00
"""Initialises a parameterised affine layer.
Args:
input_dim (int): Dimension of inputs to the layer.
output_dim (int): Dimension of the layer outputs.
weights_initialiser: Initialiser for the weight parameters.
biases_initialiser: Initialiser for the bias parameters.
weights_penalty: Weights-dependent penalty term (regulariser) or
None if no regularisation is to be applied to the weights.
biases_penalty: Biases-dependent penalty term (regulariser) or
None if no regularisation is to be applied to the biases.
2016-09-19 08:31:31 +02:00
"""
self.input_dim = input_dim
self.output_dim = output_dim
self.weights = weights_initialiser((self.output_dim, self.input_dim))
self.biases = biases_initialiser(self.output_dim)
self.weights_penalty = weights_penalty
self.biases_penalty = biases_penalty
2015-10-12 02:50:05 +02:00
def fprop(self, inputs):
2016-09-19 08:31:31 +02:00
"""Forward propagates activations through the layer transformation.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
For inputs `x`, outputs `y`, weights `W` and biases `b` the layer
corresponds to `y = W.dot(x) + b`.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
2015-10-12 02:50:05 +02:00
"""
2016-09-19 08:31:31 +02:00
return self.weights.dot(inputs.T).T + self.biases
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return grads_wrt_outputs.dot(self.weights)
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
def grads_wrt_params(self, inputs, grads_wrt_outputs):
"""Calculates gradients with respect to layer parameters.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
inputs: array of inputs to layer of shape (batch_size, input_dim)
grads_wrt_to_outputs: array of gradients with respect to the layer
outputs of shape (batch_size, output_dim)
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Returns:
list of arrays of gradients with respect to the layer parameters
`[grads_wrt_weights, grads_wrt_biases]`.
2015-10-12 02:50:05 +02:00
"""
2016-09-19 08:31:31 +02:00
grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
if self.weights_penalty is not None:
grads_wrt_weights += self.weights_penalty.grad(self.weights)
2015-10-12 02:50:05 +02:00
if self.biases_penalty is not None:
grads_wrt_biases += self.biases_penalty.grad(self.biases)
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
return [grads_wrt_weights, grads_wrt_biases]
2015-10-12 02:50:05 +02:00
def params_penalty(self):
"""Returns the parameter dependent penalty term for this layer.
2015-10-12 02:50:05 +02:00
If no parameter-dependent penalty terms are set this returns zero.
2015-10-12 02:50:05 +02:00
"""
params_penalty = 0
if self.weights_penalty is not None:
params_penalty += self.weights_penalty(self.weights)
if self.biases_penalty is not None:
params_penalty += self.biases_penalty(self.biases)
return params_penalty
2015-11-01 16:50:26 +01:00
2016-09-19 08:31:31 +02:00
@property
def params(self):
"""A list of layer parameter values: `[weights, biases]`."""
return [self.weights, self.biases]
2016-09-19 08:31:31 +02:00
@params.setter
def params(self, values):
self.weights = values[0]
self.biases = values[1]
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
def __repr__(self):
return 'AffineLayer(input_dim={0}, output_dim={1})'.format(
self.input_dim, self.output_dim)
2015-11-14 18:06:12 +01:00
2015-10-12 02:50:05 +02:00
class ReshapeLayer(Layer):
2016-11-02 02:13:22 +01:00
"""Layer which reshapes dimensions of inputs."""
def __init__(self, output_shape=None):
"""Create a new reshape layer object.
Args:
output_shape: Tuple specifying shape each input in batch should
be reshaped to in outputs. This **excludes** the batch size
so the shape of the final output array will be
(batch_size, ) + output_shape
Similarly to numpy.reshape, one shape dimension can be -1. In
this case, the value is inferred from the size of the input
array and remaining dimensions. The shape specified must be
compatible with the input array shape - i.e. the total number
of values in the array cannot be changed. If set to `None` the
output shape will be set to
(batch_size, -1)
which will flatten all the inputs to vectors.
"""
self.output_shape = (-1,) if output_shape is None else output_shape
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
return inputs.reshape((inputs.shape[0],) + self.output_shape)
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return grads_wrt_outputs.reshape(inputs.shape)
def __repr__(self):
return 'ReshapeLayer(output_shape={0})'.format(self.output_shape)
2016-09-19 08:31:31 +02:00
class SigmoidLayer(Layer):
"""Layer implementing an element-wise logistic sigmoid transformation."""
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
For inputs `x` and outputs `y` this corresponds to
`y = 1 / (1 + exp(-x))`.
2015-10-12 02:50:05 +02:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
return 1. / (1. + np.exp(-inputs))
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return grads_wrt_outputs * outputs * (1. - outputs)
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
def __repr__(self):
return 'SigmoidLayer'
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
class ReluLayer(Layer):
"""Layer implementing an element-wise rectified linear transformation."""
2015-10-28 17:59:11 +01:00
2016-09-19 08:31:31 +02:00
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
For inputs `x` and outputs `y` this corresponds to `y = max(0, x)`.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
return np.maximum(inputs, 0.)
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return (outputs > 0) * grads_wrt_outputs
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
def __repr__(self):
return 'ReluLayer'
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
class TanhLayer(Layer):
"""Layer implementing an element-wise hyperbolic tangent transformation."""
2015-11-14 18:06:12 +01:00
def fprop(self, inputs):
2016-09-19 08:31:31 +02:00
"""Forward propagates activations through the layer transformation.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
For inputs `x` and outputs `y` this corresponds to `y = tanh(x)`.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
return np.tanh(inputs)
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
2015-11-15 17:00:58 +01:00
2016-09-19 08:31:31 +02:00
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return (1. - outputs**2) * grads_wrt_outputs
2015-11-14 18:06:12 +01:00
2016-09-19 08:31:31 +02:00
def __repr__(self):
return 'TanhLayer'
2016-10-07 07:22:12 +02:00
class SoftmaxLayer(Layer):
"""Layer implementing a softmax transformation."""
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
For inputs `x` and outputs `y` this corresponds to
`y = exp(x) / sum(exp(x))`.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
# subtract max inside exponential to improve numerical stability -
# when we divide through by sum this term cancels
exp_inputs = np.exp(inputs - inputs.max(-1)[:, None])
2016-10-07 07:22:12 +02:00
return exp_inputs / exp_inputs.sum(-1)[:, None]
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return (outputs * (grads_wrt_outputs -
(grads_wrt_outputs * outputs).sum(-1)[:, None]))
def __repr__(self):
return 'SoftmaxLayer'
class RadialBasisFunctionLayer(Layer):
"""Layer implementing projection to a grid of radial basis functions."""
def __init__(self, grid_dim, intervals=[[0., 1.]]):
"""Creates a radial basis function layer object.
Args:
grid_dim: Integer specifying how many basis function to use in
grid across input space per dimension (so total number of
basis functions will be grid_dim**input_dim)
intervals: List of intervals (two element lists or tuples)
specifying extents of axis-aligned region in input-space to
tile basis functions in grid across. For example for a 2D input
space spanning [0, 1] x [0, 1] use intervals=[[0, 1], [0, 1]].
"""
num_basis = grid_dim**len(intervals)
self.centres = np.array(np.meshgrid(*[
np.linspace(low, high, grid_dim) for (low, high) in intervals])
).reshape((len(intervals), -1))
self.scales = np.array([
[(high - low) * 1. / grid_dim] for (low, high) in intervals])
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
return np.exp(-(inputs[..., None] - self.centres[None, ...])**2 /
self.scales**2).reshape((inputs.shape[0], -1))
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
num_basis = self.centres.shape[1]
return -2 * (
((inputs[..., None] - self.centres[None, ...]) / self.scales**2) *
grads_wrt_outputs.reshape((inputs.shape[0], -1, num_basis))
).sum(-1)
def __repr__(self):
return 'RadialBasisFunctionLayer(grid_dim={0})'.format(self.grid_dim)
class DropoutLayer(StochasticLayer):
"""Layer which stochastically drops input dimensions in its output."""
def __init__(self, rng=None, incl_prob=0.5, share_across_batch=True):
"""Construct a new dropout layer.
Args:
rng (RandomState): Seeded random number generator.
incl_prob: Scalar value in (0, 1] specifying the probability of
each input dimension being included in the output.
share_across_batch: Whether to use same dropout mask across
all inputs in a batch or use per input masks.
"""
super(DropoutLayer, self).__init__(rng)
assert incl_prob > 0. and incl_prob <= 1.
self.incl_prob = incl_prob
self.share_across_batch = share_across_batch
def fprop(self, inputs, stochastic=True):
"""Forward propagates activations through the layer transformation.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
stochastic: Flag allowing different deterministic
forward-propagation mode in addition to default stochastic
forward-propagation e.g. for use at test time. If False
a deterministic forward-propagation transformation
corresponding to the expected output of the stochastic
forward-propagation is applied.
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
if stochastic:
mask_shape = ((1,) + inputs.shape[1:] if self.share_across_batch
else inputs.shape)
self._mask = (self.rng.uniform(size=mask_shape) < self.incl_prob)
return inputs * self._mask
else:
return inputs * self.incl_prob
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs. This should correspond to
default stochastic forward-propagation.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return grads_wrt_outputs * self._mask
def __repr__(self):
return 'DropoutLayer(incl_prob={0:.1f})'.format(self.incl_prob)
class MaxPoolingLayer(Layer):
"""Layer outputting the maximum of non-overlapping 1D pools of inputs."""
def __init__(self, pool_size=2):
"""Construct a new max-pooling layer.
Args:
pool_size: Positive integer specifying size of pools over
which to take maximum value. The outputs of the layer
feeding in to this layer must have a dimension which
is a multiple of this pool size such that the outputs
can be split in to pools with no dimensions left over.
"""
self.pool_size = pool_size
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
This corresponds to taking the maximum over non-overlapping pools of
inputs of a fixed size `pool_size`.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
assert inputs.shape[-1] % self.pool_size == 0, (
'Last dimension of inputs must be multiple of pool size')
pooled_inputs = inputs.reshape(
inputs.shape[:-1] +
(inputs.shape[-1] // self.pool_size, self.pool_size))
pool_maxes = pooled_inputs.max(-1)
self._mask = pooled_inputs == pool_maxes[..., None]
return pool_maxes
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return (
self._mask * grads_wrt_outputs[..., None]).reshape(inputs.shape)
def __repr__(self):
return 'MaxPoolingLayer(pool_size={0})'.format(self.pool_size)