Adding stochastic layer functionality in preparation for dropout implementation.

This commit is contained in:
Matt Graham 2016-10-27 21:36:57 +01:00
parent 7e1250a82b
commit 95d4028bb7
3 changed files with 81 additions and 7 deletions

View File

@ -14,6 +14,7 @@ respect to the layer parameters.
import numpy as np
import mlp.initialisers as init
from mlp import DEFAULT_SEED
class Layer(object):
@ -96,6 +97,57 @@ class LayerWithParameters(Layer):
raise NotImplementedError()
class StochasticLayer(Layer):
"""Specialised layer which uses a stochastic forward propagation."""
def __init__(self, rng=None):
"""Constructs a new StochasticLayer object.
Args:
rng (RandomState): Seeded random number generator object.
"""
if rng is None:
rng = np.random.RandomState(DEFAULT_SEED)
self.rng = rng
def fprop(self, inputs, stochastic=True):
"""Forward propagates activations through the layer transformation.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
stochastic: Flag allowing different deterministic
forward-propagation mode in addition to default stochastic
forward-propagation e.g. for use at test time. If False
a deterministic forward-propagation transformation
corresponding to the expected output of the stochastic
forward-propagation is applied.
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
raise NotImplementedError()
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs. This should correspond to
default stochastic forward-propagation.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
raise NotImplementedError()
class AffineLayer(LayerWithParameters):
"""Layer implementing an affine tranformation of its inputs.

View File

@ -8,7 +8,7 @@ outputs (and intermediate states) and for calculating gradients of scalar
functions of the outputs with respect to the model parameters.
"""
from mlp.layers import LayerWithParameters
from mlp.layers import LayerWithParameters, StochasticLayer
class SingleLayerModel(object):
@ -27,11 +27,13 @@ class SingleLayerModel(object):
"""A list of all of the parameters of the model."""
return self.layer.params
def fprop(self, inputs):
def fprop(self, inputs, stochastic=True):
"""Calculate the model outputs corresponding to a batch of inputs.
Args:
inputs: Batch of inputs to the model.
stochastic: Whether to use stochastic forward propagation
for stochastic layers (True) or deterministic (False).
Returns:
List which is a concatenation of the model inputs and model
@ -40,7 +42,10 @@ class SingleLayerModel(object):
activations through all immediate layers of the model and including
the inputs and outputs.
"""
activations = [inputs, self.layer.fprop(inputs)]
outputs = (self.layer.fprop(inputs, stochastic)
if isinstance(self.layer, StochasticLayer) else
self.layer.fprop(inputs))
activations = [inputs, outputs]
return activations
def grads_wrt_params(self, activations, grads_wrt_outputs):
@ -88,11 +93,13 @@ class MultipleLayerModel(object):
params += layer.params
return params
def fprop(self, inputs):
def fprop(self, inputs, stochastic=True):
"""Forward propagates a batch of inputs through the model.
Args:
inputs: Batch of inputs to the model.
stochastic: Whether to use stochastic forward propagation
for stochastic layers (True) or deterministic (False).
Returns:
List of the activations at the output of all layers of the model
@ -101,7 +108,11 @@ class MultipleLayerModel(object):
"""
activations = [inputs]
for i, layer in enumerate(self.layers):
activations.append(self.layers[i].fprop(activations[i]))
if isinstance(layer, StochasticLayer):
activations.append(self.layers[i].fprop(
activations[i], stochastic=stochastic))
else:
activations.append(self.layers[i].fprop(activations[i]))
return activations
def grads_wrt_params(self, activations, grads_wrt_outputs):

View File

@ -18,7 +18,8 @@ class Optimiser(object):
"""Basic model optimiser."""
def __init__(self, model, error, learning_rule, train_dataset,
valid_dataset=None, data_monitors=None, schedulers=[]):
valid_dataset=None, data_monitors=None, schedulers=[],
use_stochastic_eval=True):
"""Create a new optimiser instance.
Args:
@ -33,6 +34,10 @@ class Optimiser(object):
validation data sets) to monitor during training in addition
to the error. Keys should correspond to a string label for
the statistic being evaluated.
schedulers: List of learning rule scheduler objects for adjusting
learning rule hyperparameters over training. Can be empty.
use_stochastic_eval: Whether to use `stochastic=True` flag in
`model.fprop` for evaluating model performance during training.
"""
self.model = model
self.error = error
@ -44,6 +49,7 @@ class Optimiser(object):
if data_monitors is not None:
self.data_monitors.update(data_monitors)
self.schedulers = schedulers
self.use_stochastic_eval = use_stochastic_eval
def do_training_epoch(self):
"""Do a single training epoch.
@ -73,7 +79,8 @@ class Optimiser(object):
data_mon_vals = OrderedDict([(key + label, 0.) for key
in self.data_monitors.keys()])
for inputs_batch, targets_batch in dataset:
activations = self.model.fprop(inputs_batch)
activations = self.model.fprop(
inputs_batch, stochastic=self.use_stochastic_eval)
for key, data_monitor in self.data_monitors.items():
data_mon_vals[key + label] += data_monitor(
activations[-1], targets_batch)
@ -125,6 +132,10 @@ class Optimiser(object):
being the total time elapsed in seconds during the training run.
"""
stats = self.get_epoch_stats()
logger.info(
'Epoch 0:\n ' +
', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()])
)
run_stats = [stats.values()]
run_start_time = time.time()
for epoch in range(1, num_epochs + 1):