Adding stochastic layer functionality in preparation for dropout implementation.

This commit is contained in:
Matt Graham 2016-10-27 21:36:57 +01:00
parent 7e1250a82b
commit 95d4028bb7
3 changed files with 81 additions and 7 deletions

View File

@ -14,6 +14,7 @@ respect to the layer parameters.
import numpy as np import numpy as np
import mlp.initialisers as init import mlp.initialisers as init
from mlp import DEFAULT_SEED
class Layer(object): class Layer(object):
@ -96,6 +97,57 @@ class LayerWithParameters(Layer):
raise NotImplementedError() raise NotImplementedError()
class StochasticLayer(Layer):
"""Specialised layer which uses a stochastic forward propagation."""
def __init__(self, rng=None):
"""Constructs a new StochasticLayer object.
Args:
rng (RandomState): Seeded random number generator object.
"""
if rng is None:
rng = np.random.RandomState(DEFAULT_SEED)
self.rng = rng
def fprop(self, inputs, stochastic=True):
"""Forward propagates activations through the layer transformation.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
stochastic: Flag allowing different deterministic
forward-propagation mode in addition to default stochastic
forward-propagation e.g. for use at test time. If False
a deterministic forward-propagation transformation
corresponding to the expected output of the stochastic
forward-propagation is applied.
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
raise NotImplementedError()
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs. This should correspond to
default stochastic forward-propagation.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
raise NotImplementedError()
class AffineLayer(LayerWithParameters): class AffineLayer(LayerWithParameters):
"""Layer implementing an affine tranformation of its inputs. """Layer implementing an affine tranformation of its inputs.

View File

@ -8,7 +8,7 @@ outputs (and intermediate states) and for calculating gradients of scalar
functions of the outputs with respect to the model parameters. functions of the outputs with respect to the model parameters.
""" """
from mlp.layers import LayerWithParameters from mlp.layers import LayerWithParameters, StochasticLayer
class SingleLayerModel(object): class SingleLayerModel(object):
@ -27,11 +27,13 @@ class SingleLayerModel(object):
"""A list of all of the parameters of the model.""" """A list of all of the parameters of the model."""
return self.layer.params return self.layer.params
def fprop(self, inputs): def fprop(self, inputs, stochastic=True):
"""Calculate the model outputs corresponding to a batch of inputs. """Calculate the model outputs corresponding to a batch of inputs.
Args: Args:
inputs: Batch of inputs to the model. inputs: Batch of inputs to the model.
stochastic: Whether to use stochastic forward propagation
for stochastic layers (True) or deterministic (False).
Returns: Returns:
List which is a concatenation of the model inputs and model List which is a concatenation of the model inputs and model
@ -40,7 +42,10 @@ class SingleLayerModel(object):
activations through all immediate layers of the model and including activations through all immediate layers of the model and including
the inputs and outputs. the inputs and outputs.
""" """
activations = [inputs, self.layer.fprop(inputs)] outputs = (self.layer.fprop(inputs, stochastic)
if isinstance(self.layer, StochasticLayer) else
self.layer.fprop(inputs))
activations = [inputs, outputs]
return activations return activations
def grads_wrt_params(self, activations, grads_wrt_outputs): def grads_wrt_params(self, activations, grads_wrt_outputs):
@ -88,11 +93,13 @@ class MultipleLayerModel(object):
params += layer.params params += layer.params
return params return params
def fprop(self, inputs): def fprop(self, inputs, stochastic=True):
"""Forward propagates a batch of inputs through the model. """Forward propagates a batch of inputs through the model.
Args: Args:
inputs: Batch of inputs to the model. inputs: Batch of inputs to the model.
stochastic: Whether to use stochastic forward propagation
for stochastic layers (True) or deterministic (False).
Returns: Returns:
List of the activations at the output of all layers of the model List of the activations at the output of all layers of the model
@ -101,7 +108,11 @@ class MultipleLayerModel(object):
""" """
activations = [inputs] activations = [inputs]
for i, layer in enumerate(self.layers): for i, layer in enumerate(self.layers):
activations.append(self.layers[i].fprop(activations[i])) if isinstance(layer, StochasticLayer):
activations.append(self.layers[i].fprop(
activations[i], stochastic=stochastic))
else:
activations.append(self.layers[i].fprop(activations[i]))
return activations return activations
def grads_wrt_params(self, activations, grads_wrt_outputs): def grads_wrt_params(self, activations, grads_wrt_outputs):

View File

@ -18,7 +18,8 @@ class Optimiser(object):
"""Basic model optimiser.""" """Basic model optimiser."""
def __init__(self, model, error, learning_rule, train_dataset, def __init__(self, model, error, learning_rule, train_dataset,
valid_dataset=None, data_monitors=None, schedulers=[]): valid_dataset=None, data_monitors=None, schedulers=[],
use_stochastic_eval=True):
"""Create a new optimiser instance. """Create a new optimiser instance.
Args: Args:
@ -33,6 +34,10 @@ class Optimiser(object):
validation data sets) to monitor during training in addition validation data sets) to monitor during training in addition
to the error. Keys should correspond to a string label for to the error. Keys should correspond to a string label for
the statistic being evaluated. the statistic being evaluated.
schedulers: List of learning rule scheduler objects for adjusting
learning rule hyperparameters over training. Can be empty.
use_stochastic_eval: Whether to use `stochastic=True` flag in
`model.fprop` for evaluating model performance during training.
""" """
self.model = model self.model = model
self.error = error self.error = error
@ -44,6 +49,7 @@ class Optimiser(object):
if data_monitors is not None: if data_monitors is not None:
self.data_monitors.update(data_monitors) self.data_monitors.update(data_monitors)
self.schedulers = schedulers self.schedulers = schedulers
self.use_stochastic_eval = use_stochastic_eval
def do_training_epoch(self): def do_training_epoch(self):
"""Do a single training epoch. """Do a single training epoch.
@ -73,7 +79,8 @@ class Optimiser(object):
data_mon_vals = OrderedDict([(key + label, 0.) for key data_mon_vals = OrderedDict([(key + label, 0.) for key
in self.data_monitors.keys()]) in self.data_monitors.keys()])
for inputs_batch, targets_batch in dataset: for inputs_batch, targets_batch in dataset:
activations = self.model.fprop(inputs_batch) activations = self.model.fprop(
inputs_batch, stochastic=self.use_stochastic_eval)
for key, data_monitor in self.data_monitors.items(): for key, data_monitor in self.data_monitors.items():
data_mon_vals[key + label] += data_monitor( data_mon_vals[key + label] += data_monitor(
activations[-1], targets_batch) activations[-1], targets_batch)
@ -125,6 +132,10 @@ class Optimiser(object):
being the total time elapsed in seconds during the training run. being the total time elapsed in seconds during the training run.
""" """
stats = self.get_epoch_stats() stats = self.get_epoch_stats()
logger.info(
'Epoch 0:\n ' +
', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()])
)
run_stats = [stats.values()] run_stats = [stats.values()]
run_start_time = time.time() run_start_time = time.time()
for epoch in range(1, num_epochs + 1): for epoch in range(1, num_epochs + 1):