From 95d4028bb76beed5d2ba770f60240a64a18c323d Mon Sep 17 00:00:00 2001 From: Matt Graham Date: Thu, 27 Oct 2016 21:36:57 +0100 Subject: [PATCH] Adding stochastic layer functionality in preparation for dropout implementation. --- mlp/layers.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++ mlp/models.py | 21 ++++++++++++++----- mlp/optimisers.py | 15 ++++++++++++-- 3 files changed, 81 insertions(+), 7 deletions(-) diff --git a/mlp/layers.py b/mlp/layers.py index 25bf183..8a892a8 100644 --- a/mlp/layers.py +++ b/mlp/layers.py @@ -14,6 +14,7 @@ respect to the layer parameters. import numpy as np import mlp.initialisers as init +from mlp import DEFAULT_SEED class Layer(object): @@ -96,6 +97,57 @@ class LayerWithParameters(Layer): raise NotImplementedError() +class StochasticLayer(Layer): + """Specialised layer which uses a stochastic forward propagation.""" + + def __init__(self, rng=None): + """Constructs a new StochasticLayer object. + + Args: + rng (RandomState): Seeded random number generator object. + """ + if rng is None: + rng = np.random.RandomState(DEFAULT_SEED) + self.rng = rng + + def fprop(self, inputs, stochastic=True): + """Forward propagates activations through the layer transformation. + + Args: + inputs: Array of layer inputs of shape (batch_size, input_dim). + stochastic: Flag allowing different deterministic + forward-propagation mode in addition to default stochastic + forward-propagation e.g. for use at test time. If False + a deterministic forward-propagation transformation + corresponding to the expected output of the stochastic + forward-propagation is applied. + + Returns: + outputs: Array of layer outputs of shape (batch_size, output_dim). + """ + raise NotImplementedError() + + def bprop(self, inputs, outputs, grads_wrt_outputs): + """Back propagates gradients through a layer. + + Given gradients with respect to the outputs of the layer calculates the + gradients with respect to the layer inputs. This should correspond to + default stochastic forward-propagation. + + Args: + inputs: Array of layer inputs of shape (batch_size, input_dim). + outputs: Array of layer outputs calculated in forward pass of + shape (batch_size, output_dim). + grads_wrt_outputs: Array of gradients with respect to the layer + outputs of shape (batch_size, output_dim). + + Returns: + Array of gradients with respect to the layer inputs of shape + (batch_size, input_dim). + """ + raise NotImplementedError() + + class AffineLayer(LayerWithParameters): """Layer implementing an affine tranformation of its inputs. diff --git a/mlp/models.py b/mlp/models.py index 842c16d..c7ace3c 100644 --- a/mlp/models.py +++ b/mlp/models.py @@ -8,7 +8,7 @@ outputs (and intermediate states) and for calculating gradients of scalar functions of the outputs with respect to the model parameters. """ -from mlp.layers import LayerWithParameters +from mlp.layers import LayerWithParameters, StochasticLayer class SingleLayerModel(object): @@ -27,11 +27,13 @@ class SingleLayerModel(object): """A list of all of the parameters of the model.""" return self.layer.params - def fprop(self, inputs): + def fprop(self, inputs, stochastic=True): """Calculate the model outputs corresponding to a batch of inputs. Args: inputs: Batch of inputs to the model. + stochastic: Whether to use stochastic forward propagation + for stochastic layers (True) or deterministic (False). Returns: List which is a concatenation of the model inputs and model @@ -40,7 +42,10 @@ class SingleLayerModel(object): activations through all immediate layers of the model and including the inputs and outputs. """ - activations = [inputs, self.layer.fprop(inputs)] + outputs = (self.layer.fprop(inputs, stochastic) + if isinstance(self.layer, StochasticLayer) else + self.layer.fprop(inputs)) + activations = [inputs, outputs] return activations def grads_wrt_params(self, activations, grads_wrt_outputs): @@ -88,11 +93,13 @@ class MultipleLayerModel(object): params += layer.params return params - def fprop(self, inputs): + def fprop(self, inputs, stochastic=True): """Forward propagates a batch of inputs through the model. Args: inputs: Batch of inputs to the model. + stochastic: Whether to use stochastic forward propagation + for stochastic layers (True) or deterministic (False). Returns: List of the activations at the output of all layers of the model @@ -101,7 +108,11 @@ class MultipleLayerModel(object): """ activations = [inputs] for i, layer in enumerate(self.layers): - activations.append(self.layers[i].fprop(activations[i])) + if isinstance(layer, StochasticLayer): + activations.append(self.layers[i].fprop( + activations[i], stochastic=stochastic)) + else: + activations.append(self.layers[i].fprop(activations[i])) return activations def grads_wrt_params(self, activations, grads_wrt_outputs): diff --git a/mlp/optimisers.py b/mlp/optimisers.py index 4ea4cd0..bebc86a 100644 --- a/mlp/optimisers.py +++ b/mlp/optimisers.py @@ -18,7 +18,8 @@ class Optimiser(object): """Basic model optimiser.""" def __init__(self, model, error, learning_rule, train_dataset, - valid_dataset=None, data_monitors=None, schedulers=[]): + valid_dataset=None, data_monitors=None, schedulers=[], + use_stochastic_eval=True): """Create a new optimiser instance. Args: @@ -33,6 +34,10 @@ class Optimiser(object): validation data sets) to monitor during training in addition to the error. Keys should correspond to a string label for the statistic being evaluated. + schedulers: List of learning rule scheduler objects for adjusting + learning rule hyperparameters over training. Can be empty. + use_stochastic_eval: Whether to use `stochastic=True` flag in + `model.fprop` for evaluating model performance during training. """ self.model = model self.error = error @@ -44,6 +49,7 @@ class Optimiser(object): if data_monitors is not None: self.data_monitors.update(data_monitors) self.schedulers = schedulers + self.use_stochastic_eval = use_stochastic_eval def do_training_epoch(self): """Do a single training epoch. @@ -73,7 +79,8 @@ class Optimiser(object): data_mon_vals = OrderedDict([(key + label, 0.) for key in self.data_monitors.keys()]) for inputs_batch, targets_batch in dataset: - activations = self.model.fprop(inputs_batch) + activations = self.model.fprop( + inputs_batch, stochastic=self.use_stochastic_eval) for key, data_monitor in self.data_monitors.items(): data_mon_vals[key + label] += data_monitor( activations[-1], targets_batch) @@ -125,6 +132,10 @@ class Optimiser(object): being the total time elapsed in seconds during the training run. """ stats = self.get_epoch_stats() + logger.info( + 'Epoch 0:\n ' + + ', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()]) + ) run_stats = [stats.values()] run_start_time = time.time() for epoch in range(1, num_epochs + 1):