Adding stochastic layer functionality in preparation for dropout implementation.

2016-10-27 21:36:57 +01:00 · 2016-10-27 21:36:57 +01:00 · 95d4028bb7
commit 95d4028bb7
parent 7e1250a82b
3 changed files with 81 additions and 7 deletions
--- a/mlp/layers.py
+++ b/mlp/layers.py
@ -14,6 +14,7 @@ respect to the layer parameters.
 import numpy as np
 import mlp.initialisers as init
 from mlp import DEFAULT_SEED
 class Layer(object):
@ -96,6 +97,57 @@ class LayerWithParameters(Layer):
        raise NotImplementedError()
 class StochasticLayer(Layer):
    """Specialised layer which uses a stochastic forward propagation."""
    def __init__(self, rng=None):
        """Constructs a new StochasticLayer object.
        Args:
            rng (RandomState): Seeded random number generator object.
        """
        if rng is None:
            rng = np.random.RandomState(DEFAULT_SEED)
        self.rng = rng
    def fprop(self, inputs, stochastic=True):
        """Forward propagates activations through the layer transformation.
        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).
            stochastic: Flag allowing different deterministic
                forward-propagation mode in addition to default stochastic
                forward-propagation e.g. for use at test time. If False
                a deterministic forward-propagation transformation
                corresponding to the expected output of the stochastic
                forward-propagation is applied.
        Returns:
            outputs: Array of layer outputs of shape (batch_size, output_dim).
        """
        raise NotImplementedError()
    def bprop(self, inputs, outputs, grads_wrt_outputs):
        """Back propagates gradients through a layer.
        Given gradients with respect to the outputs of the layer calculates the
        gradients with respect to the layer inputs. This should correspond to
        default stochastic forward-propagation.
        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).
            outputs: Array of layer outputs calculated in forward pass of
                shape (batch_size, output_dim).
            grads_wrt_outputs: Array of gradients with respect to the layer
                outputs of shape (batch_size, output_dim).
        Returns:
            Array of gradients with respect to the layer inputs of shape
            (batch_size, input_dim).
        """
        raise NotImplementedError()
 class AffineLayer(LayerWithParameters):
    """Layer implementing an affine tranformation of its inputs.
--- a/mlp/models.py
+++ b/mlp/models.py
@ -8,7 +8,7 @@ outputs (and intermediate states) and for calculating gradients of scalar
 functions of the outputs with respect to the model parameters.
 """
-from mlp.layers import LayerWithParameters
+from mlp.layers import LayerWithParameters, StochasticLayer
 class SingleLayerModel(object):
@ -27,11 +27,13 @@ class SingleLayerModel(object):
        """A list of all of the parameters of the model."""
        return self.layer.params
-    def fprop(self, inputs):
+    def fprop(self, inputs, stochastic=True):
        """Calculate the model outputs corresponding to a batch of inputs.
        Args:
            inputs: Batch of inputs to the model.
            stochastic: Whether to use stochastic forward propagation
                for stochastic layers (True) or deterministic (False).
        Returns:
            List which is a concatenation of the model inputs and model
@ -40,7 +42,10 @@ class SingleLayerModel(object):
            activations through all immediate layers of the model and including
            the inputs and outputs.
        """
-        activations = [inputs, self.layer.fprop(inputs)]
+        outputs = (self.layer.fprop(inputs, stochastic)
                   if isinstance(self.layer, StochasticLayer) else
                   self.layer.fprop(inputs))
        activations = [inputs, outputs]
        return activations
    def grads_wrt_params(self, activations, grads_wrt_outputs):
@ -88,11 +93,13 @@ class MultipleLayerModel(object):
                params += layer.params
        return params
-    def fprop(self, inputs):
+    def fprop(self, inputs, stochastic=True):
        """Forward propagates a batch of inputs through the model.
        Args:
            inputs: Batch of inputs to the model.
            stochastic: Whether to use stochastic forward propagation
                for stochastic layers (True) or deterministic (False).
        Returns:
            List of the activations at the output of all layers of the model
@ -101,7 +108,11 @@ class MultipleLayerModel(object):
        """
        activations = [inputs]
        for i, layer in enumerate(self.layers):
-            activations.append(self.layers[i].fprop(activations[i]))
+            if isinstance(layer, StochasticLayer):
                activations.append(self.layers[i].fprop(
                    activations[i], stochastic=stochastic))
            else:
                activations.append(self.layers[i].fprop(activations[i]))
        return activations
    def grads_wrt_params(self, activations, grads_wrt_outputs):
--- a/mlp/optimisers.py
+++ b/mlp/optimisers.py
@ -18,7 +18,8 @@ class Optimiser(object):
    """Basic model optimiser."""
    def __init__(self, model, error, learning_rule, train_dataset,
-                 valid_dataset=None, data_monitors=None, schedulers=[]):
+                 valid_dataset=None, data_monitors=None, schedulers=[],
                 use_stochastic_eval=True):
        """Create a new optimiser instance.
        Args:
@ -33,6 +34,10 @@ class Optimiser(object):
                validation data sets) to monitor during training in addition
                to the error. Keys should correspond to a string label for
                the statistic being evaluated.
            schedulers: List of learning rule scheduler objects for adjusting
                learning rule hyperparameters over training. Can be empty.
            use_stochastic_eval: Whether to use `stochastic=True` flag in
                `model.fprop` for evaluating model performance during training.
        """
        self.model = model
        self.error = error
@ -44,6 +49,7 @@ class Optimiser(object):
        if data_monitors is not None:
            self.data_monitors.update(data_monitors)
        self.schedulers = schedulers
        self.use_stochastic_eval = use_stochastic_eval
    def do_training_epoch(self):
        """Do a single training epoch.
@ -73,7 +79,8 @@ class Optimiser(object):
        data_mon_vals = OrderedDict([(key + label, 0.) for key
                                     in self.data_monitors.keys()])
        for inputs_batch, targets_batch in dataset:
-            activations = self.model.fprop(inputs_batch)
+            activations = self.model.fprop(
                inputs_batch, stochastic=self.use_stochastic_eval)
            for key, data_monitor in self.data_monitors.items():
                data_mon_vals[key + label] += data_monitor(
                    activations[-1], targets_batch)
@ -125,6 +132,10 @@ class Optimiser(object):
            being the total time elapsed in seconds during the training run.
        """
        stats = self.get_epoch_stats()
        logger.info(
            'Epoch 0:\n  ' +
            ', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()])
        )
        run_stats = [stats.values()]
        run_start_time = time.time()
        for epoch in range(1, num_epochs + 1):