Adding stochastic layer functionality in preparation for dropout implementation.

2016-10-27 21:36:57 +01:00 · 2016-10-27 21:36:57 +01:00 · 95d4028bb7
commit 95d4028bb7
parent 7e1250a82b
3 changed files with 81 additions and 7 deletions
--- a/mlp/layers.py
+++ b/mlp/layers.py
@ -14,6 +14,7 @@ respect to the layer parameters.

 import numpy as np
 import mlp.initialisers as init
+from mlp import DEFAULT_SEED


 class Layer(object):
@ -96,6 +97,57 @@ class LayerWithParameters(Layer):
        raise NotImplementedError()


+class StochasticLayer(Layer):
+    """Specialised layer which uses a stochastic forward propagation."""
+
+    def __init__(self, rng=None):
+        """Constructs a new StochasticLayer object.
+
+        Args:
+            rng (RandomState): Seeded random number generator object.
+        """
+        if rng is None:
+            rng = np.random.RandomState(DEFAULT_SEED)
+        self.rng = rng
+
+    def fprop(self, inputs, stochastic=True):
+        """Forward propagates activations through the layer transformation.
+
+        Args:
+            inputs: Array of layer inputs of shape (batch_size, input_dim).
+            stochastic: Flag allowing different deterministic
+                forward-propagation mode in addition to default stochastic
+                forward-propagation e.g. for use at test time. If False
+                a deterministic forward-propagation transformation
+                corresponding to the expected output of the stochastic
+                forward-propagation is applied.
+
+        Returns:
+            outputs: Array of layer outputs of shape (batch_size, output_dim).
+        """
+        raise NotImplementedError()
+
+    def bprop(self, inputs, outputs, grads_wrt_outputs):
+        """Back propagates gradients through a layer.
+
+        Given gradients with respect to the outputs of the layer calculates the
+        gradients with respect to the layer inputs. This should correspond to
+        default stochastic forward-propagation.
+
+        Args:
+            inputs: Array of layer inputs of shape (batch_size, input_dim).
+            outputs: Array of layer outputs calculated in forward pass of
+                shape (batch_size, output_dim).
+            grads_wrt_outputs: Array of gradients with respect to the layer
+                outputs of shape (batch_size, output_dim).
+
+        Returns:
+            Array of gradients with respect to the layer inputs of shape
+            (batch_size, input_dim).
+        """
+        raise NotImplementedError()
+
+
 class AffineLayer(LayerWithParameters):
    """Layer implementing an affine tranformation of its inputs.

--- a/mlp/models.py
+++ b/mlp/models.py
@ -8,7 +8,7 @@ outputs (and intermediate states) and for calculating gradients of scalar
 functions of the outputs with respect to the model parameters.
 """

-from mlp.layers import LayerWithParameters
+from mlp.layers import LayerWithParameters, StochasticLayer


 class SingleLayerModel(object):
@ -27,11 +27,13 @@ class SingleLayerModel(object):
        """A list of all of the parameters of the model."""
        return self.layer.params

-    def fprop(self, inputs):
+    def fprop(self, inputs, stochastic=True):
        """Calculate the model outputs corresponding to a batch of inputs.

        Args:
            inputs: Batch of inputs to the model.
+            stochastic: Whether to use stochastic forward propagation
+                for stochastic layers (True) or deterministic (False).

        Returns:
            List which is a concatenation of the model inputs and model
@ -40,7 +42,10 @@ class SingleLayerModel(object):
            activations through all immediate layers of the model and including
            the inputs and outputs.
        """
-        activations = [inputs, self.layer.fprop(inputs)]
+        outputs = (self.layer.fprop(inputs, stochastic)
+                   if isinstance(self.layer, StochasticLayer) else
+                   self.layer.fprop(inputs))
+        activations = [inputs, outputs]
        return activations

    def grads_wrt_params(self, activations, grads_wrt_outputs):
@ -88,11 +93,13 @@ class MultipleLayerModel(object):
                params += layer.params
        return params

-    def fprop(self, inputs):
+    def fprop(self, inputs, stochastic=True):
        """Forward propagates a batch of inputs through the model.

        Args:
            inputs: Batch of inputs to the model.
+            stochastic: Whether to use stochastic forward propagation
+                for stochastic layers (True) or deterministic (False).

        Returns:
            List of the activations at the output of all layers of the model
@ -101,6 +108,10 @@ class MultipleLayerModel(object):
        """
        activations = [inputs]
        for i, layer in enumerate(self.layers):
+            if isinstance(layer, StochasticLayer):
+                activations.append(self.layers[i].fprop(
+                    activations[i], stochastic=stochastic))
+            else:
                activations.append(self.layers[i].fprop(activations[i]))
        return activations

--- a/mlp/optimisers.py
+++ b/mlp/optimisers.py
@ -18,7 +18,8 @@ class Optimiser(object):
    """Basic model optimiser."""

    def __init__(self, model, error, learning_rule, train_dataset,
-                 valid_dataset=None, data_monitors=None, schedulers=[]):
+                 valid_dataset=None, data_monitors=None, schedulers=[],
+                 use_stochastic_eval=True):
        """Create a new optimiser instance.

        Args:
@ -33,6 +34,10 @@ class Optimiser(object):
                validation data sets) to monitor during training in addition
                to the error. Keys should correspond to a string label for
                the statistic being evaluated.
+            schedulers: List of learning rule scheduler objects for adjusting
+                learning rule hyperparameters over training. Can be empty.
+            use_stochastic_eval: Whether to use `stochastic=True` flag in
+                `model.fprop` for evaluating model performance during training.
        """
        self.model = model
        self.error = error
@ -44,6 +49,7 @@ class Optimiser(object):
        if data_monitors is not None:
            self.data_monitors.update(data_monitors)
        self.schedulers = schedulers
+        self.use_stochastic_eval = use_stochastic_eval

    def do_training_epoch(self):
        """Do a single training epoch.
@ -73,7 +79,8 @@ class Optimiser(object):
        data_mon_vals = OrderedDict([(key + label, 0.) for key
                                     in self.data_monitors.keys()])
        for inputs_batch, targets_batch in dataset:
-            activations = self.model.fprop(inputs_batch)
+            activations = self.model.fprop(
+                inputs_batch, stochastic=self.use_stochastic_eval)
            for key, data_monitor in self.data_monitors.items():
                data_mon_vals[key + label] += data_monitor(
                    activations[-1], targets_batch)
@ -125,6 +132,10 @@ class Optimiser(object):
            being the total time elapsed in seconds during the training run.
        """
        stats = self.get_epoch_stats()
+        logger.info(
+            'Epoch 0:\n  ' +
+            ', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()])
+        )
        run_stats = [stats.values()]
        run_start_time = time.time()
        for epoch in range(1, num_epochs + 1):