Adding stochastic layer functionality in preparation for dropout implementation.
This commit is contained in:
parent
7e1250a82b
commit
95d4028bb7
@ -14,6 +14,7 @@ respect to the layer parameters.
|
||||
|
||||
import numpy as np
|
||||
import mlp.initialisers as init
|
||||
from mlp import DEFAULT_SEED
|
||||
|
||||
|
||||
class Layer(object):
|
||||
@ -96,6 +97,57 @@ class LayerWithParameters(Layer):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class StochasticLayer(Layer):
|
||||
"""Specialised layer which uses a stochastic forward propagation."""
|
||||
|
||||
def __init__(self, rng=None):
|
||||
"""Constructs a new StochasticLayer object.
|
||||
|
||||
Args:
|
||||
rng (RandomState): Seeded random number generator object.
|
||||
"""
|
||||
if rng is None:
|
||||
rng = np.random.RandomState(DEFAULT_SEED)
|
||||
self.rng = rng
|
||||
|
||||
def fprop(self, inputs, stochastic=True):
|
||||
"""Forward propagates activations through the layer transformation.
|
||||
|
||||
Args:
|
||||
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||
stochastic: Flag allowing different deterministic
|
||||
forward-propagation mode in addition to default stochastic
|
||||
forward-propagation e.g. for use at test time. If False
|
||||
a deterministic forward-propagation transformation
|
||||
corresponding to the expected output of the stochastic
|
||||
forward-propagation is applied.
|
||||
|
||||
Returns:
|
||||
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||
"""Back propagates gradients through a layer.
|
||||
|
||||
Given gradients with respect to the outputs of the layer calculates the
|
||||
gradients with respect to the layer inputs. This should correspond to
|
||||
default stochastic forward-propagation.
|
||||
|
||||
Args:
|
||||
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||
outputs: Array of layer outputs calculated in forward pass of
|
||||
shape (batch_size, output_dim).
|
||||
grads_wrt_outputs: Array of gradients with respect to the layer
|
||||
outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Array of gradients with respect to the layer inputs of shape
|
||||
(batch_size, input_dim).
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class AffineLayer(LayerWithParameters):
|
||||
"""Layer implementing an affine tranformation of its inputs.
|
||||
|
||||
|
@ -8,7 +8,7 @@ outputs (and intermediate states) and for calculating gradients of scalar
|
||||
functions of the outputs with respect to the model parameters.
|
||||
"""
|
||||
|
||||
from mlp.layers import LayerWithParameters
|
||||
from mlp.layers import LayerWithParameters, StochasticLayer
|
||||
|
||||
|
||||
class SingleLayerModel(object):
|
||||
@ -27,11 +27,13 @@ class SingleLayerModel(object):
|
||||
"""A list of all of the parameters of the model."""
|
||||
return self.layer.params
|
||||
|
||||
def fprop(self, inputs):
|
||||
def fprop(self, inputs, stochastic=True):
|
||||
"""Calculate the model outputs corresponding to a batch of inputs.
|
||||
|
||||
Args:
|
||||
inputs: Batch of inputs to the model.
|
||||
stochastic: Whether to use stochastic forward propagation
|
||||
for stochastic layers (True) or deterministic (False).
|
||||
|
||||
Returns:
|
||||
List which is a concatenation of the model inputs and model
|
||||
@ -40,7 +42,10 @@ class SingleLayerModel(object):
|
||||
activations through all immediate layers of the model and including
|
||||
the inputs and outputs.
|
||||
"""
|
||||
activations = [inputs, self.layer.fprop(inputs)]
|
||||
outputs = (self.layer.fprop(inputs, stochastic)
|
||||
if isinstance(self.layer, StochasticLayer) else
|
||||
self.layer.fprop(inputs))
|
||||
activations = [inputs, outputs]
|
||||
return activations
|
||||
|
||||
def grads_wrt_params(self, activations, grads_wrt_outputs):
|
||||
@ -88,11 +93,13 @@ class MultipleLayerModel(object):
|
||||
params += layer.params
|
||||
return params
|
||||
|
||||
def fprop(self, inputs):
|
||||
def fprop(self, inputs, stochastic=True):
|
||||
"""Forward propagates a batch of inputs through the model.
|
||||
|
||||
Args:
|
||||
inputs: Batch of inputs to the model.
|
||||
stochastic: Whether to use stochastic forward propagation
|
||||
for stochastic layers (True) or deterministic (False).
|
||||
|
||||
Returns:
|
||||
List of the activations at the output of all layers of the model
|
||||
@ -101,7 +108,11 @@ class MultipleLayerModel(object):
|
||||
"""
|
||||
activations = [inputs]
|
||||
for i, layer in enumerate(self.layers):
|
||||
activations.append(self.layers[i].fprop(activations[i]))
|
||||
if isinstance(layer, StochasticLayer):
|
||||
activations.append(self.layers[i].fprop(
|
||||
activations[i], stochastic=stochastic))
|
||||
else:
|
||||
activations.append(self.layers[i].fprop(activations[i]))
|
||||
return activations
|
||||
|
||||
def grads_wrt_params(self, activations, grads_wrt_outputs):
|
||||
|
@ -18,7 +18,8 @@ class Optimiser(object):
|
||||
"""Basic model optimiser."""
|
||||
|
||||
def __init__(self, model, error, learning_rule, train_dataset,
|
||||
valid_dataset=None, data_monitors=None, schedulers=[]):
|
||||
valid_dataset=None, data_monitors=None, schedulers=[],
|
||||
use_stochastic_eval=True):
|
||||
"""Create a new optimiser instance.
|
||||
|
||||
Args:
|
||||
@ -33,6 +34,10 @@ class Optimiser(object):
|
||||
validation data sets) to monitor during training in addition
|
||||
to the error. Keys should correspond to a string label for
|
||||
the statistic being evaluated.
|
||||
schedulers: List of learning rule scheduler objects for adjusting
|
||||
learning rule hyperparameters over training. Can be empty.
|
||||
use_stochastic_eval: Whether to use `stochastic=True` flag in
|
||||
`model.fprop` for evaluating model performance during training.
|
||||
"""
|
||||
self.model = model
|
||||
self.error = error
|
||||
@ -44,6 +49,7 @@ class Optimiser(object):
|
||||
if data_monitors is not None:
|
||||
self.data_monitors.update(data_monitors)
|
||||
self.schedulers = schedulers
|
||||
self.use_stochastic_eval = use_stochastic_eval
|
||||
|
||||
def do_training_epoch(self):
|
||||
"""Do a single training epoch.
|
||||
@ -73,7 +79,8 @@ class Optimiser(object):
|
||||
data_mon_vals = OrderedDict([(key + label, 0.) for key
|
||||
in self.data_monitors.keys()])
|
||||
for inputs_batch, targets_batch in dataset:
|
||||
activations = self.model.fprop(inputs_batch)
|
||||
activations = self.model.fprop(
|
||||
inputs_batch, stochastic=self.use_stochastic_eval)
|
||||
for key, data_monitor in self.data_monitors.items():
|
||||
data_mon_vals[key + label] += data_monitor(
|
||||
activations[-1], targets_batch)
|
||||
@ -125,6 +132,10 @@ class Optimiser(object):
|
||||
being the total time elapsed in seconds during the training run.
|
||||
"""
|
||||
stats = self.get_epoch_stats()
|
||||
logger.info(
|
||||
'Epoch 0:\n ' +
|
||||
', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()])
|
||||
)
|
||||
run_stats = [stats.values()]
|
||||
run_start_time = time.time()
|
||||
for epoch in range(1, num_epochs + 1):
|
||||
|
Loading…
Reference in New Issue
Block a user