Adding stochastic layer functionality in preparation for dropout implementation.
This commit is contained in:
parent
7e1250a82b
commit
95d4028bb7
@ -14,6 +14,7 @@ respect to the layer parameters.
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import mlp.initialisers as init
|
import mlp.initialisers as init
|
||||||
|
from mlp import DEFAULT_SEED
|
||||||
|
|
||||||
|
|
||||||
class Layer(object):
|
class Layer(object):
|
||||||
@ -96,6 +97,57 @@ class LayerWithParameters(Layer):
|
|||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
|
class StochasticLayer(Layer):
|
||||||
|
"""Specialised layer which uses a stochastic forward propagation."""
|
||||||
|
|
||||||
|
def __init__(self, rng=None):
|
||||||
|
"""Constructs a new StochasticLayer object.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rng (RandomState): Seeded random number generator object.
|
||||||
|
"""
|
||||||
|
if rng is None:
|
||||||
|
rng = np.random.RandomState(DEFAULT_SEED)
|
||||||
|
self.rng = rng
|
||||||
|
|
||||||
|
def fprop(self, inputs, stochastic=True):
|
||||||
|
"""Forward propagates activations through the layer transformation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||||
|
stochastic: Flag allowing different deterministic
|
||||||
|
forward-propagation mode in addition to default stochastic
|
||||||
|
forward-propagation e.g. for use at test time. If False
|
||||||
|
a deterministic forward-propagation transformation
|
||||||
|
corresponding to the expected output of the stochastic
|
||||||
|
forward-propagation is applied.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||||
|
"""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||||
|
"""Back propagates gradients through a layer.
|
||||||
|
|
||||||
|
Given gradients with respect to the outputs of the layer calculates the
|
||||||
|
gradients with respect to the layer inputs. This should correspond to
|
||||||
|
default stochastic forward-propagation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||||
|
outputs: Array of layer outputs calculated in forward pass of
|
||||||
|
shape (batch_size, output_dim).
|
||||||
|
grads_wrt_outputs: Array of gradients with respect to the layer
|
||||||
|
outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Array of gradients with respect to the layer inputs of shape
|
||||||
|
(batch_size, input_dim).
|
||||||
|
"""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
class AffineLayer(LayerWithParameters):
|
class AffineLayer(LayerWithParameters):
|
||||||
"""Layer implementing an affine tranformation of its inputs.
|
"""Layer implementing an affine tranformation of its inputs.
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ outputs (and intermediate states) and for calculating gradients of scalar
|
|||||||
functions of the outputs with respect to the model parameters.
|
functions of the outputs with respect to the model parameters.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from mlp.layers import LayerWithParameters
|
from mlp.layers import LayerWithParameters, StochasticLayer
|
||||||
|
|
||||||
|
|
||||||
class SingleLayerModel(object):
|
class SingleLayerModel(object):
|
||||||
@ -27,11 +27,13 @@ class SingleLayerModel(object):
|
|||||||
"""A list of all of the parameters of the model."""
|
"""A list of all of the parameters of the model."""
|
||||||
return self.layer.params
|
return self.layer.params
|
||||||
|
|
||||||
def fprop(self, inputs):
|
def fprop(self, inputs, stochastic=True):
|
||||||
"""Calculate the model outputs corresponding to a batch of inputs.
|
"""Calculate the model outputs corresponding to a batch of inputs.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
inputs: Batch of inputs to the model.
|
inputs: Batch of inputs to the model.
|
||||||
|
stochastic: Whether to use stochastic forward propagation
|
||||||
|
for stochastic layers (True) or deterministic (False).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List which is a concatenation of the model inputs and model
|
List which is a concatenation of the model inputs and model
|
||||||
@ -40,7 +42,10 @@ class SingleLayerModel(object):
|
|||||||
activations through all immediate layers of the model and including
|
activations through all immediate layers of the model and including
|
||||||
the inputs and outputs.
|
the inputs and outputs.
|
||||||
"""
|
"""
|
||||||
activations = [inputs, self.layer.fprop(inputs)]
|
outputs = (self.layer.fprop(inputs, stochastic)
|
||||||
|
if isinstance(self.layer, StochasticLayer) else
|
||||||
|
self.layer.fprop(inputs))
|
||||||
|
activations = [inputs, outputs]
|
||||||
return activations
|
return activations
|
||||||
|
|
||||||
def grads_wrt_params(self, activations, grads_wrt_outputs):
|
def grads_wrt_params(self, activations, grads_wrt_outputs):
|
||||||
@ -88,11 +93,13 @@ class MultipleLayerModel(object):
|
|||||||
params += layer.params
|
params += layer.params
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def fprop(self, inputs):
|
def fprop(self, inputs, stochastic=True):
|
||||||
"""Forward propagates a batch of inputs through the model.
|
"""Forward propagates a batch of inputs through the model.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
inputs: Batch of inputs to the model.
|
inputs: Batch of inputs to the model.
|
||||||
|
stochastic: Whether to use stochastic forward propagation
|
||||||
|
for stochastic layers (True) or deterministic (False).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of the activations at the output of all layers of the model
|
List of the activations at the output of all layers of the model
|
||||||
@ -101,7 +108,11 @@ class MultipleLayerModel(object):
|
|||||||
"""
|
"""
|
||||||
activations = [inputs]
|
activations = [inputs]
|
||||||
for i, layer in enumerate(self.layers):
|
for i, layer in enumerate(self.layers):
|
||||||
activations.append(self.layers[i].fprop(activations[i]))
|
if isinstance(layer, StochasticLayer):
|
||||||
|
activations.append(self.layers[i].fprop(
|
||||||
|
activations[i], stochastic=stochastic))
|
||||||
|
else:
|
||||||
|
activations.append(self.layers[i].fprop(activations[i]))
|
||||||
return activations
|
return activations
|
||||||
|
|
||||||
def grads_wrt_params(self, activations, grads_wrt_outputs):
|
def grads_wrt_params(self, activations, grads_wrt_outputs):
|
||||||
|
@ -18,7 +18,8 @@ class Optimiser(object):
|
|||||||
"""Basic model optimiser."""
|
"""Basic model optimiser."""
|
||||||
|
|
||||||
def __init__(self, model, error, learning_rule, train_dataset,
|
def __init__(self, model, error, learning_rule, train_dataset,
|
||||||
valid_dataset=None, data_monitors=None, schedulers=[]):
|
valid_dataset=None, data_monitors=None, schedulers=[],
|
||||||
|
use_stochastic_eval=True):
|
||||||
"""Create a new optimiser instance.
|
"""Create a new optimiser instance.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -33,6 +34,10 @@ class Optimiser(object):
|
|||||||
validation data sets) to monitor during training in addition
|
validation data sets) to monitor during training in addition
|
||||||
to the error. Keys should correspond to a string label for
|
to the error. Keys should correspond to a string label for
|
||||||
the statistic being evaluated.
|
the statistic being evaluated.
|
||||||
|
schedulers: List of learning rule scheduler objects for adjusting
|
||||||
|
learning rule hyperparameters over training. Can be empty.
|
||||||
|
use_stochastic_eval: Whether to use `stochastic=True` flag in
|
||||||
|
`model.fprop` for evaluating model performance during training.
|
||||||
"""
|
"""
|
||||||
self.model = model
|
self.model = model
|
||||||
self.error = error
|
self.error = error
|
||||||
@ -44,6 +49,7 @@ class Optimiser(object):
|
|||||||
if data_monitors is not None:
|
if data_monitors is not None:
|
||||||
self.data_monitors.update(data_monitors)
|
self.data_monitors.update(data_monitors)
|
||||||
self.schedulers = schedulers
|
self.schedulers = schedulers
|
||||||
|
self.use_stochastic_eval = use_stochastic_eval
|
||||||
|
|
||||||
def do_training_epoch(self):
|
def do_training_epoch(self):
|
||||||
"""Do a single training epoch.
|
"""Do a single training epoch.
|
||||||
@ -73,7 +79,8 @@ class Optimiser(object):
|
|||||||
data_mon_vals = OrderedDict([(key + label, 0.) for key
|
data_mon_vals = OrderedDict([(key + label, 0.) for key
|
||||||
in self.data_monitors.keys()])
|
in self.data_monitors.keys()])
|
||||||
for inputs_batch, targets_batch in dataset:
|
for inputs_batch, targets_batch in dataset:
|
||||||
activations = self.model.fprop(inputs_batch)
|
activations = self.model.fprop(
|
||||||
|
inputs_batch, stochastic=self.use_stochastic_eval)
|
||||||
for key, data_monitor in self.data_monitors.items():
|
for key, data_monitor in self.data_monitors.items():
|
||||||
data_mon_vals[key + label] += data_monitor(
|
data_mon_vals[key + label] += data_monitor(
|
||||||
activations[-1], targets_batch)
|
activations[-1], targets_batch)
|
||||||
@ -125,6 +132,10 @@ class Optimiser(object):
|
|||||||
being the total time elapsed in seconds during the training run.
|
being the total time elapsed in seconds during the training run.
|
||||||
"""
|
"""
|
||||||
stats = self.get_epoch_stats()
|
stats = self.get_epoch_stats()
|
||||||
|
logger.info(
|
||||||
|
'Epoch 0:\n ' +
|
||||||
|
', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()])
|
||||||
|
)
|
||||||
run_stats = [stats.values()]
|
run_stats = [stats.values()]
|
||||||
run_start_time = time.time()
|
run_start_time = time.time()
|
||||||
for epoch in range(1, num_epochs + 1):
|
for epoch in range(1, num_epochs + 1):
|
||||||
|
Loading…
Reference in New Issue
Block a user