Switching to parameter penalty rather than cost naming.

This commit is contained in:
Matt Graham 2016-10-21 01:24:04 +01:00
parent a353526790
commit ab060d556c
3 changed files with 33 additions and 31 deletions

View File

@ -68,10 +68,10 @@ class LayerWithParameters(Layer):
""" """
raise NotImplementedError() raise NotImplementedError()
def params_cost(self): def params_penalty(self):
"""Returns the parameter dependent cost term for this layer. """Returns the parameter dependent penalty term for this layer.
If no parameter-dependent cost terms are set this returns zero. If no parameter-dependent penalty terms are set this returns zero.
""" """
raise NotImplementedError() raise NotImplementedError()
@ -105,7 +105,7 @@ class AffineLayer(LayerWithParameters):
def __init__(self, input_dim, output_dim, def __init__(self, input_dim, output_dim,
weights_initialiser=init.UniformInit(-0.1, 0.1), weights_initialiser=init.UniformInit(-0.1, 0.1),
biases_initialiser=init.ConstantInit(0.), biases_initialiser=init.ConstantInit(0.),
weights_cost=None, biases_cost=None): weights_penalty=None, biases_penalty=None):
"""Initialises a parameterised affine layer. """Initialises a parameterised affine layer.
Args: Args:
@ -113,15 +113,17 @@ class AffineLayer(LayerWithParameters):
output_dim (int): Dimension of the layer outputs. output_dim (int): Dimension of the layer outputs.
weights_initialiser: Initialiser for the weight parameters. weights_initialiser: Initialiser for the weight parameters.
biases_initialiser: Initialiser for the bias parameters. biases_initialiser: Initialiser for the bias parameters.
weights_cost: Weights-dependent cost term. weights_penalty: Weights-dependent penalty term (regulariser) or
biases_cost: Biases-dependent cost term. None if no regularisation is to be applied to the weights.
biases_penalty: Biases-dependent penalty term (regulariser) or
None if no regularisation is to be applied to the biases.
""" """
self.input_dim = input_dim self.input_dim = input_dim
self.output_dim = output_dim self.output_dim = output_dim
self.weights = weights_initialiser((self.output_dim, self.input_dim)) self.weights = weights_initialiser((self.output_dim, self.input_dim))
self.biases = biases_initialiser(self.output_dim) self.biases = biases_initialiser(self.output_dim)
self.weights_cost = weights_cost self.weights_penalty = weights_penalty
self.biases_cost = biases_cost self.biases_penalty = biases_penalty
def fprop(self, inputs): def fprop(self, inputs):
"""Forward propagates activations through the layer transformation. """Forward propagates activations through the layer transformation.
@ -172,25 +174,25 @@ class AffineLayer(LayerWithParameters):
grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs) grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0) grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
if self.weights_cost is not None: if self.weights_penalty is not None:
grads_wrt_weights += self.weights_cost.grad(self.weights) grads_wrt_weights += self.weights_penalty.grad(self.weights)
if self.biases_cost is not None: if self.biases_penalty is not None:
grads_wrt_biases += self.biases_cost.grads(self.biases) grads_wrt_biases += self.biases_penalty.grad(self.biases)
return [grads_wrt_weights, grads_wrt_biases] return [grads_wrt_weights, grads_wrt_biases]
def params_cost(self): def params_penalty(self):
"""Returns the parameter dependent cost term for this layer. """Returns the parameter dependent penalty term for this layer.
If no parameter-dependent cost terms are set this returns zero. If no parameter-dependent penalty terms are set this returns zero.
""" """
params_cost = 0 params_penalty = 0
if self.weights_cost is not None: if self.weights_penalty is not None:
params_cost += self.weights_cost(self.weights) params_penalty += self.weights_penalty(self.weights)
if self.biases_cost is not None: if self.biases_penalty is not None:
params_cost += self.biases_cost(self.biases) params_penalty += self.biases_penalty(self.biases)
return params_cost return params_penalty
@property @property
def params(self): def params(self):

View File

@ -59,9 +59,9 @@ class SingleLayerModel(object):
""" """
return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs) return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs)
def params_cost(self): def params_penalty(self):
"""Calculates the parameter dependent cost term of the model.""" """Calculates the parameter dependent penalty term of the model."""
return self.layer.params_cost() return self.layer.params_penalty()
def __repr__(self): def __repr__(self):
return 'SingleLayerModel(' + str(layer) + ')' return 'SingleLayerModel(' + str(layer) + ')'
@ -134,13 +134,13 @@ class MultipleLayerModel(object):
inputs, outputs, grads_wrt_outputs) inputs, outputs, grads_wrt_outputs)
return grads_wrt_params[::-1] return grads_wrt_params[::-1]
def params_cost(self): def params_penalty(self):
"""Calculates the parameter dependent cost term of the model.""" """Calculates the parameter dependent penalty term of the model."""
params_cost = 0. params_penalty = 0.
for layer in self.layers: for layer in self.layers:
if isinstance(layer, LayerWithParameters): if isinstance(layer, LayerWithParameters):
params_cost += layer.params_cost() params_penalty += layer.params_penalty()
return params_cost return params_penalty
def __repr__(self): def __repr__(self):
return ( return (

View File

@ -93,7 +93,7 @@ class Optimiser(object):
if self.valid_dataset is not None: if self.valid_dataset is not None:
epoch_stats.update(self.eval_monitors( epoch_stats.update(self.eval_monitors(
self.valid_dataset, '(valid)')) self.valid_dataset, '(valid)'))
epoch_stats['cost(param)'] = self.model.params_cost() epoch_stats['params_penalty'] = self.model.params_penalty()
return epoch_stats return epoch_stats
def log_stats(self, epoch, epoch_time, stats): def log_stats(self, epoch, epoch_time, stats):
@ -104,7 +104,7 @@ class Optimiser(object):
epoch_time: Time taken in seconds for the epoch to complete. epoch_time: Time taken in seconds for the epoch to complete.
stats: Monitored stats for the epoch. stats: Monitored stats for the epoch.
""" """
logger.info('Epoch {0}: {1:.2f}s to complete\n {2}'.format( logger.info('Epoch {0}: {1:.2f}s to complete\n {2}'.format(
epoch, epoch_time, epoch, epoch_time,
', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()]) ', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()])
)) ))