Switching to parameter penalty rather than cost naming.
This commit is contained in:
parent
a353526790
commit
ab060d556c
@ -68,10 +68,10 @@ class LayerWithParameters(Layer):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def params_cost(self):
|
def params_penalty(self):
|
||||||
"""Returns the parameter dependent cost term for this layer.
|
"""Returns the parameter dependent penalty term for this layer.
|
||||||
|
|
||||||
If no parameter-dependent cost terms are set this returns zero.
|
If no parameter-dependent penalty terms are set this returns zero.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@ -105,7 +105,7 @@ class AffineLayer(LayerWithParameters):
|
|||||||
def __init__(self, input_dim, output_dim,
|
def __init__(self, input_dim, output_dim,
|
||||||
weights_initialiser=init.UniformInit(-0.1, 0.1),
|
weights_initialiser=init.UniformInit(-0.1, 0.1),
|
||||||
biases_initialiser=init.ConstantInit(0.),
|
biases_initialiser=init.ConstantInit(0.),
|
||||||
weights_cost=None, biases_cost=None):
|
weights_penalty=None, biases_penalty=None):
|
||||||
"""Initialises a parameterised affine layer.
|
"""Initialises a parameterised affine layer.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -113,15 +113,17 @@ class AffineLayer(LayerWithParameters):
|
|||||||
output_dim (int): Dimension of the layer outputs.
|
output_dim (int): Dimension of the layer outputs.
|
||||||
weights_initialiser: Initialiser for the weight parameters.
|
weights_initialiser: Initialiser for the weight parameters.
|
||||||
biases_initialiser: Initialiser for the bias parameters.
|
biases_initialiser: Initialiser for the bias parameters.
|
||||||
weights_cost: Weights-dependent cost term.
|
weights_penalty: Weights-dependent penalty term (regulariser) or
|
||||||
biases_cost: Biases-dependent cost term.
|
None if no regularisation is to be applied to the weights.
|
||||||
|
biases_penalty: Biases-dependent penalty term (regulariser) or
|
||||||
|
None if no regularisation is to be applied to the biases.
|
||||||
"""
|
"""
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
self.output_dim = output_dim
|
self.output_dim = output_dim
|
||||||
self.weights = weights_initialiser((self.output_dim, self.input_dim))
|
self.weights = weights_initialiser((self.output_dim, self.input_dim))
|
||||||
self.biases = biases_initialiser(self.output_dim)
|
self.biases = biases_initialiser(self.output_dim)
|
||||||
self.weights_cost = weights_cost
|
self.weights_penalty = weights_penalty
|
||||||
self.biases_cost = biases_cost
|
self.biases_penalty = biases_penalty
|
||||||
|
|
||||||
def fprop(self, inputs):
|
def fprop(self, inputs):
|
||||||
"""Forward propagates activations through the layer transformation.
|
"""Forward propagates activations through the layer transformation.
|
||||||
@ -172,25 +174,25 @@ class AffineLayer(LayerWithParameters):
|
|||||||
grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
|
grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
|
||||||
grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
|
grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
|
||||||
|
|
||||||
if self.weights_cost is not None:
|
if self.weights_penalty is not None:
|
||||||
grads_wrt_weights += self.weights_cost.grad(self.weights)
|
grads_wrt_weights += self.weights_penalty.grad(self.weights)
|
||||||
|
|
||||||
if self.biases_cost is not None:
|
if self.biases_penalty is not None:
|
||||||
grads_wrt_biases += self.biases_cost.grads(self.biases)
|
grads_wrt_biases += self.biases_penalty.grad(self.biases)
|
||||||
|
|
||||||
return [grads_wrt_weights, grads_wrt_biases]
|
return [grads_wrt_weights, grads_wrt_biases]
|
||||||
|
|
||||||
def params_cost(self):
|
def params_penalty(self):
|
||||||
"""Returns the parameter dependent cost term for this layer.
|
"""Returns the parameter dependent penalty term for this layer.
|
||||||
|
|
||||||
If no parameter-dependent cost terms are set this returns zero.
|
If no parameter-dependent penalty terms are set this returns zero.
|
||||||
"""
|
"""
|
||||||
params_cost = 0
|
params_penalty = 0
|
||||||
if self.weights_cost is not None:
|
if self.weights_penalty is not None:
|
||||||
params_cost += self.weights_cost(self.weights)
|
params_penalty += self.weights_penalty(self.weights)
|
||||||
if self.biases_cost is not None:
|
if self.biases_penalty is not None:
|
||||||
params_cost += self.biases_cost(self.biases)
|
params_penalty += self.biases_penalty(self.biases)
|
||||||
return params_cost
|
return params_penalty
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def params(self):
|
def params(self):
|
||||||
|
@ -59,9 +59,9 @@ class SingleLayerModel(object):
|
|||||||
"""
|
"""
|
||||||
return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs)
|
return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs)
|
||||||
|
|
||||||
def params_cost(self):
|
def params_penalty(self):
|
||||||
"""Calculates the parameter dependent cost term of the model."""
|
"""Calculates the parameter dependent penalty term of the model."""
|
||||||
return self.layer.params_cost()
|
return self.layer.params_penalty()
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'SingleLayerModel(' + str(layer) + ')'
|
return 'SingleLayerModel(' + str(layer) + ')'
|
||||||
@ -134,13 +134,13 @@ class MultipleLayerModel(object):
|
|||||||
inputs, outputs, grads_wrt_outputs)
|
inputs, outputs, grads_wrt_outputs)
|
||||||
return grads_wrt_params[::-1]
|
return grads_wrt_params[::-1]
|
||||||
|
|
||||||
def params_cost(self):
|
def params_penalty(self):
|
||||||
"""Calculates the parameter dependent cost term of the model."""
|
"""Calculates the parameter dependent penalty term of the model."""
|
||||||
params_cost = 0.
|
params_penalty = 0.
|
||||||
for layer in self.layers:
|
for layer in self.layers:
|
||||||
if isinstance(layer, LayerWithParameters):
|
if isinstance(layer, LayerWithParameters):
|
||||||
params_cost += layer.params_cost()
|
params_penalty += layer.params_penalty()
|
||||||
return params_cost
|
return params_penalty
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return (
|
return (
|
||||||
|
@ -93,7 +93,7 @@ class Optimiser(object):
|
|||||||
if self.valid_dataset is not None:
|
if self.valid_dataset is not None:
|
||||||
epoch_stats.update(self.eval_monitors(
|
epoch_stats.update(self.eval_monitors(
|
||||||
self.valid_dataset, '(valid)'))
|
self.valid_dataset, '(valid)'))
|
||||||
epoch_stats['cost(param)'] = self.model.params_cost()
|
epoch_stats['params_penalty'] = self.model.params_penalty()
|
||||||
return epoch_stats
|
return epoch_stats
|
||||||
|
|
||||||
def log_stats(self, epoch, epoch_time, stats):
|
def log_stats(self, epoch, epoch_time, stats):
|
||||||
@ -104,7 +104,7 @@ class Optimiser(object):
|
|||||||
epoch_time: Time taken in seconds for the epoch to complete.
|
epoch_time: Time taken in seconds for the epoch to complete.
|
||||||
stats: Monitored stats for the epoch.
|
stats: Monitored stats for the epoch.
|
||||||
"""
|
"""
|
||||||
logger.info('Epoch {0}: {1:.2f}s to complete\n {2}'.format(
|
logger.info('Epoch {0}: {1:.2f}s to complete\n {2}'.format(
|
||||||
epoch, epoch_time,
|
epoch, epoch_time,
|
||||||
', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()])
|
', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()])
|
||||||
))
|
))
|
||||||
|
Loading…
Reference in New Issue
Block a user