Switching to parameter penalty rather than cost naming.
This commit is contained in:
parent
a353526790
commit
ab060d556c
@ -68,10 +68,10 @@ class LayerWithParameters(Layer):
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def params_cost(self):
|
||||
"""Returns the parameter dependent cost term for this layer.
|
||||
def params_penalty(self):
|
||||
"""Returns the parameter dependent penalty term for this layer.
|
||||
|
||||
If no parameter-dependent cost terms are set this returns zero.
|
||||
If no parameter-dependent penalty terms are set this returns zero.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@ -105,7 +105,7 @@ class AffineLayer(LayerWithParameters):
|
||||
def __init__(self, input_dim, output_dim,
|
||||
weights_initialiser=init.UniformInit(-0.1, 0.1),
|
||||
biases_initialiser=init.ConstantInit(0.),
|
||||
weights_cost=None, biases_cost=None):
|
||||
weights_penalty=None, biases_penalty=None):
|
||||
"""Initialises a parameterised affine layer.
|
||||
|
||||
Args:
|
||||
@ -113,15 +113,17 @@ class AffineLayer(LayerWithParameters):
|
||||
output_dim (int): Dimension of the layer outputs.
|
||||
weights_initialiser: Initialiser for the weight parameters.
|
||||
biases_initialiser: Initialiser for the bias parameters.
|
||||
weights_cost: Weights-dependent cost term.
|
||||
biases_cost: Biases-dependent cost term.
|
||||
weights_penalty: Weights-dependent penalty term (regulariser) or
|
||||
None if no regularisation is to be applied to the weights.
|
||||
biases_penalty: Biases-dependent penalty term (regulariser) or
|
||||
None if no regularisation is to be applied to the biases.
|
||||
"""
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
self.weights = weights_initialiser((self.output_dim, self.input_dim))
|
||||
self.biases = biases_initialiser(self.output_dim)
|
||||
self.weights_cost = weights_cost
|
||||
self.biases_cost = biases_cost
|
||||
self.weights_penalty = weights_penalty
|
||||
self.biases_penalty = biases_penalty
|
||||
|
||||
def fprop(self, inputs):
|
||||
"""Forward propagates activations through the layer transformation.
|
||||
@ -172,25 +174,25 @@ class AffineLayer(LayerWithParameters):
|
||||
grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
|
||||
grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
|
||||
|
||||
if self.weights_cost is not None:
|
||||
grads_wrt_weights += self.weights_cost.grad(self.weights)
|
||||
if self.weights_penalty is not None:
|
||||
grads_wrt_weights += self.weights_penalty.grad(self.weights)
|
||||
|
||||
if self.biases_cost is not None:
|
||||
grads_wrt_biases += self.biases_cost.grads(self.biases)
|
||||
if self.biases_penalty is not None:
|
||||
grads_wrt_biases += self.biases_penalty.grad(self.biases)
|
||||
|
||||
return [grads_wrt_weights, grads_wrt_biases]
|
||||
|
||||
def params_cost(self):
|
||||
"""Returns the parameter dependent cost term for this layer.
|
||||
def params_penalty(self):
|
||||
"""Returns the parameter dependent penalty term for this layer.
|
||||
|
||||
If no parameter-dependent cost terms are set this returns zero.
|
||||
If no parameter-dependent penalty terms are set this returns zero.
|
||||
"""
|
||||
params_cost = 0
|
||||
if self.weights_cost is not None:
|
||||
params_cost += self.weights_cost(self.weights)
|
||||
if self.biases_cost is not None:
|
||||
params_cost += self.biases_cost(self.biases)
|
||||
return params_cost
|
||||
params_penalty = 0
|
||||
if self.weights_penalty is not None:
|
||||
params_penalty += self.weights_penalty(self.weights)
|
||||
if self.biases_penalty is not None:
|
||||
params_penalty += self.biases_penalty(self.biases)
|
||||
return params_penalty
|
||||
|
||||
@property
|
||||
def params(self):
|
||||
|
@ -59,9 +59,9 @@ class SingleLayerModel(object):
|
||||
"""
|
||||
return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs)
|
||||
|
||||
def params_cost(self):
|
||||
"""Calculates the parameter dependent cost term of the model."""
|
||||
return self.layer.params_cost()
|
||||
def params_penalty(self):
|
||||
"""Calculates the parameter dependent penalty term of the model."""
|
||||
return self.layer.params_penalty()
|
||||
|
||||
def __repr__(self):
|
||||
return 'SingleLayerModel(' + str(layer) + ')'
|
||||
@ -134,13 +134,13 @@ class MultipleLayerModel(object):
|
||||
inputs, outputs, grads_wrt_outputs)
|
||||
return grads_wrt_params[::-1]
|
||||
|
||||
def params_cost(self):
|
||||
"""Calculates the parameter dependent cost term of the model."""
|
||||
params_cost = 0.
|
||||
def params_penalty(self):
|
||||
"""Calculates the parameter dependent penalty term of the model."""
|
||||
params_penalty = 0.
|
||||
for layer in self.layers:
|
||||
if isinstance(layer, LayerWithParameters):
|
||||
params_cost += layer.params_cost()
|
||||
return params_cost
|
||||
params_penalty += layer.params_penalty()
|
||||
return params_penalty
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
|
@ -93,7 +93,7 @@ class Optimiser(object):
|
||||
if self.valid_dataset is not None:
|
||||
epoch_stats.update(self.eval_monitors(
|
||||
self.valid_dataset, '(valid)'))
|
||||
epoch_stats['cost(param)'] = self.model.params_cost()
|
||||
epoch_stats['params_penalty'] = self.model.params_penalty()
|
||||
return epoch_stats
|
||||
|
||||
def log_stats(self, epoch, epoch_time, stats):
|
||||
@ -104,7 +104,7 @@ class Optimiser(object):
|
||||
epoch_time: Time taken in seconds for the epoch to complete.
|
||||
stats: Monitored stats for the epoch.
|
||||
"""
|
||||
logger.info('Epoch {0}: {1:.2f}s to complete\n {2}'.format(
|
||||
logger.info('Epoch {0}: {1:.2f}s to complete\n {2}'.format(
|
||||
epoch, epoch_time,
|
||||
', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()])
|
||||
))
|
||||
|
Loading…
Reference in New Issue
Block a user