diff --git a/mlp/layers.py b/mlp/layers.py index 9b7996f..25bf183 100644 --- a/mlp/layers.py +++ b/mlp/layers.py @@ -68,10 +68,10 @@ class LayerWithParameters(Layer): """ raise NotImplementedError() - def params_cost(self): - """Returns the parameter dependent cost term for this layer. + def params_penalty(self): + """Returns the parameter dependent penalty term for this layer. - If no parameter-dependent cost terms are set this returns zero. + If no parameter-dependent penalty terms are set this returns zero. """ raise NotImplementedError() @@ -105,7 +105,7 @@ class AffineLayer(LayerWithParameters): def __init__(self, input_dim, output_dim, weights_initialiser=init.UniformInit(-0.1, 0.1), biases_initialiser=init.ConstantInit(0.), - weights_cost=None, biases_cost=None): + weights_penalty=None, biases_penalty=None): """Initialises a parameterised affine layer. Args: @@ -113,15 +113,17 @@ class AffineLayer(LayerWithParameters): output_dim (int): Dimension of the layer outputs. weights_initialiser: Initialiser for the weight parameters. biases_initialiser: Initialiser for the bias parameters. - weights_cost: Weights-dependent cost term. - biases_cost: Biases-dependent cost term. + weights_penalty: Weights-dependent penalty term (regulariser) or + None if no regularisation is to be applied to the weights. + biases_penalty: Biases-dependent penalty term (regulariser) or + None if no regularisation is to be applied to the biases. """ self.input_dim = input_dim self.output_dim = output_dim self.weights = weights_initialiser((self.output_dim, self.input_dim)) self.biases = biases_initialiser(self.output_dim) - self.weights_cost = weights_cost - self.biases_cost = biases_cost + self.weights_penalty = weights_penalty + self.biases_penalty = biases_penalty def fprop(self, inputs): """Forward propagates activations through the layer transformation. @@ -172,25 +174,25 @@ class AffineLayer(LayerWithParameters): grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs) grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0) - if self.weights_cost is not None: - grads_wrt_weights += self.weights_cost.grad(self.weights) + if self.weights_penalty is not None: + grads_wrt_weights += self.weights_penalty.grad(self.weights) - if self.biases_cost is not None: - grads_wrt_biases += self.biases_cost.grads(self.biases) + if self.biases_penalty is not None: + grads_wrt_biases += self.biases_penalty.grad(self.biases) return [grads_wrt_weights, grads_wrt_biases] - def params_cost(self): - """Returns the parameter dependent cost term for this layer. + def params_penalty(self): + """Returns the parameter dependent penalty term for this layer. - If no parameter-dependent cost terms are set this returns zero. + If no parameter-dependent penalty terms are set this returns zero. """ - params_cost = 0 - if self.weights_cost is not None: - params_cost += self.weights_cost(self.weights) - if self.biases_cost is not None: - params_cost += self.biases_cost(self.biases) - return params_cost + params_penalty = 0 + if self.weights_penalty is not None: + params_penalty += self.weights_penalty(self.weights) + if self.biases_penalty is not None: + params_penalty += self.biases_penalty(self.biases) + return params_penalty @property def params(self): diff --git a/mlp/models.py b/mlp/models.py index ba1eb5b..842c16d 100644 --- a/mlp/models.py +++ b/mlp/models.py @@ -59,9 +59,9 @@ class SingleLayerModel(object): """ return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs) - def params_cost(self): - """Calculates the parameter dependent cost term of the model.""" - return self.layer.params_cost() + def params_penalty(self): + """Calculates the parameter dependent penalty term of the model.""" + return self.layer.params_penalty() def __repr__(self): return 'SingleLayerModel(' + str(layer) + ')' @@ -134,13 +134,13 @@ class MultipleLayerModel(object): inputs, outputs, grads_wrt_outputs) return grads_wrt_params[::-1] - def params_cost(self): - """Calculates the parameter dependent cost term of the model.""" - params_cost = 0. + def params_penalty(self): + """Calculates the parameter dependent penalty term of the model.""" + params_penalty = 0. for layer in self.layers: if isinstance(layer, LayerWithParameters): - params_cost += layer.params_cost() - return params_cost + params_penalty += layer.params_penalty() + return params_penalty def __repr__(self): return ( diff --git a/mlp/optimisers.py b/mlp/optimisers.py index 4d00c25..4ea4cd0 100644 --- a/mlp/optimisers.py +++ b/mlp/optimisers.py @@ -93,7 +93,7 @@ class Optimiser(object): if self.valid_dataset is not None: epoch_stats.update(self.eval_monitors( self.valid_dataset, '(valid)')) - epoch_stats['cost(param)'] = self.model.params_cost() + epoch_stats['params_penalty'] = self.model.params_penalty() return epoch_stats def log_stats(self, epoch, epoch_time, stats): @@ -104,7 +104,7 @@ class Optimiser(object): epoch_time: Time taken in seconds for the epoch to complete. stats: Monitored stats for the epoch. """ - logger.info('Epoch {0}: {1:.2f}s to complete\n {2}'.format( + logger.info('Epoch {0}: {1:.2f}s to complete\n {2}'.format( epoch, epoch_time, ', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()]) ))