Switching from 'cost' to 'error' for consistency with slides.

This commit is contained in:
Matt Graham 2016-09-30 02:53:13 +01:00
parent f1ed11a325
commit 4ef1428447
3 changed files with 61 additions and 57 deletions

View File

@ -1,20 +1,22 @@
# -*- coding: utf-8 -*-
"""Model costs.
"""Error functions.
This module defines cost functions, with the aim of model training being to
minimise the cost function given a set of inputs and target outputs. The cost
functions typically measure some concept of distance between the model outputs
and target outputs.
This module defines error functions, with the aim of model training being to
minimise the error function given a set of inputs and target outputs.
The error functions will typically measure some concept of distance between the
model outputs and target outputs, averaged over all data points in the data set
or batch.
"""
import numpy as np
class MeanSquaredErrorCost(object):
"""Mean squared error cost."""
class SumOfSquaredDiffsError(object):
"""Sum of squared differences (squared Euclidean distance) error."""
def __call__(self, outputs, targets):
"""Calculates cost function given a batch of outputs and targets.
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
@ -26,148 +28,149 @@ class MeanSquaredErrorCost(object):
return 0.5 * np.mean(np.sum((outputs - targets)**2, axis=1))
def grad(self, outputs, targets):
"""Calculates gradient of cost function with respect to outputs.
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of cost function with respect to outputs.
Gradient of error function with respect to outputs.
"""
return outputs - targets
return (outputs - targets) / outputs.shape[0]
def __repr__(self):
return 'MeanSquaredErrorCost'
class BinaryCrossEntropyCost(object):
"""Binary cross entropy cost."""
class BinaryCrossEntropyError(object):
"""Binary cross entropy error."""
def __call__(self, outputs, targets):
"""Calculates cost function given a batch of outputs and targets.
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar cost function value.
Scalar error function value.
"""
return -np.mean(
targets * np.log(outputs) + (1. - targets) * np.log(1. - ouputs))
def grad(self, outputs, targets):
"""Calculates gradient of cost function with respect to outputs.
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of cost function with respect to outputs.
Gradient of error function with respect to outputs.
"""
return (1. - targets) / (1. - outputs) - (targets / outputs)
return ((1. - targets) / (1. - outputs) -
(targets / outputs)) / outputs.shape[0]
def __repr__(self):
return 'BinaryCrossEntropyCost'
return 'BinaryCrossEntropyError'
class BinaryCrossEntropySigmoidCost(object):
"""Binary cross entropy cost with logistic sigmoid applied to outputs."""
class BinaryCrossEntropySigmoidError(object):
"""Binary cross entropy error with logistic sigmoid applied to outputs."""
def __call__(self, outputs, targets):
"""Calculates cost function given a batch of outputs and targets.
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar cost function value.
Scalar error function value.
"""
probs = 1. / (1. + np.exp(-outputs))
return -np.mean(
targets * np.log(probs) + (1. - targets) * np.log(1. - probs))
def grad(self, outputs, targets):
"""Calculates gradient of cost function with respect to outputs.
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of cost function with respect to outputs.
Gradient of error function with respect to outputs.
"""
probs = 1. / (1. + np.exp(-outputs))
return probs - targets
return (probs - targets) / outputs.shape[0]
def __repr__(self):
return 'BinaryCrossEntropySigmoidCost'
return 'BinaryCrossEntropySigmoidError'
class CrossEntropyCost(object):
"""Multi-class cross entropy cost."""
class CrossEntropyError(object):
"""Multi-class cross entropy error."""
def __call__(self, outputs, targets):
"""Calculates cost function given a batch of outputs and targets.
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar cost function value.
Scalar error function value.
"""
return -np.mean(np.sum(targets * np.log(outputs), axis=1))
def grad(self, outputs, targets):
"""Calculates gradient of cost function with respect to outputs.
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of cost function with respect to outputs.
Gradient of error function with respect to outputs.
"""
return -targets / outputs
return -(targets / outputs) / outputs.shape[0]
def __repr__(self):
return 'CrossEntropyCost'
return 'CrossEntropyError'
class CrossEntropySoftmaxCost(object):
"""Multi-class cross entropy cost with Softmax applied to outputs."""
class CrossEntropySoftmaxError(object):
"""Multi-class cross entropy error with Softmax applied to outputs."""
def __call__(self, outputs, targets):
"""Calculates cost function given a batch of outputs and targets.
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar cost function value.
Scalar error function value.
"""
probs = np.exp(outputs)
probs /= probs.sum(-1)[:, None]
return -np.mean(np.sum(targets * np.log(probs), axis=1))
def grad(self, outputs, targets):
"""Calculates gradient of cost function with respect to outputs.
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of cost function with respect to outputs.
Gradient of error function with respect to outputs.
"""
probs = np.exp(outputs)
probs /= probs.sum(-1)[:, None]
return probs - targets
return (probs - targets) / outputs.shape[0]
def __repr__(self):
return 'CrossEntropySoftmaxCost'
return 'CrossEntropySoftmaxError'

View File

@ -10,16 +10,17 @@ import numpy as np
class GradientDescentLearningRule(object):
"""Simple (stochastic) gradient descent learning rule.
For a scalar loss function `L(p[0], p_[1] ... )` of some set of potentially
multidimensional parameters this attempts to find a local minimum of the
loss function by applying updates to each parameter of the form
For a scalar error function `E(p[0], p_[1] ... )` of some set of
potentially multidimensional parameters this attempts to find a local
minimum of the loss function by applying updates to each parameter of the
form
p[i] := p[i] - learning_rate * dL/dp[i]
p[i] := p[i] - learning_rate * dE/dp[i]
With `learning_rate` a positive scaling parameter.
The loss function used in successive applications of these updates may be a
stochastic estimator of the true loss function (e.g. when the loss with
The error function used in successive applications of these updates may be
a stochastic estimator of the true error function (e.g. when the error with
respect to only a subset of data-points is calculated) in which case this
will correspond to a stochastic gradient descent learning rule.
"""

View File

@ -17,30 +17,30 @@ logger = logging.getLogger(__name__)
class Optimiser(object):
"""Basic model optimiser."""
def __init__(self, model, cost, learning_rule, train_dataset,
def __init__(self, model, error, learning_rule, train_dataset,
valid_dataset=None, data_monitors=None):
"""Create a new optimiser instance.
Args:
model: The model to optimise.
cost: The scalar cost function to minimise.
error: The scalar error function to minimise.
learning_rule: Gradient based learning rule to use to minimise
cost.
error.
train_dataset: Data provider for training set data batches.
valid_dataset: Data provider for validation set data batches.
data_monitors: Dictionary of functions evaluated on targets and
model outputs (averaged across both full training and
validation data sets) to monitor during training in addition
to the cost. Keys should correspond to a string label for
to the error. Keys should correspond to a string label for
the statistic being evaluated.
"""
self.model = model
self.cost = cost
self.error = error
self.learning_rule = learning_rule
self.learning_rule.initialise(self.model.params)
self.train_dataset = train_dataset
self.valid_dataset = valid_dataset
self.data_monitors = OrderedDict([('cost', cost)])
self.data_monitors = OrderedDict([('error', error)])
if data_monitors is not None:
self.data_monitors.update(data_monitors)
@ -48,13 +48,13 @@ class Optimiser(object):
"""Do a single training epoch.
This iterates through all batches in training dataset, for each
calculating the gradient of the estimated loss given the batch with
calculating the gradient of the estimated error given the batch with
respect to all the model parameters and then updates the model
parameters according to the learning rule.
"""
for inputs_batch, targets_batch in self.train_dataset:
activations = self.model.fprop(inputs_batch)
grads_wrt_outputs = self.cost.grad(activations[-1], targets_batch)
grads_wrt_outputs = self.error.grad(activations[-1], targets_batch)
grads_wrt_params = self.model.grads_wrt_params(
activations, grads_wrt_outputs)
self.learning_rule.update_params(grads_wrt_params)