Switching from 'cost' to 'error' for consistency with slides.

This commit is contained in:
Matt Graham 2016-09-30 02:53:13 +01:00
parent f1ed11a325
commit 4ef1428447
3 changed files with 61 additions and 57 deletions

View File

@ -1,20 +1,22 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Model costs. """Error functions.
This module defines cost functions, with the aim of model training being to This module defines error functions, with the aim of model training being to
minimise the cost function given a set of inputs and target outputs. The cost minimise the error function given a set of inputs and target outputs.
functions typically measure some concept of distance between the model outputs
and target outputs. The error functions will typically measure some concept of distance between the
model outputs and target outputs, averaged over all data points in the data set
or batch.
""" """
import numpy as np import numpy as np
class MeanSquaredErrorCost(object): class SumOfSquaredDiffsError(object):
"""Mean squared error cost.""" """Sum of squared differences (squared Euclidean distance) error."""
def __call__(self, outputs, targets): def __call__(self, outputs, targets):
"""Calculates cost function given a batch of outputs and targets. """Calculates error function given a batch of outputs and targets.
Args: Args:
outputs: Array of model outputs of shape (batch_size, output_dim). outputs: Array of model outputs of shape (batch_size, output_dim).
@ -26,148 +28,149 @@ class MeanSquaredErrorCost(object):
return 0.5 * np.mean(np.sum((outputs - targets)**2, axis=1)) return 0.5 * np.mean(np.sum((outputs - targets)**2, axis=1))
def grad(self, outputs, targets): def grad(self, outputs, targets):
"""Calculates gradient of cost function with respect to outputs. """Calculates gradient of error function with respect to outputs.
Args: Args:
outputs: Array of model outputs of shape (batch_size, output_dim). outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim). targets: Array of target outputs of shape (batch_size, output_dim).
Returns: Returns:
Gradient of cost function with respect to outputs. Gradient of error function with respect to outputs.
""" """
return outputs - targets return (outputs - targets) / outputs.shape[0]
def __repr__(self): def __repr__(self):
return 'MeanSquaredErrorCost' return 'MeanSquaredErrorCost'
class BinaryCrossEntropyCost(object): class BinaryCrossEntropyError(object):
"""Binary cross entropy cost.""" """Binary cross entropy error."""
def __call__(self, outputs, targets): def __call__(self, outputs, targets):
"""Calculates cost function given a batch of outputs and targets. """Calculates error function given a batch of outputs and targets.
Args: Args:
outputs: Array of model outputs of shape (batch_size, output_dim). outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim). targets: Array of target outputs of shape (batch_size, output_dim).
Returns: Returns:
Scalar cost function value. Scalar error function value.
""" """
return -np.mean( return -np.mean(
targets * np.log(outputs) + (1. - targets) * np.log(1. - ouputs)) targets * np.log(outputs) + (1. - targets) * np.log(1. - ouputs))
def grad(self, outputs, targets): def grad(self, outputs, targets):
"""Calculates gradient of cost function with respect to outputs. """Calculates gradient of error function with respect to outputs.
Args: Args:
outputs: Array of model outputs of shape (batch_size, output_dim). outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim). targets: Array of target outputs of shape (batch_size, output_dim).
Returns: Returns:
Gradient of cost function with respect to outputs. Gradient of error function with respect to outputs.
""" """
return (1. - targets) / (1. - outputs) - (targets / outputs) return ((1. - targets) / (1. - outputs) -
(targets / outputs)) / outputs.shape[0]
def __repr__(self): def __repr__(self):
return 'BinaryCrossEntropyCost' return 'BinaryCrossEntropyError'
class BinaryCrossEntropySigmoidCost(object): class BinaryCrossEntropySigmoidError(object):
"""Binary cross entropy cost with logistic sigmoid applied to outputs.""" """Binary cross entropy error with logistic sigmoid applied to outputs."""
def __call__(self, outputs, targets): def __call__(self, outputs, targets):
"""Calculates cost function given a batch of outputs and targets. """Calculates error function given a batch of outputs and targets.
Args: Args:
outputs: Array of model outputs of shape (batch_size, output_dim). outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim). targets: Array of target outputs of shape (batch_size, output_dim).
Returns: Returns:
Scalar cost function value. Scalar error function value.
""" """
probs = 1. / (1. + np.exp(-outputs)) probs = 1. / (1. + np.exp(-outputs))
return -np.mean( return -np.mean(
targets * np.log(probs) + (1. - targets) * np.log(1. - probs)) targets * np.log(probs) + (1. - targets) * np.log(1. - probs))
def grad(self, outputs, targets): def grad(self, outputs, targets):
"""Calculates gradient of cost function with respect to outputs. """Calculates gradient of error function with respect to outputs.
Args: Args:
outputs: Array of model outputs of shape (batch_size, output_dim). outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim). targets: Array of target outputs of shape (batch_size, output_dim).
Returns: Returns:
Gradient of cost function with respect to outputs. Gradient of error function with respect to outputs.
""" """
probs = 1. / (1. + np.exp(-outputs)) probs = 1. / (1. + np.exp(-outputs))
return probs - targets return (probs - targets) / outputs.shape[0]
def __repr__(self): def __repr__(self):
return 'BinaryCrossEntropySigmoidCost' return 'BinaryCrossEntropySigmoidError'
class CrossEntropyCost(object): class CrossEntropyError(object):
"""Multi-class cross entropy cost.""" """Multi-class cross entropy error."""
def __call__(self, outputs, targets): def __call__(self, outputs, targets):
"""Calculates cost function given a batch of outputs and targets. """Calculates error function given a batch of outputs and targets.
Args: Args:
outputs: Array of model outputs of shape (batch_size, output_dim). outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim). targets: Array of target outputs of shape (batch_size, output_dim).
Returns: Returns:
Scalar cost function value. Scalar error function value.
""" """
return -np.mean(np.sum(targets * np.log(outputs), axis=1)) return -np.mean(np.sum(targets * np.log(outputs), axis=1))
def grad(self, outputs, targets): def grad(self, outputs, targets):
"""Calculates gradient of cost function with respect to outputs. """Calculates gradient of error function with respect to outputs.
Args: Args:
outputs: Array of model outputs of shape (batch_size, output_dim). outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim). targets: Array of target outputs of shape (batch_size, output_dim).
Returns: Returns:
Gradient of cost function with respect to outputs. Gradient of error function with respect to outputs.
""" """
return -targets / outputs return -(targets / outputs) / outputs.shape[0]
def __repr__(self): def __repr__(self):
return 'CrossEntropyCost' return 'CrossEntropyError'
class CrossEntropySoftmaxCost(object): class CrossEntropySoftmaxError(object):
"""Multi-class cross entropy cost with Softmax applied to outputs.""" """Multi-class cross entropy error with Softmax applied to outputs."""
def __call__(self, outputs, targets): def __call__(self, outputs, targets):
"""Calculates cost function given a batch of outputs and targets. """Calculates error function given a batch of outputs and targets.
Args: Args:
outputs: Array of model outputs of shape (batch_size, output_dim). outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim). targets: Array of target outputs of shape (batch_size, output_dim).
Returns: Returns:
Scalar cost function value. Scalar error function value.
""" """
probs = np.exp(outputs) probs = np.exp(outputs)
probs /= probs.sum(-1)[:, None] probs /= probs.sum(-1)[:, None]
return -np.mean(np.sum(targets * np.log(probs), axis=1)) return -np.mean(np.sum(targets * np.log(probs), axis=1))
def grad(self, outputs, targets): def grad(self, outputs, targets):
"""Calculates gradient of cost function with respect to outputs. """Calculates gradient of error function with respect to outputs.
Args: Args:
outputs: Array of model outputs of shape (batch_size, output_dim). outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim). targets: Array of target outputs of shape (batch_size, output_dim).
Returns: Returns:
Gradient of cost function with respect to outputs. Gradient of error function with respect to outputs.
""" """
probs = np.exp(outputs) probs = np.exp(outputs)
probs /= probs.sum(-1)[:, None] probs /= probs.sum(-1)[:, None]
return probs - targets return (probs - targets) / outputs.shape[0]
def __repr__(self): def __repr__(self):
return 'CrossEntropySoftmaxCost' return 'CrossEntropySoftmaxError'

View File

@ -10,16 +10,17 @@ import numpy as np
class GradientDescentLearningRule(object): class GradientDescentLearningRule(object):
"""Simple (stochastic) gradient descent learning rule. """Simple (stochastic) gradient descent learning rule.
For a scalar loss function `L(p[0], p_[1] ... )` of some set of potentially For a scalar error function `E(p[0], p_[1] ... )` of some set of
multidimensional parameters this attempts to find a local minimum of the potentially multidimensional parameters this attempts to find a local
loss function by applying updates to each parameter of the form minimum of the loss function by applying updates to each parameter of the
form
p[i] := p[i] - learning_rate * dL/dp[i] p[i] := p[i] - learning_rate * dE/dp[i]
With `learning_rate` a positive scaling parameter. With `learning_rate` a positive scaling parameter.
The loss function used in successive applications of these updates may be a The error function used in successive applications of these updates may be
stochastic estimator of the true loss function (e.g. when the loss with a stochastic estimator of the true error function (e.g. when the error with
respect to only a subset of data-points is calculated) in which case this respect to only a subset of data-points is calculated) in which case this
will correspond to a stochastic gradient descent learning rule. will correspond to a stochastic gradient descent learning rule.
""" """

View File

@ -17,30 +17,30 @@ logger = logging.getLogger(__name__)
class Optimiser(object): class Optimiser(object):
"""Basic model optimiser.""" """Basic model optimiser."""
def __init__(self, model, cost, learning_rule, train_dataset, def __init__(self, model, error, learning_rule, train_dataset,
valid_dataset=None, data_monitors=None): valid_dataset=None, data_monitors=None):
"""Create a new optimiser instance. """Create a new optimiser instance.
Args: Args:
model: The model to optimise. model: The model to optimise.
cost: The scalar cost function to minimise. error: The scalar error function to minimise.
learning_rule: Gradient based learning rule to use to minimise learning_rule: Gradient based learning rule to use to minimise
cost. error.
train_dataset: Data provider for training set data batches. train_dataset: Data provider for training set data batches.
valid_dataset: Data provider for validation set data batches. valid_dataset: Data provider for validation set data batches.
data_monitors: Dictionary of functions evaluated on targets and data_monitors: Dictionary of functions evaluated on targets and
model outputs (averaged across both full training and model outputs (averaged across both full training and
validation data sets) to monitor during training in addition validation data sets) to monitor during training in addition
to the cost. Keys should correspond to a string label for to the error. Keys should correspond to a string label for
the statistic being evaluated. the statistic being evaluated.
""" """
self.model = model self.model = model
self.cost = cost self.error = error
self.learning_rule = learning_rule self.learning_rule = learning_rule
self.learning_rule.initialise(self.model.params) self.learning_rule.initialise(self.model.params)
self.train_dataset = train_dataset self.train_dataset = train_dataset
self.valid_dataset = valid_dataset self.valid_dataset = valid_dataset
self.data_monitors = OrderedDict([('cost', cost)]) self.data_monitors = OrderedDict([('error', error)])
if data_monitors is not None: if data_monitors is not None:
self.data_monitors.update(data_monitors) self.data_monitors.update(data_monitors)
@ -48,13 +48,13 @@ class Optimiser(object):
"""Do a single training epoch. """Do a single training epoch.
This iterates through all batches in training dataset, for each This iterates through all batches in training dataset, for each
calculating the gradient of the estimated loss given the batch with calculating the gradient of the estimated error given the batch with
respect to all the model parameters and then updates the model respect to all the model parameters and then updates the model
parameters according to the learning rule. parameters according to the learning rule.
""" """
for inputs_batch, targets_batch in self.train_dataset: for inputs_batch, targets_batch in self.train_dataset:
activations = self.model.fprop(inputs_batch) activations = self.model.fprop(inputs_batch)
grads_wrt_outputs = self.cost.grad(activations[-1], targets_batch) grads_wrt_outputs = self.error.grad(activations[-1], targets_batch)
grads_wrt_params = self.model.grads_wrt_params( grads_wrt_params = self.model.grads_wrt_params(
activations, grads_wrt_outputs) activations, grads_wrt_outputs)
self.learning_rule.update_params(grads_wrt_params) self.learning_rule.update_params(grads_wrt_params)