Switching from 'cost' to 'error' for consistency with slides.
This commit is contained in:
parent
f1ed11a325
commit
4ef1428447
@ -1,20 +1,22 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Model costs.
|
||||
"""Error functions.
|
||||
|
||||
This module defines cost functions, with the aim of model training being to
|
||||
minimise the cost function given a set of inputs and target outputs. The cost
|
||||
functions typically measure some concept of distance between the model outputs
|
||||
and target outputs.
|
||||
This module defines error functions, with the aim of model training being to
|
||||
minimise the error function given a set of inputs and target outputs.
|
||||
|
||||
The error functions will typically measure some concept of distance between the
|
||||
model outputs and target outputs, averaged over all data points in the data set
|
||||
or batch.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class MeanSquaredErrorCost(object):
|
||||
"""Mean squared error cost."""
|
||||
class SumOfSquaredDiffsError(object):
|
||||
"""Sum of squared differences (squared Euclidean distance) error."""
|
||||
|
||||
def __call__(self, outputs, targets):
|
||||
"""Calculates cost function given a batch of outputs and targets.
|
||||
"""Calculates error function given a batch of outputs and targets.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
@ -26,148 +28,149 @@ class MeanSquaredErrorCost(object):
|
||||
return 0.5 * np.mean(np.sum((outputs - targets)**2, axis=1))
|
||||
|
||||
def grad(self, outputs, targets):
|
||||
"""Calculates gradient of cost function with respect to outputs.
|
||||
"""Calculates gradient of error function with respect to outputs.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Gradient of cost function with respect to outputs.
|
||||
Gradient of error function with respect to outputs.
|
||||
"""
|
||||
return outputs - targets
|
||||
return (outputs - targets) / outputs.shape[0]
|
||||
|
||||
def __repr__(self):
|
||||
return 'MeanSquaredErrorCost'
|
||||
|
||||
|
||||
class BinaryCrossEntropyCost(object):
|
||||
"""Binary cross entropy cost."""
|
||||
class BinaryCrossEntropyError(object):
|
||||
"""Binary cross entropy error."""
|
||||
|
||||
def __call__(self, outputs, targets):
|
||||
"""Calculates cost function given a batch of outputs and targets.
|
||||
"""Calculates error function given a batch of outputs and targets.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Scalar cost function value.
|
||||
Scalar error function value.
|
||||
"""
|
||||
return -np.mean(
|
||||
targets * np.log(outputs) + (1. - targets) * np.log(1. - ouputs))
|
||||
|
||||
def grad(self, outputs, targets):
|
||||
"""Calculates gradient of cost function with respect to outputs.
|
||||
"""Calculates gradient of error function with respect to outputs.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Gradient of cost function with respect to outputs.
|
||||
Gradient of error function with respect to outputs.
|
||||
"""
|
||||
return (1. - targets) / (1. - outputs) - (targets / outputs)
|
||||
return ((1. - targets) / (1. - outputs) -
|
||||
(targets / outputs)) / outputs.shape[0]
|
||||
|
||||
def __repr__(self):
|
||||
return 'BinaryCrossEntropyCost'
|
||||
return 'BinaryCrossEntropyError'
|
||||
|
||||
|
||||
class BinaryCrossEntropySigmoidCost(object):
|
||||
"""Binary cross entropy cost with logistic sigmoid applied to outputs."""
|
||||
class BinaryCrossEntropySigmoidError(object):
|
||||
"""Binary cross entropy error with logistic sigmoid applied to outputs."""
|
||||
|
||||
def __call__(self, outputs, targets):
|
||||
"""Calculates cost function given a batch of outputs and targets.
|
||||
"""Calculates error function given a batch of outputs and targets.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Scalar cost function value.
|
||||
Scalar error function value.
|
||||
"""
|
||||
probs = 1. / (1. + np.exp(-outputs))
|
||||
return -np.mean(
|
||||
targets * np.log(probs) + (1. - targets) * np.log(1. - probs))
|
||||
|
||||
def grad(self, outputs, targets):
|
||||
"""Calculates gradient of cost function with respect to outputs.
|
||||
"""Calculates gradient of error function with respect to outputs.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Gradient of cost function with respect to outputs.
|
||||
Gradient of error function with respect to outputs.
|
||||
"""
|
||||
probs = 1. / (1. + np.exp(-outputs))
|
||||
return probs - targets
|
||||
return (probs - targets) / outputs.shape[0]
|
||||
|
||||
def __repr__(self):
|
||||
return 'BinaryCrossEntropySigmoidCost'
|
||||
return 'BinaryCrossEntropySigmoidError'
|
||||
|
||||
|
||||
class CrossEntropyCost(object):
|
||||
"""Multi-class cross entropy cost."""
|
||||
class CrossEntropyError(object):
|
||||
"""Multi-class cross entropy error."""
|
||||
|
||||
def __call__(self, outputs, targets):
|
||||
"""Calculates cost function given a batch of outputs and targets.
|
||||
"""Calculates error function given a batch of outputs and targets.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Scalar cost function value.
|
||||
Scalar error function value.
|
||||
"""
|
||||
return -np.mean(np.sum(targets * np.log(outputs), axis=1))
|
||||
|
||||
def grad(self, outputs, targets):
|
||||
"""Calculates gradient of cost function with respect to outputs.
|
||||
"""Calculates gradient of error function with respect to outputs.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Gradient of cost function with respect to outputs.
|
||||
Gradient of error function with respect to outputs.
|
||||
"""
|
||||
return -targets / outputs
|
||||
return -(targets / outputs) / outputs.shape[0]
|
||||
|
||||
def __repr__(self):
|
||||
return 'CrossEntropyCost'
|
||||
return 'CrossEntropyError'
|
||||
|
||||
|
||||
class CrossEntropySoftmaxCost(object):
|
||||
"""Multi-class cross entropy cost with Softmax applied to outputs."""
|
||||
class CrossEntropySoftmaxError(object):
|
||||
"""Multi-class cross entropy error with Softmax applied to outputs."""
|
||||
|
||||
def __call__(self, outputs, targets):
|
||||
"""Calculates cost function given a batch of outputs and targets.
|
||||
"""Calculates error function given a batch of outputs and targets.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Scalar cost function value.
|
||||
Scalar error function value.
|
||||
"""
|
||||
probs = np.exp(outputs)
|
||||
probs /= probs.sum(-1)[:, None]
|
||||
return -np.mean(np.sum(targets * np.log(probs), axis=1))
|
||||
|
||||
def grad(self, outputs, targets):
|
||||
"""Calculates gradient of cost function with respect to outputs.
|
||||
"""Calculates gradient of error function with respect to outputs.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Gradient of cost function with respect to outputs.
|
||||
Gradient of error function with respect to outputs.
|
||||
"""
|
||||
probs = np.exp(outputs)
|
||||
probs /= probs.sum(-1)[:, None]
|
||||
return probs - targets
|
||||
return (probs - targets) / outputs.shape[0]
|
||||
|
||||
def __repr__(self):
|
||||
return 'CrossEntropySoftmaxCost'
|
||||
return 'CrossEntropySoftmaxError'
|
@ -10,16 +10,17 @@ import numpy as np
|
||||
class GradientDescentLearningRule(object):
|
||||
"""Simple (stochastic) gradient descent learning rule.
|
||||
|
||||
For a scalar loss function `L(p[0], p_[1] ... )` of some set of potentially
|
||||
multidimensional parameters this attempts to find a local minimum of the
|
||||
loss function by applying updates to each parameter of the form
|
||||
For a scalar error function `E(p[0], p_[1] ... )` of some set of
|
||||
potentially multidimensional parameters this attempts to find a local
|
||||
minimum of the loss function by applying updates to each parameter of the
|
||||
form
|
||||
|
||||
p[i] := p[i] - learning_rate * dL/dp[i]
|
||||
p[i] := p[i] - learning_rate * dE/dp[i]
|
||||
|
||||
With `learning_rate` a positive scaling parameter.
|
||||
|
||||
The loss function used in successive applications of these updates may be a
|
||||
stochastic estimator of the true loss function (e.g. when the loss with
|
||||
The error function used in successive applications of these updates may be
|
||||
a stochastic estimator of the true error function (e.g. when the error with
|
||||
respect to only a subset of data-points is calculated) in which case this
|
||||
will correspond to a stochastic gradient descent learning rule.
|
||||
"""
|
||||
|
@ -17,30 +17,30 @@ logger = logging.getLogger(__name__)
|
||||
class Optimiser(object):
|
||||
"""Basic model optimiser."""
|
||||
|
||||
def __init__(self, model, cost, learning_rule, train_dataset,
|
||||
def __init__(self, model, error, learning_rule, train_dataset,
|
||||
valid_dataset=None, data_monitors=None):
|
||||
"""Create a new optimiser instance.
|
||||
|
||||
Args:
|
||||
model: The model to optimise.
|
||||
cost: The scalar cost function to minimise.
|
||||
error: The scalar error function to minimise.
|
||||
learning_rule: Gradient based learning rule to use to minimise
|
||||
cost.
|
||||
error.
|
||||
train_dataset: Data provider for training set data batches.
|
||||
valid_dataset: Data provider for validation set data batches.
|
||||
data_monitors: Dictionary of functions evaluated on targets and
|
||||
model outputs (averaged across both full training and
|
||||
validation data sets) to monitor during training in addition
|
||||
to the cost. Keys should correspond to a string label for
|
||||
to the error. Keys should correspond to a string label for
|
||||
the statistic being evaluated.
|
||||
"""
|
||||
self.model = model
|
||||
self.cost = cost
|
||||
self.error = error
|
||||
self.learning_rule = learning_rule
|
||||
self.learning_rule.initialise(self.model.params)
|
||||
self.train_dataset = train_dataset
|
||||
self.valid_dataset = valid_dataset
|
||||
self.data_monitors = OrderedDict([('cost', cost)])
|
||||
self.data_monitors = OrderedDict([('error', error)])
|
||||
if data_monitors is not None:
|
||||
self.data_monitors.update(data_monitors)
|
||||
|
||||
@ -48,13 +48,13 @@ class Optimiser(object):
|
||||
"""Do a single training epoch.
|
||||
|
||||
This iterates through all batches in training dataset, for each
|
||||
calculating the gradient of the estimated loss given the batch with
|
||||
calculating the gradient of the estimated error given the batch with
|
||||
respect to all the model parameters and then updates the model
|
||||
parameters according to the learning rule.
|
||||
"""
|
||||
for inputs_batch, targets_batch in self.train_dataset:
|
||||
activations = self.model.fprop(inputs_batch)
|
||||
grads_wrt_outputs = self.cost.grad(activations[-1], targets_batch)
|
||||
grads_wrt_outputs = self.error.grad(activations[-1], targets_batch)
|
||||
grads_wrt_params = self.model.grads_wrt_params(
|
||||
activations, grads_wrt_outputs)
|
||||
self.learning_rule.update_params(grads_wrt_params)
|
||||
|
Loading…
Reference in New Issue
Block a user