mlpractical/mlp/errors.py

177 lines
5.8 KiB
Python
Raw Normal View History

2024-09-20 20:09:17 +02:00
# -*- coding: utf-8 -*-
"""Error functions.
This module defines error functions, with the aim of model training being to
minimise the error function given a set of inputs and target outputs.
The error functions will typically measure some concept of distance between the
model outputs and target outputs, averaged over all data points in the data set
or batch.
"""
import numpy as np
class SumOfSquaredDiffsError(object):
"""Sum of squared differences (squared Euclidean distance) error."""
2024-10-03 15:53:33 +02:00
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar cost function value.
"""
return 0.5 * np.mean(np.sum((outputs - targets)**2, axis=1))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
return (outputs - targets) / outputs.shape[0]
def __repr__(self):
return 'MeanSquaredErrorCost'
class BinaryCrossEntropyError(object):
"""Binary cross entropy error."""
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
"""
return -np.mean(
targets * np.log(outputs) + (1. - targets) * np.log(1. - outputs))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
return ((1. - targets) / (1. - outputs) -
(targets / outputs)) / outputs.shape[0]
def __repr__(self):
return 'BinaryCrossEntropyError'
class BinaryCrossEntropySigmoidError(object):
"""Binary cross entropy error with logistic sigmoid applied to outputs."""
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
"""
probs = 1. / (1. + np.exp(-outputs))
return -np.mean(
targets * np.log(probs) + (1. - targets) * np.log(1. - probs))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
probs = 1. / (1. + np.exp(-outputs))
return (probs - targets) / outputs.shape[0]
def __repr__(self):
return 'BinaryCrossEntropySigmoidError'
class CrossEntropyError(object):
"""Multi-class cross entropy error."""
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
"""
return -np.mean(np.sum(targets * np.log(outputs), axis=1))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
return -(targets / outputs) / outputs.shape[0]
def __repr__(self):
return 'CrossEntropyError'
class CrossEntropySoftmaxError(object):
"""Multi-class cross entropy error with Softmax applied to outputs."""
2024-09-20 20:09:17 +02:00
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
"""
2024-10-03 15:53:33 +02:00
probs = np.exp(outputs)
probs /= probs.sum(-1)[:, None]
return -np.mean(np.sum(targets * np.log(probs), axis=1))
2024-09-20 20:09:17 +02:00
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
2024-10-03 15:53:33 +02:00
Gradient of error function with respect to outputs.
2024-09-20 20:09:17 +02:00
"""
2024-10-03 15:53:33 +02:00
probs = np.exp(outputs)
probs /= probs.sum(-1)[:, None]
return (probs - targets) / outputs.shape[0]
2024-09-20 20:09:17 +02:00
def __repr__(self):
2024-10-03 15:53:33 +02:00
return 'CrossEntropySoftmaxError'