diff --git a/mlp/costs.py b/mlp/costs.py index 351980b..bd103b3 100644 --- a/mlp/costs.py +++ b/mlp/costs.py @@ -1,17 +1,40 @@ # -*- coding: utf-8 -*- -"""Model costs.""" +"""Model costs. + +This module defines cost functions, with the aim of model training being to +minimise the cost function given a set of inputs and target outputs. The cost +functions typically measure some concept of distance between the model outputs +and target outputs. +""" import numpy as np class MeanSquaredErrorCost(object): - """ - """ + """Mean squared error cost.""" def __call__(self, outputs, targets): + """Calculates cost function given a batch of outputs and targets. + + Args: + outputs: Array of model outputs of shape (batch_size, output_dim). + targets: Array of target outputs of shape (batch_size, output_dim). + + Returns: + Scalar cost function value. + """ return 0.5 * np.mean(np.sum((outputs - targets)**2, axis=1)) def grad(self, outputs, targets): + """Calculates gradient of cost function with respect to outputs. + + Args: + outputs: Array of model outputs of shape (batch_size, output_dim). + targets: Array of target outputs of shape (batch_size, output_dim). + + Returns: + Gradient of cost function with respect to outputs. + """ return outputs - targets def __repr__(self): @@ -19,14 +42,31 @@ class MeanSquaredErrorCost(object): class BinaryCrossEntropyCost(object): - """ - """ + """Binary cross entropy cost.""" def __call__(self, outputs, targets): + """Calculates cost function given a batch of outputs and targets. + + Args: + outputs: Array of model outputs of shape (batch_size, output_dim). + targets: Array of target outputs of shape (batch_size, output_dim). + + Returns: + Scalar cost function value. + """ return -np.mean( targets * np.log(outputs) + (1. - targets) * np.log(1. - ouputs)) def grad(self, outputs, targets): + """Calculates gradient of cost function with respect to outputs. + + Args: + outputs: Array of model outputs of shape (batch_size, output_dim). + targets: Array of target outputs of shape (batch_size, output_dim). + + Returns: + Gradient of cost function with respect to outputs. + """ return (1. - targets) / (1. - outputs) - (targets / outputs) def __repr__(self): @@ -34,15 +74,32 @@ class BinaryCrossEntropyCost(object): class BinaryCrossEntropySigmoidCost(object): - """ - """ + """Binary cross entropy cost with logistic sigmoid applied to outputs.""" def __call__(self, outputs, targets): + """Calculates cost function given a batch of outputs and targets. + + Args: + outputs: Array of model outputs of shape (batch_size, output_dim). + targets: Array of target outputs of shape (batch_size, output_dim). + + Returns: + Scalar cost function value. + """ probs = 1. / (1. + np.exp(-outputs)) return -np.mean( targets * np.log(probs) + (1. - targets) * np.log(1. - probs)) def grad(self, outputs, targets): + """Calculates gradient of cost function with respect to outputs. + + Args: + outputs: Array of model outputs of shape (batch_size, output_dim). + targets: Array of target outputs of shape (batch_size, output_dim). + + Returns: + Gradient of cost function with respect to outputs. + """ probs = 1. / (1. + np.exp(-outputs)) return probs - targets @@ -50,25 +107,31 @@ class BinaryCrossEntropySigmoidCost(object): return 'BinaryCrossEntropySigmoidCost' -class BinaryAccuracySigmoidCost(object): - """ - """ - - def __call__(self, outputs, targets): - return ((outputs > 0) == targets).mean() - - def ___repr__(self): - return 'BinaryAccuracySigmoidCost' - - class CrossEntropyCost(object): - """ - """ + """Multi-class cross entropy cost.""" def __call__(self, outputs, targets): + """Calculates cost function given a batch of outputs and targets. + + Args: + outputs: Array of model outputs of shape (batch_size, output_dim). + targets: Array of target outputs of shape (batch_size, output_dim). + + Returns: + Scalar cost function value. + """ return -np.mean(np.sum(targets * np.log(outputs), axis=1)) def grad(self, outputs, targets): + """Calculates gradient of cost function with respect to outputs. + + Args: + outputs: Array of model outputs of shape (batch_size, output_dim). + targets: Array of target outputs of shape (batch_size, output_dim). + + Returns: + Gradient of cost function with respect to outputs. + """ return -targets / outputs def __repr__(self): @@ -76,30 +139,35 @@ class CrossEntropyCost(object): class CrossEntropySoftmaxCost(object): - """ - """ + """Multi-class cross entropy cost with Softmax applied to outputs.""" def __call__(self, outputs, targets): + """Calculates cost function given a batch of outputs and targets. + + Args: + outputs: Array of model outputs of shape (batch_size, output_dim). + targets: Array of target outputs of shape (batch_size, output_dim). + + Returns: + Scalar cost function value. + """ probs = np.exp(outputs) probs /= probs.sum(-1)[:, None] return -np.mean(np.sum(targets * np.log(probs), axis=1)) def grad(self, outputs, targets): + """Calculates gradient of cost function with respect to outputs. + + Args: + outputs: Array of model outputs of shape (batch_size, output_dim). + targets: Array of target outputs of shape (batch_size, output_dim). + + Returns: + Gradient of cost function with respect to outputs. + """ probs = np.exp(outputs) probs /= probs.sum(-1)[:, None] return probs - targets def __repr__(self): return 'CrossEntropySoftmaxCost' - - -class MulticlassAccuracySoftmaxCost(object): - """ - """ - - def __call__(self, outputs, targets): - probs = np.exp(outputs) - return np.mean(np.argmax(probs, -1) == np.argmax(targets, -1)) - - def __repr__(self): - return 'MulticlassAccuracySoftmaxCost'