Update lab 3

This commit is contained in:
tpmmthomas 2024-10-03 21:53:33 +08:00
parent 3692ab7532
commit 9f1f3ccd04
7 changed files with 1695 additions and 58 deletions

5
.gitignore vendored
View File

@ -25,6 +25,7 @@ var/
*.egg-info/
.installed.cfg
*.egg
etc/
# PyInstaller
# Usually these files are written by a python script from a template
@ -61,3 +62,7 @@ target/
# Notebook stuff
notebooks/.ipynb_checkpoints/
# Data folder
data/
solutions/

View File

@ -23,10 +23,9 @@ class SumOfSquaredDiffsError(object):
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
Scalar cost function value.
"""
#TODO write your code here
raise NotImplementedError()
return 0.5 * np.mean(np.sum((outputs - targets)**2, axis=1))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
@ -36,11 +35,142 @@ class SumOfSquaredDiffsError(object):
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs. This should be
an array of shape (batch_size, output_dim).
Gradient of error function with respect to outputs.
"""
#TODO write your code here
raise NotImplementedError()
return (outputs - targets) / outputs.shape[0]
def __repr__(self):
return 'SumOfSquaredDiffsError'
return 'MeanSquaredErrorCost'
class BinaryCrossEntropyError(object):
"""Binary cross entropy error."""
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
"""
return -np.mean(
targets * np.log(outputs) + (1. - targets) * np.log(1. - outputs))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
return ((1. - targets) / (1. - outputs) -
(targets / outputs)) / outputs.shape[0]
def __repr__(self):
return 'BinaryCrossEntropyError'
class BinaryCrossEntropySigmoidError(object):
"""Binary cross entropy error with logistic sigmoid applied to outputs."""
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
"""
probs = 1. / (1. + np.exp(-outputs))
return -np.mean(
targets * np.log(probs) + (1. - targets) * np.log(1. - probs))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
probs = 1. / (1. + np.exp(-outputs))
return (probs - targets) / outputs.shape[0]
def __repr__(self):
return 'BinaryCrossEntropySigmoidError'
class CrossEntropyError(object):
"""Multi-class cross entropy error."""
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
"""
return -np.mean(np.sum(targets * np.log(outputs), axis=1))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
return -(targets / outputs) / outputs.shape[0]
def __repr__(self):
return 'CrossEntropyError'
class CrossEntropySoftmaxError(object):
"""Multi-class cross entropy error with Softmax applied to outputs."""
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
"""
probs = np.exp(outputs)
probs /= probs.sum(-1)[:, None]
return -np.mean(np.sum(targets * np.log(probs), axis=1))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
probs = np.exp(outputs)
probs /= probs.sum(-1)[:, None]
return (probs - targets) / outputs.shape[0]
def __repr__(self):
return 'CrossEntropySoftmaxError'

View File

@ -73,7 +73,18 @@ class LayerWithParameters(Layer):
"""Returns a list of parameters of layer.
Returns:
List of current parameter values.
List of current parameter values. This list should be in the
corresponding order to the `values` argument to `set_params`.
"""
raise NotImplementedError()
@params.setter
def params(self, values):
"""Sets layer parameters from a list of values.
Args:
values: List of values to set parameters to. This list should be
in the corresponding order to what is returned by `get_params`.
"""
raise NotImplementedError()
@ -86,8 +97,7 @@ class AffineLayer(LayerWithParameters):
def __init__(self, input_dim, output_dim,
weights_initialiser=init.UniformInit(-0.1, 0.1),
biases_initialiser=init.ConstantInit(0.),
weights_cost=None, biases_cost=None):
biases_initialiser=init.ConstantInit(0.)):
"""Initialises a parameterised affine layer.
Args:
@ -113,8 +123,26 @@ class AffineLayer(LayerWithParameters):
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
#TODO write your code here
raise NotImplementedError()
return inputs.dot(self.weights.T) + self.biases
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return grads_wrt_outputs.dot(self.weights)
def grads_wrt_params(self, inputs, grads_wrt_outputs):
"""Calculates gradients with respect to layer parameters.
@ -128,14 +156,104 @@ class AffineLayer(LayerWithParameters):
list of arrays of gradients with respect to the layer parameters
`[grads_wrt_weights, grads_wrt_biases]`.
"""
#TODO write your code here
raise NotImplementedError()
grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
return [grads_wrt_weights, grads_wrt_biases]
@property
def params(self):
"""A list of layer parameter values: `[weights, biases]`."""
return [self.weights, self.biases]
@params.setter
def params(self, values):
self.weights = values[0]
self.biases = values[1]
def __repr__(self):
return 'AffineLayer(input_dim={0}, output_dim={1})'.format(
self.input_dim, self.output_dim)
class SigmoidLayer(Layer):
"""Layer implementing an element-wise logistic sigmoid transformation."""
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
For inputs `x` and outputs `y` this corresponds to
`y = 1 / (1 + exp(-x))`.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
return 1. / (1. + np.exp(-inputs))
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return grads_wrt_outputs * outputs * (1. - outputs)
def __repr__(self):
return 'SigmoidLayer'
class SoftmaxLayer(Layer):
"""Layer implementing a softmax transformation."""
def fprop(self, inputs):
"""Forward propagates activations through the layer transformation.
For inputs `x` and outputs `y` this corresponds to
`y = exp(x) / sum(exp(x))`.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
Returns:
outputs: Array of layer outputs of shape (batch_size, output_dim).
"""
exp_inputs = np.exp(inputs)
return exp_inputs / exp_inputs.sum(-1)[:, None]
def bprop(self, inputs, outputs, grads_wrt_outputs):
"""Back propagates gradients through a layer.
Given gradients with respect to the outputs of the layer calculates the
gradients with respect to the layer inputs.
Args:
inputs: Array of layer inputs of shape (batch_size, input_dim).
outputs: Array of layer outputs calculated in forward pass of
shape (batch_size, output_dim).
grads_wrt_outputs: Array of gradients with respect to the layer
outputs of shape (batch_size, output_dim).
Returns:
Array of gradients with respect to the layer inputs of shape
(batch_size, input_dim).
"""
return (outputs * (grads_wrt_outputs -
(grads_wrt_outputs * outputs).sum(-1)[:, None]))
def __repr__(self):
return 'SoftmaxLayer'

View File

@ -59,9 +59,75 @@ class SingleLayerModel(object):
"""
return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs)
def params_cost(self):
"""Calculates the parameter dependent cost term of the model."""
return self.layer.params_cost()
def __repr__(self):
return 'SingleLayerModel(' + str(self.layer) + ')'
class MultipleLayerModel(object):
"""A model consisting of multiple layers applied sequentially."""
def __init__(self, layers):
"""Create a new multiple layer model instance.
Args:
layers: List of the the layer objecst defining the model in the
order they should be applied from inputs to outputs.
"""
self.layers = layers
@property
def params(self):
"""A list of all of the parameters of the model."""
params = []
for layer in self.layers:
if isinstance(layer, LayerWithParameters):
params += layer.params
return params
def fprop(self, inputs):
"""Forward propagates a batch of inputs through the model.
Args:
inputs: Batch of inputs to the model.
Returns:
List of the activations at the output of all layers of the model
plus the inputs (to the first layer) as the first element. The
last element of the list corresponds to the model outputs.
"""
activations = [inputs]
for i, layer in enumerate(self.layers):
activations.append(self.layers[i].fprop(activations[i]))
return activations
def grads_wrt_params(self, activations, grads_wrt_outputs):
"""Calculates gradients with respect to the model parameters.
Args:
activations: List of all activations from forward pass through
model using `fprop`.
grads_wrt_outputs: Gradient with respect to the model outputs of
the scalar function parameter gradients are being calculated
for.
Returns:
List of gradients of the scalar function with respect to all model
parameters.
"""
grads_wrt_params = []
for i, layer in enumerate(self.layers[::-1]):
inputs = activations[-i - 2]
outputs = activations[-i - 1]
grads_wrt_inputs = layer.bprop(inputs, outputs, grads_wrt_outputs)
if isinstance(layer, LayerWithParameters):
grads_wrt_params += layer.grads_wrt_params(
inputs, grads_wrt_outputs)[::-1]
grads_wrt_outputs = grads_wrt_inputs
return grads_wrt_params[::-1]
def __repr__(self):
return 'SingleLayerModel(' + str(layer) + ')'
return (
'MultiLayerModel(\n ' +
'\n '.join([str(layer) for layer in self.layers]) +
'\n)'
)

View File

@ -121,6 +121,7 @@ class Optimiser(object):
and the second being a dict mapping the labels for the statistics
recorded to their column index in the array.
"""
start_train_time = time.process_time()
run_stats = [list(self.get_epoch_stats().values())]
for epoch in range(1, num_epochs + 1):
start_time = time.process_time()
@ -130,5 +131,7 @@ class Optimiser(object):
stats = self.get_epoch_stats()
self.log_stats(epoch, epoch_time, stats)
run_stats.append(list(stats.values()))
return np.array(run_stats), {k: i for i, k in enumerate(stats.keys())}
finish_train_time = time.process_time()
total_train_time = finish_train_time - start_train_time
return np.array(run_stats), {k: i for i, k in enumerate(stats.keys())}, total_train_time

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff