Update lab 3
This commit is contained in:
parent
3692ab7532
commit
9f1f3ccd04
5
.gitignore
vendored
5
.gitignore
vendored
@ -25,6 +25,7 @@ var/
|
|||||||
*.egg-info/
|
*.egg-info/
|
||||||
.installed.cfg
|
.installed.cfg
|
||||||
*.egg
|
*.egg
|
||||||
|
etc/
|
||||||
|
|
||||||
# PyInstaller
|
# PyInstaller
|
||||||
# Usually these files are written by a python script from a template
|
# Usually these files are written by a python script from a template
|
||||||
@ -61,3 +62,7 @@ target/
|
|||||||
|
|
||||||
# Notebook stuff
|
# Notebook stuff
|
||||||
notebooks/.ipynb_checkpoints/
|
notebooks/.ipynb_checkpoints/
|
||||||
|
|
||||||
|
# Data folder
|
||||||
|
data/
|
||||||
|
solutions/
|
||||||
|
146
mlp/errors.py
146
mlp/errors.py
@ -23,10 +23,9 @@ class SumOfSquaredDiffsError(object):
|
|||||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Scalar error function value.
|
Scalar cost function value.
|
||||||
"""
|
"""
|
||||||
#TODO write your code here
|
return 0.5 * np.mean(np.sum((outputs - targets)**2, axis=1))
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def grad(self, outputs, targets):
|
def grad(self, outputs, targets):
|
||||||
"""Calculates gradient of error function with respect to outputs.
|
"""Calculates gradient of error function with respect to outputs.
|
||||||
@ -36,11 +35,142 @@ class SumOfSquaredDiffsError(object):
|
|||||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Gradient of error function with respect to outputs. This should be
|
Gradient of error function with respect to outputs.
|
||||||
an array of shape (batch_size, output_dim).
|
|
||||||
"""
|
"""
|
||||||
#TODO write your code here
|
return (outputs - targets) / outputs.shape[0]
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'SumOfSquaredDiffsError'
|
return 'MeanSquaredErrorCost'
|
||||||
|
|
||||||
|
|
||||||
|
class BinaryCrossEntropyError(object):
|
||||||
|
"""Binary cross entropy error."""
|
||||||
|
|
||||||
|
def __call__(self, outputs, targets):
|
||||||
|
"""Calculates error function given a batch of outputs and targets.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||||
|
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Scalar error function value.
|
||||||
|
"""
|
||||||
|
return -np.mean(
|
||||||
|
targets * np.log(outputs) + (1. - targets) * np.log(1. - outputs))
|
||||||
|
|
||||||
|
def grad(self, outputs, targets):
|
||||||
|
"""Calculates gradient of error function with respect to outputs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||||
|
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Gradient of error function with respect to outputs.
|
||||||
|
"""
|
||||||
|
return ((1. - targets) / (1. - outputs) -
|
||||||
|
(targets / outputs)) / outputs.shape[0]
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'BinaryCrossEntropyError'
|
||||||
|
|
||||||
|
|
||||||
|
class BinaryCrossEntropySigmoidError(object):
|
||||||
|
"""Binary cross entropy error with logistic sigmoid applied to outputs."""
|
||||||
|
|
||||||
|
def __call__(self, outputs, targets):
|
||||||
|
"""Calculates error function given a batch of outputs and targets.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||||
|
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Scalar error function value.
|
||||||
|
"""
|
||||||
|
probs = 1. / (1. + np.exp(-outputs))
|
||||||
|
return -np.mean(
|
||||||
|
targets * np.log(probs) + (1. - targets) * np.log(1. - probs))
|
||||||
|
|
||||||
|
def grad(self, outputs, targets):
|
||||||
|
"""Calculates gradient of error function with respect to outputs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||||
|
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Gradient of error function with respect to outputs.
|
||||||
|
"""
|
||||||
|
probs = 1. / (1. + np.exp(-outputs))
|
||||||
|
return (probs - targets) / outputs.shape[0]
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'BinaryCrossEntropySigmoidError'
|
||||||
|
|
||||||
|
|
||||||
|
class CrossEntropyError(object):
|
||||||
|
"""Multi-class cross entropy error."""
|
||||||
|
|
||||||
|
def __call__(self, outputs, targets):
|
||||||
|
"""Calculates error function given a batch of outputs and targets.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||||
|
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Scalar error function value.
|
||||||
|
"""
|
||||||
|
return -np.mean(np.sum(targets * np.log(outputs), axis=1))
|
||||||
|
|
||||||
|
def grad(self, outputs, targets):
|
||||||
|
"""Calculates gradient of error function with respect to outputs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||||
|
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Gradient of error function with respect to outputs.
|
||||||
|
"""
|
||||||
|
return -(targets / outputs) / outputs.shape[0]
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'CrossEntropyError'
|
||||||
|
|
||||||
|
|
||||||
|
class CrossEntropySoftmaxError(object):
|
||||||
|
"""Multi-class cross entropy error with Softmax applied to outputs."""
|
||||||
|
|
||||||
|
def __call__(self, outputs, targets):
|
||||||
|
"""Calculates error function given a batch of outputs and targets.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||||
|
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Scalar error function value.
|
||||||
|
"""
|
||||||
|
probs = np.exp(outputs)
|
||||||
|
probs /= probs.sum(-1)[:, None]
|
||||||
|
return -np.mean(np.sum(targets * np.log(probs), axis=1))
|
||||||
|
|
||||||
|
def grad(self, outputs, targets):
|
||||||
|
"""Calculates gradient of error function with respect to outputs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||||
|
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Gradient of error function with respect to outputs.
|
||||||
|
"""
|
||||||
|
probs = np.exp(outputs)
|
||||||
|
probs /= probs.sum(-1)[:, None]
|
||||||
|
return (probs - targets) / outputs.shape[0]
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'CrossEntropySoftmaxError'
|
||||||
|
132
mlp/layers.py
132
mlp/layers.py
@ -73,7 +73,18 @@ class LayerWithParameters(Layer):
|
|||||||
"""Returns a list of parameters of layer.
|
"""Returns a list of parameters of layer.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of current parameter values.
|
List of current parameter values. This list should be in the
|
||||||
|
corresponding order to the `values` argument to `set_params`.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
@params.setter
|
||||||
|
def params(self, values):
|
||||||
|
"""Sets layer parameters from a list of values.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
values: List of values to set parameters to. This list should be
|
||||||
|
in the corresponding order to what is returned by `get_params`.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
@ -86,8 +97,7 @@ class AffineLayer(LayerWithParameters):
|
|||||||
|
|
||||||
def __init__(self, input_dim, output_dim,
|
def __init__(self, input_dim, output_dim,
|
||||||
weights_initialiser=init.UniformInit(-0.1, 0.1),
|
weights_initialiser=init.UniformInit(-0.1, 0.1),
|
||||||
biases_initialiser=init.ConstantInit(0.),
|
biases_initialiser=init.ConstantInit(0.)):
|
||||||
weights_cost=None, biases_cost=None):
|
|
||||||
"""Initialises a parameterised affine layer.
|
"""Initialises a parameterised affine layer.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -113,8 +123,26 @@ class AffineLayer(LayerWithParameters):
|
|||||||
Returns:
|
Returns:
|
||||||
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||||
"""
|
"""
|
||||||
#TODO write your code here
|
return inputs.dot(self.weights.T) + self.biases
|
||||||
raise NotImplementedError()
|
|
||||||
|
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||||
|
"""Back propagates gradients through a layer.
|
||||||
|
|
||||||
|
Given gradients with respect to the outputs of the layer calculates the
|
||||||
|
gradients with respect to the layer inputs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||||
|
outputs: Array of layer outputs calculated in forward pass of
|
||||||
|
shape (batch_size, output_dim).
|
||||||
|
grads_wrt_outputs: Array of gradients with respect to the layer
|
||||||
|
outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Array of gradients with respect to the layer inputs of shape
|
||||||
|
(batch_size, input_dim).
|
||||||
|
"""
|
||||||
|
return grads_wrt_outputs.dot(self.weights)
|
||||||
|
|
||||||
def grads_wrt_params(self, inputs, grads_wrt_outputs):
|
def grads_wrt_params(self, inputs, grads_wrt_outputs):
|
||||||
"""Calculates gradients with respect to layer parameters.
|
"""Calculates gradients with respect to layer parameters.
|
||||||
@ -128,14 +156,104 @@ class AffineLayer(LayerWithParameters):
|
|||||||
list of arrays of gradients with respect to the layer parameters
|
list of arrays of gradients with respect to the layer parameters
|
||||||
`[grads_wrt_weights, grads_wrt_biases]`.
|
`[grads_wrt_weights, grads_wrt_biases]`.
|
||||||
"""
|
"""
|
||||||
#TODO write your code here
|
|
||||||
raise NotImplementedError()
|
grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
|
||||||
|
grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
|
||||||
|
return [grads_wrt_weights, grads_wrt_biases]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def params(self):
|
def params(self):
|
||||||
"""A list of layer parameter values: `[weights, biases]`."""
|
"""A list of layer parameter values: `[weights, biases]`."""
|
||||||
return [self.weights, self.biases]
|
return [self.weights, self.biases]
|
||||||
|
|
||||||
|
@params.setter
|
||||||
|
def params(self, values):
|
||||||
|
self.weights = values[0]
|
||||||
|
self.biases = values[1]
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'AffineLayer(input_dim={0}, output_dim={1})'.format(
|
return 'AffineLayer(input_dim={0}, output_dim={1})'.format(
|
||||||
self.input_dim, self.output_dim)
|
self.input_dim, self.output_dim)
|
||||||
|
|
||||||
|
|
||||||
|
class SigmoidLayer(Layer):
|
||||||
|
"""Layer implementing an element-wise logistic sigmoid transformation."""
|
||||||
|
|
||||||
|
def fprop(self, inputs):
|
||||||
|
"""Forward propagates activations through the layer transformation.
|
||||||
|
|
||||||
|
For inputs `x` and outputs `y` this corresponds to
|
||||||
|
`y = 1 / (1 + exp(-x))`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||||
|
"""
|
||||||
|
return 1. / (1. + np.exp(-inputs))
|
||||||
|
|
||||||
|
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||||
|
"""Back propagates gradients through a layer.
|
||||||
|
|
||||||
|
Given gradients with respect to the outputs of the layer calculates the
|
||||||
|
gradients with respect to the layer inputs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||||
|
outputs: Array of layer outputs calculated in forward pass of
|
||||||
|
shape (batch_size, output_dim).
|
||||||
|
grads_wrt_outputs: Array of gradients with respect to the layer
|
||||||
|
outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Array of gradients with respect to the layer inputs of shape
|
||||||
|
(batch_size, input_dim).
|
||||||
|
"""
|
||||||
|
return grads_wrt_outputs * outputs * (1. - outputs)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'SigmoidLayer'
|
||||||
|
|
||||||
|
|
||||||
|
class SoftmaxLayer(Layer):
|
||||||
|
"""Layer implementing a softmax transformation."""
|
||||||
|
|
||||||
|
def fprop(self, inputs):
|
||||||
|
"""Forward propagates activations through the layer transformation.
|
||||||
|
|
||||||
|
For inputs `x` and outputs `y` this corresponds to
|
||||||
|
|
||||||
|
`y = exp(x) / sum(exp(x))`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||||
|
"""
|
||||||
|
exp_inputs = np.exp(inputs)
|
||||||
|
return exp_inputs / exp_inputs.sum(-1)[:, None]
|
||||||
|
|
||||||
|
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||||
|
"""Back propagates gradients through a layer.
|
||||||
|
|
||||||
|
Given gradients with respect to the outputs of the layer calculates the
|
||||||
|
gradients with respect to the layer inputs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||||
|
outputs: Array of layer outputs calculated in forward pass of
|
||||||
|
shape (batch_size, output_dim).
|
||||||
|
grads_wrt_outputs: Array of gradients with respect to the layer
|
||||||
|
outputs of shape (batch_size, output_dim).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Array of gradients with respect to the layer inputs of shape
|
||||||
|
(batch_size, input_dim).
|
||||||
|
"""
|
||||||
|
return (outputs * (grads_wrt_outputs -
|
||||||
|
(grads_wrt_outputs * outputs).sum(-1)[:, None]))
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'SoftmaxLayer'
|
||||||
|
@ -59,9 +59,75 @@ class SingleLayerModel(object):
|
|||||||
"""
|
"""
|
||||||
return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs)
|
return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs)
|
||||||
|
|
||||||
def params_cost(self):
|
def __repr__(self):
|
||||||
"""Calculates the parameter dependent cost term of the model."""
|
return 'SingleLayerModel(' + str(self.layer) + ')'
|
||||||
return self.layer.params_cost()
|
|
||||||
|
|
||||||
|
class MultipleLayerModel(object):
|
||||||
|
"""A model consisting of multiple layers applied sequentially."""
|
||||||
|
|
||||||
|
def __init__(self, layers):
|
||||||
|
"""Create a new multiple layer model instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
layers: List of the the layer objecst defining the model in the
|
||||||
|
order they should be applied from inputs to outputs.
|
||||||
|
"""
|
||||||
|
self.layers = layers
|
||||||
|
|
||||||
|
@property
|
||||||
|
def params(self):
|
||||||
|
"""A list of all of the parameters of the model."""
|
||||||
|
params = []
|
||||||
|
for layer in self.layers:
|
||||||
|
if isinstance(layer, LayerWithParameters):
|
||||||
|
params += layer.params
|
||||||
|
return params
|
||||||
|
|
||||||
|
def fprop(self, inputs):
|
||||||
|
"""Forward propagates a batch of inputs through the model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
inputs: Batch of inputs to the model.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of the activations at the output of all layers of the model
|
||||||
|
plus the inputs (to the first layer) as the first element. The
|
||||||
|
last element of the list corresponds to the model outputs.
|
||||||
|
"""
|
||||||
|
activations = [inputs]
|
||||||
|
for i, layer in enumerate(self.layers):
|
||||||
|
activations.append(self.layers[i].fprop(activations[i]))
|
||||||
|
return activations
|
||||||
|
|
||||||
|
def grads_wrt_params(self, activations, grads_wrt_outputs):
|
||||||
|
"""Calculates gradients with respect to the model parameters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
activations: List of all activations from forward pass through
|
||||||
|
model using `fprop`.
|
||||||
|
grads_wrt_outputs: Gradient with respect to the model outputs of
|
||||||
|
the scalar function parameter gradients are being calculated
|
||||||
|
for.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of gradients of the scalar function with respect to all model
|
||||||
|
parameters.
|
||||||
|
"""
|
||||||
|
grads_wrt_params = []
|
||||||
|
for i, layer in enumerate(self.layers[::-1]):
|
||||||
|
inputs = activations[-i - 2]
|
||||||
|
outputs = activations[-i - 1]
|
||||||
|
grads_wrt_inputs = layer.bprop(inputs, outputs, grads_wrt_outputs)
|
||||||
|
if isinstance(layer, LayerWithParameters):
|
||||||
|
grads_wrt_params += layer.grads_wrt_params(
|
||||||
|
inputs, grads_wrt_outputs)[::-1]
|
||||||
|
grads_wrt_outputs = grads_wrt_inputs
|
||||||
|
return grads_wrt_params[::-1]
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'SingleLayerModel(' + str(layer) + ')'
|
return (
|
||||||
|
'MultiLayerModel(\n ' +
|
||||||
|
'\n '.join([str(layer) for layer in self.layers]) +
|
||||||
|
'\n)'
|
||||||
|
)
|
||||||
|
@ -121,6 +121,7 @@ class Optimiser(object):
|
|||||||
and the second being a dict mapping the labels for the statistics
|
and the second being a dict mapping the labels for the statistics
|
||||||
recorded to their column index in the array.
|
recorded to their column index in the array.
|
||||||
"""
|
"""
|
||||||
|
start_train_time = time.process_time()
|
||||||
run_stats = [list(self.get_epoch_stats().values())]
|
run_stats = [list(self.get_epoch_stats().values())]
|
||||||
for epoch in range(1, num_epochs + 1):
|
for epoch in range(1, num_epochs + 1):
|
||||||
start_time = time.process_time()
|
start_time = time.process_time()
|
||||||
@ -130,5 +131,7 @@ class Optimiser(object):
|
|||||||
stats = self.get_epoch_stats()
|
stats = self.get_epoch_stats()
|
||||||
self.log_stats(epoch, epoch_time, stats)
|
self.log_stats(epoch, epoch_time, stats)
|
||||||
run_stats.append(list(stats.values()))
|
run_stats.append(list(stats.values()))
|
||||||
return np.array(run_stats), {k: i for i, k in enumerate(stats.keys())}
|
finish_train_time = time.process_time()
|
||||||
|
total_train_time = finish_train_time - start_train_time
|
||||||
|
return np.array(run_stats), {k: i for i, k in enumerate(stats.keys())}, total_train_time
|
||||||
|
|
||||||
|
File diff suppressed because one or more lines are too long
1117
notebooks/03_Multiple_layer_models.ipynb
Normal file
1117
notebooks/03_Multiple_layer_models.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user