Update lab 3
This commit is contained in:
parent
3692ab7532
commit
9f1f3ccd04
5
.gitignore
vendored
5
.gitignore
vendored
@ -25,6 +25,7 @@ var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
etc/
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
@ -61,3 +62,7 @@ target/
|
||||
|
||||
# Notebook stuff
|
||||
notebooks/.ipynb_checkpoints/
|
||||
|
||||
# Data folder
|
||||
data/
|
||||
solutions/
|
||||
|
146
mlp/errors.py
146
mlp/errors.py
@ -23,10 +23,9 @@ class SumOfSquaredDiffsError(object):
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Scalar error function value.
|
||||
Scalar cost function value.
|
||||
"""
|
||||
#TODO write your code here
|
||||
raise NotImplementedError()
|
||||
return 0.5 * np.mean(np.sum((outputs - targets)**2, axis=1))
|
||||
|
||||
def grad(self, outputs, targets):
|
||||
"""Calculates gradient of error function with respect to outputs.
|
||||
@ -36,11 +35,142 @@ class SumOfSquaredDiffsError(object):
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Gradient of error function with respect to outputs. This should be
|
||||
an array of shape (batch_size, output_dim).
|
||||
Gradient of error function with respect to outputs.
|
||||
"""
|
||||
#TODO write your code here
|
||||
raise NotImplementedError()
|
||||
return (outputs - targets) / outputs.shape[0]
|
||||
|
||||
def __repr__(self):
|
||||
return 'SumOfSquaredDiffsError'
|
||||
return 'MeanSquaredErrorCost'
|
||||
|
||||
|
||||
class BinaryCrossEntropyError(object):
|
||||
"""Binary cross entropy error."""
|
||||
|
||||
def __call__(self, outputs, targets):
|
||||
"""Calculates error function given a batch of outputs and targets.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Scalar error function value.
|
||||
"""
|
||||
return -np.mean(
|
||||
targets * np.log(outputs) + (1. - targets) * np.log(1. - outputs))
|
||||
|
||||
def grad(self, outputs, targets):
|
||||
"""Calculates gradient of error function with respect to outputs.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Gradient of error function with respect to outputs.
|
||||
"""
|
||||
return ((1. - targets) / (1. - outputs) -
|
||||
(targets / outputs)) / outputs.shape[0]
|
||||
|
||||
def __repr__(self):
|
||||
return 'BinaryCrossEntropyError'
|
||||
|
||||
|
||||
class BinaryCrossEntropySigmoidError(object):
|
||||
"""Binary cross entropy error with logistic sigmoid applied to outputs."""
|
||||
|
||||
def __call__(self, outputs, targets):
|
||||
"""Calculates error function given a batch of outputs and targets.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Scalar error function value.
|
||||
"""
|
||||
probs = 1. / (1. + np.exp(-outputs))
|
||||
return -np.mean(
|
||||
targets * np.log(probs) + (1. - targets) * np.log(1. - probs))
|
||||
|
||||
def grad(self, outputs, targets):
|
||||
"""Calculates gradient of error function with respect to outputs.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Gradient of error function with respect to outputs.
|
||||
"""
|
||||
probs = 1. / (1. + np.exp(-outputs))
|
||||
return (probs - targets) / outputs.shape[0]
|
||||
|
||||
def __repr__(self):
|
||||
return 'BinaryCrossEntropySigmoidError'
|
||||
|
||||
|
||||
class CrossEntropyError(object):
|
||||
"""Multi-class cross entropy error."""
|
||||
|
||||
def __call__(self, outputs, targets):
|
||||
"""Calculates error function given a batch of outputs and targets.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Scalar error function value.
|
||||
"""
|
||||
return -np.mean(np.sum(targets * np.log(outputs), axis=1))
|
||||
|
||||
def grad(self, outputs, targets):
|
||||
"""Calculates gradient of error function with respect to outputs.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Gradient of error function with respect to outputs.
|
||||
"""
|
||||
return -(targets / outputs) / outputs.shape[0]
|
||||
|
||||
def __repr__(self):
|
||||
return 'CrossEntropyError'
|
||||
|
||||
|
||||
class CrossEntropySoftmaxError(object):
|
||||
"""Multi-class cross entropy error with Softmax applied to outputs."""
|
||||
|
||||
def __call__(self, outputs, targets):
|
||||
"""Calculates error function given a batch of outputs and targets.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Scalar error function value.
|
||||
"""
|
||||
probs = np.exp(outputs)
|
||||
probs /= probs.sum(-1)[:, None]
|
||||
return -np.mean(np.sum(targets * np.log(probs), axis=1))
|
||||
|
||||
def grad(self, outputs, targets):
|
||||
"""Calculates gradient of error function with respect to outputs.
|
||||
|
||||
Args:
|
||||
outputs: Array of model outputs of shape (batch_size, output_dim).
|
||||
targets: Array of target outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Gradient of error function with respect to outputs.
|
||||
"""
|
||||
probs = np.exp(outputs)
|
||||
probs /= probs.sum(-1)[:, None]
|
||||
return (probs - targets) / outputs.shape[0]
|
||||
|
||||
def __repr__(self):
|
||||
return 'CrossEntropySoftmaxError'
|
||||
|
132
mlp/layers.py
132
mlp/layers.py
@ -73,7 +73,18 @@ class LayerWithParameters(Layer):
|
||||
"""Returns a list of parameters of layer.
|
||||
|
||||
Returns:
|
||||
List of current parameter values.
|
||||
List of current parameter values. This list should be in the
|
||||
corresponding order to the `values` argument to `set_params`.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@params.setter
|
||||
def params(self, values):
|
||||
"""Sets layer parameters from a list of values.
|
||||
|
||||
Args:
|
||||
values: List of values to set parameters to. This list should be
|
||||
in the corresponding order to what is returned by `get_params`.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@ -86,8 +97,7 @@ class AffineLayer(LayerWithParameters):
|
||||
|
||||
def __init__(self, input_dim, output_dim,
|
||||
weights_initialiser=init.UniformInit(-0.1, 0.1),
|
||||
biases_initialiser=init.ConstantInit(0.),
|
||||
weights_cost=None, biases_cost=None):
|
||||
biases_initialiser=init.ConstantInit(0.)):
|
||||
"""Initialises a parameterised affine layer.
|
||||
|
||||
Args:
|
||||
@ -113,8 +123,26 @@ class AffineLayer(LayerWithParameters):
|
||||
Returns:
|
||||
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||
"""
|
||||
#TODO write your code here
|
||||
raise NotImplementedError()
|
||||
return inputs.dot(self.weights.T) + self.biases
|
||||
|
||||
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||
"""Back propagates gradients through a layer.
|
||||
|
||||
Given gradients with respect to the outputs of the layer calculates the
|
||||
gradients with respect to the layer inputs.
|
||||
|
||||
Args:
|
||||
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||
outputs: Array of layer outputs calculated in forward pass of
|
||||
shape (batch_size, output_dim).
|
||||
grads_wrt_outputs: Array of gradients with respect to the layer
|
||||
outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Array of gradients with respect to the layer inputs of shape
|
||||
(batch_size, input_dim).
|
||||
"""
|
||||
return grads_wrt_outputs.dot(self.weights)
|
||||
|
||||
def grads_wrt_params(self, inputs, grads_wrt_outputs):
|
||||
"""Calculates gradients with respect to layer parameters.
|
||||
@ -128,14 +156,104 @@ class AffineLayer(LayerWithParameters):
|
||||
list of arrays of gradients with respect to the layer parameters
|
||||
`[grads_wrt_weights, grads_wrt_biases]`.
|
||||
"""
|
||||
#TODO write your code here
|
||||
raise NotImplementedError()
|
||||
|
||||
grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
|
||||
grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
|
||||
return [grads_wrt_weights, grads_wrt_biases]
|
||||
|
||||
@property
|
||||
def params(self):
|
||||
"""A list of layer parameter values: `[weights, biases]`."""
|
||||
return [self.weights, self.biases]
|
||||
|
||||
@params.setter
|
||||
def params(self, values):
|
||||
self.weights = values[0]
|
||||
self.biases = values[1]
|
||||
|
||||
def __repr__(self):
|
||||
return 'AffineLayer(input_dim={0}, output_dim={1})'.format(
|
||||
self.input_dim, self.output_dim)
|
||||
|
||||
|
||||
class SigmoidLayer(Layer):
|
||||
"""Layer implementing an element-wise logistic sigmoid transformation."""
|
||||
|
||||
def fprop(self, inputs):
|
||||
"""Forward propagates activations through the layer transformation.
|
||||
|
||||
For inputs `x` and outputs `y` this corresponds to
|
||||
`y = 1 / (1 + exp(-x))`.
|
||||
|
||||
Args:
|
||||
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||
|
||||
Returns:
|
||||
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||
"""
|
||||
return 1. / (1. + np.exp(-inputs))
|
||||
|
||||
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||
"""Back propagates gradients through a layer.
|
||||
|
||||
Given gradients with respect to the outputs of the layer calculates the
|
||||
gradients with respect to the layer inputs.
|
||||
|
||||
Args:
|
||||
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||
outputs: Array of layer outputs calculated in forward pass of
|
||||
shape (batch_size, output_dim).
|
||||
grads_wrt_outputs: Array of gradients with respect to the layer
|
||||
outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Array of gradients with respect to the layer inputs of shape
|
||||
(batch_size, input_dim).
|
||||
"""
|
||||
return grads_wrt_outputs * outputs * (1. - outputs)
|
||||
|
||||
def __repr__(self):
|
||||
return 'SigmoidLayer'
|
||||
|
||||
|
||||
class SoftmaxLayer(Layer):
|
||||
"""Layer implementing a softmax transformation."""
|
||||
|
||||
def fprop(self, inputs):
|
||||
"""Forward propagates activations through the layer transformation.
|
||||
|
||||
For inputs `x` and outputs `y` this corresponds to
|
||||
|
||||
`y = exp(x) / sum(exp(x))`.
|
||||
|
||||
Args:
|
||||
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||
|
||||
Returns:
|
||||
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||
"""
|
||||
exp_inputs = np.exp(inputs)
|
||||
return exp_inputs / exp_inputs.sum(-1)[:, None]
|
||||
|
||||
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||
"""Back propagates gradients through a layer.
|
||||
|
||||
Given gradients with respect to the outputs of the layer calculates the
|
||||
gradients with respect to the layer inputs.
|
||||
|
||||
Args:
|
||||
inputs: Array of layer inputs of shape (batch_size, input_dim).
|
||||
outputs: Array of layer outputs calculated in forward pass of
|
||||
shape (batch_size, output_dim).
|
||||
grads_wrt_outputs: Array of gradients with respect to the layer
|
||||
outputs of shape (batch_size, output_dim).
|
||||
|
||||
Returns:
|
||||
Array of gradients with respect to the layer inputs of shape
|
||||
(batch_size, input_dim).
|
||||
"""
|
||||
return (outputs * (grads_wrt_outputs -
|
||||
(grads_wrt_outputs * outputs).sum(-1)[:, None]))
|
||||
|
||||
def __repr__(self):
|
||||
return 'SoftmaxLayer'
|
||||
|
@ -59,9 +59,75 @@ class SingleLayerModel(object):
|
||||
"""
|
||||
return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs)
|
||||
|
||||
def params_cost(self):
|
||||
"""Calculates the parameter dependent cost term of the model."""
|
||||
return self.layer.params_cost()
|
||||
def __repr__(self):
|
||||
return 'SingleLayerModel(' + str(self.layer) + ')'
|
||||
|
||||
|
||||
class MultipleLayerModel(object):
|
||||
"""A model consisting of multiple layers applied sequentially."""
|
||||
|
||||
def __init__(self, layers):
|
||||
"""Create a new multiple layer model instance.
|
||||
|
||||
Args:
|
||||
layers: List of the the layer objecst defining the model in the
|
||||
order they should be applied from inputs to outputs.
|
||||
"""
|
||||
self.layers = layers
|
||||
|
||||
@property
|
||||
def params(self):
|
||||
"""A list of all of the parameters of the model."""
|
||||
params = []
|
||||
for layer in self.layers:
|
||||
if isinstance(layer, LayerWithParameters):
|
||||
params += layer.params
|
||||
return params
|
||||
|
||||
def fprop(self, inputs):
|
||||
"""Forward propagates a batch of inputs through the model.
|
||||
|
||||
Args:
|
||||
inputs: Batch of inputs to the model.
|
||||
|
||||
Returns:
|
||||
List of the activations at the output of all layers of the model
|
||||
plus the inputs (to the first layer) as the first element. The
|
||||
last element of the list corresponds to the model outputs.
|
||||
"""
|
||||
activations = [inputs]
|
||||
for i, layer in enumerate(self.layers):
|
||||
activations.append(self.layers[i].fprop(activations[i]))
|
||||
return activations
|
||||
|
||||
def grads_wrt_params(self, activations, grads_wrt_outputs):
|
||||
"""Calculates gradients with respect to the model parameters.
|
||||
|
||||
Args:
|
||||
activations: List of all activations from forward pass through
|
||||
model using `fprop`.
|
||||
grads_wrt_outputs: Gradient with respect to the model outputs of
|
||||
the scalar function parameter gradients are being calculated
|
||||
for.
|
||||
|
||||
Returns:
|
||||
List of gradients of the scalar function with respect to all model
|
||||
parameters.
|
||||
"""
|
||||
grads_wrt_params = []
|
||||
for i, layer in enumerate(self.layers[::-1]):
|
||||
inputs = activations[-i - 2]
|
||||
outputs = activations[-i - 1]
|
||||
grads_wrt_inputs = layer.bprop(inputs, outputs, grads_wrt_outputs)
|
||||
if isinstance(layer, LayerWithParameters):
|
||||
grads_wrt_params += layer.grads_wrt_params(
|
||||
inputs, grads_wrt_outputs)[::-1]
|
||||
grads_wrt_outputs = grads_wrt_inputs
|
||||
return grads_wrt_params[::-1]
|
||||
|
||||
def __repr__(self):
|
||||
return 'SingleLayerModel(' + str(layer) + ')'
|
||||
return (
|
||||
'MultiLayerModel(\n ' +
|
||||
'\n '.join([str(layer) for layer in self.layers]) +
|
||||
'\n)'
|
||||
)
|
||||
|
@ -121,6 +121,7 @@ class Optimiser(object):
|
||||
and the second being a dict mapping the labels for the statistics
|
||||
recorded to their column index in the array.
|
||||
"""
|
||||
start_train_time = time.process_time()
|
||||
run_stats = [list(self.get_epoch_stats().values())]
|
||||
for epoch in range(1, num_epochs + 1):
|
||||
start_time = time.process_time()
|
||||
@ -130,5 +131,7 @@ class Optimiser(object):
|
||||
stats = self.get_epoch_stats()
|
||||
self.log_stats(epoch, epoch_time, stats)
|
||||
run_stats.append(list(stats.values()))
|
||||
return np.array(run_stats), {k: i for i, k in enumerate(stats.keys())}
|
||||
finish_train_time = time.process_time()
|
||||
total_train_time = finish_train_time - start_train_time
|
||||
return np.array(run_stats), {k: i for i, k in enumerate(stats.keys())}, total_train_time
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
1117
notebooks/03_Multiple_layer_models.ipynb
Normal file
1117
notebooks/03_Multiple_layer_models.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user