Update lab 3

2024-10-03 21:53:33 +08:00 · 2024-10-03 21:53:33 +08:00 · 9f1f3ccd04
commit 9f1f3ccd04
parent 3692ab7532
7 changed files with 1695 additions and 58 deletions
--- a/.gitignore
+++ b/.gitignore
@ -25,6 +25,7 @@ var/
 *.egg-info/
 .installed.cfg
 *.egg
+etc/

 # PyInstaller
 #  Usually these files are written by a python script from a template
@ -61,3 +62,7 @@ target/

 # Notebook stuff
 notebooks/.ipynb_checkpoints/
+
+# Data folder
+data/
+solutions/
--- a/mlp/errors.py
+++ b/mlp/errors.py
@ -23,10 +23,9 @@ class SumOfSquaredDiffsError(object):
            targets: Array of target outputs of shape (batch_size, output_dim).

        Returns:
-            Scalar error function value.
+            Scalar cost function value.
        """
-        #TODO write your code here
-        raise NotImplementedError()
+        return 0.5 * np.mean(np.sum((outputs - targets)**2, axis=1))

    def grad(self, outputs, targets):
        """Calculates gradient of error function with respect to outputs.
@ -36,11 +35,142 @@ class SumOfSquaredDiffsError(object):
            targets: Array of target outputs of shape (batch_size, output_dim).

        Returns:
-            Gradient of error function with respect to outputs. This should be
-            an array of shape (batch_size, output_dim).
+            Gradient of error function with respect to outputs.
        """
-        #TODO write your code here
-        raise NotImplementedError()
+        return (outputs - targets) / outputs.shape[0]

    def __repr__(self):
-        return 'SumOfSquaredDiffsError'
+        return 'MeanSquaredErrorCost'
+
+
+class BinaryCrossEntropyError(object):
+    """Binary cross entropy error."""
+
+    def __call__(self, outputs, targets):
+        """Calculates error function given a batch of outputs and targets.
+
+        Args:
+            outputs: Array of model outputs of shape (batch_size, output_dim).
+            targets: Array of target outputs of shape (batch_size, output_dim).
+
+        Returns:
+            Scalar error function value.
+        """
+        return -np.mean(
+            targets * np.log(outputs) + (1. - targets) * np.log(1. - outputs))
+
+    def grad(self, outputs, targets):
+        """Calculates gradient of error function with respect to outputs.
+
+        Args:
+            outputs: Array of model outputs of shape (batch_size, output_dim).
+            targets: Array of target outputs of shape (batch_size, output_dim).
+
+        Returns:
+            Gradient of error function with respect to outputs.
+        """
+        return ((1. - targets) / (1. - outputs) -
+                (targets / outputs)) / outputs.shape[0]
+
+    def __repr__(self):
+        return 'BinaryCrossEntropyError'
+
+
+class BinaryCrossEntropySigmoidError(object):
+    """Binary cross entropy error with logistic sigmoid applied to outputs."""
+
+    def __call__(self, outputs, targets):
+        """Calculates error function given a batch of outputs and targets.
+
+        Args:
+            outputs: Array of model outputs of shape (batch_size, output_dim).
+            targets: Array of target outputs of shape (batch_size, output_dim).
+
+        Returns:
+            Scalar error function value.
+        """
+        probs = 1. / (1. + np.exp(-outputs))
+        return -np.mean(
+            targets * np.log(probs) + (1. - targets) * np.log(1. - probs))
+
+    def grad(self, outputs, targets):
+        """Calculates gradient of error function with respect to outputs.
+
+        Args:
+            outputs: Array of model outputs of shape (batch_size, output_dim).
+            targets: Array of target outputs of shape (batch_size, output_dim).
+
+        Returns:
+            Gradient of error function with respect to outputs.
+        """
+        probs = 1. / (1. + np.exp(-outputs))
+        return (probs - targets) / outputs.shape[0]
+
+    def __repr__(self):
+        return 'BinaryCrossEntropySigmoidError'
+
+
+class CrossEntropyError(object):
+    """Multi-class cross entropy error."""
+
+    def __call__(self, outputs, targets):
+        """Calculates error function given a batch of outputs and targets.
+
+        Args:
+            outputs: Array of model outputs of shape (batch_size, output_dim).
+            targets: Array of target outputs of shape (batch_size, output_dim).
+
+        Returns:
+            Scalar error function value.
+        """
+        return -np.mean(np.sum(targets * np.log(outputs), axis=1))
+
+    def grad(self, outputs, targets):
+        """Calculates gradient of error function with respect to outputs.
+
+        Args:
+            outputs: Array of model outputs of shape (batch_size, output_dim).
+            targets: Array of target outputs of shape (batch_size, output_dim).
+
+        Returns:
+            Gradient of error function with respect to outputs.
+        """
+        return -(targets / outputs) / outputs.shape[0]
+
+    def __repr__(self):
+        return 'CrossEntropyError'
+
+
+class CrossEntropySoftmaxError(object):
+    """Multi-class cross entropy error with Softmax applied to outputs."""
+
+    def __call__(self, outputs, targets):
+        """Calculates error function given a batch of outputs and targets.
+
+        Args:
+            outputs: Array of model outputs of shape (batch_size, output_dim).
+            targets: Array of target outputs of shape (batch_size, output_dim).
+
+        Returns:
+            Scalar error function value.
+        """
+        probs = np.exp(outputs)
+        probs /= probs.sum(-1)[:, None]
+        return -np.mean(np.sum(targets * np.log(probs), axis=1))
+
+    def grad(self, outputs, targets):
+        """Calculates gradient of error function with respect to outputs.
+
+        Args:
+            outputs: Array of model outputs of shape (batch_size, output_dim).
+            targets: Array of target outputs of shape (batch_size, output_dim).
+
+        Returns:
+            Gradient of error function with respect to outputs.
+        """
+        probs = np.exp(outputs)
+        probs /= probs.sum(-1)[:, None]
+        return (probs - targets) / outputs.shape[0]
+
+    def __repr__(self):
+        return 'CrossEntropySoftmaxError'
--- a/mlp/layers.py
+++ b/mlp/layers.py
@ -73,7 +73,18 @@ class LayerWithParameters(Layer):
        """Returns a list of parameters of layer.

        Returns:
-            List of current parameter values.
+            List of current parameter values. This list should be in the
+            corresponding order to the `values` argument to `set_params`.
+        """
+        raise NotImplementedError()
+
+    @params.setter
+    def params(self, values):
+        """Sets layer parameters from a list of values.
+
+        Args:
+            values: List of values to set parameters to. This list should be
+                in the corresponding order to what is returned by `get_params`.
        """
        raise NotImplementedError()

@ -86,8 +97,7 @@ class AffineLayer(LayerWithParameters):

    def __init__(self, input_dim, output_dim,
                 weights_initialiser=init.UniformInit(-0.1, 0.1),
-                 biases_initialiser=init.ConstantInit(0.),
-                 weights_cost=None, biases_cost=None):
+                 biases_initialiser=init.ConstantInit(0.)):
        """Initialises a parameterised affine layer.

        Args:
@ -113,8 +123,26 @@ class AffineLayer(LayerWithParameters):
        Returns:
            outputs: Array of layer outputs of shape (batch_size, output_dim).
        """
-        #TODO write your code here
-        raise NotImplementedError()
+        return inputs.dot(self.weights.T) + self.biases
+
+    def bprop(self, inputs, outputs, grads_wrt_outputs):
+        """Back propagates gradients through a layer.
+
+        Given gradients with respect to the outputs of the layer calculates the
+        gradients with respect to the layer inputs.
+
+        Args:
+            inputs: Array of layer inputs of shape (batch_size, input_dim).
+            outputs: Array of layer outputs calculated in forward pass of
+                shape (batch_size, output_dim).
+            grads_wrt_outputs: Array of gradients with respect to the layer
+                outputs of shape (batch_size, output_dim).
+
+        Returns:
+            Array of gradients with respect to the layer inputs of shape
+            (batch_size, input_dim).
+        """
+        return grads_wrt_outputs.dot(self.weights)

    def grads_wrt_params(self, inputs, grads_wrt_outputs):
        """Calculates gradients with respect to layer parameters.
@ -128,14 +156,104 @@ class AffineLayer(LayerWithParameters):
            list of arrays of gradients with respect to the layer parameters
            `[grads_wrt_weights, grads_wrt_biases]`.
        """
-        #TODO write your code here
-        raise NotImplementedError()
+
+        grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
+        grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
+        return [grads_wrt_weights, grads_wrt_biases]

    @property
    def params(self):
        """A list of layer parameter values: `[weights, biases]`."""
        return [self.weights, self.biases]

+    @params.setter
+    def params(self, values):
+        self.weights = values[0]
+        self.biases = values[1]
+
    def __repr__(self):
        return 'AffineLayer(input_dim={0}, output_dim={1})'.format(
            self.input_dim, self.output_dim)
+
+
+class SigmoidLayer(Layer):
+    """Layer implementing an element-wise logistic sigmoid transformation."""
+
+    def fprop(self, inputs):
+        """Forward propagates activations through the layer transformation.
+
+        For inputs `x` and outputs `y` this corresponds to
+        `y = 1 / (1 + exp(-x))`.
+
+        Args:
+            inputs: Array of layer inputs of shape (batch_size, input_dim).
+
+        Returns:
+            outputs: Array of layer outputs of shape (batch_size, output_dim).
+        """
+        return 1. / (1. + np.exp(-inputs))
+
+    def bprop(self, inputs, outputs, grads_wrt_outputs):
+        """Back propagates gradients through a layer.
+
+        Given gradients with respect to the outputs of the layer calculates the
+        gradients with respect to the layer inputs.
+
+        Args:
+            inputs: Array of layer inputs of shape (batch_size, input_dim).
+            outputs: Array of layer outputs calculated in forward pass of
+                shape (batch_size, output_dim).
+            grads_wrt_outputs: Array of gradients with respect to the layer
+                outputs of shape (batch_size, output_dim).
+
+        Returns:
+            Array of gradients with respect to the layer inputs of shape
+            (batch_size, input_dim).
+        """
+        return grads_wrt_outputs * outputs * (1. - outputs)
+
+    def __repr__(self):
+        return 'SigmoidLayer'
+
+
+class SoftmaxLayer(Layer):
+    """Layer implementing a softmax transformation."""
+
+    def fprop(self, inputs):
+        """Forward propagates activations through the layer transformation.
+
+        For inputs `x` and outputs `y` this corresponds to
+
+            `y = exp(x) / sum(exp(x))`.
+
+        Args:
+            inputs: Array of layer inputs of shape (batch_size, input_dim).
+
+        Returns:
+            outputs: Array of layer outputs of shape (batch_size, output_dim).
+        """
+        exp_inputs = np.exp(inputs)
+        return exp_inputs / exp_inputs.sum(-1)[:, None]
+
+    def bprop(self, inputs, outputs, grads_wrt_outputs):
+        """Back propagates gradients through a layer.
+
+        Given gradients with respect to the outputs of the layer calculates the
+        gradients with respect to the layer inputs.
+
+        Args:
+            inputs: Array of layer inputs of shape (batch_size, input_dim).
+            outputs: Array of layer outputs calculated in forward pass of
+                shape (batch_size, output_dim).
+            grads_wrt_outputs: Array of gradients with respect to the layer
+                outputs of shape (batch_size, output_dim).
+
+        Returns:
+            Array of gradients with respect to the layer inputs of shape
+            (batch_size, input_dim).
+        """
+        return (outputs * (grads_wrt_outputs -
+                           (grads_wrt_outputs * outputs).sum(-1)[:, None]))
+
+    def __repr__(self):
+        return 'SoftmaxLayer'
--- a/mlp/models.py
+++ b/mlp/models.py
@ -59,9 +59,75 @@ class SingleLayerModel(object):
        """
        return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs)

-    def params_cost(self):
-        """Calculates the parameter dependent cost term of the model."""
-        return self.layer.params_cost()
+    def __repr__(self):
+        return 'SingleLayerModel(' + str(self.layer) + ')'
+
+
+class MultipleLayerModel(object):
+    """A model consisting of multiple layers applied sequentially."""
+
+    def __init__(self, layers):
+        """Create a new multiple layer model instance.
+
+        Args:
+            layers: List of the the layer objecst defining the model in the
+                order they should be applied from inputs to outputs.
+        """
+        self.layers = layers
+
+    @property
+    def params(self):
+        """A list of all of the parameters of the model."""
+        params = []
+        for layer in self.layers:
+            if isinstance(layer, LayerWithParameters):
+                params += layer.params
+        return params
+
+    def fprop(self, inputs):
+        """Forward propagates a batch of inputs through the model.
+
+        Args:
+            inputs: Batch of inputs to the model.
+
+        Returns:
+            List of the activations at the output of all layers of the model
+            plus the inputs (to the first layer) as the first element. The
+            last element of the list corresponds to the model outputs.
+        """
+        activations = [inputs]
+        for i, layer in enumerate(self.layers):
+            activations.append(self.layers[i].fprop(activations[i]))
+        return activations
+
+    def grads_wrt_params(self, activations, grads_wrt_outputs):
+        """Calculates gradients with respect to the model parameters.
+
+        Args:
+            activations: List of all activations from forward pass through
+                model using `fprop`.
+            grads_wrt_outputs: Gradient with respect to the model outputs of
+               the scalar function parameter gradients are being calculated
+               for.
+
+        Returns:
+            List of gradients of the scalar function with respect to all model
+            parameters.
+        """
+        grads_wrt_params = []
+        for i, layer in enumerate(self.layers[::-1]):
+            inputs = activations[-i - 2]
+            outputs = activations[-i - 1]
+            grads_wrt_inputs = layer.bprop(inputs, outputs, grads_wrt_outputs)
+            if isinstance(layer, LayerWithParameters):
+                grads_wrt_params += layer.grads_wrt_params(
+                    inputs, grads_wrt_outputs)[::-1]
+            grads_wrt_outputs = grads_wrt_inputs
+        return grads_wrt_params[::-1]

    def __repr__(self):
-        return 'SingleLayerModel(' + str(layer) + ')'
+        return (
+            'MultiLayerModel(\n    ' +
+            '\n    '.join([str(layer) for layer in self.layers]) +
+            '\n)'
+        )
--- a/mlp/optimisers.py
+++ b/mlp/optimisers.py
@ -121,6 +121,7 @@ class Optimiser(object):
            and the second being a dict mapping the labels for the statistics
            recorded to their column index in the array.
        """
+        start_train_time = time.process_time()
        run_stats = [list(self.get_epoch_stats().values())]
        for epoch in range(1, num_epochs + 1):
            start_time = time.process_time()
@ -130,5 +131,7 @@ class Optimiser(object):
                stats = self.get_epoch_stats()
                self.log_stats(epoch, epoch_time, stats)
                run_stats.append(list(stats.values()))
-        return np.array(run_stats), {k: i for i, k in enumerate(stats.keys())}
+        finish_train_time = time.process_time()
+        total_train_time = finish_train_time - start_train_time
+        return np.array(run_stats), {k: i for i, k in enumerate(stats.keys())}, total_train_time

--- a/notebooks/02_Single_layer_models.ipynb
+++ b/notebooks/02_Single_layer_models.ipynb
--- a/notebooks/03_Multiple_layer_models.ipynb
+++ b/notebooks/03_Multiple_layer_models.ipynb