Update lab 3

2024-10-03 21:53:33 +08:00 · 2024-10-03 21:53:33 +08:00 · 9f1f3ccd04
commit 9f1f3ccd04
parent 3692ab7532
7 changed files with 1695 additions and 58 deletions
--- a/.gitignore
+++ b/.gitignore
@ -25,6 +25,7 @@ var/
 *.egg-info/
 .installed.cfg
 *.egg
 etc/
 # PyInstaller
 #  Usually these files are written by a python script from a template
@ -61,3 +62,7 @@ target/
 # Notebook stuff
 notebooks/.ipynb_checkpoints/
 # Data folder
 data/
 solutions/
--- a/mlp/errors.py
+++ b/mlp/errors.py
@ -23,10 +23,9 @@ class SumOfSquaredDiffsError(object):
            targets: Array of target outputs of shape (batch_size, output_dim).
        Returns:
-            Scalar error function value.
+            Scalar cost function value.
        """
-        #TODO write your code here
+        return 0.5 * np.mean(np.sum((outputs - targets)**2, axis=1))
        raise NotImplementedError()
    def grad(self, outputs, targets):
        """Calculates gradient of error function with respect to outputs.
@ -36,11 +35,142 @@ class SumOfSquaredDiffsError(object):
            targets: Array of target outputs of shape (batch_size, output_dim).
        Returns:
-            Gradient of error function with respect to outputs. This should be
+            Gradient of error function with respect to outputs.
            an array of shape (batch_size, output_dim).
        """
-        #TODO write your code here
+        return (outputs - targets) / outputs.shape[0]
        raise NotImplementedError()
    def __repr__(self):
-        return 'SumOfSquaredDiffsError'
+        return 'MeanSquaredErrorCost'
 class BinaryCrossEntropyError(object):
    """Binary cross entropy error."""
    def __call__(self, outputs, targets):
        """Calculates error function given a batch of outputs and targets.
        Args:
            outputs: Array of model outputs of shape (batch_size, output_dim).
            targets: Array of target outputs of shape (batch_size, output_dim).
        Returns:
            Scalar error function value.
        """
        return -np.mean(
            targets * np.log(outputs) + (1. - targets) * np.log(1. - outputs))
    def grad(self, outputs, targets):
        """Calculates gradient of error function with respect to outputs.
        Args:
            outputs: Array of model outputs of shape (batch_size, output_dim).
            targets: Array of target outputs of shape (batch_size, output_dim).
        Returns:
            Gradient of error function with respect to outputs.
        """
        return ((1. - targets) / (1. - outputs) -
                (targets / outputs)) / outputs.shape[0]
    def __repr__(self):
        return 'BinaryCrossEntropyError'
 class BinaryCrossEntropySigmoidError(object):
    """Binary cross entropy error with logistic sigmoid applied to outputs."""
    def __call__(self, outputs, targets):
        """Calculates error function given a batch of outputs and targets.
        Args:
            outputs: Array of model outputs of shape (batch_size, output_dim).
            targets: Array of target outputs of shape (batch_size, output_dim).
        Returns:
            Scalar error function value.
        """
        probs = 1. / (1. + np.exp(-outputs))
        return -np.mean(
            targets * np.log(probs) + (1. - targets) * np.log(1. - probs))
    def grad(self, outputs, targets):
        """Calculates gradient of error function with respect to outputs.
        Args:
            outputs: Array of model outputs of shape (batch_size, output_dim).
            targets: Array of target outputs of shape (batch_size, output_dim).
        Returns:
            Gradient of error function with respect to outputs.
        """
        probs = 1. / (1. + np.exp(-outputs))
        return (probs - targets) / outputs.shape[0]
    def __repr__(self):
        return 'BinaryCrossEntropySigmoidError'
 class CrossEntropyError(object):
    """Multi-class cross entropy error."""
    def __call__(self, outputs, targets):
        """Calculates error function given a batch of outputs and targets.
        Args:
            outputs: Array of model outputs of shape (batch_size, output_dim).
            targets: Array of target outputs of shape (batch_size, output_dim).
        Returns:
            Scalar error function value.
        """
        return -np.mean(np.sum(targets * np.log(outputs), axis=1))
    def grad(self, outputs, targets):
        """Calculates gradient of error function with respect to outputs.
        Args:
            outputs: Array of model outputs of shape (batch_size, output_dim).
            targets: Array of target outputs of shape (batch_size, output_dim).
        Returns:
            Gradient of error function with respect to outputs.
        """
        return -(targets / outputs) / outputs.shape[0]
    def __repr__(self):
        return 'CrossEntropyError'
 class CrossEntropySoftmaxError(object):
    """Multi-class cross entropy error with Softmax applied to outputs."""
    def __call__(self, outputs, targets):
        """Calculates error function given a batch of outputs and targets.
        Args:
            outputs: Array of model outputs of shape (batch_size, output_dim).
            targets: Array of target outputs of shape (batch_size, output_dim).
        Returns:
            Scalar error function value.
        """
        probs = np.exp(outputs)
        probs /= probs.sum(-1)[:, None]
        return -np.mean(np.sum(targets * np.log(probs), axis=1))
    def grad(self, outputs, targets):
        """Calculates gradient of error function with respect to outputs.
        Args:
            outputs: Array of model outputs of shape (batch_size, output_dim).
            targets: Array of target outputs of shape (batch_size, output_dim).
        Returns:
            Gradient of error function with respect to outputs.
        """
        probs = np.exp(outputs)
        probs /= probs.sum(-1)[:, None]
        return (probs - targets) / outputs.shape[0]
    def __repr__(self):
        return 'CrossEntropySoftmaxError'
--- a/mlp/layers.py
+++ b/mlp/layers.py
@ -73,7 +73,18 @@ class LayerWithParameters(Layer):
        """Returns a list of parameters of layer.
        Returns:
-            List of current parameter values.
+            List of current parameter values. This list should be in the
            corresponding order to the `values` argument to `set_params`.
        """
        raise NotImplementedError()
    @params.setter
    def params(self, values):
        """Sets layer parameters from a list of values.
        Args:
            values: List of values to set parameters to. This list should be
                in the corresponding order to what is returned by `get_params`.
        """
        raise NotImplementedError()
@ -86,8 +97,7 @@ class AffineLayer(LayerWithParameters):
    def __init__(self, input_dim, output_dim,
                 weights_initialiser=init.UniformInit(-0.1, 0.1),
-                 biases_initialiser=init.ConstantInit(0.),
+                 biases_initialiser=init.ConstantInit(0.)):
                 weights_cost=None, biases_cost=None):
        """Initialises a parameterised affine layer.
        Args:
@ -113,8 +123,26 @@ class AffineLayer(LayerWithParameters):
        Returns:
            outputs: Array of layer outputs of shape (batch_size, output_dim).
        """
-        #TODO write your code here
+        return inputs.dot(self.weights.T) + self.biases
-        raise NotImplementedError()
+
    def bprop(self, inputs, outputs, grads_wrt_outputs):
        """Back propagates gradients through a layer.
        Given gradients with respect to the outputs of the layer calculates the
        gradients with respect to the layer inputs.
        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).
            outputs: Array of layer outputs calculated in forward pass of
                shape (batch_size, output_dim).
            grads_wrt_outputs: Array of gradients with respect to the layer
                outputs of shape (batch_size, output_dim).
        Returns:
            Array of gradients with respect to the layer inputs of shape
            (batch_size, input_dim).
        """
        return grads_wrt_outputs.dot(self.weights)
    def grads_wrt_params(self, inputs, grads_wrt_outputs):
        """Calculates gradients with respect to layer parameters.
@ -128,14 +156,104 @@ class AffineLayer(LayerWithParameters):
            list of arrays of gradients with respect to the layer parameters
            `[grads_wrt_weights, grads_wrt_biases]`.
        """
-        #TODO write your code here
+
-        raise NotImplementedError()
+        grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
        grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
        return [grads_wrt_weights, grads_wrt_biases]
    @property
    def params(self):
        """A list of layer parameter values: `[weights, biases]`."""
        return [self.weights, self.biases]
    @params.setter
    def params(self, values):
        self.weights = values[0]
        self.biases = values[1]
    def __repr__(self):
        return 'AffineLayer(input_dim={0}, output_dim={1})'.format(
            self.input_dim, self.output_dim)
 class SigmoidLayer(Layer):
    """Layer implementing an element-wise logistic sigmoid transformation."""
    def fprop(self, inputs):
        """Forward propagates activations through the layer transformation.
        For inputs `x` and outputs `y` this corresponds to
        `y = 1 / (1 + exp(-x))`.
        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).
        Returns:
            outputs: Array of layer outputs of shape (batch_size, output_dim).
        """
        return 1. / (1. + np.exp(-inputs))
    def bprop(self, inputs, outputs, grads_wrt_outputs):
        """Back propagates gradients through a layer.
        Given gradients with respect to the outputs of the layer calculates the
        gradients with respect to the layer inputs.
        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).
            outputs: Array of layer outputs calculated in forward pass of
                shape (batch_size, output_dim).
            grads_wrt_outputs: Array of gradients with respect to the layer
                outputs of shape (batch_size, output_dim).
        Returns:
            Array of gradients with respect to the layer inputs of shape
            (batch_size, input_dim).
        """
        return grads_wrt_outputs * outputs * (1. - outputs)
    def __repr__(self):
        return 'SigmoidLayer'
 class SoftmaxLayer(Layer):
    """Layer implementing a softmax transformation."""
    def fprop(self, inputs):
        """Forward propagates activations through the layer transformation.
        For inputs `x` and outputs `y` this corresponds to
            `y = exp(x) / sum(exp(x))`.
        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).
        Returns:
            outputs: Array of layer outputs of shape (batch_size, output_dim).
        """
        exp_inputs = np.exp(inputs)
        return exp_inputs / exp_inputs.sum(-1)[:, None]
    def bprop(self, inputs, outputs, grads_wrt_outputs):
        """Back propagates gradients through a layer.
        Given gradients with respect to the outputs of the layer calculates the
        gradients with respect to the layer inputs.
        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).
            outputs: Array of layer outputs calculated in forward pass of
                shape (batch_size, output_dim).
            grads_wrt_outputs: Array of gradients with respect to the layer
                outputs of shape (batch_size, output_dim).
        Returns:
            Array of gradients with respect to the layer inputs of shape
            (batch_size, input_dim).
        """
        return (outputs * (grads_wrt_outputs -
                           (grads_wrt_outputs * outputs).sum(-1)[:, None]))
    def __repr__(self):
        return 'SoftmaxLayer'
--- a/mlp/models.py
+++ b/mlp/models.py
@ -59,9 +59,75 @@ class SingleLayerModel(object):
        """
        return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs)
-    def params_cost(self):
+    def __repr__(self):
-        """Calculates the parameter dependent cost term of the model."""
+        return 'SingleLayerModel(' + str(self.layer) + ')'
-        return self.layer.params_cost()
+
 class MultipleLayerModel(object):
    """A model consisting of multiple layers applied sequentially."""
    def __init__(self, layers):
        """Create a new multiple layer model instance.
        Args:
            layers: List of the the layer objecst defining the model in the
                order they should be applied from inputs to outputs.
        """
        self.layers = layers
    @property
    def params(self):
        """A list of all of the parameters of the model."""
        params = []
        for layer in self.layers:
            if isinstance(layer, LayerWithParameters):
                params += layer.params
        return params
    def fprop(self, inputs):
        """Forward propagates a batch of inputs through the model.
        Args:
            inputs: Batch of inputs to the model.
        Returns:
            List of the activations at the output of all layers of the model
            plus the inputs (to the first layer) as the first element. The
            last element of the list corresponds to the model outputs.
        """
        activations = [inputs]
        for i, layer in enumerate(self.layers):
            activations.append(self.layers[i].fprop(activations[i]))
        return activations
    def grads_wrt_params(self, activations, grads_wrt_outputs):
        """Calculates gradients with respect to the model parameters.
        Args:
            activations: List of all activations from forward pass through
                model using `fprop`.
            grads_wrt_outputs: Gradient with respect to the model outputs of
               the scalar function parameter gradients are being calculated
               for.
        Returns:
            List of gradients of the scalar function with respect to all model
            parameters.
        """
        grads_wrt_params = []
        for i, layer in enumerate(self.layers[::-1]):
            inputs = activations[-i - 2]
            outputs = activations[-i - 1]
            grads_wrt_inputs = layer.bprop(inputs, outputs, grads_wrt_outputs)
            if isinstance(layer, LayerWithParameters):
                grads_wrt_params += layer.grads_wrt_params(
                    inputs, grads_wrt_outputs)[::-1]
            grads_wrt_outputs = grads_wrt_inputs
        return grads_wrt_params[::-1]
    def __repr__(self):
-        return 'SingleLayerModel(' + str(layer) + ')'
+        return (
            'MultiLayerModel(\n    ' +
            '\n    '.join([str(layer) for layer in self.layers]) +
            '\n)'
        )
--- a/mlp/optimisers.py
+++ b/mlp/optimisers.py
@ -121,6 +121,7 @@ class Optimiser(object):
            and the second being a dict mapping the labels for the statistics
            recorded to their column index in the array.
        """
        start_train_time = time.process_time()
        run_stats = [list(self.get_epoch_stats().values())]
        for epoch in range(1, num_epochs + 1):
            start_time = time.process_time()
@ -130,5 +131,7 @@ class Optimiser(object):
                stats = self.get_epoch_stats()
                self.log_stats(epoch, epoch_time, stats)
                run_stats.append(list(stats.values()))
-        return np.array(run_stats), {k: i for i, k in enumerate(stats.keys())}
+        finish_train_time = time.process_time()
        total_train_time = finish_train_time - start_train_time
        return np.array(run_stats), {k: i for i, k in enumerate(stats.keys())}, total_train_time
--- a/notebooks/02_Single_layer_models.ipynb
+++ b/notebooks/02_Single_layer_models.ipynb
--- a/notebooks/03_Multiple_layer_models.ipynb
+++ b/notebooks/03_Multiple_layer_models.ipynb