mlpractical/mlp/layers.py

# -*- coding: utf-8 -*-
"""Layer definitions.

This module defines classes which encapsulate a single layer.

These layers map input activations to output activation with the `fprop`
method and map gradients with repsect to outputs to gradients with respect to
their inputs with the `bprop` method.

Some layers will have learnable parameters and so will additionally define
methods for getting and setting parameter and calculating gradients with
respect to the layer parameters.
"""

import numpy as np
import mlp.initialisers as init


class Layer(object):
    """Abstract class defining the interface for a layer."""

    def fprop(self, inputs):
        """Forward propagates activations through the layer transformation.

        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).

        Returns:
            outputs: Array of layer outputs of shape (batch_size, output_dim).
        """
        raise NotImplementedError()

    def bprop(self, inputs, outputs, grads_wrt_outputs):
        """Back propagates gradients through a layer.

        Given gradients with respect to the outputs of the layer calculates the
        gradients with respect to the layer inputs.

        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).
            outputs: Array of layer outputs calculated in forward pass of
                shape (batch_size, output_dim).
            grads_wrt_outputs: Array of gradients with respect to the layer
                outputs of shape (batch_size, output_dim).

        Returns:
            Array of gradients with respect to the layer inputs of shape
            (batch_size, input_dim).
        """
        raise NotImplementedError()


class LayerWithParameters(Layer):
    """Abstract class defining the interface for a layer with parameters."""

    def grads_wrt_params(self, inputs, grads_wrt_outputs):
        """Calculates gradients with respect to layer parameters.

        Args:
            inputs: Array of inputs to layer of shape (batch_size, input_dim).
            grads_wrt_to_outputs: Array of gradients with respect to the layer
                outputs of shape (batch_size, output_dim).

        Returns:
            List of arrays of gradients with respect to the layer parameters
            with parameter gradients appearing in same order in tuple as
            returned from `get_params` method.
        """
        raise NotImplementedError()

    @property
    def params(self):
        """Returns a list of parameters of layer.

        Returns:
            List of current parameter values. This list should be in the
            corresponding order to the `values` argument to `set_params`.
        """
        raise NotImplementedError()

    @params.setter
    def params(self, values):
        """Sets layer parameters from a list of values.

        Args:
            values: List of values to set parameters to. This list should be
                in the corresponding order to what is returned by `get_params`.
        """
        raise NotImplementedError()


class AffineLayer(LayerWithParameters):
    """Layer implementing an affine tranformation of its inputs.

    This layer is parameterised by a weight matrix and bias vector.
    """

    def __init__(self, input_dim, output_dim,
                 weights_initialiser=init.UniformInit(-0.1, 0.1),
                 biases_initialiser=init.ConstantInit(0.)):
        """Initialises a parameterised affine layer.

        Args:
            input_dim (int): Dimension of inputs to the layer.
            output_dim (int): Dimension of the layer outputs.
            weights_initialiser: Initialiser for the weight parameters.
            biases_initialiser: Initialiser for the bias parameters.
        """
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weights = weights_initialiser((self.output_dim, self.input_dim))
        self.biases = biases_initialiser(self.output_dim)

    def fprop(self, inputs):
        """Forward propagates activations through the layer transformation.

        For inputs `x`, outputs `y`, weights `W` and biases `b` the layer
        corresponds to `y = W.dot(x) + b`.

        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).

        Returns:
            outputs: Array of layer outputs of shape (batch_size, output_dim).
        """
        return inputs.dot(self.weights.T) + self.biases

    def bprop(self, inputs, outputs, grads_wrt_outputs):
        """Back propagates gradients through a layer.

        Given gradients with respect to the outputs of the layer calculates the
        gradients with respect to the layer inputs.

        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).
            outputs: Array of layer outputs calculated in forward pass of
                shape (batch_size, output_dim).
            grads_wrt_outputs: Array of gradients with respect to the layer
                outputs of shape (batch_size, output_dim).

        Returns:
            Array of gradients with respect to the layer inputs of shape
            (batch_size, input_dim).
        """
        return grads_wrt_outputs.dot(self.weights)

    def grads_wrt_params(self, inputs, grads_wrt_outputs):
        """Calculates gradients with respect to layer parameters.

        Args:
            inputs: array of inputs to layer of shape (batch_size, input_dim)
            grads_wrt_to_outputs: array of gradients with respect to the layer
                outputs of shape (batch_size, output_dim)

        Returns:
            list of arrays of gradients with respect to the layer parameters
            `[grads_wrt_weights, grads_wrt_biases]`.
        """

        grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)
        grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)
        return [grads_wrt_weights, grads_wrt_biases]

    @property
    def params(self):
        """A list of layer parameter values: `[weights, biases]`."""
        return [self.weights, self.biases]

    @params.setter
    def params(self, values):
        self.weights = values[0]
        self.biases = values[1]

    def __repr__(self):
        return 'AffineLayer(input_dim={0}, output_dim={1})'.format(
            self.input_dim, self.output_dim)


class SigmoidLayer(Layer):
    """Layer implementing an element-wise logistic sigmoid transformation."""

    def fprop(self, inputs):
        """Forward propagates activations through the layer transformation.

        For inputs `x` and outputs `y` this corresponds to
        `y = 1 / (1 + exp(-x))`.

        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).

        Returns:
            outputs: Array of layer outputs of shape (batch_size, output_dim).
        """
        return 1. / (1. + np.exp(-inputs))

    def bprop(self, inputs, outputs, grads_wrt_outputs):
        """Back propagates gradients through a layer.

        Given gradients with respect to the outputs of the layer calculates the
        gradients with respect to the layer inputs.

        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).
            outputs: Array of layer outputs calculated in forward pass of
                shape (batch_size, output_dim).
            grads_wrt_outputs: Array of gradients with respect to the layer
                outputs of shape (batch_size, output_dim).

        Returns:
            Array of gradients with respect to the layer inputs of shape
            (batch_size, input_dim).
        """
        return grads_wrt_outputs * outputs * (1. - outputs)

    def __repr__(self):
        return 'SigmoidLayer'


class SoftmaxLayer(Layer):
    """Layer implementing a softmax transformation."""

    def fprop(self, inputs):
        """Forward propagates activations through the layer transformation.

        For inputs `x` and outputs `y` this corresponds to

            `y = exp(x) / sum(exp(x))`.

        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).

        Returns:
            outputs: Array of layer outputs of shape (batch_size, output_dim).
        """
        exp_inputs = np.exp(inputs)
        return exp_inputs / exp_inputs.sum(-1)[:, None]

    def bprop(self, inputs, outputs, grads_wrt_outputs):
        """Back propagates gradients through a layer.

        Given gradients with respect to the outputs of the layer calculates the
        gradients with respect to the layer inputs.

        Args:
            inputs: Array of layer inputs of shape (batch_size, input_dim).
            outputs: Array of layer outputs calculated in forward pass of
                shape (batch_size, output_dim).
            grads_wrt_outputs: Array of gradients with respect to the layer
                outputs of shape (batch_size, output_dim).

        Returns:
            Array of gradients with respect to the layer inputs of shape
            (batch_size, input_dim).
        """
        return (outputs * (grads_wrt_outputs -
                           (grads_wrt_outputs * outputs).sum(-1)[:, None]))

    def __repr__(self):
        return 'SoftmaxLayer'
update lab 2 2024-09-20 20:09:17 +02:00			`# -- coding: utf-8 --`
			`"""Layer definitions.`

			`This module defines classes which encapsulate a single layer.`

			These layers map input activations to output activation with the `fprop`
			`method and map gradients with repsect to outputs to gradients with respect to`
			their inputs with the `bprop` method.

			`Some layers will have learnable parameters and so will additionally define`
			`methods for getting and setting parameter and calculating gradients with`
			`respect to the layer parameters.`
			`"""`

			`import numpy as np`
			`import mlp.initialisers as init`


			`class Layer(object):`
			`"""Abstract class defining the interface for a layer."""`

			`def fprop(self, inputs):`
			`"""Forward propagates activations through the layer transformation.`

			`Args:`
			`inputs: Array of layer inputs of shape (batch_size, input_dim).`

			`Returns:`
			`outputs: Array of layer outputs of shape (batch_size, output_dim).`
			`"""`
			`raise NotImplementedError()`

			`def bprop(self, inputs, outputs, grads_wrt_outputs):`
			`"""Back propagates gradients through a layer.`

			`Given gradients with respect to the outputs of the layer calculates the`
			`gradients with respect to the layer inputs.`

			`Args:`
			`inputs: Array of layer inputs of shape (batch_size, input_dim).`
			`outputs: Array of layer outputs calculated in forward pass of`
			`shape (batch_size, output_dim).`
			`grads_wrt_outputs: Array of gradients with respect to the layer`
			`outputs of shape (batch_size, output_dim).`

			`Returns:`
			`Array of gradients with respect to the layer inputs of shape`
			`(batch_size, input_dim).`
			`"""`
			`raise NotImplementedError()`


			`class LayerWithParameters(Layer):`
			`"""Abstract class defining the interface for a layer with parameters."""`

			`def grads_wrt_params(self, inputs, grads_wrt_outputs):`
			`"""Calculates gradients with respect to layer parameters.`

			`Args:`
			`inputs: Array of inputs to layer of shape (batch_size, input_dim).`
			`grads_wrt_to_outputs: Array of gradients with respect to the layer`
			`outputs of shape (batch_size, output_dim).`

			`Returns:`
			`List of arrays of gradients with respect to the layer parameters`
			`with parameter gradients appearing in same order in tuple as`
			returned from `get_params` method.
			`"""`
			`raise NotImplementedError()`

			`@property`
			`def params(self):`
			`"""Returns a list of parameters of layer.`

			`Returns:`
Update lab 3 2024-10-03 15:53:33 +02:00			`List of current parameter values. This list should be in the`
			corresponding order to the `values` argument to `set_params`.
			`"""`
			`raise NotImplementedError()`

			`@params.setter`
			`def params(self, values):`
			`"""Sets layer parameters from a list of values.`

			`Args:`
			`values: List of values to set parameters to. This list should be`
			in the corresponding order to what is returned by `get_params`.
update lab 2 2024-09-20 20:09:17 +02:00			`"""`
			`raise NotImplementedError()`


			`class AffineLayer(LayerWithParameters):`
			`"""Layer implementing an affine tranformation of its inputs.`

			`This layer is parameterised by a weight matrix and bias vector.`
			`"""`

			`def __init__(self, input_dim, output_dim,`
			`weights_initialiser=init.UniformInit(-0.1, 0.1),`
Update lab 3 2024-10-03 15:53:33 +02:00			`biases_initialiser=init.ConstantInit(0.)):`
update lab 2 2024-09-20 20:09:17 +02:00			`"""Initialises a parameterised affine layer.`

			`Args:`
			`input_dim (int): Dimension of inputs to the layer.`
			`output_dim (int): Dimension of the layer outputs.`
			`weights_initialiser: Initialiser for the weight parameters.`
			`biases_initialiser: Initialiser for the bias parameters.`
			`"""`
			`self.input_dim = input_dim`
			`self.output_dim = output_dim`
			`self.weights = weights_initialiser((self.output_dim, self.input_dim))`
			`self.biases = biases_initialiser(self.output_dim)`

			`def fprop(self, inputs):`
			`"""Forward propagates activations through the layer transformation.`

			For inputs `x`, outputs `y`, weights `W` and biases `b` the layer
			corresponds to `y = W.dot(x) + b`.

			`Args:`
			`inputs: Array of layer inputs of shape (batch_size, input_dim).`

			`Returns:`
			`outputs: Array of layer outputs of shape (batch_size, output_dim).`
			`"""`
Update lab 3 2024-10-03 15:53:33 +02:00			`return inputs.dot(self.weights.T) + self.biases`

			`def bprop(self, inputs, outputs, grads_wrt_outputs):`
			`"""Back propagates gradients through a layer.`

			`Given gradients with respect to the outputs of the layer calculates the`
			`gradients with respect to the layer inputs.`

			`Args:`
			`inputs: Array of layer inputs of shape (batch_size, input_dim).`
			`outputs: Array of layer outputs calculated in forward pass of`
			`shape (batch_size, output_dim).`
			`grads_wrt_outputs: Array of gradients with respect to the layer`
			`outputs of shape (batch_size, output_dim).`

			`Returns:`
			`Array of gradients with respect to the layer inputs of shape`
			`(batch_size, input_dim).`
			`"""`
			`return grads_wrt_outputs.dot(self.weights)`
update lab 2 2024-09-20 20:09:17 +02:00
			`def grads_wrt_params(self, inputs, grads_wrt_outputs):`
			`"""Calculates gradients with respect to layer parameters.`

			`Args:`
			`inputs: array of inputs to layer of shape (batch_size, input_dim)`
			`grads_wrt_to_outputs: array of gradients with respect to the layer`
			`outputs of shape (batch_size, output_dim)`

			`Returns:`
			`list of arrays of gradients with respect to the layer parameters`
			`[grads_wrt_weights, grads_wrt_biases]`.
			`"""`
Update lab 3 2024-10-03 15:53:33 +02:00
			`grads_wrt_weights = np.dot(grads_wrt_outputs.T, inputs)`
			`grads_wrt_biases = np.sum(grads_wrt_outputs, axis=0)`
			`return [grads_wrt_weights, grads_wrt_biases]`
update lab 2 2024-09-20 20:09:17 +02:00
			`@property`
			`def params(self):`
			"""A list of layer parameter values: `[weights, biases]`."""
			`return [self.weights, self.biases]`

Update lab 3 2024-10-03 15:53:33 +02:00			`@params.setter`
			`def params(self, values):`
			`self.weights = values[0]`
			`self.biases = values[1]`

update lab 2 2024-09-20 20:09:17 +02:00			`def __repr__(self):`
			`return 'AffineLayer(input_dim={0}, output_dim={1})'.format(`
			`self.input_dim, self.output_dim)`
Update lab 3 2024-10-03 15:53:33 +02:00

			`class SigmoidLayer(Layer):`
			`"""Layer implementing an element-wise logistic sigmoid transformation."""`

			`def fprop(self, inputs):`
			`"""Forward propagates activations through the layer transformation.`

			For inputs `x` and outputs `y` this corresponds to
			`y = 1 / (1 + exp(-x))`.

			`Args:`
			`inputs: Array of layer inputs of shape (batch_size, input_dim).`

			`Returns:`
			`outputs: Array of layer outputs of shape (batch_size, output_dim).`
			`"""`
			`return 1. / (1. + np.exp(-inputs))`

			`def bprop(self, inputs, outputs, grads_wrt_outputs):`
			`"""Back propagates gradients through a layer.`

			`Given gradients with respect to the outputs of the layer calculates the`
			`gradients with respect to the layer inputs.`

			`Args:`
			`inputs: Array of layer inputs of shape (batch_size, input_dim).`
			`outputs: Array of layer outputs calculated in forward pass of`
			`shape (batch_size, output_dim).`
			`grads_wrt_outputs: Array of gradients with respect to the layer`
			`outputs of shape (batch_size, output_dim).`

			`Returns:`
			`Array of gradients with respect to the layer inputs of shape`
			`(batch_size, input_dim).`
			`"""`
			`return grads_wrt_outputs * outputs * (1. - outputs)`

			`def __repr__(self):`
			`return 'SigmoidLayer'`


			`class SoftmaxLayer(Layer):`
			`"""Layer implementing a softmax transformation."""`

			`def fprop(self, inputs):`
			`"""Forward propagates activations through the layer transformation.`

			For inputs `x` and outputs `y` this corresponds to

			`y = exp(x) / sum(exp(x))`.

			`Args:`
			`inputs: Array of layer inputs of shape (batch_size, input_dim).`

			`Returns:`
			`outputs: Array of layer outputs of shape (batch_size, output_dim).`
			`"""`
			`exp_inputs = np.exp(inputs)`
			`return exp_inputs / exp_inputs.sum(-1)[:, None]`

			`def bprop(self, inputs, outputs, grads_wrt_outputs):`
			`"""Back propagates gradients through a layer.`

			`Given gradients with respect to the outputs of the layer calculates the`
			`gradients with respect to the layer inputs.`

			`Args:`
			`inputs: Array of layer inputs of shape (batch_size, input_dim).`
			`outputs: Array of layer outputs calculated in forward pass of`
			`shape (batch_size, output_dim).`
			`grads_wrt_outputs: Array of gradients with respect to the layer`
			`outputs of shape (batch_size, output_dim).`

			`Returns:`
			`Array of gradients with respect to the layer inputs of shape`
			`(batch_size, input_dim).`
			`"""`
			`return (outputs * (grads_wrt_outputs -`
			`(grads_wrt_outputs * outputs).sum(-1)[:, None]))`

			`def __repr__(self):`
			`return 'SoftmaxLayer'`