From 302d44371ae6e00920433cb3064859407d5630af Mon Sep 17 00:00:00 2001
From: Matt Graham <m.m.graham@ed.ac.uk>
Date: Mon, 10 Oct 2016 09:23:53 +0100
Subject: [PATCH] Adding new schedulers module.

---
 mlp/schedulers.py | 192 ++++--------------------
 mlp/utils.py      | 361 ----------------------------------------------
 2 files changed, 27 insertions(+), 526 deletions(-)
 delete mode 100644 mlp/utils.py

diff --git a/mlp/schedulers.py b/mlp/schedulers.py
index 6ae9597..4f53e7e 100644
--- a/mlp/schedulers.py
+++ b/mlp/schedulers.py
@@ -1,172 +1,34 @@
-# Machine Learning Practical (INFR11119),
-# Pawel Swietojanski, University of Edinburgh
+# -*- coding: utf-8 -*-
+"""Training schedulers.
 
-import logging
+This module contains classes implementing schedulers which control the
+evolution of learning rule hyperparameters (such as learning rate) over a
+training run.
+"""
+
+import numpy as np
 
 
-class LearningRateScheduler(object):
-    """
-    Define an interface for determining learning rates
-    """
-    def __init__(self, max_epochs=100):
-        self.epoch = 0
-        self.max_epochs = max_epochs
+class ConstantLearningRateScheduler(object):
+    """Example of scheduler interface which sets a constant learning rate."""
 
-    def get_rate(self):
-        raise NotImplementedError()
+    def __init__(self, learning_rate):
+        """Construct a new constant learning rate scheduler object.
 
-    def get_next_rate(self, current_accuracy=None):
-        self.epoch += 1
-
-
-class LearningRateList(LearningRateScheduler):
-    def __init__(self, learning_rates_list, max_epochs):
-
-        super(LearningRateList, self).__init__(max_epochs)
-
-        assert isinstance(learning_rates_list, list), (
-            "The learning_rates_list argument expected"
-            " to be of type list, got %s" % type(learning_rates_list)
-        )
-        self.lr_list = learning_rates_list
-        
-    def get_rate(self):
-        if self.epoch < len(self.lr_list):
-            return self.lr_list[self.epoch]
-        return 0.0
-    
-    def get_next_rate(self, current_accuracy=None):
-        super(LearningRateList, self).get_next_rate(current_accuracy=None)
-        return self.get_rate()
-
-
-class LearningRateFixed(LearningRateList):
-
-    def __init__(self, learning_rate, max_epochs):
-        assert learning_rate > 0, (
-            "learning rate expected to be > 0, got %f" % learning_rate
-        )
-        super(LearningRateFixed, self).__init__([learning_rate], max_epochs)
-
-    def get_rate(self):
-        if self.epoch < self.max_epochs:
-            return self.lr_list[0]
-        return 0.0
-
-    def get_next_rate(self, current_accuracy=None):
-        super(LearningRateFixed, self).get_next_rate(current_accuracy=None)
-        return self.get_rate()
-
-
-class LearningRateNewBob(LearningRateScheduler):
-    """
-    newbob learning rate schedule.
-    
-    Fixed learning rate until validation set stops improving then exponential
-    decay.
-    """
-    
-    def __init__(self, start_rate, scale_by=.5, max_epochs=99,
-                 min_derror_ramp_start=.5, min_derror_stop=.5, init_error=100.0,
-                 patience=0, zero_rate=None, ramping=False):
+        Args:
+            learning_rate: Learning rate to use in learning rule.
         """
-        :type start_rate: float
-        :param start_rate: 
-        
-        :type scale_by: float
-        :param scale_by: 
-        
-        :type max_epochs: int
-        :param max_epochs: 
-        
-        :type min_error_start: float
-        :param min_error_start: 
-        
-        :type min_error_stop: float
-        :param min_error_stop: 
-        
-        :type init_error: float
-        :param init_error: 
+        self.learning_rate = learning_rate
+
+    def update_learning_rule(self, learning_rule, epoch_number):
+        """Update the hyperparameters of the learning rule.
+
+        Run at the beginning of each epoch.
+
+        Args:
+            learning_rule: Learning rule object being used in training run,
+                any scheduled hyperparameters to be altered should be
+                attributes of this object.
+            epoch_number: Integer index of training epoch about to be run.
         """
-        self.start_rate = start_rate
-        self.init_error = init_error
-        self.init_patience = patience
-        
-        self.rate = start_rate
-        self.scale_by = scale_by
-        self.max_epochs = max_epochs
-        self.min_derror_ramp_start = min_derror_ramp_start
-        self.min_derror_stop = min_derror_stop
-        self.lowest_error = init_error
-        
-        self.epoch = 1
-        self.ramping = ramping
-        self.patience = patience
-        self.zero_rate = zero_rate
-        
-    def reset(self):
-        self.rate = self.start_rate
-        self.lowest_error = self.init_error
-        self.epoch = 1
-        self.ramping = False
-        self.patience = self.init_patience
-    
-    def get_rate(self):
-        if (self.epoch==1 and self.zero_rate!=None):
-            return self.zero_rate
-        return self.rate  
-    
-    def get_next_rate(self, current_accuracy):
-        """
-        :type current_accuracy: float
-        :param current_accuracy: current proportion correctly classified
-        
-        """
-        
-        current_error = 1. - current_accuracy
-        diff_error = 0.0
-        
-        if ( (self.max_epochs > 10000) or (self.epoch >= self.max_epochs) ):
-            #logging.debug('Setting rate to 0.0. max_epochs or epoch>=max_epochs')
-            self.rate = 0.0
-        else:
-            diff_error = self.lowest_error - current_error
-            
-            if (current_error < self.lowest_error):
-                self.lowest_error = current_error
-    
-            if (self.ramping):
-                if (diff_error < self.min_derror_stop):
-                    if (self.patience > 0):
-                        #logging.debug('Patience decreased to %f' % self.patience)
-                        self.patience -= 1
-                        self.rate *= self.scale_by
-                    else:
-                        #logging.debug('diff_error (%f) < min_derror_stop (%f)' % (diff_error, self.min_derror_stop))
-                        self.rate = 0.0
-                else:
-                    self.rate *= self.scale_by
-            else:
-                if (diff_error < self.min_derror_ramp_start):
-                    #logging.debug('Start ramping.')
-                    self.ramping = True
-                    self.rate *= self.scale_by
-            
-            self.epoch += 1
-    
-        return self.rate
-
-
-class DropoutFixed(LearningRateList):
-
-    def __init__(self, p_inp_keep, p_hid_keep):
-        assert 0 < p_inp_keep <= 1 and 0 < p_hid_keep <= 1, (
-            "Dropout 'keep' probabilites are suppose to be in (0, 1] range"
-        )
-        super(DropoutFixed, self).__init__([(p_inp_keep, p_hid_keep)], max_epochs=999)
-
-    def get_rate(self):
-        return self.lr_list[0]
-
-    def get_next_rate(self, current_accuracy=None):
-        return self.get_rate()
+        learning_rule.learning_rate = self.learning_rate
diff --git a/mlp/utils.py b/mlp/utils.py
deleted file mode 100644
index 34d62e5..0000000
--- a/mlp/utils.py
+++ /dev/null
@@ -1,361 +0,0 @@
-# Machine Learning Practical (INFR11119),
-# Pawel Swietojanski, University of Edinburgh
-
-import numpy
-from mlp.layers import Layer
-
-
-def numerical_gradient(f, x, eps=1e-4, **kwargs):
-    """
-    Implements the following numerical gradient rule
-    df(x)/dx = (f(x+eps)-f(x-eps))/(2eps)
-    """
-
-    xc = x.copy()
-    g = numpy.zeros_like(xc)
-    xf = xc.ravel()
-    gf = g.ravel()
-
-    for i in xrange(xf.shape[0]):
-        xx = xf[i]
-        xf[i] = xx + eps
-        fp_eps, ___ = f(xc, **kwargs)
-        xf[i] = xx - eps
-        fm_eps, ___ = f(xc, **kwargs)
-        xf[i] = xx
-        gf[i] = (fp_eps - fm_eps)/(2*eps)
-
-    return g
-
-
-def verify_gradient(f, x, eps=1e-4, tol=1e-6, **kwargs):
-    """
-    Compares the numerical and analytical gradients.
-    """
-    fval, fgrad = f(x=x, **kwargs)
-    ngrad = numerical_gradient(f=f, x=x, eps=eps, tol=tol, **kwargs)
-
-    fgradnorm = numpy.sqrt(numpy.sum(fgrad**2))
-    ngradnorm = numpy.sqrt(numpy.sum(ngrad**2))
-    diffnorm = numpy.sqrt(numpy.sum((fgrad-ngrad)**2))
-
-    if fgradnorm > 0 or ngradnorm > 0:
-        norm = numpy.maximum(fgradnorm, ngradnorm)
-        if not (diffnorm < tol or diffnorm/norm < tol):
-            raise Exception("Numerical and analytical gradients "
-                            "are different: %s != %s!" % (ngrad, fgrad))
-    else:
-        if not (diffnorm < tol):
-            raise Exception("Numerical and analytical gradients "
-                            "are different: %s != %s!" % (ngrad, fgrad))
-    return True
-
-
-def verify_layer_gradient(layer, x, eps=1e-4, tol=1e-6):
-
-    assert isinstance(layer, Layer), (
-        "Expected to get the instance of Layer class, got"
-        " %s " % type(layer)
-    )
-
-    def grad_layer_wrapper(x, **kwargs):
-        h = layer.fprop(x)
-        deltas, ograds = layer.bprop(h=h, igrads=numpy.ones_like(h))
-        return numpy.sum(h), ograds
-
-    return verify_gradient(f=grad_layer_wrapper, x=x, eps=eps, tol=tol, layer=layer)
-
-
-def test_conv_linear_fprop(layer, kernel_order='ioxy', kernels_first=True,
-                           dtype=numpy.float):
-    """ 
-    Tests forward propagation method of a convolutional layer.
-    
-    Checks the outputs of `fprop` method for a fixed input against known
-    reference values for the outputs and raises an AssertionError if
-    the outputted values are not consistent with the reference values. If
-    tests are all passed returns True.
-    
-    Parameters
-    ----------
-    layer : instance of Layer subclass
-        Convolutional (linear only) layer implementation. It must implement
-        the methods `get_params`, `set_params` and `fprop`.
-    kernel_order : string
-        Specifes dimension ordering assumed for convolutional kernels
-        passed to `layer`. Default is `ioxy` which corresponds to:
-            input channels, output channels, image x, image y
-        The other option is 'oixy' which corresponds to
-            output channels, input channels, image x, image y
-        Any other value will raise a ValueError exception.
-    kernels_first : boolean
-        Specifies order in which parameters are passed to and returned from
-        `get_params` and `set_params`. Default is True which corresponds
-        to signatures of `get_params` and `set_params` being:
-            kernels, biases = layer.get_params()
-            layer.set_params([kernels, biases])
-        If False this corresponds to signatures of `get_params` and 
-        `set_params` being:
-            biases, kernels = layer.get_params()
-            layer.set_params([biases, kernels])
-    dtype : numpy data type
-         Data type to use in numpy arrays passed to layer methods. Default
-         is `numpy.float`.
-            
-    Raises
-    ------
-    AssertionError
-        Raised if output of `layer.fprop` is inconsistent with reference
-        values either in shape or values.
-    ValueError
-        Raised if `kernel_order` is not a valid order string.
-    """
-    inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype)
-    kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype)
-    if kernel_order == 'oixy':
-        kernels = kernels.swapaxes(0, 1)
-    elif kernel_order != 'ioxy':
-        raise ValueError('kernel_order must be one of "ioxy" and "oixy"')
-    biases = numpy.arange(2).astype(dtype)
-    true_output = numpy.array(
-      [[[[  496.,   466.,   436.],
-         [  376.,   346.,   316.],
-         [  256.,   226.,   196.]],
-        [[ 1385.,  1403.,  1421.],
-         [ 1457.,  1475.,  1493.],
-         [ 1529.,  1547.,  1565.]]],
-       [[[ -944.,  -974., -1004.],
-         [-1064., -1094., -1124.],
-         [-1184., -1214., -1244.]],
-        [[ 2249.,  2267.,  2285.],
-         [ 2321.,  2339.,  2357.],
-         [ 2393.,  2411.,  2429.]]]], dtype=dtype)
-    try:
-        orig_params = layer.get_params()
-        if kernels_first:
-            layer.set_params([kernels, biases])
-        else:
-            layer.set_params([biases, kernels])
-        layer_output = layer.fprop(inputs)
-        assert layer_output.shape == true_output.shape, (
-            'Layer fprop gives incorrect shaped output. '
-            'Correct shape is {0} but returned shape is {1}.'
-            .format(true_output.shape, layer_output.shape)
-        )
-        assert numpy.allclose(layer_output, true_output), (
-            'Layer fprop does not give correct output. '
-            'Correct output is {0}\n but returned output is {1}.'
-            .format(true_output, layer_output)
-        )
-    finally:
-        layer.set_params(orig_params)
-    return True
-
-  
-def test_conv_linear_bprop(layer, kernel_order='ioxy', kernels_first=True,
-                           dtype=numpy.float):
-    """ 
-    Tests input gradients backpropagation method of a convolutional layer.
-    
-    Checks the outputs of `bprop` method for a fixed input against known
-    reference values for the outputs and raises an AssertionError if
-    the outputted values are not consistent with the reference values. If
-    tests are all passed returns True.
-    
-    Parameters
-    ----------
-    layer : instance of Layer subclass
-        Convolutional (linear only) layer implementation. It must implement
-        the methods `get_params`, `set_params` and `bprop`.
-    kernel_order : string
-        Specifes dimension ordering assumed for convolutional kernels
-        passed to `layer`. Default is `ioxy` which corresponds to:
-            input channels, output channels, image x, image y
-        The other option is 'oixy' which corresponds to
-            output channels, input channels, image x, image y
-        Any other value will raise a ValueError exception.
-    kernels_first : boolean
-        Specifies order in which parameters are passed to and returned from
-        `get_params` and `set_params`. Default is True which corresponds
-        to signatures of `get_params` and `set_params` being:
-            kernels, biases = layer.get_params()
-            layer.set_params([kernels, biases])
-        If False this corresponds to signatures of `get_params` and 
-        `set_params` being:
-            biases, kernels = layer.get_params()
-            layer.set_params([biases, kernels])
-    dtype : numpy data type
-         Data type to use in numpy arrays passed to layer methods. Default
-         is `numpy.float`.
-            
-    Raises
-    ------
-    AssertionError
-        Raised if output of `layer.bprop` is inconsistent with reference
-        values either in shape or values.
-    ValueError
-        Raised if `kernel_order` is not a valid order string.
-    """
-    inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype)
-    kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype)
-    if kernel_order == 'oixy':
-        kernels = kernels.swapaxes(0, 1)
-    elif kernel_order != 'ioxy':
-        raise ValueError('kernel_order must be one of "ioxy" and "oixy"')
-    biases = numpy.arange(2).astype(dtype)
-    igrads = numpy.arange(-20, 16).reshape((2, 2, 3, 3)).astype(dtype)
-    true_ograds = numpy.array(
-      [[[[ 328.,  605.,  567.,  261.],
-         [ 534.,  976.,  908.,  414.],
-         [ 426.,  772.,  704.,  318.],
-         [ 170.,  305.,  275.,  123.]],
-        [[  80.,  125.,  119.,   45.],
-         [  86.,  112.,  108.,   30.],
-         [  74.,  100.,   96.,   30.],
-         [  18.,   17.,   19.,    3.]],
-        [[-168., -355., -329., -171.],
-         [-362., -752., -692., -354.],
-         [-278., -572., -512., -258.],
-         [-134., -271., -237., -117.]]],
-       [[[ -32.,  -79., -117.,  -63.],
-         [-114., -248., -316., -162.],
-         [-222., -452., -520., -258.],
-         [-118., -235., -265., -129.]],
-        [[   8.,   17.,   11.,    9.],
-         [  14.,   40.,   36.,   30.],
-         [   2.,   28.,   24.,   30.],
-         [  18.,   53.,   55.,   39.]],
-        [[  48.,  113.,  139.,   81.],
-         [ 142.,  328.,  388.,  222.],
-         [ 226.,  508.,  568.,  318.],
-         [ 154.,  341.,  375.,  207.]]]], dtype=dtype)
-    try:
-        orig_params = layer.get_params()
-        if kernels_first:
-            layer.set_params([kernels, biases])
-        else:
-            layer.set_params([biases, kernels])
-        layer_deltas, layer_ograds = layer.bprop(None, igrads)
-        assert layer_deltas.shape == igrads.shape, (
-            'Layer bprop give incorrectly shaped deltas output.'
-            'Correct shape is {0} but returned shape is {1}.'
-            .format(igrads.shape, layer_deltas.shape)
-        )
-        assert numpy.allclose(layer_deltas, igrads), (
-            'Layer bprop does not give correct deltas output. '
-            'Correct output is {0}\n but returned output is {1}.'
-            .format(igrads, layer_deltas)
-        )
-        assert layer_ograds.shape == true_ograds.shape, (
-            'Layer bprop gives incorrect shaped ograds output. '
-            'Correct shape is {0} but returned shape is {1}.'
-            .format(true_ograds.shape, layer_ograds.shape)
-        )
-        assert numpy.allclose(layer_ograds, true_ograds), (
-            'Layer bprop does not give correct ograds output. '
-            'Correct output is {0}\n but returned output is {1}.'
-            .format(true_ograds, layer_ograds)
-        )
-    finally:
-        layer.set_params(orig_params)
-    return True
-
-   
-def test_conv_linear_pgrads(layer, kernel_order='ioxy', kernels_first=True,
-                            dtype=numpy.float):
-    """ 
-    Tests parameter gradients backpropagation method of a convolutional layer.
-    
-    Checks the outputs of `pgrads` method for a fixed input against known
-    reference values for the outputs and raises an AssertionError if
-    the outputted values are not consistent with the reference values. If
-    tests are all passed returns True.
-    
-    Parameters
-    ----------
-    layer : instance of Layer subclass
-        Convolutional (linear only) layer implementation. It must implement
-        the methods `get_params`, `set_params` and `pgrads`.
-    kernel_order : string
-        Specifes dimension ordering assumed for convolutional kernels
-        passed to `layer`. Default is `ioxy` which corresponds to:
-            input channels, output channels, image x, image y
-        The other option is 'oixy' which corresponds to
-            output channels, input channels, image x, image y
-        Any other value will raise a ValueError exception.
-    kernels_first : boolean
-        Specifies order in which parameters are passed to and returned from
-        `get_params` and `set_params`. Default is True which corresponds
-        to signatures of `get_params` and `set_params` being:
-            kernels, biases = layer.get_params()
-            layer.set_params([kernels, biases])
-        If False this corresponds to signatures of `get_params` and 
-        `set_params` being:
-            biases, kernels = layer.get_params()
-            layer.set_params([biases, kernels])
-    dtype : numpy data type
-         Data type to use in numpy arrays passed to layer methods. Default
-         is `numpy.float`.
-            
-    Raises
-    ------
-    AssertionError
-        Raised if output of `layer.pgrads` is inconsistent with reference
-        values either in shape or values.
-    ValueError
-        Raised if `kernel_order` is not a valid order string.
-    """
-    inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype)
-    kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype)
-    biases = numpy.arange(2).astype(dtype)
-    deltas = numpy.arange(-20, 16).reshape((2, 2, 3, 3)).astype(dtype)
-    true_kernel_grads = numpy.array(
-      [[[[  390.,   264.],
-         [ -114.,  -240.]],
-        [[ 5088.,  5124.],
-         [ 5232.,  5268.]]],
-       [[[-1626., -1752.],
-         [-2130., -2256.]],
-        [[ 5664.,  5700.],
-         [ 5808.,  5844.]]],
-       [[[-3642., -3768.],
-         [-4146., -4272.]],
-        [[ 6240.,  6276.],
-         [ 6384.,  6420.]]]], dtype=dtype)
-    if kernel_order == 'oixy':
-        kernels = kernels.swapaxes(0, 1)
-        true_kernel_grads = true_kernel_grads.swapaxes(0, 1)
-    elif kernel_order != 'ioxy':
-        raise ValueError('kernel_order must be one of "ioxy" and "oixy"')
-    true_bias_grads = numpy.array([-126.,   36.], dtype=dtype)
-    try:
-        orig_params = layer.get_params()
-        if kernels_first:
-            layer.set_params([kernels, biases])
-        else:
-            layer.set_params([biases, kernels])
-        layer_kernel_grads, layer_bias_grads = layer.pgrads(inputs, deltas)
-        assert layer_kernel_grads.shape == true_kernel_grads.shape, (
-            'Layer pgrads gives incorrect shaped kernel gradients output. '
-            'Correct shape is {0} but returned shape is {1}.'
-            .format(true_kernel_grads.shape, layer_kernel_grads.shape)
-        )
-        assert numpy.allclose(layer_kernel_grads, true_kernel_grads), (
-            'Layer pgrads does not give correct kernel gradients output. '
-            'Correct output is {0}\n but returned output is {1}.'
-            .format(true_kernel_grads, layer_kernel_grads)
-        )
-        assert layer_bias_grads.shape == true_bias_grads.shape, (
-            'Layer pgrads gives incorrect shaped bias gradients output. '
-            'Correct shape is {0} but returned shape is {1}.'
-            .format(true_bias_grads.shape, layer_bias_grads.shape)
-        )
-        assert numpy.allclose(layer_bias_grads, true_bias_grads), (
-            'Layer pgrads does not give correct bias gradients output. '
-            'Correct output is {0}\n but returned output is {1}.'
-            .format(true_bias_grads, layer_bias_grads)
-        )
-    finally:
-        layer.set_params(orig_params)
-    return True
-