Adding new schedulers module.

This commit is contained in:
Matt Graham 2016-10-10 09:23:53 +01:00
parent fa2f6b195c
commit 302d44371a
2 changed files with 27 additions and 526 deletions

View File

@ -1,172 +1,34 @@
# Machine Learning Practical (INFR11119),
# Pawel Swietojanski, University of Edinburgh
# -*- coding: utf-8 -*-
"""Training schedulers.
import logging
This module contains classes implementing schedulers which control the
evolution of learning rule hyperparameters (such as learning rate) over a
training run.
"""
import numpy as np
class LearningRateScheduler(object):
"""
Define an interface for determining learning rates
"""
def __init__(self, max_epochs=100):
self.epoch = 0
self.max_epochs = max_epochs
class ConstantLearningRateScheduler(object):
"""Example of scheduler interface which sets a constant learning rate."""
def get_rate(self):
raise NotImplementedError()
def __init__(self, learning_rate):
"""Construct a new constant learning rate scheduler object.
def get_next_rate(self, current_accuracy=None):
self.epoch += 1
class LearningRateList(LearningRateScheduler):
def __init__(self, learning_rates_list, max_epochs):
super(LearningRateList, self).__init__(max_epochs)
assert isinstance(learning_rates_list, list), (
"The learning_rates_list argument expected"
" to be of type list, got %s" % type(learning_rates_list)
)
self.lr_list = learning_rates_list
def get_rate(self):
if self.epoch < len(self.lr_list):
return self.lr_list[self.epoch]
return 0.0
def get_next_rate(self, current_accuracy=None):
super(LearningRateList, self).get_next_rate(current_accuracy=None)
return self.get_rate()
class LearningRateFixed(LearningRateList):
def __init__(self, learning_rate, max_epochs):
assert learning_rate > 0, (
"learning rate expected to be > 0, got %f" % learning_rate
)
super(LearningRateFixed, self).__init__([learning_rate], max_epochs)
def get_rate(self):
if self.epoch < self.max_epochs:
return self.lr_list[0]
return 0.0
def get_next_rate(self, current_accuracy=None):
super(LearningRateFixed, self).get_next_rate(current_accuracy=None)
return self.get_rate()
class LearningRateNewBob(LearningRateScheduler):
"""
newbob learning rate schedule.
Fixed learning rate until validation set stops improving then exponential
decay.
"""
def __init__(self, start_rate, scale_by=.5, max_epochs=99,
min_derror_ramp_start=.5, min_derror_stop=.5, init_error=100.0,
patience=0, zero_rate=None, ramping=False):
Args:
learning_rate: Learning rate to use in learning rule.
"""
:type start_rate: float
:param start_rate:
:type scale_by: float
:param scale_by:
:type max_epochs: int
:param max_epochs:
:type min_error_start: float
:param min_error_start:
:type min_error_stop: float
:param min_error_stop:
:type init_error: float
:param init_error:
self.learning_rate = learning_rate
def update_learning_rule(self, learning_rule, epoch_number):
"""Update the hyperparameters of the learning rule.
Run at the beginning of each epoch.
Args:
learning_rule: Learning rule object being used in training run,
any scheduled hyperparameters to be altered should be
attributes of this object.
epoch_number: Integer index of training epoch about to be run.
"""
self.start_rate = start_rate
self.init_error = init_error
self.init_patience = patience
self.rate = start_rate
self.scale_by = scale_by
self.max_epochs = max_epochs
self.min_derror_ramp_start = min_derror_ramp_start
self.min_derror_stop = min_derror_stop
self.lowest_error = init_error
self.epoch = 1
self.ramping = ramping
self.patience = patience
self.zero_rate = zero_rate
def reset(self):
self.rate = self.start_rate
self.lowest_error = self.init_error
self.epoch = 1
self.ramping = False
self.patience = self.init_patience
def get_rate(self):
if (self.epoch==1 and self.zero_rate!=None):
return self.zero_rate
return self.rate
def get_next_rate(self, current_accuracy):
"""
:type current_accuracy: float
:param current_accuracy: current proportion correctly classified
"""
current_error = 1. - current_accuracy
diff_error = 0.0
if ( (self.max_epochs > 10000) or (self.epoch >= self.max_epochs) ):
#logging.debug('Setting rate to 0.0. max_epochs or epoch>=max_epochs')
self.rate = 0.0
else:
diff_error = self.lowest_error - current_error
if (current_error < self.lowest_error):
self.lowest_error = current_error
if (self.ramping):
if (diff_error < self.min_derror_stop):
if (self.patience > 0):
#logging.debug('Patience decreased to %f' % self.patience)
self.patience -= 1
self.rate *= self.scale_by
else:
#logging.debug('diff_error (%f) < min_derror_stop (%f)' % (diff_error, self.min_derror_stop))
self.rate = 0.0
else:
self.rate *= self.scale_by
else:
if (diff_error < self.min_derror_ramp_start):
#logging.debug('Start ramping.')
self.ramping = True
self.rate *= self.scale_by
self.epoch += 1
return self.rate
class DropoutFixed(LearningRateList):
def __init__(self, p_inp_keep, p_hid_keep):
assert 0 < p_inp_keep <= 1 and 0 < p_hid_keep <= 1, (
"Dropout 'keep' probabilites are suppose to be in (0, 1] range"
)
super(DropoutFixed, self).__init__([(p_inp_keep, p_hid_keep)], max_epochs=999)
def get_rate(self):
return self.lr_list[0]
def get_next_rate(self, current_accuracy=None):
return self.get_rate()
learning_rule.learning_rate = self.learning_rate

View File

@ -1,361 +0,0 @@
# Machine Learning Practical (INFR11119),
# Pawel Swietojanski, University of Edinburgh
import numpy
from mlp.layers import Layer
def numerical_gradient(f, x, eps=1e-4, **kwargs):
"""
Implements the following numerical gradient rule
df(x)/dx = (f(x+eps)-f(x-eps))/(2eps)
"""
xc = x.copy()
g = numpy.zeros_like(xc)
xf = xc.ravel()
gf = g.ravel()
for i in xrange(xf.shape[0]):
xx = xf[i]
xf[i] = xx + eps
fp_eps, ___ = f(xc, **kwargs)
xf[i] = xx - eps
fm_eps, ___ = f(xc, **kwargs)
xf[i] = xx
gf[i] = (fp_eps - fm_eps)/(2*eps)
return g
def verify_gradient(f, x, eps=1e-4, tol=1e-6, **kwargs):
"""
Compares the numerical and analytical gradients.
"""
fval, fgrad = f(x=x, **kwargs)
ngrad = numerical_gradient(f=f, x=x, eps=eps, tol=tol, **kwargs)
fgradnorm = numpy.sqrt(numpy.sum(fgrad**2))
ngradnorm = numpy.sqrt(numpy.sum(ngrad**2))
diffnorm = numpy.sqrt(numpy.sum((fgrad-ngrad)**2))
if fgradnorm > 0 or ngradnorm > 0:
norm = numpy.maximum(fgradnorm, ngradnorm)
if not (diffnorm < tol or diffnorm/norm < tol):
raise Exception("Numerical and analytical gradients "
"are different: %s != %s!" % (ngrad, fgrad))
else:
if not (diffnorm < tol):
raise Exception("Numerical and analytical gradients "
"are different: %s != %s!" % (ngrad, fgrad))
return True
def verify_layer_gradient(layer, x, eps=1e-4, tol=1e-6):
assert isinstance(layer, Layer), (
"Expected to get the instance of Layer class, got"
" %s " % type(layer)
)
def grad_layer_wrapper(x, **kwargs):
h = layer.fprop(x)
deltas, ograds = layer.bprop(h=h, igrads=numpy.ones_like(h))
return numpy.sum(h), ograds
return verify_gradient(f=grad_layer_wrapper, x=x, eps=eps, tol=tol, layer=layer)
def test_conv_linear_fprop(layer, kernel_order='ioxy', kernels_first=True,
dtype=numpy.float):
"""
Tests forward propagation method of a convolutional layer.
Checks the outputs of `fprop` method for a fixed input against known
reference values for the outputs and raises an AssertionError if
the outputted values are not consistent with the reference values. If
tests are all passed returns True.
Parameters
----------
layer : instance of Layer subclass
Convolutional (linear only) layer implementation. It must implement
the methods `get_params`, `set_params` and `fprop`.
kernel_order : string
Specifes dimension ordering assumed for convolutional kernels
passed to `layer`. Default is `ioxy` which corresponds to:
input channels, output channels, image x, image y
The other option is 'oixy' which corresponds to
output channels, input channels, image x, image y
Any other value will raise a ValueError exception.
kernels_first : boolean
Specifies order in which parameters are passed to and returned from
`get_params` and `set_params`. Default is True which corresponds
to signatures of `get_params` and `set_params` being:
kernels, biases = layer.get_params()
layer.set_params([kernels, biases])
If False this corresponds to signatures of `get_params` and
`set_params` being:
biases, kernels = layer.get_params()
layer.set_params([biases, kernels])
dtype : numpy data type
Data type to use in numpy arrays passed to layer methods. Default
is `numpy.float`.
Raises
------
AssertionError
Raised if output of `layer.fprop` is inconsistent with reference
values either in shape or values.
ValueError
Raised if `kernel_order` is not a valid order string.
"""
inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype)
kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype)
if kernel_order == 'oixy':
kernels = kernels.swapaxes(0, 1)
elif kernel_order != 'ioxy':
raise ValueError('kernel_order must be one of "ioxy" and "oixy"')
biases = numpy.arange(2).astype(dtype)
true_output = numpy.array(
[[[[ 496., 466., 436.],
[ 376., 346., 316.],
[ 256., 226., 196.]],
[[ 1385., 1403., 1421.],
[ 1457., 1475., 1493.],
[ 1529., 1547., 1565.]]],
[[[ -944., -974., -1004.],
[-1064., -1094., -1124.],
[-1184., -1214., -1244.]],
[[ 2249., 2267., 2285.],
[ 2321., 2339., 2357.],
[ 2393., 2411., 2429.]]]], dtype=dtype)
try:
orig_params = layer.get_params()
if kernels_first:
layer.set_params([kernels, biases])
else:
layer.set_params([biases, kernels])
layer_output = layer.fprop(inputs)
assert layer_output.shape == true_output.shape, (
'Layer fprop gives incorrect shaped output. '
'Correct shape is {0} but returned shape is {1}.'
.format(true_output.shape, layer_output.shape)
)
assert numpy.allclose(layer_output, true_output), (
'Layer fprop does not give correct output. '
'Correct output is {0}\n but returned output is {1}.'
.format(true_output, layer_output)
)
finally:
layer.set_params(orig_params)
return True
def test_conv_linear_bprop(layer, kernel_order='ioxy', kernels_first=True,
dtype=numpy.float):
"""
Tests input gradients backpropagation method of a convolutional layer.
Checks the outputs of `bprop` method for a fixed input against known
reference values for the outputs and raises an AssertionError if
the outputted values are not consistent with the reference values. If
tests are all passed returns True.
Parameters
----------
layer : instance of Layer subclass
Convolutional (linear only) layer implementation. It must implement
the methods `get_params`, `set_params` and `bprop`.
kernel_order : string
Specifes dimension ordering assumed for convolutional kernels
passed to `layer`. Default is `ioxy` which corresponds to:
input channels, output channels, image x, image y
The other option is 'oixy' which corresponds to
output channels, input channels, image x, image y
Any other value will raise a ValueError exception.
kernels_first : boolean
Specifies order in which parameters are passed to and returned from
`get_params` and `set_params`. Default is True which corresponds
to signatures of `get_params` and `set_params` being:
kernels, biases = layer.get_params()
layer.set_params([kernels, biases])
If False this corresponds to signatures of `get_params` and
`set_params` being:
biases, kernels = layer.get_params()
layer.set_params([biases, kernels])
dtype : numpy data type
Data type to use in numpy arrays passed to layer methods. Default
is `numpy.float`.
Raises
------
AssertionError
Raised if output of `layer.bprop` is inconsistent with reference
values either in shape or values.
ValueError
Raised if `kernel_order` is not a valid order string.
"""
inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype)
kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype)
if kernel_order == 'oixy':
kernels = kernels.swapaxes(0, 1)
elif kernel_order != 'ioxy':
raise ValueError('kernel_order must be one of "ioxy" and "oixy"')
biases = numpy.arange(2).astype(dtype)
igrads = numpy.arange(-20, 16).reshape((2, 2, 3, 3)).astype(dtype)
true_ograds = numpy.array(
[[[[ 328., 605., 567., 261.],
[ 534., 976., 908., 414.],
[ 426., 772., 704., 318.],
[ 170., 305., 275., 123.]],
[[ 80., 125., 119., 45.],
[ 86., 112., 108., 30.],
[ 74., 100., 96., 30.],
[ 18., 17., 19., 3.]],
[[-168., -355., -329., -171.],
[-362., -752., -692., -354.],
[-278., -572., -512., -258.],
[-134., -271., -237., -117.]]],
[[[ -32., -79., -117., -63.],
[-114., -248., -316., -162.],
[-222., -452., -520., -258.],
[-118., -235., -265., -129.]],
[[ 8., 17., 11., 9.],
[ 14., 40., 36., 30.],
[ 2., 28., 24., 30.],
[ 18., 53., 55., 39.]],
[[ 48., 113., 139., 81.],
[ 142., 328., 388., 222.],
[ 226., 508., 568., 318.],
[ 154., 341., 375., 207.]]]], dtype=dtype)
try:
orig_params = layer.get_params()
if kernels_first:
layer.set_params([kernels, biases])
else:
layer.set_params([biases, kernels])
layer_deltas, layer_ograds = layer.bprop(None, igrads)
assert layer_deltas.shape == igrads.shape, (
'Layer bprop give incorrectly shaped deltas output.'
'Correct shape is {0} but returned shape is {1}.'
.format(igrads.shape, layer_deltas.shape)
)
assert numpy.allclose(layer_deltas, igrads), (
'Layer bprop does not give correct deltas output. '
'Correct output is {0}\n but returned output is {1}.'
.format(igrads, layer_deltas)
)
assert layer_ograds.shape == true_ograds.shape, (
'Layer bprop gives incorrect shaped ograds output. '
'Correct shape is {0} but returned shape is {1}.'
.format(true_ograds.shape, layer_ograds.shape)
)
assert numpy.allclose(layer_ograds, true_ograds), (
'Layer bprop does not give correct ograds output. '
'Correct output is {0}\n but returned output is {1}.'
.format(true_ograds, layer_ograds)
)
finally:
layer.set_params(orig_params)
return True
def test_conv_linear_pgrads(layer, kernel_order='ioxy', kernels_first=True,
dtype=numpy.float):
"""
Tests parameter gradients backpropagation method of a convolutional layer.
Checks the outputs of `pgrads` method for a fixed input against known
reference values for the outputs and raises an AssertionError if
the outputted values are not consistent with the reference values. If
tests are all passed returns True.
Parameters
----------
layer : instance of Layer subclass
Convolutional (linear only) layer implementation. It must implement
the methods `get_params`, `set_params` and `pgrads`.
kernel_order : string
Specifes dimension ordering assumed for convolutional kernels
passed to `layer`. Default is `ioxy` which corresponds to:
input channels, output channels, image x, image y
The other option is 'oixy' which corresponds to
output channels, input channels, image x, image y
Any other value will raise a ValueError exception.
kernels_first : boolean
Specifies order in which parameters are passed to and returned from
`get_params` and `set_params`. Default is True which corresponds
to signatures of `get_params` and `set_params` being:
kernels, biases = layer.get_params()
layer.set_params([kernels, biases])
If False this corresponds to signatures of `get_params` and
`set_params` being:
biases, kernels = layer.get_params()
layer.set_params([biases, kernels])
dtype : numpy data type
Data type to use in numpy arrays passed to layer methods. Default
is `numpy.float`.
Raises
------
AssertionError
Raised if output of `layer.pgrads` is inconsistent with reference
values either in shape or values.
ValueError
Raised if `kernel_order` is not a valid order string.
"""
inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype)
kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype)
biases = numpy.arange(2).astype(dtype)
deltas = numpy.arange(-20, 16).reshape((2, 2, 3, 3)).astype(dtype)
true_kernel_grads = numpy.array(
[[[[ 390., 264.],
[ -114., -240.]],
[[ 5088., 5124.],
[ 5232., 5268.]]],
[[[-1626., -1752.],
[-2130., -2256.]],
[[ 5664., 5700.],
[ 5808., 5844.]]],
[[[-3642., -3768.],
[-4146., -4272.]],
[[ 6240., 6276.],
[ 6384., 6420.]]]], dtype=dtype)
if kernel_order == 'oixy':
kernels = kernels.swapaxes(0, 1)
true_kernel_grads = true_kernel_grads.swapaxes(0, 1)
elif kernel_order != 'ioxy':
raise ValueError('kernel_order must be one of "ioxy" and "oixy"')
true_bias_grads = numpy.array([-126., 36.], dtype=dtype)
try:
orig_params = layer.get_params()
if kernels_first:
layer.set_params([kernels, biases])
else:
layer.set_params([biases, kernels])
layer_kernel_grads, layer_bias_grads = layer.pgrads(inputs, deltas)
assert layer_kernel_grads.shape == true_kernel_grads.shape, (
'Layer pgrads gives incorrect shaped kernel gradients output. '
'Correct shape is {0} but returned shape is {1}.'
.format(true_kernel_grads.shape, layer_kernel_grads.shape)
)
assert numpy.allclose(layer_kernel_grads, true_kernel_grads), (
'Layer pgrads does not give correct kernel gradients output. '
'Correct output is {0}\n but returned output is {1}.'
.format(true_kernel_grads, layer_kernel_grads)
)
assert layer_bias_grads.shape == true_bias_grads.shape, (
'Layer pgrads gives incorrect shaped bias gradients output. '
'Correct shape is {0} but returned shape is {1}.'
.format(true_bias_grads.shape, layer_bias_grads.shape)
)
assert numpy.allclose(layer_bias_grads, true_bias_grads), (
'Layer pgrads does not give correct bias gradients output. '
'Correct output is {0}\n but returned output is {1}.'
.format(true_bias_grads, layer_bias_grads)
)
finally:
layer.set_params(orig_params)
return True