mlpractical/mlp/utils.py
2015-12-28 16:15:04 +00:00

362 lines
14 KiB
Python

# Machine Learning Practical (INFR11119),
# Pawel Swietojanski, University of Edinburgh
import numpy
from mlp.layers import Layer
def numerical_gradient(f, x, eps=1e-4, **kwargs):
"""
Implements the following numerical gradient rule
df(x)/dx = (f(x+eps)-f(x-eps))/(2eps)
"""
xc = x.copy()
g = numpy.zeros_like(xc)
xf = xc.ravel()
gf = g.ravel()
for i in xrange(xf.shape[0]):
xx = xf[i]
xf[i] = xx + eps
fp_eps, ___ = f(xc, **kwargs)
xf[i] = xx - eps
fm_eps, ___ = f(xc, **kwargs)
xf[i] = xx
gf[i] = (fp_eps - fm_eps)/(2*eps)
return g
def verify_gradient(f, x, eps=1e-4, tol=1e-6, **kwargs):
"""
Compares the numerical and analytical gradients.
"""
fval, fgrad = f(x=x, **kwargs)
ngrad = numerical_gradient(f=f, x=x, eps=eps, tol=tol, **kwargs)
fgradnorm = numpy.sqrt(numpy.sum(fgrad**2))
ngradnorm = numpy.sqrt(numpy.sum(ngrad**2))
diffnorm = numpy.sqrt(numpy.sum((fgrad-ngrad)**2))
if fgradnorm > 0 or ngradnorm > 0:
norm = numpy.maximum(fgradnorm, ngradnorm)
if not (diffnorm < tol or diffnorm/norm < tol):
raise Exception("Numerical and analytical gradients "
"are different: %s != %s!" % (ngrad, fgrad))
else:
if not (diffnorm < tol):
raise Exception("Numerical and analytical gradients "
"are different: %s != %s!" % (ngrad, fgrad))
return True
def verify_layer_gradient(layer, x, eps=1e-4, tol=1e-6):
assert isinstance(layer, Layer), (
"Expected to get the instance of Layer class, got"
" %s " % type(layer)
)
def grad_layer_wrapper(x, **kwargs):
h = layer.fprop(x)
deltas, ograds = layer.bprop(h=h, igrads=numpy.ones_like(h))
return numpy.sum(h), ograds
return verify_gradient(f=grad_layer_wrapper, x=x, eps=eps, tol=tol, layer=layer)
def test_conv_linear_fprop(layer, kernel_order='ioxy', kernels_first=True,
dtype=numpy.float):
"""
Tests forward propagation method of a convolutional layer.
Checks the outputs of `fprop` method for a fixed input against known
reference values for the outputs and raises an AssertionError if
the outputted values are not consistent with the reference values. If
tests are all passed returns True.
Parameters
----------
layer : instance of Layer subclass
Convolutional (linear only) layer implementation. It must implement
the methods `get_params`, `set_params` and `fprop`.
kernel_order : string
Specifes dimension ordering assumed for convolutional kernels
passed to `layer`. Default is `ioxy` which corresponds to:
input channels, output channels, image x, image y
The other option is 'oixy' which corresponds to
output channels, input channels, image x, image y
Any other value will raise a ValueError exception.
kernels_first : boolean
Specifies order in which parameters are passed to and returned from
`get_params` and `set_params`. Default is True which corresponds
to signatures of `get_params` and `set_params` being:
kernels, biases = layer.get_params()
layer.set_params([kernels, biases])
If False this corresponds to signatures of `get_params` and
`set_params` being:
biases, kernels = layer.get_params()
layer.set_params([biases, kernels])
dtype : numpy data type
Data type to use in numpy arrays passed to layer methods. Default
is `numpy.float`.
Raises
------
AssertionError
Raised if output of `layer.fprop` is inconsistent with reference
values either in shape or values.
ValueError
Raised if `kernel_order` is not a valid order string.
"""
inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype)
kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype)
if kernel_order == 'oixy':
kernels = kernels.swapaxes(0, 1)
elif kernel_order != 'ioxy':
raise ValueError('kernel_order must be one of "ioxy" and "oixy"')
biases = numpy.arange(2).astype(dtype)
true_output = numpy.array(
[[[[ 496., 466., 436.],
[ 376., 346., 316.],
[ 256., 226., 196.]],
[[ 1385., 1403., 1421.],
[ 1457., 1475., 1493.],
[ 1529., 1547., 1565.]]],
[[[ -944., -974., -1004.],
[-1064., -1094., -1124.],
[-1184., -1214., -1244.]],
[[ 2249., 2267., 2285.],
[ 2321., 2339., 2357.],
[ 2393., 2411., 2429.]]]], dtype=dtype)
try:
orig_params = layer.get_params()
if kernels_first:
layer.set_params([kernels, biases])
else:
layer.set_params([biases, kernels])
layer_output = layer.fprop(inputs)
assert layer_output.shape == true_output.shape, (
'Layer fprop gives incorrect shaped output. '
'Correct shape is {0} but returned shape is {1}.'
.format(true_output.shape, layer_output.shape)
)
assert numpy.allclose(layer_output, true_output), (
'Layer fprop does not give correct output. '
'Correct output is {0}\n but returned output is {1}.'
.format(true_output, layer_output)
)
finally:
layer.set_params(orig_params)
return True
def test_conv_linear_bprop(layer, kernel_order='ioxy', kernels_first=True,
dtype=numpy.float):
"""
Tests input gradients backpropagation method of a convolutional layer.
Checks the outputs of `bprop` method for a fixed input against known
reference values for the outputs and raises an AssertionError if
the outputted values are not consistent with the reference values. If
tests are all passed returns True.
Parameters
----------
layer : instance of Layer subclass
Convolutional (linear only) layer implementation. It must implement
the methods `get_params`, `set_params` and `bprop`.
kernel_order : string
Specifes dimension ordering assumed for convolutional kernels
passed to `layer`. Default is `ioxy` which corresponds to:
input channels, output channels, image x, image y
The other option is 'oixy' which corresponds to
output channels, input channels, image x, image y
Any other value will raise a ValueError exception.
kernels_first : boolean
Specifies order in which parameters are passed to and returned from
`get_params` and `set_params`. Default is True which corresponds
to signatures of `get_params` and `set_params` being:
kernels, biases = layer.get_params()
layer.set_params([kernels, biases])
If False this corresponds to signatures of `get_params` and
`set_params` being:
biases, kernels = layer.get_params()
layer.set_params([biases, kernels])
dtype : numpy data type
Data type to use in numpy arrays passed to layer methods. Default
is `numpy.float`.
Raises
------
AssertionError
Raised if output of `layer.bprop` is inconsistent with reference
values either in shape or values.
ValueError
Raised if `kernel_order` is not a valid order string.
"""
inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype)
kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype)
if kernel_order == 'oixy':
kernels = kernels.swapaxes(0, 1)
elif kernel_order != 'ioxy':
raise ValueError('kernel_order must be one of "ioxy" and "oixy"')
biases = numpy.arange(2).astype(dtype)
igrads = numpy.arange(-20, 16).reshape((2, 2, 3, 3)).astype(dtype)
true_ograds = numpy.array(
[[[[ 328., 605., 567., 261.],
[ 534., 976., 908., 414.],
[ 426., 772., 704., 318.],
[ 170., 305., 275., 123.]],
[[ 80., 125., 119., 45.],
[ 86., 112., 108., 30.],
[ 74., 100., 96., 30.],
[ 18., 17., 19., 3.]],
[[-168., -355., -329., -171.],
[-362., -752., -692., -354.],
[-278., -572., -512., -258.],
[-134., -271., -237., -117.]]],
[[[ -32., -79., -117., -63.],
[-114., -248., -316., -162.],
[-222., -452., -520., -258.],
[-118., -235., -265., -129.]],
[[ 8., 17., 11., 9.],
[ 14., 40., 36., 30.],
[ 2., 28., 24., 30.],
[ 18., 53., 55., 39.]],
[[ 48., 113., 139., 81.],
[ 142., 328., 388., 222.],
[ 226., 508., 568., 318.],
[ 154., 341., 375., 207.]]]], dtype=dtype)
try:
orig_params = layer.get_params()
if kernels_first:
layer.set_params([kernels, biases])
else:
layer.set_params([biases, kernels])
layer_deltas, layer_ograds = layer.bprop(None, igrads)
assert layer_deltas.shape == igrads.shape, (
'Layer bprop give incorrectly shaped deltas output.'
'Correct shape is {0} but returned shape is {1}.'
.format(igrads.shape, layer_deltas.shape)
)
assert numpy.allclose(layer_deltas, igrads), (
'Layer bprop does not give correct deltas output. '
'Correct output is {0}\n but returned output is {1}.'
.format(igrads, layer_deltas)
)
assert layer_ograds.shape == true_ograds.shape, (
'Layer bprop gives incorrect shaped ograds output. '
'Correct shape is {0} but returned shape is {1}.'
.format(true_ograds.shape, layer_ograds.shape)
)
assert numpy.allclose(layer_ograds, true_ograds), (
'Layer bprop does not give correct ograds output. '
'Correct output is {0}\n but returned output is {1}.'
.format(true_ograds, layer_ograds)
)
finally:
layer.set_params(orig_params)
return True
def test_conv_linear_pgrads(layer, kernel_order='ioxy', kernels_first=True,
dtype=numpy.float):
"""
Tests parameter gradients backpropagation method of a convolutional layer.
Checks the outputs of `pgrads` method for a fixed input against known
reference values for the outputs and raises an AssertionError if
the outputted values are not consistent with the reference values. If
tests are all passed returns True.
Parameters
----------
layer : instance of Layer subclass
Convolutional (linear only) layer implementation. It must implement
the methods `get_params`, `set_params` and `pgrads`.
kernel_order : string
Specifes dimension ordering assumed for convolutional kernels
passed to `layer`. Default is `ioxy` which corresponds to:
input channels, output channels, image x, image y
The other option is 'oixy' which corresponds to
output channels, input channels, image x, image y
Any other value will raise a ValueError exception.
kernels_first : boolean
Specifies order in which parameters are passed to and returned from
`get_params` and `set_params`. Default is True which corresponds
to signatures of `get_params` and `set_params` being:
kernels, biases = layer.get_params()
layer.set_params([kernels, biases])
If False this corresponds to signatures of `get_params` and
`set_params` being:
biases, kernels = layer.get_params()
layer.set_params([biases, kernels])
dtype : numpy data type
Data type to use in numpy arrays passed to layer methods. Default
is `numpy.float`.
Raises
------
AssertionError
Raised if output of `layer.pgrads` is inconsistent with reference
values either in shape or values.
ValueError
Raised if `kernel_order` is not a valid order string.
"""
inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype)
kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype)
biases = numpy.arange(2).astype(dtype)
deltas = numpy.arange(-20, 16).reshape((2, 2, 3, 3)).astype(dtype)
true_kernel_grads = numpy.array(
[[[[ 390., 264.],
[ -114., -240.]],
[[ 5088., 5124.],
[ 5232., 5268.]]],
[[[-1626., -1752.],
[-2130., -2256.]],
[[ 5664., 5700.],
[ 5808., 5844.]]],
[[[-3642., -3768.],
[-4146., -4272.]],
[[ 6240., 6276.],
[ 6384., 6420.]]]], dtype=dtype)
if kernel_order == 'oixy':
kernels = kernels.swapaxes(0, 1)
true_kernel_grads = true_kernel_grads.swapaxes(0, 1)
elif kernel_order != 'ioxy':
raise ValueError('kernel_order must be one of "ioxy" and "oixy"')
true_bias_grads = numpy.array([-126., 36.], dtype=dtype)
try:
orig_params = layer.get_params()
if kernels_first:
layer.set_params([kernels, biases])
else:
layer.set_params([biases, kernels])
layer_kernel_grads, layer_bias_grads = layer.pgrads(inputs, deltas)
assert layer_kernel_grads.shape == true_kernel_grads.shape, (
'Layer pgrads gives incorrect shaped kernel gradients output. '
'Correct shape is {0} but returned shape is {1}.'
.format(true_kernel_grads.shape, layer_kernel_grads.shape)
)
assert numpy.allclose(layer_kernel_grads, true_kernel_grads), (
'Layer pgrads does not give correct kernel gradients output. '
'Correct output is {0}\n but returned output is {1}.'
.format(true_kernel_grads, layer_kernel_grads)
)
assert layer_bias_grads.shape == true_bias_grads.shape, (
'Layer pgrads gives incorrect shaped bias gradients output. '
'Correct shape is {0} but returned shape is {1}.'
.format(true_bias_grads.shape, layer_bias_grads.shape)
)
assert numpy.allclose(layer_bias_grads, true_bias_grads), (
'Layer pgrads does not give correct bias gradients output. '
'Correct output is {0}\n but returned output is {1}.'
.format(true_bias_grads, layer_bias_grads)
)
finally:
layer.set_params(orig_params)
return True