# Machine Learning Practical (INFR11119), # Pawel Swietojanski, University of Edinburgh import numpy from mlp.layers import Layer def numerical_gradient(f, x, eps=1e-4, **kwargs): """ Implements the following numerical gradient rule df(x)/dx = (f(x+eps)-f(x-eps))/(2eps) """ xc = x.copy() g = numpy.zeros_like(xc) xf = xc.ravel() gf = g.ravel() for i in xrange(xf.shape[0]): xx = xf[i] xf[i] = xx + eps fp_eps, ___ = f(xc, **kwargs) xf[i] = xx - eps fm_eps, ___ = f(xc, **kwargs) xf[i] = xx gf[i] = (fp_eps - fm_eps)/(2*eps) return g def verify_gradient(f, x, eps=1e-4, tol=1e-6, **kwargs): """ Compares the numerical and analytical gradients. """ fval, fgrad = f(x=x, **kwargs) ngrad = numerical_gradient(f=f, x=x, eps=eps, tol=tol, **kwargs) fgradnorm = numpy.sqrt(numpy.sum(fgrad**2)) ngradnorm = numpy.sqrt(numpy.sum(ngrad**2)) diffnorm = numpy.sqrt(numpy.sum((fgrad-ngrad)**2)) if fgradnorm > 0 or ngradnorm > 0: norm = numpy.maximum(fgradnorm, ngradnorm) if not (diffnorm < tol or diffnorm/norm < tol): raise Exception("Numerical and analytical gradients " "are different: %s != %s!" % (ngrad, fgrad)) else: if not (diffnorm < tol): raise Exception("Numerical and analytical gradients " "are different: %s != %s!" % (ngrad, fgrad)) return True def verify_layer_gradient(layer, x, eps=1e-4, tol=1e-6): assert isinstance(layer, Layer), ( "Expected to get the instance of Layer class, got" " %s " % type(layer) ) def grad_layer_wrapper(x, **kwargs): h = layer.fprop(x) deltas, ograds = layer.bprop(h=h, igrads=numpy.ones_like(h)) return numpy.sum(h), ograds return verify_gradient(f=grad_layer_wrapper, x=x, eps=eps, tol=tol, layer=layer) def test_conv_linear_fprop(layer, kernel_order='ioxy', kernels_first=True, dtype=numpy.float): """ Tests forward propagation method of a convolutional layer. Checks the outputs of `fprop` method for a fixed input against known reference values for the outputs and raises an AssertionError if the outputted values are not consistent with the reference values. If tests are all passed returns True. Parameters ---------- layer : instance of Layer subclass Convolutional (linear only) layer implementation. It must implement the methods `get_params`, `set_params` and `fprop`. kernel_order : string Specifes dimension ordering assumed for convolutional kernels passed to `layer`. Default is `ioxy` which corresponds to: input channels, output channels, image x, image y The other option is 'oixy' which corresponds to output channels, input channels, image x, image y Any other value will raise a ValueError exception. kernels_first : boolean Specifies order in which parameters are passed to and returned from `get_params` and `set_params`. Default is True which corresponds to signatures of `get_params` and `set_params` being: kernels, biases = layer.get_params() layer.set_params([kernels, biases]) If False this corresponds to signatures of `get_params` and `set_params` being: biases, kernels = layer.get_params() layer.set_params([biases, kernels]) dtype : numpy data type Data type to use in numpy arrays passed to layer methods. Default is `numpy.float`. Raises ------ AssertionError Raised if output of `layer.fprop` is inconsistent with reference values either in shape or values. ValueError Raised if `kernel_order` is not a valid order string. """ inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype) kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype) if kernel_order == 'oixy': kernels = kernels.swapaxes(0, 1) elif kernel_order != 'ioxy': raise ValueError('kernel_order must be one of "ioxy" and "oixy"') biases = numpy.arange(2).astype(dtype) true_output = numpy.array( [[[[ 496., 466., 436.], [ 376., 346., 316.], [ 256., 226., 196.]], [[ 1385., 1403., 1421.], [ 1457., 1475., 1493.], [ 1529., 1547., 1565.]]], [[[ -944., -974., -1004.], [-1064., -1094., -1124.], [-1184., -1214., -1244.]], [[ 2249., 2267., 2285.], [ 2321., 2339., 2357.], [ 2393., 2411., 2429.]]]], dtype=dtype) try: orig_params = layer.get_params() if kernels_first: layer.set_params([kernels, biases]) else: layer.set_params([biases, kernels]) layer_output = layer.fprop(inputs) assert layer_output.shape == true_output.shape, ( 'Layer fprop gives incorrect shaped output. ' 'Correct shape is {0} but returned shape is {1}.' .format(true_output.shape, layer_output.shape) ) assert numpy.allclose(layer_output, true_output), ( 'Layer fprop does not give correct output. ' 'Correct output is {0}\n but returned output is {1}.' .format(true_output, layer_output) ) finally: layer.set_params(orig_params) return True def test_conv_linear_bprop(layer, kernel_order='ioxy', kernels_first=True, dtype=numpy.float): """ Tests input gradients backpropagation method of a convolutional layer. Checks the outputs of `bprop` method for a fixed input against known reference values for the outputs and raises an AssertionError if the outputted values are not consistent with the reference values. If tests are all passed returns True. Parameters ---------- layer : instance of Layer subclass Convolutional (linear only) layer implementation. It must implement the methods `get_params`, `set_params` and `bprop`. kernel_order : string Specifes dimension ordering assumed for convolutional kernels passed to `layer`. Default is `ioxy` which corresponds to: input channels, output channels, image x, image y The other option is 'oixy' which corresponds to output channels, input channels, image x, image y Any other value will raise a ValueError exception. kernels_first : boolean Specifies order in which parameters are passed to and returned from `get_params` and `set_params`. Default is True which corresponds to signatures of `get_params` and `set_params` being: kernels, biases = layer.get_params() layer.set_params([kernels, biases]) If False this corresponds to signatures of `get_params` and `set_params` being: biases, kernels = layer.get_params() layer.set_params([biases, kernels]) dtype : numpy data type Data type to use in numpy arrays passed to layer methods. Default is `numpy.float`. Raises ------ AssertionError Raised if output of `layer.bprop` is inconsistent with reference values either in shape or values. ValueError Raised if `kernel_order` is not a valid order string. """ inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype) kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype) if kernel_order == 'oixy': kernels = kernels.swapaxes(0, 1) elif kernel_order != 'ioxy': raise ValueError('kernel_order must be one of "ioxy" and "oixy"') biases = numpy.arange(2).astype(dtype) igrads = numpy.arange(-20, 16).reshape((2, 2, 3, 3)).astype(dtype) true_ograds = numpy.array( [[[[ 328., 605., 567., 261.], [ 534., 976., 908., 414.], [ 426., 772., 704., 318.], [ 170., 305., 275., 123.]], [[ 80., 125., 119., 45.], [ 86., 112., 108., 30.], [ 74., 100., 96., 30.], [ 18., 17., 19., 3.]], [[-168., -355., -329., -171.], [-362., -752., -692., -354.], [-278., -572., -512., -258.], [-134., -271., -237., -117.]]], [[[ -32., -79., -117., -63.], [-114., -248., -316., -162.], [-222., -452., -520., -258.], [-118., -235., -265., -129.]], [[ 8., 17., 11., 9.], [ 14., 40., 36., 30.], [ 2., 28., 24., 30.], [ 18., 53., 55., 39.]], [[ 48., 113., 139., 81.], [ 142., 328., 388., 222.], [ 226., 508., 568., 318.], [ 154., 341., 375., 207.]]]], dtype=dtype) try: orig_params = layer.get_params() if kernels_first: layer.set_params([kernels, biases]) else: layer.set_params([biases, kernels]) layer_deltas, layer_ograds = layer.bprop(None, igrads) assert layer_deltas.shape == igrads.shape, ( 'Layer bprop give incorrectly shaped deltas output.' 'Correct shape is {0} but returned shape is {1}.' .format(igrads.shape, layer_deltas.shape) ) assert numpy.allclose(layer_deltas, igrads), ( 'Layer bprop does not give correct deltas output. ' 'Correct output is {0}\n but returned output is {1}.' .format(igrads, layer_deltas) ) assert layer_ograds.shape == true_ograds.shape, ( 'Layer bprop gives incorrect shaped ograds output. ' 'Correct shape is {0} but returned shape is {1}.' .format(true_ograds.shape, layer_ograds.shape) ) assert numpy.allclose(layer_ograds, true_ograds), ( 'Layer bprop does not give correct ograds output. ' 'Correct output is {0}\n but returned output is {1}.' .format(true_ograds, layer_ograds) ) finally: layer.set_params(orig_params) return True def test_conv_linear_pgrads(layer, kernel_order='ioxy', kernels_first=True, dtype=numpy.float): """ Tests parameter gradients backpropagation method of a convolutional layer. Checks the outputs of `pgrads` method for a fixed input against known reference values for the outputs and raises an AssertionError if the outputted values are not consistent with the reference values. If tests are all passed returns True. Parameters ---------- layer : instance of Layer subclass Convolutional (linear only) layer implementation. It must implement the methods `get_params`, `set_params` and `pgrads`. kernel_order : string Specifes dimension ordering assumed for convolutional kernels passed to `layer`. Default is `ioxy` which corresponds to: input channels, output channels, image x, image y The other option is 'oixy' which corresponds to output channels, input channels, image x, image y Any other value will raise a ValueError exception. kernels_first : boolean Specifies order in which parameters are passed to and returned from `get_params` and `set_params`. Default is True which corresponds to signatures of `get_params` and `set_params` being: kernels, biases = layer.get_params() layer.set_params([kernels, biases]) If False this corresponds to signatures of `get_params` and `set_params` being: biases, kernels = layer.get_params() layer.set_params([biases, kernels]) dtype : numpy data type Data type to use in numpy arrays passed to layer methods. Default is `numpy.float`. Raises ------ AssertionError Raised if output of `layer.pgrads` is inconsistent with reference values either in shape or values. ValueError Raised if `kernel_order` is not a valid order string. """ inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype) kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype) biases = numpy.arange(2).astype(dtype) deltas = numpy.arange(-20, 16).reshape((2, 2, 3, 3)).astype(dtype) true_kernel_grads = numpy.array( [[[[ 390., 264.], [ -114., -240.]], [[ 5088., 5124.], [ 5232., 5268.]]], [[[-1626., -1752.], [-2130., -2256.]], [[ 5664., 5700.], [ 5808., 5844.]]], [[[-3642., -3768.], [-4146., -4272.]], [[ 6240., 6276.], [ 6384., 6420.]]]], dtype=dtype) if kernel_order == 'oixy': kernels = kernels.swapaxes(0, 1) true_kernel_grads = true_kernel_grads.swapaxes(0, 1) elif kernel_order != 'ioxy': raise ValueError('kernel_order must be one of "ioxy" and "oixy"') true_bias_grads = numpy.array([-126., 36.], dtype=dtype) try: orig_params = layer.get_params() if kernels_first: layer.set_params([kernels, biases]) else: layer.set_params([biases, kernels]) layer_kernel_grads, layer_bias_grads = layer.pgrads(inputs, deltas) assert layer_kernel_grads.shape == true_kernel_grads.shape, ( 'Layer pgrads gives incorrect shaped kernel gradients output. ' 'Correct shape is {0} but returned shape is {1}.' .format(true_kernel_grads.shape, layer_kernel_grads.shape) ) assert numpy.allclose(layer_kernel_grads, true_kernel_grads), ( 'Layer pgrads does not give correct kernel gradients output. ' 'Correct output is {0}\n but returned output is {1}.' .format(true_kernel_grads, layer_kernel_grads) ) assert layer_bias_grads.shape == true_bias_grads.shape, ( 'Layer pgrads gives incorrect shaped bias gradients output. ' 'Correct shape is {0} but returned shape is {1}.' .format(true_bias_grads.shape, layer_bias_grads.shape) ) assert numpy.allclose(layer_bias_grads, true_bias_grads), ( 'Layer pgrads does not give correct bias gradients output. ' 'Correct output is {0}\n but returned output is {1}.' .format(true_bias_grads, layer_bias_grads) ) finally: layer.set_params(orig_params) return True