Adding new schedulers module.
This commit is contained in:
parent
fa2f6b195c
commit
302d44371a
@ -1,172 +1,34 @@
|
||||
# Machine Learning Practical (INFR11119),
|
||||
# Pawel Swietojanski, University of Edinburgh
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Training schedulers.
|
||||
|
||||
import logging
|
||||
This module contains classes implementing schedulers which control the
|
||||
evolution of learning rule hyperparameters (such as learning rate) over a
|
||||
training run.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class LearningRateScheduler(object):
|
||||
"""
|
||||
Define an interface for determining learning rates
|
||||
"""
|
||||
def __init__(self, max_epochs=100):
|
||||
self.epoch = 0
|
||||
self.max_epochs = max_epochs
|
||||
class ConstantLearningRateScheduler(object):
|
||||
"""Example of scheduler interface which sets a constant learning rate."""
|
||||
|
||||
def get_rate(self):
|
||||
raise NotImplementedError()
|
||||
def __init__(self, learning_rate):
|
||||
"""Construct a new constant learning rate scheduler object.
|
||||
|
||||
def get_next_rate(self, current_accuracy=None):
|
||||
self.epoch += 1
|
||||
|
||||
|
||||
class LearningRateList(LearningRateScheduler):
|
||||
def __init__(self, learning_rates_list, max_epochs):
|
||||
|
||||
super(LearningRateList, self).__init__(max_epochs)
|
||||
|
||||
assert isinstance(learning_rates_list, list), (
|
||||
"The learning_rates_list argument expected"
|
||||
" to be of type list, got %s" % type(learning_rates_list)
|
||||
)
|
||||
self.lr_list = learning_rates_list
|
||||
|
||||
def get_rate(self):
|
||||
if self.epoch < len(self.lr_list):
|
||||
return self.lr_list[self.epoch]
|
||||
return 0.0
|
||||
|
||||
def get_next_rate(self, current_accuracy=None):
|
||||
super(LearningRateList, self).get_next_rate(current_accuracy=None)
|
||||
return self.get_rate()
|
||||
|
||||
|
||||
class LearningRateFixed(LearningRateList):
|
||||
|
||||
def __init__(self, learning_rate, max_epochs):
|
||||
assert learning_rate > 0, (
|
||||
"learning rate expected to be > 0, got %f" % learning_rate
|
||||
)
|
||||
super(LearningRateFixed, self).__init__([learning_rate], max_epochs)
|
||||
|
||||
def get_rate(self):
|
||||
if self.epoch < self.max_epochs:
|
||||
return self.lr_list[0]
|
||||
return 0.0
|
||||
|
||||
def get_next_rate(self, current_accuracy=None):
|
||||
super(LearningRateFixed, self).get_next_rate(current_accuracy=None)
|
||||
return self.get_rate()
|
||||
|
||||
|
||||
class LearningRateNewBob(LearningRateScheduler):
|
||||
"""
|
||||
newbob learning rate schedule.
|
||||
|
||||
Fixed learning rate until validation set stops improving then exponential
|
||||
decay.
|
||||
"""
|
||||
|
||||
def __init__(self, start_rate, scale_by=.5, max_epochs=99,
|
||||
min_derror_ramp_start=.5, min_derror_stop=.5, init_error=100.0,
|
||||
patience=0, zero_rate=None, ramping=False):
|
||||
Args:
|
||||
learning_rate: Learning rate to use in learning rule.
|
||||
"""
|
||||
:type start_rate: float
|
||||
:param start_rate:
|
||||
|
||||
:type scale_by: float
|
||||
:param scale_by:
|
||||
|
||||
:type max_epochs: int
|
||||
:param max_epochs:
|
||||
|
||||
:type min_error_start: float
|
||||
:param min_error_start:
|
||||
|
||||
:type min_error_stop: float
|
||||
:param min_error_stop:
|
||||
|
||||
:type init_error: float
|
||||
:param init_error:
|
||||
self.learning_rate = learning_rate
|
||||
|
||||
def update_learning_rule(self, learning_rule, epoch_number):
|
||||
"""Update the hyperparameters of the learning rule.
|
||||
|
||||
Run at the beginning of each epoch.
|
||||
|
||||
Args:
|
||||
learning_rule: Learning rule object being used in training run,
|
||||
any scheduled hyperparameters to be altered should be
|
||||
attributes of this object.
|
||||
epoch_number: Integer index of training epoch about to be run.
|
||||
"""
|
||||
self.start_rate = start_rate
|
||||
self.init_error = init_error
|
||||
self.init_patience = patience
|
||||
|
||||
self.rate = start_rate
|
||||
self.scale_by = scale_by
|
||||
self.max_epochs = max_epochs
|
||||
self.min_derror_ramp_start = min_derror_ramp_start
|
||||
self.min_derror_stop = min_derror_stop
|
||||
self.lowest_error = init_error
|
||||
|
||||
self.epoch = 1
|
||||
self.ramping = ramping
|
||||
self.patience = patience
|
||||
self.zero_rate = zero_rate
|
||||
|
||||
def reset(self):
|
||||
self.rate = self.start_rate
|
||||
self.lowest_error = self.init_error
|
||||
self.epoch = 1
|
||||
self.ramping = False
|
||||
self.patience = self.init_patience
|
||||
|
||||
def get_rate(self):
|
||||
if (self.epoch==1 and self.zero_rate!=None):
|
||||
return self.zero_rate
|
||||
return self.rate
|
||||
|
||||
def get_next_rate(self, current_accuracy):
|
||||
"""
|
||||
:type current_accuracy: float
|
||||
:param current_accuracy: current proportion correctly classified
|
||||
|
||||
"""
|
||||
|
||||
current_error = 1. - current_accuracy
|
||||
diff_error = 0.0
|
||||
|
||||
if ( (self.max_epochs > 10000) or (self.epoch >= self.max_epochs) ):
|
||||
#logging.debug('Setting rate to 0.0. max_epochs or epoch>=max_epochs')
|
||||
self.rate = 0.0
|
||||
else:
|
||||
diff_error = self.lowest_error - current_error
|
||||
|
||||
if (current_error < self.lowest_error):
|
||||
self.lowest_error = current_error
|
||||
|
||||
if (self.ramping):
|
||||
if (diff_error < self.min_derror_stop):
|
||||
if (self.patience > 0):
|
||||
#logging.debug('Patience decreased to %f' % self.patience)
|
||||
self.patience -= 1
|
||||
self.rate *= self.scale_by
|
||||
else:
|
||||
#logging.debug('diff_error (%f) < min_derror_stop (%f)' % (diff_error, self.min_derror_stop))
|
||||
self.rate = 0.0
|
||||
else:
|
||||
self.rate *= self.scale_by
|
||||
else:
|
||||
if (diff_error < self.min_derror_ramp_start):
|
||||
#logging.debug('Start ramping.')
|
||||
self.ramping = True
|
||||
self.rate *= self.scale_by
|
||||
|
||||
self.epoch += 1
|
||||
|
||||
return self.rate
|
||||
|
||||
|
||||
class DropoutFixed(LearningRateList):
|
||||
|
||||
def __init__(self, p_inp_keep, p_hid_keep):
|
||||
assert 0 < p_inp_keep <= 1 and 0 < p_hid_keep <= 1, (
|
||||
"Dropout 'keep' probabilites are suppose to be in (0, 1] range"
|
||||
)
|
||||
super(DropoutFixed, self).__init__([(p_inp_keep, p_hid_keep)], max_epochs=999)
|
||||
|
||||
def get_rate(self):
|
||||
return self.lr_list[0]
|
||||
|
||||
def get_next_rate(self, current_accuracy=None):
|
||||
return self.get_rate()
|
||||
learning_rule.learning_rate = self.learning_rate
|
||||
|
361
mlp/utils.py
361
mlp/utils.py
@ -1,361 +0,0 @@
|
||||
# Machine Learning Practical (INFR11119),
|
||||
# Pawel Swietojanski, University of Edinburgh
|
||||
|
||||
import numpy
|
||||
from mlp.layers import Layer
|
||||
|
||||
|
||||
def numerical_gradient(f, x, eps=1e-4, **kwargs):
|
||||
"""
|
||||
Implements the following numerical gradient rule
|
||||
df(x)/dx = (f(x+eps)-f(x-eps))/(2eps)
|
||||
"""
|
||||
|
||||
xc = x.copy()
|
||||
g = numpy.zeros_like(xc)
|
||||
xf = xc.ravel()
|
||||
gf = g.ravel()
|
||||
|
||||
for i in xrange(xf.shape[0]):
|
||||
xx = xf[i]
|
||||
xf[i] = xx + eps
|
||||
fp_eps, ___ = f(xc, **kwargs)
|
||||
xf[i] = xx - eps
|
||||
fm_eps, ___ = f(xc, **kwargs)
|
||||
xf[i] = xx
|
||||
gf[i] = (fp_eps - fm_eps)/(2*eps)
|
||||
|
||||
return g
|
||||
|
||||
|
||||
def verify_gradient(f, x, eps=1e-4, tol=1e-6, **kwargs):
|
||||
"""
|
||||
Compares the numerical and analytical gradients.
|
||||
"""
|
||||
fval, fgrad = f(x=x, **kwargs)
|
||||
ngrad = numerical_gradient(f=f, x=x, eps=eps, tol=tol, **kwargs)
|
||||
|
||||
fgradnorm = numpy.sqrt(numpy.sum(fgrad**2))
|
||||
ngradnorm = numpy.sqrt(numpy.sum(ngrad**2))
|
||||
diffnorm = numpy.sqrt(numpy.sum((fgrad-ngrad)**2))
|
||||
|
||||
if fgradnorm > 0 or ngradnorm > 0:
|
||||
norm = numpy.maximum(fgradnorm, ngradnorm)
|
||||
if not (diffnorm < tol or diffnorm/norm < tol):
|
||||
raise Exception("Numerical and analytical gradients "
|
||||
"are different: %s != %s!" % (ngrad, fgrad))
|
||||
else:
|
||||
if not (diffnorm < tol):
|
||||
raise Exception("Numerical and analytical gradients "
|
||||
"are different: %s != %s!" % (ngrad, fgrad))
|
||||
return True
|
||||
|
||||
|
||||
def verify_layer_gradient(layer, x, eps=1e-4, tol=1e-6):
|
||||
|
||||
assert isinstance(layer, Layer), (
|
||||
"Expected to get the instance of Layer class, got"
|
||||
" %s " % type(layer)
|
||||
)
|
||||
|
||||
def grad_layer_wrapper(x, **kwargs):
|
||||
h = layer.fprop(x)
|
||||
deltas, ograds = layer.bprop(h=h, igrads=numpy.ones_like(h))
|
||||
return numpy.sum(h), ograds
|
||||
|
||||
return verify_gradient(f=grad_layer_wrapper, x=x, eps=eps, tol=tol, layer=layer)
|
||||
|
||||
|
||||
def test_conv_linear_fprop(layer, kernel_order='ioxy', kernels_first=True,
|
||||
dtype=numpy.float):
|
||||
"""
|
||||
Tests forward propagation method of a convolutional layer.
|
||||
|
||||
Checks the outputs of `fprop` method for a fixed input against known
|
||||
reference values for the outputs and raises an AssertionError if
|
||||
the outputted values are not consistent with the reference values. If
|
||||
tests are all passed returns True.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
layer : instance of Layer subclass
|
||||
Convolutional (linear only) layer implementation. It must implement
|
||||
the methods `get_params`, `set_params` and `fprop`.
|
||||
kernel_order : string
|
||||
Specifes dimension ordering assumed for convolutional kernels
|
||||
passed to `layer`. Default is `ioxy` which corresponds to:
|
||||
input channels, output channels, image x, image y
|
||||
The other option is 'oixy' which corresponds to
|
||||
output channels, input channels, image x, image y
|
||||
Any other value will raise a ValueError exception.
|
||||
kernels_first : boolean
|
||||
Specifies order in which parameters are passed to and returned from
|
||||
`get_params` and `set_params`. Default is True which corresponds
|
||||
to signatures of `get_params` and `set_params` being:
|
||||
kernels, biases = layer.get_params()
|
||||
layer.set_params([kernels, biases])
|
||||
If False this corresponds to signatures of `get_params` and
|
||||
`set_params` being:
|
||||
biases, kernels = layer.get_params()
|
||||
layer.set_params([biases, kernels])
|
||||
dtype : numpy data type
|
||||
Data type to use in numpy arrays passed to layer methods. Default
|
||||
is `numpy.float`.
|
||||
|
||||
Raises
|
||||
------
|
||||
AssertionError
|
||||
Raised if output of `layer.fprop` is inconsistent with reference
|
||||
values either in shape or values.
|
||||
ValueError
|
||||
Raised if `kernel_order` is not a valid order string.
|
||||
"""
|
||||
inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype)
|
||||
kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype)
|
||||
if kernel_order == 'oixy':
|
||||
kernels = kernels.swapaxes(0, 1)
|
||||
elif kernel_order != 'ioxy':
|
||||
raise ValueError('kernel_order must be one of "ioxy" and "oixy"')
|
||||
biases = numpy.arange(2).astype(dtype)
|
||||
true_output = numpy.array(
|
||||
[[[[ 496., 466., 436.],
|
||||
[ 376., 346., 316.],
|
||||
[ 256., 226., 196.]],
|
||||
[[ 1385., 1403., 1421.],
|
||||
[ 1457., 1475., 1493.],
|
||||
[ 1529., 1547., 1565.]]],
|
||||
[[[ -944., -974., -1004.],
|
||||
[-1064., -1094., -1124.],
|
||||
[-1184., -1214., -1244.]],
|
||||
[[ 2249., 2267., 2285.],
|
||||
[ 2321., 2339., 2357.],
|
||||
[ 2393., 2411., 2429.]]]], dtype=dtype)
|
||||
try:
|
||||
orig_params = layer.get_params()
|
||||
if kernels_first:
|
||||
layer.set_params([kernels, biases])
|
||||
else:
|
||||
layer.set_params([biases, kernels])
|
||||
layer_output = layer.fprop(inputs)
|
||||
assert layer_output.shape == true_output.shape, (
|
||||
'Layer fprop gives incorrect shaped output. '
|
||||
'Correct shape is {0} but returned shape is {1}.'
|
||||
.format(true_output.shape, layer_output.shape)
|
||||
)
|
||||
assert numpy.allclose(layer_output, true_output), (
|
||||
'Layer fprop does not give correct output. '
|
||||
'Correct output is {0}\n but returned output is {1}.'
|
||||
.format(true_output, layer_output)
|
||||
)
|
||||
finally:
|
||||
layer.set_params(orig_params)
|
||||
return True
|
||||
|
||||
|
||||
def test_conv_linear_bprop(layer, kernel_order='ioxy', kernels_first=True,
|
||||
dtype=numpy.float):
|
||||
"""
|
||||
Tests input gradients backpropagation method of a convolutional layer.
|
||||
|
||||
Checks the outputs of `bprop` method for a fixed input against known
|
||||
reference values for the outputs and raises an AssertionError if
|
||||
the outputted values are not consistent with the reference values. If
|
||||
tests are all passed returns True.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
layer : instance of Layer subclass
|
||||
Convolutional (linear only) layer implementation. It must implement
|
||||
the methods `get_params`, `set_params` and `bprop`.
|
||||
kernel_order : string
|
||||
Specifes dimension ordering assumed for convolutional kernels
|
||||
passed to `layer`. Default is `ioxy` which corresponds to:
|
||||
input channels, output channels, image x, image y
|
||||
The other option is 'oixy' which corresponds to
|
||||
output channels, input channels, image x, image y
|
||||
Any other value will raise a ValueError exception.
|
||||
kernels_first : boolean
|
||||
Specifies order in which parameters are passed to and returned from
|
||||
`get_params` and `set_params`. Default is True which corresponds
|
||||
to signatures of `get_params` and `set_params` being:
|
||||
kernels, biases = layer.get_params()
|
||||
layer.set_params([kernels, biases])
|
||||
If False this corresponds to signatures of `get_params` and
|
||||
`set_params` being:
|
||||
biases, kernels = layer.get_params()
|
||||
layer.set_params([biases, kernels])
|
||||
dtype : numpy data type
|
||||
Data type to use in numpy arrays passed to layer methods. Default
|
||||
is `numpy.float`.
|
||||
|
||||
Raises
|
||||
------
|
||||
AssertionError
|
||||
Raised if output of `layer.bprop` is inconsistent with reference
|
||||
values either in shape or values.
|
||||
ValueError
|
||||
Raised if `kernel_order` is not a valid order string.
|
||||
"""
|
||||
inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype)
|
||||
kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype)
|
||||
if kernel_order == 'oixy':
|
||||
kernels = kernels.swapaxes(0, 1)
|
||||
elif kernel_order != 'ioxy':
|
||||
raise ValueError('kernel_order must be one of "ioxy" and "oixy"')
|
||||
biases = numpy.arange(2).astype(dtype)
|
||||
igrads = numpy.arange(-20, 16).reshape((2, 2, 3, 3)).astype(dtype)
|
||||
true_ograds = numpy.array(
|
||||
[[[[ 328., 605., 567., 261.],
|
||||
[ 534., 976., 908., 414.],
|
||||
[ 426., 772., 704., 318.],
|
||||
[ 170., 305., 275., 123.]],
|
||||
[[ 80., 125., 119., 45.],
|
||||
[ 86., 112., 108., 30.],
|
||||
[ 74., 100., 96., 30.],
|
||||
[ 18., 17., 19., 3.]],
|
||||
[[-168., -355., -329., -171.],
|
||||
[-362., -752., -692., -354.],
|
||||
[-278., -572., -512., -258.],
|
||||
[-134., -271., -237., -117.]]],
|
||||
[[[ -32., -79., -117., -63.],
|
||||
[-114., -248., -316., -162.],
|
||||
[-222., -452., -520., -258.],
|
||||
[-118., -235., -265., -129.]],
|
||||
[[ 8., 17., 11., 9.],
|
||||
[ 14., 40., 36., 30.],
|
||||
[ 2., 28., 24., 30.],
|
||||
[ 18., 53., 55., 39.]],
|
||||
[[ 48., 113., 139., 81.],
|
||||
[ 142., 328., 388., 222.],
|
||||
[ 226., 508., 568., 318.],
|
||||
[ 154., 341., 375., 207.]]]], dtype=dtype)
|
||||
try:
|
||||
orig_params = layer.get_params()
|
||||
if kernels_first:
|
||||
layer.set_params([kernels, biases])
|
||||
else:
|
||||
layer.set_params([biases, kernels])
|
||||
layer_deltas, layer_ograds = layer.bprop(None, igrads)
|
||||
assert layer_deltas.shape == igrads.shape, (
|
||||
'Layer bprop give incorrectly shaped deltas output.'
|
||||
'Correct shape is {0} but returned shape is {1}.'
|
||||
.format(igrads.shape, layer_deltas.shape)
|
||||
)
|
||||
assert numpy.allclose(layer_deltas, igrads), (
|
||||
'Layer bprop does not give correct deltas output. '
|
||||
'Correct output is {0}\n but returned output is {1}.'
|
||||
.format(igrads, layer_deltas)
|
||||
)
|
||||
assert layer_ograds.shape == true_ograds.shape, (
|
||||
'Layer bprop gives incorrect shaped ograds output. '
|
||||
'Correct shape is {0} but returned shape is {1}.'
|
||||
.format(true_ograds.shape, layer_ograds.shape)
|
||||
)
|
||||
assert numpy.allclose(layer_ograds, true_ograds), (
|
||||
'Layer bprop does not give correct ograds output. '
|
||||
'Correct output is {0}\n but returned output is {1}.'
|
||||
.format(true_ograds, layer_ograds)
|
||||
)
|
||||
finally:
|
||||
layer.set_params(orig_params)
|
||||
return True
|
||||
|
||||
|
||||
def test_conv_linear_pgrads(layer, kernel_order='ioxy', kernels_first=True,
|
||||
dtype=numpy.float):
|
||||
"""
|
||||
Tests parameter gradients backpropagation method of a convolutional layer.
|
||||
|
||||
Checks the outputs of `pgrads` method for a fixed input against known
|
||||
reference values for the outputs and raises an AssertionError if
|
||||
the outputted values are not consistent with the reference values. If
|
||||
tests are all passed returns True.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
layer : instance of Layer subclass
|
||||
Convolutional (linear only) layer implementation. It must implement
|
||||
the methods `get_params`, `set_params` and `pgrads`.
|
||||
kernel_order : string
|
||||
Specifes dimension ordering assumed for convolutional kernels
|
||||
passed to `layer`. Default is `ioxy` which corresponds to:
|
||||
input channels, output channels, image x, image y
|
||||
The other option is 'oixy' which corresponds to
|
||||
output channels, input channels, image x, image y
|
||||
Any other value will raise a ValueError exception.
|
||||
kernels_first : boolean
|
||||
Specifies order in which parameters are passed to and returned from
|
||||
`get_params` and `set_params`. Default is True which corresponds
|
||||
to signatures of `get_params` and `set_params` being:
|
||||
kernels, biases = layer.get_params()
|
||||
layer.set_params([kernels, biases])
|
||||
If False this corresponds to signatures of `get_params` and
|
||||
`set_params` being:
|
||||
biases, kernels = layer.get_params()
|
||||
layer.set_params([biases, kernels])
|
||||
dtype : numpy data type
|
||||
Data type to use in numpy arrays passed to layer methods. Default
|
||||
is `numpy.float`.
|
||||
|
||||
Raises
|
||||
------
|
||||
AssertionError
|
||||
Raised if output of `layer.pgrads` is inconsistent with reference
|
||||
values either in shape or values.
|
||||
ValueError
|
||||
Raised if `kernel_order` is not a valid order string.
|
||||
"""
|
||||
inputs = numpy.arange(96).reshape((2, 3, 4, 4)).astype(dtype)
|
||||
kernels = numpy.arange(-12, 12).reshape((3, 2, 2, 2)).astype(dtype)
|
||||
biases = numpy.arange(2).astype(dtype)
|
||||
deltas = numpy.arange(-20, 16).reshape((2, 2, 3, 3)).astype(dtype)
|
||||
true_kernel_grads = numpy.array(
|
||||
[[[[ 390., 264.],
|
||||
[ -114., -240.]],
|
||||
[[ 5088., 5124.],
|
||||
[ 5232., 5268.]]],
|
||||
[[[-1626., -1752.],
|
||||
[-2130., -2256.]],
|
||||
[[ 5664., 5700.],
|
||||
[ 5808., 5844.]]],
|
||||
[[[-3642., -3768.],
|
||||
[-4146., -4272.]],
|
||||
[[ 6240., 6276.],
|
||||
[ 6384., 6420.]]]], dtype=dtype)
|
||||
if kernel_order == 'oixy':
|
||||
kernels = kernels.swapaxes(0, 1)
|
||||
true_kernel_grads = true_kernel_grads.swapaxes(0, 1)
|
||||
elif kernel_order != 'ioxy':
|
||||
raise ValueError('kernel_order must be one of "ioxy" and "oixy"')
|
||||
true_bias_grads = numpy.array([-126., 36.], dtype=dtype)
|
||||
try:
|
||||
orig_params = layer.get_params()
|
||||
if kernels_first:
|
||||
layer.set_params([kernels, biases])
|
||||
else:
|
||||
layer.set_params([biases, kernels])
|
||||
layer_kernel_grads, layer_bias_grads = layer.pgrads(inputs, deltas)
|
||||
assert layer_kernel_grads.shape == true_kernel_grads.shape, (
|
||||
'Layer pgrads gives incorrect shaped kernel gradients output. '
|
||||
'Correct shape is {0} but returned shape is {1}.'
|
||||
.format(true_kernel_grads.shape, layer_kernel_grads.shape)
|
||||
)
|
||||
assert numpy.allclose(layer_kernel_grads, true_kernel_grads), (
|
||||
'Layer pgrads does not give correct kernel gradients output. '
|
||||
'Correct output is {0}\n but returned output is {1}.'
|
||||
.format(true_kernel_grads, layer_kernel_grads)
|
||||
)
|
||||
assert layer_bias_grads.shape == true_bias_grads.shape, (
|
||||
'Layer pgrads gives incorrect shaped bias gradients output. '
|
||||
'Correct shape is {0} but returned shape is {1}.'
|
||||
.format(true_bias_grads.shape, layer_bias_grads.shape)
|
||||
)
|
||||
assert numpy.allclose(layer_bias_grads, true_bias_grads), (
|
||||
'Layer pgrads does not give correct bias gradients output. '
|
||||
'Correct output is {0}\n but returned output is {1}.'
|
||||
.format(true_bias_grads, layer_bias_grads)
|
||||
)
|
||||
finally:
|
||||
layer.set_params(orig_params)
|
||||
return True
|
||||
|
Loading…
Reference in New Issue
Block a user