addded code-base
This commit is contained in:
parent
4404649ba1
commit
2b1700180e
64
mlp/costs.py
Normal file
64
mlp/costs.py
Normal file
@ -0,0 +1,64 @@
|
||||
# Machine Learning Practical (INFR11119),
|
||||
# Pawel Swietojanski, University of Edinburgh
|
||||
|
||||
|
||||
import numpy
|
||||
|
||||
|
||||
class Cost(object):
|
||||
"""
|
||||
Defines an interface for the cost object
|
||||
"""
|
||||
def cost(self, y, t, **kwargs):
|
||||
"""
|
||||
Implements a cost for monitoring purposes
|
||||
:param y: matrix -- an output of the model
|
||||
:param t: matrix -- an expected output the model should produce
|
||||
:param kwargs: -- some optional parameters required by the cost
|
||||
:return: the scalar value representing the cost given y and t
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def grad(self, y, t, **kwargs):
|
||||
"""
|
||||
Implements a gradient of the cost w.r.t y
|
||||
:param y: matrix -- an output of the model
|
||||
:param t: matrix -- an expected output the model should produce
|
||||
:param kwargs: -- some optional parameters required by the cost
|
||||
:return: matrix - the gradient of the cost w.r.t y
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_name(self):
|
||||
return 'cost'
|
||||
|
||||
|
||||
class MSECost(Cost):
|
||||
def cost(self, y, t, **kwargs):
|
||||
se = 0.5*numpy.sum((y - t)**2, axis=1)
|
||||
return numpy.mean(se)
|
||||
|
||||
def grad(self, y, t, **kwargs):
|
||||
return y - t
|
||||
|
||||
def get_name(self):
|
||||
return 'mse'
|
||||
|
||||
|
||||
class CECost(Cost):
|
||||
"""
|
||||
Cross Entropy (Negative log-likelihood) cost for multiple classes
|
||||
"""
|
||||
def cost(self, y, t, **kwargs):
|
||||
#assumes t is 1-of-K coded and y is a softmax
|
||||
#transformed estimate at the output layer
|
||||
nll = t * numpy.log(y)
|
||||
return -numpy.mean(numpy.sum(nll, axis=1))
|
||||
|
||||
def grad(self, y, t, **kwargs):
|
||||
#assumes t is 1-of-K coded and y is a softmax
|
||||
#transformed estimate at the output layer
|
||||
return y - t
|
||||
|
||||
def get_name(self):
|
||||
return 'ce'
|
281
mlp/layers.py
Normal file
281
mlp/layers.py
Normal file
@ -0,0 +1,281 @@
|
||||
|
||||
# Machine Learning Practical (INFR11119),
|
||||
# Pawel Swietojanski, University of Edinburgh
|
||||
|
||||
import numpy
|
||||
import logging
|
||||
from mlp.costs import Cost
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MLP(object):
|
||||
"""
|
||||
This is a container for an arbitrary sequence of other transforms
|
||||
On top of this, the class also keeps the state of the model, i.e.
|
||||
the result of forward (activations) and backward (deltas) passes
|
||||
through the model (for a mini-batch), which is required to compute
|
||||
the gradients for the parameters
|
||||
"""
|
||||
def __init__(self, cost):
|
||||
|
||||
assert isinstance(cost, Cost), (
|
||||
"Cost needs to be of type mlp.costs.Cost, got %s" % type(cost)
|
||||
)
|
||||
|
||||
self.layers = [] #the actual list of network layers
|
||||
self.activations = [] #keeps forward-pass activations (h from equations)
|
||||
# for a given minibatch (or features at 0th index)
|
||||
self.deltas = [] #keeps back-propagated error signals (deltas from equations)
|
||||
# for a given minibatch and each layer
|
||||
self.cost = cost
|
||||
|
||||
def fprop(self, x):
|
||||
"""
|
||||
|
||||
:param inputs: mini-batch of data-points x
|
||||
:return: y (top layer activation) which is an estimate of y given x
|
||||
"""
|
||||
|
||||
if len(self.activations) != len(self.layers) + 1:
|
||||
self.activations = [None]*(len(self.layers) + 1)
|
||||
|
||||
self.activations[0] = x
|
||||
for i in xrange(0, len(self.layers)):
|
||||
self.activations[i+1] = self.layers[i].fprop(self.activations[i])
|
||||
return self.activations[-1]
|
||||
|
||||
def bprop(self, cost_grad):
|
||||
"""
|
||||
:param cost_grad: matrix -- grad of the cost w.r.t y
|
||||
:return: None, the deltas are kept in the model
|
||||
"""
|
||||
|
||||
# allocate the list of deltas for each layer
|
||||
# note, we do not use all of those fields but
|
||||
# want to keep it aligned 1:1 with activations,
|
||||
# which will simplify indexing later on when
|
||||
# computing grads w.r.t parameters
|
||||
if len(self.deltas) != len(self.activations):
|
||||
self.deltas = [None]*len(self.activations)
|
||||
|
||||
# treat the top layer in special way, as it deals with the
|
||||
# cost, which may lead to some simplifications
|
||||
top_layer_idx = len(self.layers)
|
||||
self.deltas[top_layer_idx], ograds = self.layers[top_layer_idx - 1].\
|
||||
bprop_cost(self.activations[top_layer_idx], cost_grad, self.cost)
|
||||
|
||||
# then back-prop through remaining layers
|
||||
for i in xrange(top_layer_idx - 1, 0, -1):
|
||||
self.deltas[i], ograds = self.layers[i - 1].\
|
||||
bprop(self.activations[i], ograds)
|
||||
|
||||
def add_layer(self, layer):
|
||||
self.layers.append(layer)
|
||||
|
||||
def set_layers(self, layers):
|
||||
self.layers = layers
|
||||
|
||||
def get_name(self):
|
||||
return 'mlp'
|
||||
|
||||
|
||||
class Layer(object):
|
||||
"""
|
||||
Abstract class defining an interface for
|
||||
other transforms.
|
||||
"""
|
||||
def __init__(self, rng=None):
|
||||
|
||||
if rng is None:
|
||||
seed=[2015, 10, 1]
|
||||
self.rng = numpy.random.RandomState(seed)
|
||||
else:
|
||||
self.rng = rng
|
||||
|
||||
def fprop(self, inputs):
|
||||
"""
|
||||
Implements a forward propagation through the i-th layer, that is
|
||||
some form of:
|
||||
a^i = xW^i + b^i
|
||||
h^i = f^i(a^i)
|
||||
with f^i, W^i, b^i denoting a non-linearity, weight matrix and
|
||||
biases at the i-th layer, respectively and x denoting inputs.
|
||||
|
||||
:param inputs: matrix of features (x) or the output of the previous layer h^{i-1}
|
||||
:return: h^i, matrix of transformed by layer features
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def bprop(self, h, igrads):
|
||||
"""
|
||||
Implements a backward propagation through the layer, that is, given
|
||||
h^i denotes the output of the layer and x^i the input, we compute:
|
||||
dh^i/dx^i which by chain rule is dh^i/da^i da^i/dx^i
|
||||
x^i could be either features (x) or the output of the lower layer h^{i-1}
|
||||
:param h: it's an activation produced in forward pass
|
||||
:param igrads, error signal (or gradient) flowing to the layer, note,
|
||||
this in general case does not corresponds to 'deltas' used to update
|
||||
the layer's parameters, to get deltas ones need to multiply it with
|
||||
the dh^i/da^i derivative
|
||||
:return: a tuple (deltas, ograds) where:
|
||||
deltas = igrads * dh^i/da^i
|
||||
ograds = deltas \times da^i/dx^i
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def bprop_cost(self, h, igrads, cost=None):
|
||||
"""
|
||||
Implements a backward propagation in case the layer directly
|
||||
deals with the optimised cost (i.e. the top layer)
|
||||
By default, method should implement a back-prop for default cost, that is
|
||||
the one that is natural to the layer's output, i.e.:
|
||||
linear -> mse, softmax -> cross-entropy, sigmoid -> binary cross-entropy
|
||||
:param h: it's an activation produced in forward pass
|
||||
:param igrads, error signal (or gradient) flowing to the layer, note,
|
||||
this in general case does not corresponds to 'deltas' used to update
|
||||
the layer's parameters, to get deltas ones need to multiply it with
|
||||
the dh^i/da^i derivative
|
||||
:return: a tuple (deltas, ograds) where:
|
||||
deltas = igrads * dh^i/da^i
|
||||
ograds = deltas \times da^i/dx^i
|
||||
"""
|
||||
|
||||
raise NotImplementedError()
|
||||
|
||||
def pgrads(self, inputs, deltas):
|
||||
"""
|
||||
Return gradients w.r.t parameters
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_params(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def set_params(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_name(self):
|
||||
return 'abstract_layer'
|
||||
|
||||
|
||||
class Linear(Layer):
|
||||
|
||||
def __init__(self, idim, odim,
|
||||
rng=None,
|
||||
irange=0.1):
|
||||
|
||||
super(Linear, self).__init__(rng=rng)
|
||||
|
||||
self.idim = idim
|
||||
self.odim = odim
|
||||
|
||||
self.W = self.rng.uniform(
|
||||
-irange, irange,
|
||||
(self.idim, self.odim))
|
||||
|
||||
self.b = numpy.zeros((self.odim,), dtype=numpy.float32)
|
||||
|
||||
def fprop(self, inputs):
|
||||
"""
|
||||
Implements a forward propagation through the i-th layer, that is
|
||||
some form of:
|
||||
a^i = xW^i + b^i
|
||||
h^i = f^i(a^i)
|
||||
with f^i, W^i, b^i denoting a non-linearity, weight matrix and
|
||||
biases of this (i-th) layer, respectively and x denoting inputs.
|
||||
|
||||
:param inputs: matrix of features (x) or the output of the previous layer h^{i-1}
|
||||
:return: h^i, matrix of transformed by layer features
|
||||
"""
|
||||
a = numpy.dot(inputs, self.W) + self.b
|
||||
# here f() is an identity function, so just return a linear transformation
|
||||
return a
|
||||
|
||||
def bprop(self, h, igrads):
|
||||
"""
|
||||
Implements a backward propagation through the layer, that is, given
|
||||
h^i denotes the output of the layer and x^i the input, we compute:
|
||||
dh^i/dx^i which by chain rule is dh^i/da^i da^i/dx^i
|
||||
x^i could be either features (x) or the output of the lower layer h^{i-1}
|
||||
:param h: it's an activation produced in forward pass
|
||||
:param igrads, error signal (or gradient) flowing to the layer, note,
|
||||
this in general case does not corresponds to 'deltas' used to update
|
||||
the layer's parameters, to get deltas ones need to multiply it with
|
||||
the dh^i/da^i derivative
|
||||
:return: a tuple (deltas, ograds) where:
|
||||
deltas = igrads * dh^i/da^i
|
||||
ograds = deltas \times da^i/dx^i
|
||||
"""
|
||||
|
||||
# since df^i/da^i = 1 (f is assumed identity function),
|
||||
# deltas are in fact the same as igrads
|
||||
ograds = numpy.dot(igrads, self.W.T)
|
||||
return igrads, ograds
|
||||
|
||||
def bprop_cost(self, h, igrads, cost):
|
||||
"""
|
||||
Implements a backward propagation in case the layer directly
|
||||
deals with the optimised cost (i.e. the top layer)
|
||||
By default, method should implement a bprop for default cost, that is
|
||||
the one that is natural to the layer's output, i.e.:
|
||||
here we implement linear -> mse scenario
|
||||
:param h: it's an activation produced in forward pass
|
||||
:param igrads, error signal (or gradient) flowing to the layer, note,
|
||||
this in general case does not corresponds to 'deltas' used to update
|
||||
the layer's parameters, to get deltas ones need to multiply it with
|
||||
the dh^i/da^i derivative
|
||||
:param cost, mlp.costs.Cost instance defining the used cost
|
||||
:return: a tuple (deltas, ograds) where:
|
||||
deltas = igrads * dh^i/da^i
|
||||
ograds = deltas \times da^i/dx^i
|
||||
"""
|
||||
|
||||
if cost is None or cost.get_name() == 'mse':
|
||||
# for linear layer and mean square error cost,
|
||||
# cost back-prop is the same as standard back-prop
|
||||
return self.bprop(h, igrads)
|
||||
else:
|
||||
raise NotImplementedError('Linear.bprop_cost method not implemented '
|
||||
'for the %s cost' % cost.get_name())
|
||||
|
||||
def pgrads(self, inputs, deltas):
|
||||
"""
|
||||
Return gradients w.r.t parameters
|
||||
|
||||
:param inputs, input to the i-th layer
|
||||
:param deltas, deltas computed in bprop stage up to -ith layer
|
||||
:return list of grads w.r.t parameters dE/dW and dE/db in *exactly*
|
||||
the same order as the params are returned by get_params()
|
||||
|
||||
Note: deltas here contain the whole chain rule leading
|
||||
from the cost up to the the i-th layer, i.e.
|
||||
dE/dy^L dy^L/da^L da^L/dh^{L-1} dh^{L-1}/da^{L-1} ... dh^{i}/da^{i}
|
||||
and here we are just asking about
|
||||
1) da^i/dW^i and 2) da^i/db^i
|
||||
since W and b are only layer's parameters
|
||||
"""
|
||||
|
||||
grad_W = numpy.dot(inputs.T, deltas)
|
||||
grad_b = numpy.sum(deltas, axis=0)
|
||||
|
||||
return [grad_W, grad_b]
|
||||
|
||||
def get_params(self):
|
||||
return [self.W, self.b]
|
||||
|
||||
def set_params(self, params):
|
||||
#we do not make checks here, but the order on the list
|
||||
#is assumed to be exactly the same as get_params() returns
|
||||
self.W = params[0]
|
||||
self.b = params[1]
|
||||
|
||||
def get_name(self):
|
||||
return 'linear'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
170
mlp/optimisers.py
Normal file
170
mlp/optimisers.py
Normal file
@ -0,0 +1,170 @@
|
||||
# Machine Learning Practical (INFR11119),
|
||||
# Pawel Swietojanski, University of Edinburgh
|
||||
|
||||
import numpy
|
||||
import time
|
||||
import logging
|
||||
|
||||
from mlp.layers import MLP
|
||||
from mlp.dataset import DataProvider
|
||||
from mlp.schedulers import LearningRateScheduler
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Optimiser(object):
|
||||
def train_epoch(self, model, train_iter):
|
||||
raise NotImplementedError()
|
||||
|
||||
def train(self, model, train_iter, valid_iter=None):
|
||||
raise NotImplementedError()
|
||||
|
||||
def validate(self, model, valid_iterator):
|
||||
assert isinstance(model, MLP), (
|
||||
"Expected model to be a subclass of 'mlp.layers.MLP'"
|
||||
" class but got %s " % type(model)
|
||||
)
|
||||
|
||||
assert isinstance(valid_iterator, DataProvider), (
|
||||
"Expected iterator to be a subclass of 'mlp.dataset.DataProvider'"
|
||||
" class but got %s " % type(valid_iterator)
|
||||
)
|
||||
|
||||
acc_list, nll_list = [], []
|
||||
for x, t in valid_iterator:
|
||||
y = model.fprop(x)
|
||||
nll_list.append(model.cost.cost(y, t))
|
||||
acc_list.append(numpy.mean(self.classification_accuracy(y, t)))
|
||||
|
||||
acc = numpy.mean(acc_list)
|
||||
nll = numpy.mean(nll_list)
|
||||
|
||||
return nll, acc
|
||||
|
||||
@staticmethod
|
||||
def classification_accuracy(y, t):
|
||||
"""
|
||||
Returns classification accuracy given the estimate y and targets t
|
||||
:param y: matrix -- estimate produced by the model in fprop
|
||||
:param t: matrix -- target 1-of-K coded
|
||||
:return: vector of y.shape[0] size with binary values set to 0
|
||||
if example was miscalssified or 1 otherwise
|
||||
"""
|
||||
y_idx = numpy.argmax(y, axis=1)
|
||||
t_idx = numpy.argmax(t, axis=1)
|
||||
rval = numpy.equal(y_idx, t_idx)
|
||||
return rval
|
||||
|
||||
|
||||
class SGDOptimiser(Optimiser):
|
||||
def __init__(self, lr_scheduler):
|
||||
super(SGDOptimiser, self).__init__()
|
||||
|
||||
assert isinstance(lr_scheduler, LearningRateScheduler), (
|
||||
"Expected lr_scheduler to be a subclass of 'mlp.schedulers.LearningRateScheduler'"
|
||||
" class but got %s " % type(lr_scheduler)
|
||||
)
|
||||
|
||||
self.lr_scheduler = lr_scheduler
|
||||
|
||||
def train_epoch(self, model, train_iterator, learning_rate):
|
||||
|
||||
assert isinstance(model, MLP), (
|
||||
"Expected model to be a subclass of 'mlp.layers.MLP'"
|
||||
" class but got %s " % type(model)
|
||||
)
|
||||
assert isinstance(train_iterator, DataProvider), (
|
||||
"Expected iterator to be a subclass of 'mlp.dataset.DataProvider'"
|
||||
" class but got %s " % type(train_iterator)
|
||||
)
|
||||
|
||||
acc_list, nll_list = [], []
|
||||
for x, t in train_iterator:
|
||||
# get the prediction
|
||||
y = model.fprop(x)
|
||||
|
||||
# compute the cost and grad of the cost w.r.t y
|
||||
cost = model.cost.cost(y, t)
|
||||
cost_grad = model.cost.grad(y, t)
|
||||
|
||||
# do backward pass through the model
|
||||
model.bprop(cost_grad)
|
||||
|
||||
#update the model, here we iterate over layers
|
||||
#and then over each parameter in the layer
|
||||
effective_learning_rate = learning_rate / x.shape[0]
|
||||
|
||||
for i in xrange(0, len(model.layers)):
|
||||
params = model.layers[i].get_params()
|
||||
grads = model.layers[i].pgrads(model.activations[i], model.deltas[i + 1])
|
||||
uparams = []
|
||||
for param, grad in zip(params, grads):
|
||||
param = param - effective_learning_rate * grad
|
||||
uparams.append(param)
|
||||
model.layers[i].set_params(uparams)
|
||||
|
||||
nll_list.append(cost)
|
||||
acc_list.append(numpy.mean(self.classification_accuracy(y, t)))
|
||||
|
||||
return numpy.mean(nll_list), numpy.mean(acc_list)
|
||||
|
||||
def train(self, model, train_iterator, valid_iterator=None):
|
||||
|
||||
converged = False
|
||||
cost_name = model.cost.get_name()
|
||||
tr_stats, valid_stats = [], []
|
||||
|
||||
# do the initial validation
|
||||
tr_nll, tr_acc = self.validate(model, train_iterator)
|
||||
logger.info('Epoch %i: Training cost (%s) for random model is %.3f. Accuracy is %.2f%%'
|
||||
% (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.))
|
||||
tr_stats.append((tr_nll, tr_acc))
|
||||
|
||||
if valid_iterator is not None:
|
||||
valid_iterator.reset()
|
||||
valid_nll, valid_acc = self.validate(model, valid_iterator)
|
||||
logger.info('Epoch %i: Validation cost (%s) for random model is %.3f. Accuracy is %.2f%%'
|
||||
% (self.lr_scheduler.epoch, cost_name, valid_nll, valid_acc * 100.))
|
||||
valid_stats.append((valid_nll, valid_acc))
|
||||
|
||||
while not converged:
|
||||
train_iterator.reset()
|
||||
|
||||
tstart = time.clock()
|
||||
tr_nll, tr_acc = self.train_epoch(model=model,
|
||||
train_iterator=train_iterator,
|
||||
learning_rate=self.lr_scheduler.get_rate())
|
||||
tstop = time.clock()
|
||||
tr_stats.append((tr_nll, tr_acc))
|
||||
|
||||
logger.info('Epoch %i: Training cost (%s) is %.3f. Accuracy is %.2f%%'
|
||||
% (self.lr_scheduler.epoch + 1, cost_name, tr_nll, tr_acc * 100.))
|
||||
|
||||
vstart = time.clock()
|
||||
if valid_iterator is not None:
|
||||
valid_iterator.reset()
|
||||
valid_nll, valid_acc = self.validate(model, valid_iterator)
|
||||
logger.info('Epoch %i: Validation cost (%s) is %.3f. Accuracy is %.2f%%'
|
||||
% (self.lr_scheduler.epoch + 1, cost_name, valid_nll, valid_acc * 100.))
|
||||
self.lr_scheduler.get_next_rate(valid_acc)
|
||||
valid_stats.append((valid_nll, valid_acc))
|
||||
else:
|
||||
self.lr_scheduler.get_next_rate(None)
|
||||
vstop = time.clock()
|
||||
|
||||
train_speed = train_iterator.num_examples() / (tstop - tstart)
|
||||
valid_speed = valid_iterator.num_examples() / (vstop - vstart)
|
||||
tot_time = vstop - tstart
|
||||
#pps = presentations per second
|
||||
logger.info("Epoch %i: Took %.0f seconds. Training speed %.0f pps. "
|
||||
"Validation speed %.0f pps."
|
||||
% (self.lr_scheduler.epoch, tot_time, train_speed, valid_speed))
|
||||
|
||||
# we stop training when learning rate, as returned by lr scheduler, is 0
|
||||
# this is implementation dependent and depending on lr schedule could happen,
|
||||
# for example, when max_epochs has been reached or if the progress between
|
||||
# two consecutive epochs is too small, etc.
|
||||
converged = (self.lr_scheduler.get_rate() == 0)
|
||||
|
||||
return tr_stats, valid_stats
|
155
mlp/schedulers.py
Normal file
155
mlp/schedulers.py
Normal file
@ -0,0 +1,155 @@
|
||||
# Machine Learning Practical (INFR11119),
|
||||
# Pawel Swietojanski, University of Edinburgh
|
||||
|
||||
import logging
|
||||
|
||||
|
||||
class LearningRateScheduler(object):
|
||||
"""
|
||||
Define an interface for determining learning rates
|
||||
"""
|
||||
def __init__(self, max_epochs=100):
|
||||
self.epoch = 0
|
||||
self.max_epochs = max_epochs
|
||||
|
||||
def get_rate(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_next_rate(self, current_error=None):
|
||||
self.epoch += 1
|
||||
|
||||
|
||||
class LearningRateList(LearningRateScheduler):
|
||||
def __init__(self, learning_rates_list, max_epochs):
|
||||
|
||||
super(LearningRateList, self).__init__(max_epochs)
|
||||
|
||||
assert isinstance(learning_rates_list, list), (
|
||||
"The learning_rates_list argument expected"
|
||||
" to be of type list, got %s" % type(learning_rates_list)
|
||||
)
|
||||
self.lr_list = learning_rates_list
|
||||
|
||||
def get_rate(self):
|
||||
if self.epoch < len(self.lr_list):
|
||||
return self.lr_list[self.epoch]
|
||||
return 0.0
|
||||
|
||||
def get_next_rate(self, current_error=None):
|
||||
super(LearningRateList, self).get_next_rate(current_error=None)
|
||||
return self.get_rate()
|
||||
|
||||
|
||||
class LearningRateFixed(LearningRateList):
|
||||
|
||||
def __init__(self, learning_rate, max_epochs):
|
||||
assert learning_rate > 0, (
|
||||
"learning rate expected to be > 0, got %f" % learning_rate
|
||||
)
|
||||
super(LearningRateFixed, self).__init__([learning_rate], max_epochs)
|
||||
|
||||
def get_rate(self):
|
||||
if self.epoch < self.max_epochs:
|
||||
return self.lr_list[0]
|
||||
return 0.0
|
||||
|
||||
def get_next_rate(self, current_error=None):
|
||||
super(LearningRateFixed, self).get_next_rate(current_error=None)
|
||||
return self.get_rate()
|
||||
|
||||
|
||||
class LearningRateNewBob(LearningRateScheduler):
|
||||
"""
|
||||
Exponential learning rate schema
|
||||
"""
|
||||
|
||||
def __init__(self, start_rate, scale_by=.5, max_epochs=99, \
|
||||
min_derror_ramp_start=.5, min_derror_stop=.5, init_error=100.0, \
|
||||
patience=0, zero_rate=None, ramping=False):
|
||||
"""
|
||||
:type start_rate: float
|
||||
:param start_rate:
|
||||
|
||||
:type scale_by: float
|
||||
:param scale_by:
|
||||
|
||||
:type max_epochs: int
|
||||
:param max_epochs:
|
||||
|
||||
:type min_error_start: float
|
||||
:param min_error_start:
|
||||
|
||||
:type min_error_stop: float
|
||||
:param min_error_stop:
|
||||
|
||||
:type init_error: float
|
||||
:param init_error:
|
||||
# deltas2 below are just deltas returned by linear Linear,bprop transform
|
||||
# and are exactly the same as
|
||||
"""
|
||||
self.start_rate = start_rate
|
||||
self.init_error = init_error
|
||||
self.init_patience = patience
|
||||
|
||||
self.rate = start_rate
|
||||
self.scale_by = scale_by
|
||||
self.max_epochs = max_epochs
|
||||
self.min_derror_ramp_start = min_derror_ramp_start
|
||||
self.min_derror_stop = min_derror_stop
|
||||
self.lowest_error = init_error
|
||||
|
||||
self.epoch = 1
|
||||
self.ramping = ramping
|
||||
self.patience = patience
|
||||
self.zero_rate = zero_rate
|
||||
|
||||
def reset(self):
|
||||
self.rate = self.start_rate
|
||||
self.lowest_error = self.init_error
|
||||
self.epoch = 1
|
||||
self.ramping = False
|
||||
self.patience = self.init_patience
|
||||
|
||||
def get_rate(self):
|
||||
if (self.epoch==1 and self.zero_rate!=None):
|
||||
return self.zero_rate
|
||||
return self.rate
|
||||
|
||||
def get_next_rate(self, current_error):
|
||||
"""
|
||||
:type current_error: float
|
||||
:param current_error: percentage error
|
||||
|
||||
"""
|
||||
|
||||
diff_error = 0.0
|
||||
|
||||
if ( (self.max_epochs > 10000) or (self.epoch >= self.max_epochs) ):
|
||||
#logging.debug('Setting rate to 0.0. max_epochs or epoch>=max_epochs')
|
||||
self.rate = 0.0
|
||||
else:
|
||||
diff_error = self.lowest_error - current_error
|
||||
|
||||
if (current_error < self.lowest_error):
|
||||
self.lowest_error = current_error
|
||||
|
||||
if (self.ramping):
|
||||
if (diff_error < self.min_derror_stop):
|
||||
if (self.patience > 0):
|
||||
#logging.debug('Patience decreased to %f' % self.patience)
|
||||
self.patience -= 1
|
||||
self.rate *= self.scale_by
|
||||
else:
|
||||
#logging.debug('diff_error (%f) < min_derror_stop (%f)' % (diff_error, self.min_derror_stop))
|
||||
self.rate = 0.0
|
||||
else:
|
||||
self.rate *= self.scale_by
|
||||
else:
|
||||
if (diff_error < self.min_derror_ramp_start):
|
||||
#logging.debug('Start ramping.')
|
||||
self.ramping = True
|
||||
self.rate *= self.scale_by
|
||||
|
||||
self.epoch += 1
|
||||
|
||||
return self.rate
|
2030
res/code_scheme.svg
Normal file
2030
res/code_scheme.svg
Normal file
File diff suppressed because it is too large
Load Diff
After Width: | Height: | Size: 200 KiB |
Loading…
Reference in New Issue
Block a user