Removing old optimisers code.
This commit is contained in:
parent
03f27cab75
commit
2988811b50
@ -1,214 +0,0 @@
|
|||||||
# Machine Learning Practical (INFR11119),
|
|
||||||
# Pawel Swietojanski, University of Edinburgh
|
|
||||||
|
|
||||||
import numpy
|
|
||||||
import time
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from mlp.layers import MLP
|
|
||||||
from mlp.dataset import DataProvider
|
|
||||||
from mlp.schedulers import LearningRateScheduler
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class Optimiser(object):
|
|
||||||
def train_epoch(self, model, train_iter):
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def train(self, model, train_iter, valid_iter=None):
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def validate(self, model, valid_iterator, l1_weight=0, l2_weight=0):
|
|
||||||
assert isinstance(model, MLP), (
|
|
||||||
"Expected model to be a subclass of 'mlp.layers.MLP'"
|
|
||||||
" class but got %s " % type(model)
|
|
||||||
)
|
|
||||||
|
|
||||||
assert isinstance(valid_iterator, DataProvider), (
|
|
||||||
"Expected iterator to be a subclass of 'mlp.dataset.DataProvider'"
|
|
||||||
" class but got %s " % type(valid_iterator)
|
|
||||||
)
|
|
||||||
|
|
||||||
acc_list, nll_list = [], []
|
|
||||||
for x, t in valid_iterator:
|
|
||||||
y = model.fprop(x)
|
|
||||||
nll_list.append(model.cost.cost(y, t))
|
|
||||||
acc_list.append(numpy.mean(self.classification_accuracy(y, t)))
|
|
||||||
|
|
||||||
acc = numpy.mean(acc_list)
|
|
||||||
nll = numpy.mean(nll_list)
|
|
||||||
|
|
||||||
prior_costs = Optimiser.compute_prior_costs(model, l1_weight, l2_weight)
|
|
||||||
|
|
||||||
return nll + sum(prior_costs), acc
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def classification_accuracy(y, t):
|
|
||||||
"""
|
|
||||||
Returns classification accuracy given the estimate y and targets t
|
|
||||||
:param y: matrix -- estimate produced by the model in fprop
|
|
||||||
:param t: matrix -- target 1-of-K coded
|
|
||||||
:return: vector of y.shape[0] size with binary values set to 0
|
|
||||||
if example was miscalssified or 1 otherwise
|
|
||||||
"""
|
|
||||||
y_idx = numpy.argmax(y, axis=1)
|
|
||||||
t_idx = numpy.argmax(t, axis=1)
|
|
||||||
rval = numpy.equal(y_idx, t_idx)
|
|
||||||
return rval
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def compute_prior_costs(model, l1_weight, l2_weight):
|
|
||||||
"""
|
|
||||||
Computes the cost contributions coming from parameter-dependent only
|
|
||||||
regularisation penalties
|
|
||||||
"""
|
|
||||||
assert isinstance(model, MLP), (
|
|
||||||
"Expected model to be a subclass of 'mlp.layers.MLP'"
|
|
||||||
" class but got %s " % type(model)
|
|
||||||
)
|
|
||||||
|
|
||||||
l1_cost, l2_cost = 0, 0
|
|
||||||
for i in xrange(0, len(model.layers)):
|
|
||||||
params = model.layers[i].get_params()
|
|
||||||
for param in params:
|
|
||||||
if l2_weight > 0:
|
|
||||||
l2_cost += 0.5 * l2_weight * numpy.sum(param**2)
|
|
||||||
if l1_weight > 0:
|
|
||||||
l1_cost += l1_weight * numpy.sum(numpy.abs(param))
|
|
||||||
|
|
||||||
return l1_cost, l2_cost
|
|
||||||
|
|
||||||
|
|
||||||
class SGDOptimiser(Optimiser):
|
|
||||||
def __init__(self, lr_scheduler,
|
|
||||||
dp_scheduler=None,
|
|
||||||
l1_weight=0.0,
|
|
||||||
l2_weight=0.0):
|
|
||||||
|
|
||||||
super(SGDOptimiser, self).__init__()
|
|
||||||
|
|
||||||
assert isinstance(lr_scheduler, LearningRateScheduler), (
|
|
||||||
"Expected lr_scheduler to be a subclass of 'mlp.schedulers.LearningRateScheduler'"
|
|
||||||
" class but got %s " % type(lr_scheduler)
|
|
||||||
)
|
|
||||||
|
|
||||||
self.lr_scheduler = lr_scheduler
|
|
||||||
self.dp_scheduler = dp_scheduler
|
|
||||||
self.l1_weight = l1_weight
|
|
||||||
self.l2_weight = l2_weight
|
|
||||||
|
|
||||||
def train_epoch(self, model, train_iterator, learning_rate):
|
|
||||||
|
|
||||||
assert isinstance(model, MLP), (
|
|
||||||
"Expected model to be a subclass of 'mlp.layers.MLP'"
|
|
||||||
" class but got %s " % type(model)
|
|
||||||
)
|
|
||||||
assert isinstance(train_iterator, DataProvider), (
|
|
||||||
"Expected iterator to be a subclass of 'mlp.dataset.DataProvider'"
|
|
||||||
" class but got %s " % type(train_iterator)
|
|
||||||
)
|
|
||||||
|
|
||||||
acc_list, nll_list = [], []
|
|
||||||
for x, t in train_iterator:
|
|
||||||
|
|
||||||
# get the prediction
|
|
||||||
if self.dp_scheduler is not None:
|
|
||||||
y = model.fprop_dropout(x, self.dp_scheduler)
|
|
||||||
else:
|
|
||||||
y = model.fprop(x)
|
|
||||||
|
|
||||||
# compute the cost and grad of the cost w.r.t y
|
|
||||||
cost = model.cost.cost(y, t)
|
|
||||||
cost_grad = model.cost.grad(y, t)
|
|
||||||
|
|
||||||
# do backward pass through the model
|
|
||||||
model.bprop(cost_grad, self.dp_scheduler)
|
|
||||||
|
|
||||||
#update the model, here we iterate over layers
|
|
||||||
#and then over each parameter in the layer
|
|
||||||
effective_learning_rate = learning_rate / x.shape[0]
|
|
||||||
|
|
||||||
for i in xrange(0, len(model.layers)):
|
|
||||||
params = model.layers[i].get_params()
|
|
||||||
grads = model.layers[i].pgrads(inputs=model.activations[i],
|
|
||||||
deltas=model.deltas[i + 1],
|
|
||||||
l1_weight=self.l1_weight,
|
|
||||||
l2_weight=self.l2_weight)
|
|
||||||
uparams = []
|
|
||||||
for param, grad in zip(params, grads):
|
|
||||||
param = param - effective_learning_rate * grad
|
|
||||||
uparams.append(param)
|
|
||||||
model.layers[i].set_params(uparams)
|
|
||||||
|
|
||||||
nll_list.append(cost)
|
|
||||||
acc_list.append(numpy.mean(self.classification_accuracy(y, t)))
|
|
||||||
|
|
||||||
#compute the prior penalties contribution (parameter dependent only)
|
|
||||||
prior_costs = Optimiser.compute_prior_costs(model, self.l1_weight, self.l2_weight)
|
|
||||||
training_cost = numpy.mean(nll_list) + sum(prior_costs)
|
|
||||||
|
|
||||||
return training_cost, numpy.mean(acc_list)
|
|
||||||
|
|
||||||
def train(self, model, train_iterator, valid_iterator=None):
|
|
||||||
|
|
||||||
converged = False
|
|
||||||
cost_name = model.cost.get_name()
|
|
||||||
tr_stats, valid_stats = [], []
|
|
||||||
|
|
||||||
# do the initial validation
|
|
||||||
train_iterator.reset()
|
|
||||||
tr_nll, tr_acc = self.validate(model, train_iterator, self.l1_weight, self.l2_weight)
|
|
||||||
logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
|
|
||||||
% (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.))
|
|
||||||
tr_stats.append((tr_nll, tr_acc))
|
|
||||||
|
|
||||||
if valid_iterator is not None:
|
|
||||||
valid_iterator.reset()
|
|
||||||
valid_nll, valid_acc = self.validate(model, valid_iterator, self.l1_weight, self.l2_weight)
|
|
||||||
logger.info('Epoch %i: Validation cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
|
|
||||||
% (self.lr_scheduler.epoch, cost_name, valid_nll, valid_acc * 100.))
|
|
||||||
valid_stats.append((valid_nll, valid_acc))
|
|
||||||
|
|
||||||
while not converged:
|
|
||||||
train_iterator.reset()
|
|
||||||
|
|
||||||
tstart = time.clock()
|
|
||||||
tr_nll, tr_acc = self.train_epoch(model=model,
|
|
||||||
train_iterator=train_iterator,
|
|
||||||
learning_rate=self.lr_scheduler.get_rate())
|
|
||||||
tstop = time.clock()
|
|
||||||
tr_stats.append((tr_nll, tr_acc))
|
|
||||||
|
|
||||||
logger.info('Epoch %i: Training cost (%s) is %.3f. Accuracy is %.2f%%'
|
|
||||||
% (self.lr_scheduler.epoch + 1, cost_name, tr_nll, tr_acc * 100.))
|
|
||||||
|
|
||||||
vstart = time.clock()
|
|
||||||
if valid_iterator is not None:
|
|
||||||
valid_iterator.reset()
|
|
||||||
valid_nll, valid_acc = self.validate(model, valid_iterator,
|
|
||||||
self.l1_weight, self.l2_weight)
|
|
||||||
logger.info('Epoch %i: Validation cost (%s) is %.3f. Accuracy is %.2f%%'
|
|
||||||
% (self.lr_scheduler.epoch + 1, cost_name, valid_nll, valid_acc * 100.))
|
|
||||||
self.lr_scheduler.get_next_rate(valid_acc)
|
|
||||||
valid_stats.append((valid_nll, valid_acc))
|
|
||||||
else:
|
|
||||||
self.lr_scheduler.get_next_rate(None)
|
|
||||||
vstop = time.clock()
|
|
||||||
|
|
||||||
train_speed = train_iterator.num_examples_presented() / (tstop - tstart)
|
|
||||||
valid_speed = valid_iterator.num_examples_presented() / (vstop - vstart)
|
|
||||||
tot_time = vstop - tstart
|
|
||||||
#pps = presentations per second
|
|
||||||
logger.info("Epoch %i: Took %.0f seconds. Training speed %.0f pps. "
|
|
||||||
"Validation speed %.0f pps."
|
|
||||||
% (self.lr_scheduler.epoch, tot_time, train_speed, valid_speed))
|
|
||||||
|
|
||||||
# we stop training when learning rate, as returned by lr scheduler, is 0
|
|
||||||
# this is implementation dependent and depending on lr schedule could happen,
|
|
||||||
# for example, when max_epochs has been reached or if the progress between
|
|
||||||
# two consecutive epochs is too small, etc.
|
|
||||||
converged = (self.lr_scheduler.get_rate() == 0)
|
|
||||||
|
|
||||||
return tr_stats, valid_stats
|
|
Loading…
Reference in New Issue
Block a user