diff --git a/mlp/optimisers.py b/mlp/optimisers.py deleted file mode 100644 index 3200238..0000000 --- a/mlp/optimisers.py +++ /dev/null @@ -1,214 +0,0 @@ -# Machine Learning Practical (INFR11119), -# Pawel Swietojanski, University of Edinburgh - -import numpy -import time -import logging - -from mlp.layers import MLP -from mlp.dataset import DataProvider -from mlp.schedulers import LearningRateScheduler - - -logger = logging.getLogger(__name__) - - -class Optimiser(object): - def train_epoch(self, model, train_iter): - raise NotImplementedError() - - def train(self, model, train_iter, valid_iter=None): - raise NotImplementedError() - - def validate(self, model, valid_iterator, l1_weight=0, l2_weight=0): - assert isinstance(model, MLP), ( - "Expected model to be a subclass of 'mlp.layers.MLP'" - " class but got %s " % type(model) - ) - - assert isinstance(valid_iterator, DataProvider), ( - "Expected iterator to be a subclass of 'mlp.dataset.DataProvider'" - " class but got %s " % type(valid_iterator) - ) - - acc_list, nll_list = [], [] - for x, t in valid_iterator: - y = model.fprop(x) - nll_list.append(model.cost.cost(y, t)) - acc_list.append(numpy.mean(self.classification_accuracy(y, t))) - - acc = numpy.mean(acc_list) - nll = numpy.mean(nll_list) - - prior_costs = Optimiser.compute_prior_costs(model, l1_weight, l2_weight) - - return nll + sum(prior_costs), acc - - @staticmethod - def classification_accuracy(y, t): - """ - Returns classification accuracy given the estimate y and targets t - :param y: matrix -- estimate produced by the model in fprop - :param t: matrix -- target 1-of-K coded - :return: vector of y.shape[0] size with binary values set to 0 - if example was miscalssified or 1 otherwise - """ - y_idx = numpy.argmax(y, axis=1) - t_idx = numpy.argmax(t, axis=1) - rval = numpy.equal(y_idx, t_idx) - return rval - - @staticmethod - def compute_prior_costs(model, l1_weight, l2_weight): - """ - Computes the cost contributions coming from parameter-dependent only - regularisation penalties - """ - assert isinstance(model, MLP), ( - "Expected model to be a subclass of 'mlp.layers.MLP'" - " class but got %s " % type(model) - ) - - l1_cost, l2_cost = 0, 0 - for i in xrange(0, len(model.layers)): - params = model.layers[i].get_params() - for param in params: - if l2_weight > 0: - l2_cost += 0.5 * l2_weight * numpy.sum(param**2) - if l1_weight > 0: - l1_cost += l1_weight * numpy.sum(numpy.abs(param)) - - return l1_cost, l2_cost - - -class SGDOptimiser(Optimiser): - def __init__(self, lr_scheduler, - dp_scheduler=None, - l1_weight=0.0, - l2_weight=0.0): - - super(SGDOptimiser, self).__init__() - - assert isinstance(lr_scheduler, LearningRateScheduler), ( - "Expected lr_scheduler to be a subclass of 'mlp.schedulers.LearningRateScheduler'" - " class but got %s " % type(lr_scheduler) - ) - - self.lr_scheduler = lr_scheduler - self.dp_scheduler = dp_scheduler - self.l1_weight = l1_weight - self.l2_weight = l2_weight - - def train_epoch(self, model, train_iterator, learning_rate): - - assert isinstance(model, MLP), ( - "Expected model to be a subclass of 'mlp.layers.MLP'" - " class but got %s " % type(model) - ) - assert isinstance(train_iterator, DataProvider), ( - "Expected iterator to be a subclass of 'mlp.dataset.DataProvider'" - " class but got %s " % type(train_iterator) - ) - - acc_list, nll_list = [], [] - for x, t in train_iterator: - - # get the prediction - if self.dp_scheduler is not None: - y = model.fprop_dropout(x, self.dp_scheduler) - else: - y = model.fprop(x) - - # compute the cost and grad of the cost w.r.t y - cost = model.cost.cost(y, t) - cost_grad = model.cost.grad(y, t) - - # do backward pass through the model - model.bprop(cost_grad, self.dp_scheduler) - - #update the model, here we iterate over layers - #and then over each parameter in the layer - effective_learning_rate = learning_rate / x.shape[0] - - for i in xrange(0, len(model.layers)): - params = model.layers[i].get_params() - grads = model.layers[i].pgrads(inputs=model.activations[i], - deltas=model.deltas[i + 1], - l1_weight=self.l1_weight, - l2_weight=self.l2_weight) - uparams = [] - for param, grad in zip(params, grads): - param = param - effective_learning_rate * grad - uparams.append(param) - model.layers[i].set_params(uparams) - - nll_list.append(cost) - acc_list.append(numpy.mean(self.classification_accuracy(y, t))) - - #compute the prior penalties contribution (parameter dependent only) - prior_costs = Optimiser.compute_prior_costs(model, self.l1_weight, self.l2_weight) - training_cost = numpy.mean(nll_list) + sum(prior_costs) - - return training_cost, numpy.mean(acc_list) - - def train(self, model, train_iterator, valid_iterator=None): - - converged = False - cost_name = model.cost.get_name() - tr_stats, valid_stats = [], [] - - # do the initial validation - train_iterator.reset() - tr_nll, tr_acc = self.validate(model, train_iterator, self.l1_weight, self.l2_weight) - logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%' - % (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.)) - tr_stats.append((tr_nll, tr_acc)) - - if valid_iterator is not None: - valid_iterator.reset() - valid_nll, valid_acc = self.validate(model, valid_iterator, self.l1_weight, self.l2_weight) - logger.info('Epoch %i: Validation cost (%s) for initial model is %.3f. Accuracy is %.2f%%' - % (self.lr_scheduler.epoch, cost_name, valid_nll, valid_acc * 100.)) - valid_stats.append((valid_nll, valid_acc)) - - while not converged: - train_iterator.reset() - - tstart = time.clock() - tr_nll, tr_acc = self.train_epoch(model=model, - train_iterator=train_iterator, - learning_rate=self.lr_scheduler.get_rate()) - tstop = time.clock() - tr_stats.append((tr_nll, tr_acc)) - - logger.info('Epoch %i: Training cost (%s) is %.3f. Accuracy is %.2f%%' - % (self.lr_scheduler.epoch + 1, cost_name, tr_nll, tr_acc * 100.)) - - vstart = time.clock() - if valid_iterator is not None: - valid_iterator.reset() - valid_nll, valid_acc = self.validate(model, valid_iterator, - self.l1_weight, self.l2_weight) - logger.info('Epoch %i: Validation cost (%s) is %.3f. Accuracy is %.2f%%' - % (self.lr_scheduler.epoch + 1, cost_name, valid_nll, valid_acc * 100.)) - self.lr_scheduler.get_next_rate(valid_acc) - valid_stats.append((valid_nll, valid_acc)) - else: - self.lr_scheduler.get_next_rate(None) - vstop = time.clock() - - train_speed = train_iterator.num_examples_presented() / (tstop - tstart) - valid_speed = valid_iterator.num_examples_presented() / (vstop - vstart) - tot_time = vstop - tstart - #pps = presentations per second - logger.info("Epoch %i: Took %.0f seconds. Training speed %.0f pps. " - "Validation speed %.0f pps." - % (self.lr_scheduler.epoch, tot_time, train_speed, valid_speed)) - - # we stop training when learning rate, as returned by lr scheduler, is 0 - # this is implementation dependent and depending on lr schedule could happen, - # for example, when max_epochs has been reached or if the progress between - # two consecutive epochs is too small, etc. - converged = (self.lr_scheduler.get_rate() == 0) - - return tr_stats, valid_stats