Merge pull request #24 from pswietojanski/master

some clarifications
This commit is contained in:
Pawel Swietojanski 2015-11-02 13:00:51 +00:00
commit 873817d72f
3 changed files with 43 additions and 12 deletions

View File

@ -86,9 +86,9 @@
"Hence, the gradient of the cost w.r.t parameter $w_i$ is given as follows:\n", "Hence, the gradient of the cost w.r.t parameter $w_i$ is given as follows:\n",
"\n", "\n",
"(5) $\n", "(5) $\n",
"\\begin{align*}\\frac{\\partial E^n}{\\partial w_i} &= \\frac{\\partial (E^n_{\\text{train}} + \\beta_{L_2} \\frac{1}[2} E^n_{L_2}) }{\\partial w_i} \n", "\\begin{align*}\\frac{\\partial E^n}{\\partial w_i} &= \\frac{\\partial (E^n_{\\text{train}} + \\beta_{L_2} 0.5 E^n_{L_2}) }{\\partial w_i} \n",
" = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} \\frac{\\partial\n", " = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} 0.5 \\frac{\\partial\n",
" \\frac{1}{2}E^n_{L_2}}{\\partial w_i} \\right) \n", " E^n_{L_2}}{\\partial w_i} \\right) \n",
" = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} w_i \\right)\n", " = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} w_i \\right)\n",
"\\end{align*}\n", "\\end{align*}\n",
"$\n", "$\n",
@ -101,7 +101,9 @@
"\\end{align*}\n", "\\end{align*}\n",
"$\n", "$\n",
"\n", "\n",
"where $\\eta$ is learning rate.\n", "where $\\eta$ is learning rate. \n",
"\n",
"Exercise 1 gives some more implementational suggestions on how to incorporate this technique into the lab code, the cost related prior contributions (equation (1)) are computed in mlp.optimisers.Optimiser.compute_prior_costs() and your job is to add the relevant optimisation related code when computing the gradients w.r.t parameters. \n",
"\n", "\n",
"## $L_{p=1}$ (Sparsity)\n", "## $L_{p=1}$ (Sparsity)\n",
"\n", "\n",
@ -283,7 +285,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython2", "pygments_lexer": "ipython2",
"version": "2.7.10" "version": "2.7.9"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -290,7 +290,7 @@ class Sigmoid(Linear):
#'a' get very negative. We limit both tails, however only #'a' get very negative. We limit both tails, however only
#negative values may lead to numerical issues -- exp(-a) #negative values may lead to numerical issues -- exp(-a)
#clip() function does the following operation faster: #clip() function does the following operation faster:
# a[a < -30.] = 30, # a[a < -30.] = -30,
# a[a > 30.] = 30. # a[a > 30.] = 30.
numpy.clip(a, -30.0, 30.0, out=a) numpy.clip(a, -30.0, 30.0, out=a)
h = 1.0/(1 + numpy.exp(-a)) h = 1.0/(1 + numpy.exp(-a))

View File

@ -20,7 +20,7 @@ class Optimiser(object):
def train(self, model, train_iter, valid_iter=None): def train(self, model, train_iter, valid_iter=None):
raise NotImplementedError() raise NotImplementedError()
def validate(self, model, valid_iterator): def validate(self, model, valid_iterator, l1_weight=0, l2_weight=0):
assert isinstance(model, MLP), ( assert isinstance(model, MLP), (
"Expected model to be a subclass of 'mlp.layers.MLP'" "Expected model to be a subclass of 'mlp.layers.MLP'"
" class but got %s " % type(model) " class but got %s " % type(model)
@ -40,7 +40,9 @@ class Optimiser(object):
acc = numpy.mean(acc_list) acc = numpy.mean(acc_list)
nll = numpy.mean(nll_list) nll = numpy.mean(nll_list)
return nll, acc prior_costs = Optimiser.compute_prior_costs(model, l1_weight, l2_weight)
return nll + sum(prior_costs), acc
@staticmethod @staticmethod
def classification_accuracy(y, t): def classification_accuracy(y, t):
@ -56,6 +58,28 @@ class Optimiser(object):
rval = numpy.equal(y_idx, t_idx) rval = numpy.equal(y_idx, t_idx)
return rval return rval
@staticmethod
def compute_prior_costs(model, l1_weight, l2_weight):
"""
Computes the cost contributions coming from parameter-dependent only
regularisation penalties
"""
assert isinstance(model, MLP), (
"Expected model to be a subclass of 'mlp.layers.MLP'"
" class but got %s " % type(model)
)
l1_cost, l2_cost = 0, 0
for i in xrange(0, len(model.layers)):
params = model.layers[i].get_params()
for param in params:
if l2_weight > 0:
l2_cost += 0.5 * l2_weight * numpy.sum(param**2)
if l1_weight > 0:
l1_cost += l1_weight * numpy.sum(numpy.sign(param))
return l1_cost, l2_cost
class SGDOptimiser(Optimiser): class SGDOptimiser(Optimiser):
def __init__(self, lr_scheduler, def __init__(self, lr_scheduler,
@ -117,7 +141,11 @@ class SGDOptimiser(Optimiser):
nll_list.append(cost) nll_list.append(cost)
acc_list.append(numpy.mean(self.classification_accuracy(y, t))) acc_list.append(numpy.mean(self.classification_accuracy(y, t)))
return numpy.mean(nll_list), numpy.mean(acc_list) #compute the prior penalties contribution (parameter dependent only)
prior_costs = Optimiser.compute_prior_costs(model, self.l1_weight, self.l2_weight)
training_cost = numpy.mean(nll_list) + sum(prior_costs)
return training_cost, numpy.mean(acc_list)
def train(self, model, train_iterator, valid_iterator=None): def train(self, model, train_iterator, valid_iterator=None):
@ -127,14 +155,14 @@ class SGDOptimiser(Optimiser):
# do the initial validation # do the initial validation
train_iterator.reset() train_iterator.reset()
tr_nll, tr_acc = self.validate(model, train_iterator) tr_nll, tr_acc = self.validate(model, train_iterator, self.l1_weight, self.l2_weight)
logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%' logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
% (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.)) % (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.))
tr_stats.append((tr_nll, tr_acc)) tr_stats.append((tr_nll, tr_acc))
if valid_iterator is not None: if valid_iterator is not None:
valid_iterator.reset() valid_iterator.reset()
valid_nll, valid_acc = self.validate(model, valid_iterator) valid_nll, valid_acc = self.validate(model, valid_iterator, self.l1_weight, self.l2_weight)
logger.info('Epoch %i: Validation cost (%s) for initial model is %.3f. Accuracy is %.2f%%' logger.info('Epoch %i: Validation cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
% (self.lr_scheduler.epoch, cost_name, valid_nll, valid_acc * 100.)) % (self.lr_scheduler.epoch, cost_name, valid_nll, valid_acc * 100.))
valid_stats.append((valid_nll, valid_acc)) valid_stats.append((valid_nll, valid_acc))
@ -155,7 +183,8 @@ class SGDOptimiser(Optimiser):
vstart = time.clock() vstart = time.clock()
if valid_iterator is not None: if valid_iterator is not None:
valid_iterator.reset() valid_iterator.reset()
valid_nll, valid_acc = self.validate(model, valid_iterator) valid_nll, valid_acc = self.validate(model, valid_iterator,
self.l1_weight, self.l2_weight)
logger.info('Epoch %i: Validation cost (%s) is %.3f. Accuracy is %.2f%%' logger.info('Epoch %i: Validation cost (%s) is %.3f. Accuracy is %.2f%%'
% (self.lr_scheduler.epoch + 1, cost_name, valid_nll, valid_acc * 100.)) % (self.lr_scheduler.epoch + 1, cost_name, valid_nll, valid_acc * 100.))
self.lr_scheduler.get_next_rate(valid_acc) self.lr_scheduler.get_next_rate(valid_acc)