commit
873817d72f
@ -86,9 +86,9 @@
|
||||
"Hence, the gradient of the cost w.r.t parameter $w_i$ is given as follows:\n",
|
||||
"\n",
|
||||
"(5) $\n",
|
||||
"\\begin{align*}\\frac{\\partial E^n}{\\partial w_i} &= \\frac{\\partial (E^n_{\\text{train}} + \\beta_{L_2} \\frac{1}[2} E^n_{L_2}) }{\\partial w_i} \n",
|
||||
" = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} \\frac{\\partial\n",
|
||||
" \\frac{1}{2}E^n_{L_2}}{\\partial w_i} \\right) \n",
|
||||
"\\begin{align*}\\frac{\\partial E^n}{\\partial w_i} &= \\frac{\\partial (E^n_{\\text{train}} + \\beta_{L_2} 0.5 E^n_{L_2}) }{\\partial w_i} \n",
|
||||
" = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} 0.5 \\frac{\\partial\n",
|
||||
" E^n_{L_2}}{\\partial w_i} \\right) \n",
|
||||
" = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} w_i \\right)\n",
|
||||
"\\end{align*}\n",
|
||||
"$\n",
|
||||
@ -101,7 +101,9 @@
|
||||
"\\end{align*}\n",
|
||||
"$\n",
|
||||
"\n",
|
||||
"where $\\eta$ is learning rate.\n",
|
||||
"where $\\eta$ is learning rate. \n",
|
||||
"\n",
|
||||
"Exercise 1 gives some more implementational suggestions on how to incorporate this technique into the lab code, the cost related prior contributions (equation (1)) are computed in mlp.optimisers.Optimiser.compute_prior_costs() and your job is to add the relevant optimisation related code when computing the gradients w.r.t parameters. \n",
|
||||
"\n",
|
||||
"## $L_{p=1}$ (Sparsity)\n",
|
||||
"\n",
|
||||
@ -283,7 +285,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.10"
|
||||
"version": "2.7.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -290,7 +290,7 @@ class Sigmoid(Linear):
|
||||
#'a' get very negative. We limit both tails, however only
|
||||
#negative values may lead to numerical issues -- exp(-a)
|
||||
#clip() function does the following operation faster:
|
||||
# a[a < -30.] = 30,
|
||||
# a[a < -30.] = -30,
|
||||
# a[a > 30.] = 30.
|
||||
numpy.clip(a, -30.0, 30.0, out=a)
|
||||
h = 1.0/(1 + numpy.exp(-a))
|
||||
|
@ -20,7 +20,7 @@ class Optimiser(object):
|
||||
def train(self, model, train_iter, valid_iter=None):
|
||||
raise NotImplementedError()
|
||||
|
||||
def validate(self, model, valid_iterator):
|
||||
def validate(self, model, valid_iterator, l1_weight=0, l2_weight=0):
|
||||
assert isinstance(model, MLP), (
|
||||
"Expected model to be a subclass of 'mlp.layers.MLP'"
|
||||
" class but got %s " % type(model)
|
||||
@ -40,7 +40,9 @@ class Optimiser(object):
|
||||
acc = numpy.mean(acc_list)
|
||||
nll = numpy.mean(nll_list)
|
||||
|
||||
return nll, acc
|
||||
prior_costs = Optimiser.compute_prior_costs(model, l1_weight, l2_weight)
|
||||
|
||||
return nll + sum(prior_costs), acc
|
||||
|
||||
@staticmethod
|
||||
def classification_accuracy(y, t):
|
||||
@ -56,6 +58,28 @@ class Optimiser(object):
|
||||
rval = numpy.equal(y_idx, t_idx)
|
||||
return rval
|
||||
|
||||
@staticmethod
|
||||
def compute_prior_costs(model, l1_weight, l2_weight):
|
||||
"""
|
||||
Computes the cost contributions coming from parameter-dependent only
|
||||
regularisation penalties
|
||||
"""
|
||||
assert isinstance(model, MLP), (
|
||||
"Expected model to be a subclass of 'mlp.layers.MLP'"
|
||||
" class but got %s " % type(model)
|
||||
)
|
||||
|
||||
l1_cost, l2_cost = 0, 0
|
||||
for i in xrange(0, len(model.layers)):
|
||||
params = model.layers[i].get_params()
|
||||
for param in params:
|
||||
if l2_weight > 0:
|
||||
l2_cost += 0.5 * l2_weight * numpy.sum(param**2)
|
||||
if l1_weight > 0:
|
||||
l1_cost += l1_weight * numpy.sum(numpy.sign(param))
|
||||
|
||||
return l1_cost, l2_cost
|
||||
|
||||
|
||||
class SGDOptimiser(Optimiser):
|
||||
def __init__(self, lr_scheduler,
|
||||
@ -117,7 +141,11 @@ class SGDOptimiser(Optimiser):
|
||||
nll_list.append(cost)
|
||||
acc_list.append(numpy.mean(self.classification_accuracy(y, t)))
|
||||
|
||||
return numpy.mean(nll_list), numpy.mean(acc_list)
|
||||
#compute the prior penalties contribution (parameter dependent only)
|
||||
prior_costs = Optimiser.compute_prior_costs(model, self.l1_weight, self.l2_weight)
|
||||
training_cost = numpy.mean(nll_list) + sum(prior_costs)
|
||||
|
||||
return training_cost, numpy.mean(acc_list)
|
||||
|
||||
def train(self, model, train_iterator, valid_iterator=None):
|
||||
|
||||
@ -127,14 +155,14 @@ class SGDOptimiser(Optimiser):
|
||||
|
||||
# do the initial validation
|
||||
train_iterator.reset()
|
||||
tr_nll, tr_acc = self.validate(model, train_iterator)
|
||||
tr_nll, tr_acc = self.validate(model, train_iterator, self.l1_weight, self.l2_weight)
|
||||
logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
|
||||
% (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.))
|
||||
tr_stats.append((tr_nll, tr_acc))
|
||||
|
||||
if valid_iterator is not None:
|
||||
valid_iterator.reset()
|
||||
valid_nll, valid_acc = self.validate(model, valid_iterator)
|
||||
valid_nll, valid_acc = self.validate(model, valid_iterator, self.l1_weight, self.l2_weight)
|
||||
logger.info('Epoch %i: Validation cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
|
||||
% (self.lr_scheduler.epoch, cost_name, valid_nll, valid_acc * 100.))
|
||||
valid_stats.append((valid_nll, valid_acc))
|
||||
@ -155,7 +183,8 @@ class SGDOptimiser(Optimiser):
|
||||
vstart = time.clock()
|
||||
if valid_iterator is not None:
|
||||
valid_iterator.reset()
|
||||
valid_nll, valid_acc = self.validate(model, valid_iterator)
|
||||
valid_nll, valid_acc = self.validate(model, valid_iterator,
|
||||
self.l1_weight, self.l2_weight)
|
||||
logger.info('Epoch %i: Validation cost (%s) is %.3f. Accuracy is %.2f%%'
|
||||
% (self.lr_scheduler.epoch + 1, cost_name, valid_nll, valid_acc * 100.))
|
||||
self.lr_scheduler.get_next_rate(valid_acc)
|
||||
|
Loading…
Reference in New Issue
Block a user