commit
873817d72f
@ -86,9 +86,9 @@
|
|||||||
"Hence, the gradient of the cost w.r.t parameter $w_i$ is given as follows:\n",
|
"Hence, the gradient of the cost w.r.t parameter $w_i$ is given as follows:\n",
|
||||||
"\n",
|
"\n",
|
||||||
"(5) $\n",
|
"(5) $\n",
|
||||||
"\\begin{align*}\\frac{\\partial E^n}{\\partial w_i} &= \\frac{\\partial (E^n_{\\text{train}} + \\beta_{L_2} \\frac{1}[2} E^n_{L_2}) }{\\partial w_i} \n",
|
"\\begin{align*}\\frac{\\partial E^n}{\\partial w_i} &= \\frac{\\partial (E^n_{\\text{train}} + \\beta_{L_2} 0.5 E^n_{L_2}) }{\\partial w_i} \n",
|
||||||
" = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} \\frac{\\partial\n",
|
" = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} 0.5 \\frac{\\partial\n",
|
||||||
" \\frac{1}{2}E^n_{L_2}}{\\partial w_i} \\right) \n",
|
" E^n_{L_2}}{\\partial w_i} \\right) \n",
|
||||||
" = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} w_i \\right)\n",
|
" = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} w_i \\right)\n",
|
||||||
"\\end{align*}\n",
|
"\\end{align*}\n",
|
||||||
"$\n",
|
"$\n",
|
||||||
@ -103,6 +103,8 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"where $\\eta$ is learning rate. \n",
|
"where $\\eta$ is learning rate. \n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"Exercise 1 gives some more implementational suggestions on how to incorporate this technique into the lab code, the cost related prior contributions (equation (1)) are computed in mlp.optimisers.Optimiser.compute_prior_costs() and your job is to add the relevant optimisation related code when computing the gradients w.r.t parameters. \n",
|
||||||
|
"\n",
|
||||||
"## $L_{p=1}$ (Sparsity)\n",
|
"## $L_{p=1}$ (Sparsity)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Our cost with $L_{1}$ regulariser then becomes:\n",
|
"Our cost with $L_{1}$ regulariser then becomes:\n",
|
||||||
@ -283,7 +285,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython2",
|
"pygments_lexer": "ipython2",
|
||||||
"version": "2.7.10"
|
"version": "2.7.9"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
@ -290,7 +290,7 @@ class Sigmoid(Linear):
|
|||||||
#'a' get very negative. We limit both tails, however only
|
#'a' get very negative. We limit both tails, however only
|
||||||
#negative values may lead to numerical issues -- exp(-a)
|
#negative values may lead to numerical issues -- exp(-a)
|
||||||
#clip() function does the following operation faster:
|
#clip() function does the following operation faster:
|
||||||
# a[a < -30.] = 30,
|
# a[a < -30.] = -30,
|
||||||
# a[a > 30.] = 30.
|
# a[a > 30.] = 30.
|
||||||
numpy.clip(a, -30.0, 30.0, out=a)
|
numpy.clip(a, -30.0, 30.0, out=a)
|
||||||
h = 1.0/(1 + numpy.exp(-a))
|
h = 1.0/(1 + numpy.exp(-a))
|
||||||
|
@ -20,7 +20,7 @@ class Optimiser(object):
|
|||||||
def train(self, model, train_iter, valid_iter=None):
|
def train(self, model, train_iter, valid_iter=None):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def validate(self, model, valid_iterator):
|
def validate(self, model, valid_iterator, l1_weight=0, l2_weight=0):
|
||||||
assert isinstance(model, MLP), (
|
assert isinstance(model, MLP), (
|
||||||
"Expected model to be a subclass of 'mlp.layers.MLP'"
|
"Expected model to be a subclass of 'mlp.layers.MLP'"
|
||||||
" class but got %s " % type(model)
|
" class but got %s " % type(model)
|
||||||
@ -40,7 +40,9 @@ class Optimiser(object):
|
|||||||
acc = numpy.mean(acc_list)
|
acc = numpy.mean(acc_list)
|
||||||
nll = numpy.mean(nll_list)
|
nll = numpy.mean(nll_list)
|
||||||
|
|
||||||
return nll, acc
|
prior_costs = Optimiser.compute_prior_costs(model, l1_weight, l2_weight)
|
||||||
|
|
||||||
|
return nll + sum(prior_costs), acc
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def classification_accuracy(y, t):
|
def classification_accuracy(y, t):
|
||||||
@ -56,6 +58,28 @@ class Optimiser(object):
|
|||||||
rval = numpy.equal(y_idx, t_idx)
|
rval = numpy.equal(y_idx, t_idx)
|
||||||
return rval
|
return rval
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def compute_prior_costs(model, l1_weight, l2_weight):
|
||||||
|
"""
|
||||||
|
Computes the cost contributions coming from parameter-dependent only
|
||||||
|
regularisation penalties
|
||||||
|
"""
|
||||||
|
assert isinstance(model, MLP), (
|
||||||
|
"Expected model to be a subclass of 'mlp.layers.MLP'"
|
||||||
|
" class but got %s " % type(model)
|
||||||
|
)
|
||||||
|
|
||||||
|
l1_cost, l2_cost = 0, 0
|
||||||
|
for i in xrange(0, len(model.layers)):
|
||||||
|
params = model.layers[i].get_params()
|
||||||
|
for param in params:
|
||||||
|
if l2_weight > 0:
|
||||||
|
l2_cost += 0.5 * l2_weight * numpy.sum(param**2)
|
||||||
|
if l1_weight > 0:
|
||||||
|
l1_cost += l1_weight * numpy.sum(numpy.sign(param))
|
||||||
|
|
||||||
|
return l1_cost, l2_cost
|
||||||
|
|
||||||
|
|
||||||
class SGDOptimiser(Optimiser):
|
class SGDOptimiser(Optimiser):
|
||||||
def __init__(self, lr_scheduler,
|
def __init__(self, lr_scheduler,
|
||||||
@ -117,7 +141,11 @@ class SGDOptimiser(Optimiser):
|
|||||||
nll_list.append(cost)
|
nll_list.append(cost)
|
||||||
acc_list.append(numpy.mean(self.classification_accuracy(y, t)))
|
acc_list.append(numpy.mean(self.classification_accuracy(y, t)))
|
||||||
|
|
||||||
return numpy.mean(nll_list), numpy.mean(acc_list)
|
#compute the prior penalties contribution (parameter dependent only)
|
||||||
|
prior_costs = Optimiser.compute_prior_costs(model, self.l1_weight, self.l2_weight)
|
||||||
|
training_cost = numpy.mean(nll_list) + sum(prior_costs)
|
||||||
|
|
||||||
|
return training_cost, numpy.mean(acc_list)
|
||||||
|
|
||||||
def train(self, model, train_iterator, valid_iterator=None):
|
def train(self, model, train_iterator, valid_iterator=None):
|
||||||
|
|
||||||
@ -127,14 +155,14 @@ class SGDOptimiser(Optimiser):
|
|||||||
|
|
||||||
# do the initial validation
|
# do the initial validation
|
||||||
train_iterator.reset()
|
train_iterator.reset()
|
||||||
tr_nll, tr_acc = self.validate(model, train_iterator)
|
tr_nll, tr_acc = self.validate(model, train_iterator, self.l1_weight, self.l2_weight)
|
||||||
logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
|
logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
|
||||||
% (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.))
|
% (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.))
|
||||||
tr_stats.append((tr_nll, tr_acc))
|
tr_stats.append((tr_nll, tr_acc))
|
||||||
|
|
||||||
if valid_iterator is not None:
|
if valid_iterator is not None:
|
||||||
valid_iterator.reset()
|
valid_iterator.reset()
|
||||||
valid_nll, valid_acc = self.validate(model, valid_iterator)
|
valid_nll, valid_acc = self.validate(model, valid_iterator, self.l1_weight, self.l2_weight)
|
||||||
logger.info('Epoch %i: Validation cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
|
logger.info('Epoch %i: Validation cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
|
||||||
% (self.lr_scheduler.epoch, cost_name, valid_nll, valid_acc * 100.))
|
% (self.lr_scheduler.epoch, cost_name, valid_nll, valid_acc * 100.))
|
||||||
valid_stats.append((valid_nll, valid_acc))
|
valid_stats.append((valid_nll, valid_acc))
|
||||||
@ -155,7 +183,8 @@ class SGDOptimiser(Optimiser):
|
|||||||
vstart = time.clock()
|
vstart = time.clock()
|
||||||
if valid_iterator is not None:
|
if valid_iterator is not None:
|
||||||
valid_iterator.reset()
|
valid_iterator.reset()
|
||||||
valid_nll, valid_acc = self.validate(model, valid_iterator)
|
valid_nll, valid_acc = self.validate(model, valid_iterator,
|
||||||
|
self.l1_weight, self.l2_weight)
|
||||||
logger.info('Epoch %i: Validation cost (%s) is %.3f. Accuracy is %.2f%%'
|
logger.info('Epoch %i: Validation cost (%s) is %.3f. Accuracy is %.2f%%'
|
||||||
% (self.lr_scheduler.epoch + 1, cost_name, valid_nll, valid_acc * 100.))
|
% (self.lr_scheduler.epoch + 1, cost_name, valid_nll, valid_acc * 100.))
|
||||||
self.lr_scheduler.get_next_rate(valid_acc)
|
self.lr_scheduler.get_next_rate(valid_acc)
|
||||||
|
Loading…
Reference in New Issue
Block a user