Merge pull request #24 from pswietojanski/master

some clarifications
This commit is contained in:
Pawel Swietojanski 2015-11-02 13:00:51 +00:00
commit 873817d72f
3 changed files with 43 additions and 12 deletions

View File

@ -86,9 +86,9 @@
"Hence, the gradient of the cost w.r.t parameter $w_i$ is given as follows:\n",
"\n",
"(5) $\n",
"\\begin{align*}\\frac{\\partial E^n}{\\partial w_i} &= \\frac{\\partial (E^n_{\\text{train}} + \\beta_{L_2} \\frac{1}[2} E^n_{L_2}) }{\\partial w_i} \n",
" = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} \\frac{\\partial\n",
" \\frac{1}{2}E^n_{L_2}}{\\partial w_i} \\right) \n",
"\\begin{align*}\\frac{\\partial E^n}{\\partial w_i} &= \\frac{\\partial (E^n_{\\text{train}} + \\beta_{L_2} 0.5 E^n_{L_2}) }{\\partial w_i} \n",
" = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} 0.5 \\frac{\\partial\n",
" E^n_{L_2}}{\\partial w_i} \\right) \n",
" = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} w_i \\right)\n",
"\\end{align*}\n",
"$\n",
@ -101,7 +101,9 @@
"\\end{align*}\n",
"$\n",
"\n",
"where $\\eta$ is learning rate.\n",
"where $\\eta$ is learning rate. \n",
"\n",
"Exercise 1 gives some more implementational suggestions on how to incorporate this technique into the lab code, the cost related prior contributions (equation (1)) are computed in mlp.optimisers.Optimiser.compute_prior_costs() and your job is to add the relevant optimisation related code when computing the gradients w.r.t parameters. \n",
"\n",
"## $L_{p=1}$ (Sparsity)\n",
"\n",
@ -283,7 +285,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
"version": "2.7.9"
}
},
"nbformat": 4,

View File

@ -290,7 +290,7 @@ class Sigmoid(Linear):
#'a' get very negative. We limit both tails, however only
#negative values may lead to numerical issues -- exp(-a)
#clip() function does the following operation faster:
# a[a < -30.] = 30,
# a[a < -30.] = -30,
# a[a > 30.] = 30.
numpy.clip(a, -30.0, 30.0, out=a)
h = 1.0/(1 + numpy.exp(-a))

View File

@ -20,7 +20,7 @@ class Optimiser(object):
def train(self, model, train_iter, valid_iter=None):
raise NotImplementedError()
def validate(self, model, valid_iterator):
def validate(self, model, valid_iterator, l1_weight=0, l2_weight=0):
assert isinstance(model, MLP), (
"Expected model to be a subclass of 'mlp.layers.MLP'"
" class but got %s " % type(model)
@ -40,7 +40,9 @@ class Optimiser(object):
acc = numpy.mean(acc_list)
nll = numpy.mean(nll_list)
return nll, acc
prior_costs = Optimiser.compute_prior_costs(model, l1_weight, l2_weight)
return nll + sum(prior_costs), acc
@staticmethod
def classification_accuracy(y, t):
@ -56,6 +58,28 @@ class Optimiser(object):
rval = numpy.equal(y_idx, t_idx)
return rval
@staticmethod
def compute_prior_costs(model, l1_weight, l2_weight):
"""
Computes the cost contributions coming from parameter-dependent only
regularisation penalties
"""
assert isinstance(model, MLP), (
"Expected model to be a subclass of 'mlp.layers.MLP'"
" class but got %s " % type(model)
)
l1_cost, l2_cost = 0, 0
for i in xrange(0, len(model.layers)):
params = model.layers[i].get_params()
for param in params:
if l2_weight > 0:
l2_cost += 0.5 * l2_weight * numpy.sum(param**2)
if l1_weight > 0:
l1_cost += l1_weight * numpy.sum(numpy.sign(param))
return l1_cost, l2_cost
class SGDOptimiser(Optimiser):
def __init__(self, lr_scheduler,
@ -117,7 +141,11 @@ class SGDOptimiser(Optimiser):
nll_list.append(cost)
acc_list.append(numpy.mean(self.classification_accuracy(y, t)))
return numpy.mean(nll_list), numpy.mean(acc_list)
#compute the prior penalties contribution (parameter dependent only)
prior_costs = Optimiser.compute_prior_costs(model, self.l1_weight, self.l2_weight)
training_cost = numpy.mean(nll_list) + sum(prior_costs)
return training_cost, numpy.mean(acc_list)
def train(self, model, train_iterator, valid_iterator=None):
@ -127,14 +155,14 @@ class SGDOptimiser(Optimiser):
# do the initial validation
train_iterator.reset()
tr_nll, tr_acc = self.validate(model, train_iterator)
tr_nll, tr_acc = self.validate(model, train_iterator, self.l1_weight, self.l2_weight)
logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
% (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.))
tr_stats.append((tr_nll, tr_acc))
if valid_iterator is not None:
valid_iterator.reset()
valid_nll, valid_acc = self.validate(model, valid_iterator)
valid_nll, valid_acc = self.validate(model, valid_iterator, self.l1_weight, self.l2_weight)
logger.info('Epoch %i: Validation cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
% (self.lr_scheduler.epoch, cost_name, valid_nll, valid_acc * 100.))
valid_stats.append((valid_nll, valid_acc))
@ -155,7 +183,8 @@ class SGDOptimiser(Optimiser):
vstart = time.clock()
if valid_iterator is not None:
valid_iterator.reset()
valid_nll, valid_acc = self.validate(model, valid_iterator)
valid_nll, valid_acc = self.validate(model, valid_iterator,
self.l1_weight, self.l2_weight)
logger.info('Epoch %i: Validation cost (%s) is %.3f. Accuracy is %.2f%%'
% (self.lr_scheduler.epoch + 1, cost_name, valid_nll, valid_acc * 100.))
self.lr_scheduler.get_next_rate(valid_acc)