Merge pull request #24 from pswietojanski/master

some clarifications
2015-11-02 13:00:51 +00:00 · 2015-11-02 13:00:51 +00:00 · 873817d72f
commit 873817d72f
parent cfa2a95541 d87f8b05aa
3 changed files with 43 additions and 12 deletions
--- a/04_Regularisation.ipynb
+++ b/04_Regularisation.ipynb
@ -86,9 +86,9 @@
    "Hence, the gradient of the cost w.r.t parameter $w_i$ is given as follows:\n",
    "\n",
    "(5) $\n",
-    "\\begin{align*}\\frac{\\partial E^n}{\\partial w_i} &= \\frac{\\partial (E^n_{\\text{train}} + \\beta_{L_2} \\frac{1}[2} E^n_{L_2}) }{\\partial w_i} \n",
+    "\\begin{align*}\\frac{\\partial E^n}{\\partial w_i} &= \\frac{\\partial (E^n_{\\text{train}} + \\beta_{L_2} 0.5 E^n_{L_2}) }{\\partial w_i} \n",
-    "  = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i}  + \\beta_{L_2} \\frac{\\partial\n",
+    "  = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i}  + \\beta_{L_2} 0.5 \\frac{\\partial\n",
-    "      \\frac{1}{2}E^n_{L_2}}{\\partial w_i} \\right) \n",
+    "      E^n_{L_2}}{\\partial w_i} \\right) \n",
    "  = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i}  + \\beta_{L_2} w_i \\right)\n",
    "\\end{align*}\n",
    "$\n",
@ -103,6 +103,8 @@
    "\n",
    "where $\\eta$ is learning rate. \n",
    "\n",
    "Exercise 1 gives some more implementational suggestions on how to incorporate this technique into the lab code, the cost related prior contributions (equation (1)) are computed in mlp.optimisers.Optimiser.compute_prior_costs() and your job is to add the relevant optimisation related code when computing the gradients w.r.t parameters. \n",
    "\n",
    "## $L_{p=1}$ (Sparsity)\n",
    "\n",
    "Our cost with $L_{1}$ regulariser then becomes:\n",
@ -283,7 +285,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
-   "version": "2.7.10"
+   "version": "2.7.9"
  }
 },
 "nbformat": 4,
--- a/mlp/layers.py
+++ b/mlp/layers.py
@ -290,7 +290,7 @@ class Sigmoid(Linear):
        #'a' get very negative. We limit both tails, however only
        #negative values may lead to numerical issues -- exp(-a)
        #clip() function does the following operation faster:
-        # a[a < -30.] = 30,
+        # a[a < -30.] = -30,
        # a[a > 30.] = 30.
        numpy.clip(a, -30.0, 30.0, out=a)
        h = 1.0/(1 + numpy.exp(-a))
--- a/mlp/optimisers.py
+++ b/mlp/optimisers.py
@ -20,7 +20,7 @@ class Optimiser(object):
    def train(self, model, train_iter, valid_iter=None):
        raise NotImplementedError()
-    def validate(self, model, valid_iterator):
+    def validate(self, model, valid_iterator, l1_weight=0, l2_weight=0):
        assert isinstance(model, MLP), (
            "Expected model to be a subclass of 'mlp.layers.MLP'"
            " class but got %s " % type(model)
@ -40,7 +40,9 @@ class Optimiser(object):
        acc = numpy.mean(acc_list)
        nll = numpy.mean(nll_list)
-        return nll, acc
+        prior_costs = Optimiser.compute_prior_costs(model, l1_weight, l2_weight)
        return nll + sum(prior_costs), acc
    @staticmethod
    def classification_accuracy(y, t):
@ -56,6 +58,28 @@ class Optimiser(object):
        rval = numpy.equal(y_idx, t_idx)
        return rval
    @staticmethod
    def compute_prior_costs(model, l1_weight, l2_weight):
        """
        Computes the cost contributions coming from parameter-dependent only
        regularisation penalties
        """
        assert isinstance(model, MLP), (
            "Expected model to be a subclass of 'mlp.layers.MLP'"
            " class but got %s " % type(model)
        )
        l1_cost, l2_cost = 0, 0
        for i in xrange(0, len(model.layers)):
            params = model.layers[i].get_params()
            for param in params:
                if l2_weight > 0:
                    l2_cost += 0.5 * l2_weight * numpy.sum(param**2)
                if l1_weight > 0:
                    l1_cost += l1_weight * numpy.sum(numpy.sign(param))
        return l1_cost, l2_cost
 class SGDOptimiser(Optimiser):
    def __init__(self, lr_scheduler,
@ -117,7 +141,11 @@ class SGDOptimiser(Optimiser):
            nll_list.append(cost)
            acc_list.append(numpy.mean(self.classification_accuracy(y, t)))
-        return numpy.mean(nll_list), numpy.mean(acc_list)
+        #compute the prior penalties contribution (parameter dependent only)
        prior_costs = Optimiser.compute_prior_costs(model, self.l1_weight, self.l2_weight)
        training_cost = numpy.mean(nll_list) + sum(prior_costs)
        return training_cost, numpy.mean(acc_list)
    def train(self, model, train_iterator, valid_iterator=None):
@ -127,14 +155,14 @@ class SGDOptimiser(Optimiser):
        # do the initial validation
        train_iterator.reset()
-        tr_nll, tr_acc = self.validate(model, train_iterator)
+        tr_nll, tr_acc = self.validate(model, train_iterator, self.l1_weight, self.l2_weight)
        logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
                    % (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.))
        tr_stats.append((tr_nll, tr_acc))
        if valid_iterator is not None:
            valid_iterator.reset()
-            valid_nll, valid_acc = self.validate(model, valid_iterator)
+            valid_nll, valid_acc = self.validate(model, valid_iterator, self.l1_weight, self.l2_weight)
            logger.info('Epoch %i: Validation cost (%s) for initial model is %.3f. Accuracy is %.2f%%'
                        % (self.lr_scheduler.epoch, cost_name, valid_nll, valid_acc * 100.))
            valid_stats.append((valid_nll, valid_acc))
@ -155,7 +183,8 @@ class SGDOptimiser(Optimiser):
            vstart = time.clock()
            if valid_iterator is not None:
                valid_iterator.reset()
-                valid_nll, valid_acc = self.validate(model, valid_iterator)
+                valid_nll, valid_acc = self.validate(model, valid_iterator,
                                                     self.l1_weight, self.l2_weight)
                logger.info('Epoch %i: Validation cost (%s) is %.3f. Accuracy is %.2f%%'
                            % (self.lr_scheduler.epoch + 1, cost_name, valid_nll, valid_acc * 100.))
                self.lr_scheduler.get_next_rate(valid_acc)