From 2b516f2f97b64b1232751c1e732fae86bbe08b57 Mon Sep 17 00:00:00 2001
From: pswietojanski
Date: Wed, 28 Oct 2015 16:59:11 +0000
Subject: [PATCH] lab4 work
---
03_MLP_Coursework1.ipynb | 67 +++++++++++++++--
04_Regularisation.ipynb | 156 +++++++++++++++++++++++++++++++++++++++
mlp/layers.py | 104 ++++++++++++++++++++++++--
mlp/optimisers.py | 1 +
4 files changed, 316 insertions(+), 12 deletions(-)
create mode 100644 04_Regularisation.ipynb
diff --git a/03_MLP_Coursework1.ipynb b/03_MLP_Coursework1.ipynb
index 0f786ae..021b761 100644
--- a/03_MLP_Coursework1.ipynb
+++ b/03_MLP_Coursework1.ipynb
@@ -78,7 +78,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {
"collapsed": false
},
@@ -142,11 +142,43 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "collapsed": true
+ "collapsed": false
},
"outputs": [],
"source": [
- "%load -s Sigmoid mlp/layers.py\n"
+ "# %load -s Sigmoid mlp/layers.py\n",
+ "class Sigmoid(Linear):\n",
+ " def __init__(self, idim, odim,\n",
+ " rng=None,\n",
+ " irange=0.1):\n",
+ "\n",
+ " super(Sigmoid, self).__init__(idim, odim, rng, irange)\n",
+ " \n",
+ " def fprop(self, inputs):\n",
+ " a = super(Sigmoid, self).fprop(inputs)\n",
+ " h = 1.0/(1 + numpy.exp(-a))\n",
+ " return h\n",
+ " \n",
+ " def bprop(self, h, igrads):\n",
+ " dsigm = h*(1.0 - h)\n",
+ " deltas = igrads*dsigm\n",
+ " ___, ograds = super(Sigmoid, self).bprop(h=None, igrads=deltas)\n",
+ " return deltas, ograds\n",
+ "\n",
+ " def cost_bprop(self, h, igrads, cost):\n",
+ " if cost is None or cost.get_name() == 'bce':\n",
+ " return super(Sigmoid, self).bprop(h=h, igrads=igrads)\n",
+ " else:\n",
+ " raise NotImplementedError('Sigmoid.bprop_cost method not implemented '\n",
+ " 'for the %s cost' % cost.get_name())\n",
+ "\n",
+ " def pgrads(self, inputs, deltas):\n",
+ " \"Return list of gradients w.r.t parameters\"\n",
+ " gparams = super(Sigmoid, self).pgrads(inputs, deltas)\n",
+ " return gparams\n",
+ "\n",
+ " def get_name(self):\n",
+ " return 'sigmoid'\n"
]
},
{
@@ -162,11 +194,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {
"collapsed": false
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1.0\n",
+ "0.0\n",
+ "0.0744177068753\n",
+ "[ 4.571e-05 1.697e-03 9.877e-01 6.631e-04 1.194e-04 8.880e-04\n",
+ " 1.977e-04 8.671e-03]\n",
+ "[ 4.571e-05 1.697e-03 9.877e-01 6.631e-04 1.194e-04 8.880e-04\n",
+ " 1.977e-04 -9.913e-01]\n",
+ "[-0.089 0.03 0.079 0.011 0.017 0.027]\n"
+ ]
+ }
+ ],
"source": [
"from mlp.layers import Softmax\n",
"\n",
@@ -204,7 +251,15 @@
},
"outputs": [],
"source": [
- "%load -s Softmax mlp/layers.py"
+ "%load -s Softmax mlp/layers.py\n",
+ "1.0\n",
+ "-1.11022302463e-16\n",
+ "0.0744177068753\n",
+ "[ 4.571e-05 1.697e-03 9.877e-01 6.631e-04 1.194e-04 8.880e-04\n",
+ " 1.977e-04 8.671e-03]\n",
+ "[ 4.571e-05 1.697e-03 9.877e-01 6.631e-04 1.194e-04 8.880e-04\n",
+ " 1.977e-04 -9.913e-01]\n",
+ "[-0.089 0.03 0.079 0.011 0.017 0.027]"
]
},
{
diff --git a/04_Regularisation.ipynb b/04_Regularisation.ipynb
new file mode 100644
index 0000000..bac175e
--- /dev/null
+++ b/04_Regularisation.ipynb
@@ -0,0 +1,156 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Introduction\n",
+ "\n",
+ "This tutorial focuses on implementation of three reqularisaion techniques, two of them are norm based approaches - L2 and L1 as well as technique called droput, that.\n",
+ "\n",
+ "\n",
+ "## Virtual environments\n",
+ "\n",
+ "Before you proceed onwards, remember to activate your virtual environment:\n",
+ " * If you were in last week's Tuesday or Wednesday group type `activate_mlp` or `source ~/mlpractical/venv/bin/activate`\n",
+ " * If you were in the Monday group:\n",
+ " + and if you have chosen the **comfy** way type: `workon mlpractical`\n",
+ " + and if you have chosen the **generic** way, `source` your virutal environment using `source` and specyfing the path to the activate script (you need to localise it yourself, there were not any general recommendations w.r.t dir structure and people have installed it in different places, usually somewhere in the home directories. If you cannot easily find it by yourself, use something like: `find . -iname activate` ):\n",
+ "\n",
+ "## Syncing the git repository\n",
+ "\n",
+ "Look here for more details. But in short, we recommend to create a separate branch for this lab, as follows:\n",
+ "\n",
+ "1. Enter the mlpractical directory `cd ~/mlpractical/repo-mlp`\n",
+ "2. List the branches and check which is currently active by typing: `git branch`\n",
+ "3. If you have followed recommendations, you should be in the `coursework1` branch, please commit your local changed to the repo index by typing:\n",
+ "```\n",
+ "git commit -am \"stuff I did for the coursework\"\n",
+ "```\n",
+ "4. Now you can switch to `master` branch by typing: \n",
+ "```\n",
+ "git checkout master\n",
+ " ```\n",
+ "5. To update the repository (note, assuming master does not have any conflicts), if there are some, have a look here\n",
+ "```\n",
+ "git pull\n",
+ "```\n",
+ "6. And now, create the new branch & swith to it by typing:\n",
+ "```\n",
+ "git checkout -b lab4\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Regularisation\n",
+ "\n",
+ "Today, we shall build models which can have an arbitrary number of hidden layers. Please have a look at the diagram below, and the corresponding computations (which have an *exact* matrix form as expected by numpy, and row-wise orientation; note that $\\circ$ denotes an element-wise product). In the diagram, we briefly describe how each comptation relates to the code we have provided.\n",
+ "\n",
+ "(1) $E = \\log(\\mathbf{y}|\\mathbf{x}; \\theta) + \\alpha J_{L2}(\\theta) + \\beta J_{L1}(\\theta)$\n",
+ "\n",
+ "## L2 Weight Decay\n",
+ "\n",
+ "(1) $J_{L2}(\\theta) = \\frac{1}{2}||\\theta||^2$\n",
+ "\n",
+ "(1) $\\frac{\\partial J_{L2}}{\\partial\\theta} = \\frac{1}{2}||\\theta||^2$\n",
+ "\n",
+ "## L1 Sparsity \n",
+ "\n",
+ "## Dropout\n",
+ "\n",
+ "Dropout, for a given layer's output $\\mathbf{h}^i \\in \\mathbb{R}^{BxH^l}$ (where $B$ is batch size and $H^l$ is the $l$-th layer output dimensionality) implements the following transformation:\n",
+ "\n",
+ "(1) $\\mathbf{\\hat h}^l = \\mathbf{d}^l\\circ\\mathbf{h}^l$\n",
+ "\n",
+ "where $\\circ$ denotes an elementwise product and $\\mathbf{d}^l \\in \\{0,1\\}^{BxH^i}$ is a matrix in which $d^l_{ij}$ element is sampled from the Bernoulli distribution:\n",
+ "\n",
+ "(2) $d^l_{ij} \\sim \\mbox{Bernoulli}(p^l_d)$\n",
+ "\n",
+ "with $0 30.] = 30.
+ numpy.clip(a, -30.0, 30.0, out=a)
+ h = 1.0/(1 + numpy.exp(-a))
+ return h
+
+ def bprop(self, h, igrads):
+ dsigm = h * (1.0 - h)
+ deltas = igrads * dsigm
+ ___, ograds = super(Sigmoid, self).bprop(h=None, igrads=deltas)
+ return deltas, ograds
+
+ def cost_bprop(self, h, igrads, cost):
+ if cost is None or cost.get_name() == 'bce':
+ return super(Sigmoid, self).bprop(h=h, igrads=igrads)
+ else:
+ raise NotImplementedError('Sigmoid.bprop_cost method not implemented '
+ 'for the %s cost' % cost.get_name())
+
+ def get_name(self):
+ return 'sigmoid'
+
+
+class Softmax(Linear):
+
+ def __init__(self,idim, odim,
+ rng=None,
+ irange=0.1):
+
+ super(Softmax, self).__init__(idim,
+ odim,
+ rng=rng,
+ irange=irange)
+
+ def fprop(self, inputs):
+
+ # compute the linear outputs
+ a = super(Softmax, self).fprop(inputs)
+ # apply numerical stabilisation by subtracting max
+ # from each row (not required for the coursework)
+ # then compute exponent
+ assert a.ndim in [1, 2], (
+ "Expected the linear activation in Softmax layer to be either "
+ "vector or matrix, got %ith dimensional tensor" % a.ndim
+ )
+ axis = a.ndim - 1
+ exp_a = numpy.exp(a - numpy.max(a, axis=axis, keepdims=True))
+ # finally, normalise by the sum within each example
+ y = exp_a/numpy.sum(exp_a, axis=axis, keepdims=True)
+
+ return y
+
+ def bprop(self, h, igrads):
+ raise NotImplementedError()
+
+ def bprop_cost(self, h, igrads, cost):
+
+ if cost is None or cost.get_name() == 'ce':
+ return super(Softmax, self).bprop(h=h, igrads=igrads)
+ else:
+ raise NotImplementedError('Softmax.bprop_cost method not implemented '
+ 'for %s cost' % cost.get_name())
+
+ def get_name(self):
+ return 'softmax'
-
-
-
-
\ No newline at end of file
diff --git a/mlp/optimisers.py b/mlp/optimisers.py
index 9d4b947..f03c3cc 100644
--- a/mlp/optimisers.py
+++ b/mlp/optimisers.py
@@ -116,6 +116,7 @@ class SGDOptimiser(Optimiser):
tr_stats, valid_stats = [], []
# do the initial validation
+ train_iterator.reset()
tr_nll, tr_acc = self.validate(model, train_iterator)
logger.info('Epoch %i: Training cost (%s) for random model is %.3f. Accuracy is %.2f%%'
% (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.))