From 2b516f2f97b64b1232751c1e732fae86bbe08b57 Mon Sep 17 00:00:00 2001 From: pswietojanski Date: Wed, 28 Oct 2015 16:59:11 +0000 Subject: [PATCH] lab4 work --- 03_MLP_Coursework1.ipynb | 67 +++++++++++++++-- 04_Regularisation.ipynb | 156 +++++++++++++++++++++++++++++++++++++++ mlp/layers.py | 104 ++++++++++++++++++++++++-- mlp/optimisers.py | 1 + 4 files changed, 316 insertions(+), 12 deletions(-) create mode 100644 04_Regularisation.ipynb diff --git a/03_MLP_Coursework1.ipynb b/03_MLP_Coursework1.ipynb index 0f786ae..021b761 100644 --- a/03_MLP_Coursework1.ipynb +++ b/03_MLP_Coursework1.ipynb @@ -78,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "collapsed": false }, @@ -142,11 +142,43 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": false }, "outputs": [], "source": [ - "%load -s Sigmoid mlp/layers.py\n" + "# %load -s Sigmoid mlp/layers.py\n", + "class Sigmoid(Linear):\n", + " def __init__(self, idim, odim,\n", + " rng=None,\n", + " irange=0.1):\n", + "\n", + " super(Sigmoid, self).__init__(idim, odim, rng, irange)\n", + " \n", + " def fprop(self, inputs):\n", + " a = super(Sigmoid, self).fprop(inputs)\n", + " h = 1.0/(1 + numpy.exp(-a))\n", + " return h\n", + " \n", + " def bprop(self, h, igrads):\n", + " dsigm = h*(1.0 - h)\n", + " deltas = igrads*dsigm\n", + " ___, ograds = super(Sigmoid, self).bprop(h=None, igrads=deltas)\n", + " return deltas, ograds\n", + "\n", + " def cost_bprop(self, h, igrads, cost):\n", + " if cost is None or cost.get_name() == 'bce':\n", + " return super(Sigmoid, self).bprop(h=h, igrads=igrads)\n", + " else:\n", + " raise NotImplementedError('Sigmoid.bprop_cost method not implemented '\n", + " 'for the %s cost' % cost.get_name())\n", + "\n", + " def pgrads(self, inputs, deltas):\n", + " \"Return list of gradients w.r.t parameters\"\n", + " gparams = super(Sigmoid, self).pgrads(inputs, deltas)\n", + " return gparams\n", + "\n", + " def get_name(self):\n", + " return 'sigmoid'\n" ] }, { @@ -162,11 +194,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.0\n", + "0.0\n", + "0.0744177068753\n", + "[ 4.571e-05 1.697e-03 9.877e-01 6.631e-04 1.194e-04 8.880e-04\n", + " 1.977e-04 8.671e-03]\n", + "[ 4.571e-05 1.697e-03 9.877e-01 6.631e-04 1.194e-04 8.880e-04\n", + " 1.977e-04 -9.913e-01]\n", + "[-0.089 0.03 0.079 0.011 0.017 0.027]\n" + ] + } + ], "source": [ "from mlp.layers import Softmax\n", "\n", @@ -204,7 +251,15 @@ }, "outputs": [], "source": [ - "%load -s Softmax mlp/layers.py" + "%load -s Softmax mlp/layers.py\n", + "1.0\n", + "-1.11022302463e-16\n", + "0.0744177068753\n", + "[ 4.571e-05 1.697e-03 9.877e-01 6.631e-04 1.194e-04 8.880e-04\n", + " 1.977e-04 8.671e-03]\n", + "[ 4.571e-05 1.697e-03 9.877e-01 6.631e-04 1.194e-04 8.880e-04\n", + " 1.977e-04 -9.913e-01]\n", + "[-0.089 0.03 0.079 0.011 0.017 0.027]" ] }, { diff --git a/04_Regularisation.ipynb b/04_Regularisation.ipynb new file mode 100644 index 0000000..bac175e --- /dev/null +++ b/04_Regularisation.ipynb @@ -0,0 +1,156 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Introduction\n", + "\n", + "This tutorial focuses on implementation of three reqularisaion techniques, two of them are norm based approaches - L2 and L1 as well as technique called droput, that.\n", + "\n", + "\n", + "## Virtual environments\n", + "\n", + "Before you proceed onwards, remember to activate your virtual environment:\n", + " * If you were in last week's Tuesday or Wednesday group type `activate_mlp` or `source ~/mlpractical/venv/bin/activate`\n", + " * If you were in the Monday group:\n", + " + and if you have chosen the **comfy** way type: `workon mlpractical`\n", + " + and if you have chosen the **generic** way, `source` your virutal environment using `source` and specyfing the path to the activate script (you need to localise it yourself, there were not any general recommendations w.r.t dir structure and people have installed it in different places, usually somewhere in the home directories. If you cannot easily find it by yourself, use something like: `find . -iname activate` ):\n", + "\n", + "## Syncing the git repository\n", + "\n", + "Look here for more details. But in short, we recommend to create a separate branch for this lab, as follows:\n", + "\n", + "1. Enter the mlpractical directory `cd ~/mlpractical/repo-mlp`\n", + "2. List the branches and check which is currently active by typing: `git branch`\n", + "3. If you have followed recommendations, you should be in the `coursework1` branch, please commit your local changed to the repo index by typing:\n", + "```\n", + "git commit -am \"stuff I did for the coursework\"\n", + "```\n", + "4. Now you can switch to `master` branch by typing: \n", + "```\n", + "git checkout master\n", + " ```\n", + "5. To update the repository (note, assuming master does not have any conflicts), if there are some, have a look here\n", + "```\n", + "git pull\n", + "```\n", + "6. And now, create the new branch & swith to it by typing:\n", + "```\n", + "git checkout -b lab4\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Regularisation\n", + "\n", + "Today, we shall build models which can have an arbitrary number of hidden layers. Please have a look at the diagram below, and the corresponding computations (which have an *exact* matrix form as expected by numpy, and row-wise orientation; note that $\\circ$ denotes an element-wise product). In the diagram, we briefly describe how each comptation relates to the code we have provided.\n", + "\n", + "(1) $E = \\log(\\mathbf{y}|\\mathbf{x}; \\theta) + \\alpha J_{L2}(\\theta) + \\beta J_{L1}(\\theta)$\n", + "\n", + "## L2 Weight Decay\n", + "\n", + "(1) $J_{L2}(\\theta) = \\frac{1}{2}||\\theta||^2$\n", + "\n", + "(1) $\\frac{\\partial J_{L2}}{\\partial\\theta} = \\frac{1}{2}||\\theta||^2$\n", + "\n", + "## L1 Sparsity \n", + "\n", + "## Dropout\n", + "\n", + "Dropout, for a given layer's output $\\mathbf{h}^i \\in \\mathbb{R}^{BxH^l}$ (where $B$ is batch size and $H^l$ is the $l$-th layer output dimensionality) implements the following transformation:\n", + "\n", + "(1) $\\mathbf{\\hat h}^l = \\mathbf{d}^l\\circ\\mathbf{h}^l$\n", + "\n", + "where $\\circ$ denotes an elementwise product and $\\mathbf{d}^l \\in \\{0,1\\}^{BxH^i}$ is a matrix in which $d^l_{ij}$ element is sampled from the Bernoulli distribution:\n", + "\n", + "(2) $d^l_{ij} \\sim \\mbox{Bernoulli}(p^l_d)$\n", + "\n", + "with $0