From 078094d8fc14b8b90b33817d53c88844a9c67aa8 Mon Sep 17 00:00:00 2001
From: pswietojanski
Date: Sat, 14 Nov 2015 16:22:35 +0000
Subject: [PATCH 1/7] regularisers solution
---
04_Regularisation_solution.ipynb | 1064 ++++++++++++++++++++++++++++++
1 file changed, 1064 insertions(+)
create mode 100644 04_Regularisation_solution.ipynb
diff --git a/04_Regularisation_solution.ipynb b/04_Regularisation_solution.ipynb
new file mode 100644
index 0000000..cbae7c6
--- /dev/null
+++ b/04_Regularisation_solution.ipynb
@@ -0,0 +1,1064 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Introduction\n",
+ "\n",
+ "This tutorial focuses on implementation of three reqularisaion techniques, two of them are norm based approaches which are added to optimised objective and the third technique, called *droput*, is a form of noise injection by random corruption of information carried by hidden units during training.\n",
+ "\n",
+ "\n",
+ "## Virtual environments\n",
+ "\n",
+ "Before you proceed onwards, remember to activate your virtual environment:\n",
+ " * If you were in last week's Tuesday or Wednesday group type `activate_mlp` or `source ~/mlpractical/venv/bin/activate`\n",
+ " * If you were in the Monday group:\n",
+ " + and if you have chosen the **comfy** way type: `workon mlpractical`\n",
+ " + and if you have chosen the **generic** way, `source` your virutal environment using `source` and specyfing the path to the activate script (you need to localise it yourself, there were not any general recommendations w.r.t dir structure and people have installed it in different places, usually somewhere in the home directories. If you cannot easily find it by yourself, use something like: `find . -iname activate` ):\n",
+ "\n",
+ "## Syncing the git repository\n",
+ "\n",
+ "Look here for more details. But in short, we recommend to create a separate branch for this lab, as follows:\n",
+ "\n",
+ "1. Enter the mlpractical directory `cd ~/mlpractical/repo-mlp`\n",
+ "2. List the branches and check which is currently active by typing: `git branch`\n",
+ "3. If you have followed our recommendations, you should be in the `coursework1` branch, please commit your local changed to the repo index by typing:\n",
+ "```\n",
+ "git commit -am \"finished coursework\"\n",
+ "```\n",
+ "4. Now you can switch to `master` branch by typing: \n",
+ "```\n",
+ "git checkout master\n",
+ " ```\n",
+ "5. To update the repository (note, assuming master does not have any conflicts), if there are some, have a look here\n",
+ "```\n",
+ "git pull\n",
+ "```\n",
+ "6. And now, create the new branch & swith to it by typing:\n",
+ "```\n",
+ "git checkout -b lab4\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Regularisation\n",
+ "\n",
+ "Regularisation add some *complexity term* to the cost function. It's purpose is to put some prior on the model's parameters. The most common prior is perhaps the one which assumes smoother solutions (the one which are not able to fit training data too well) are better as they are more likely to better generalise to unseen data. \n",
+ "\n",
+ "A way to incorporate such prior in the model is to add some term that penalise certain configurations of the parameters -- either from growing too large ($L_2$) or the one that prefers solution that could be modelled with less parameters ($L_1$), hence encouraging some parameters to become 0. One can, of course, combine many such priors when optimising the model, however, in the lab we shall use $L_1$ and/or $L_2$ priors.\n",
+ "\n",
+ "They can be easily incorporated into the training objective by adding some additive terms, as follows:\n",
+ "\n",
+ "(1) $\n",
+ " \\begin{align*}\n",
+ " E^n &= \\underbrace{E^n_{\\text{train}}}_{\\text{data term}} + \n",
+ " \\underbrace{\\beta_{L_1} E^n_{L_1}}_{\\text{prior term}} + \\underbrace{\\beta_{L_2} E^n_{L_2}}_{\\text{prior term}}\n",
+ "\\end{align*}\n",
+ "$\n",
+ "\n",
+ "where $ E^n_{\\text{train}} = - \\sum_{k=1}^K t^n_k \\ln y^n_k $, $\\beta_{L_1}$ and $\\beta_{L_2}$ some non-negative constants specified a priori (hyper-parameters) and $E^n_{L_1}$ and $E^n_{L_2}$ norm metric specifying certain properties of parameters:\n",
+ "\n",
+ "(2) $\n",
+ " \\begin{align*}\n",
+ " E^n_{L_p}(\\mathbf{W}) = \\left ( \\sum_{i,j \\in \\mathbf{W}} |w_{i,j}|^p \\right )^{\\frac{1}{p}}\n",
+ "\\end{align*}\n",
+ "$\n",
+ "\n",
+ "where $p$ denotes the norm-order (for regularisation either 1 or 2). (TODO: explain here why we usualy skip square root for p=2)\n",
+ "\n",
+ "## $L_{p=2}$ (Weight Decay)\n",
+ "\n",
+ "(3) $\n",
+ " \\begin{align*}\n",
+ " E^n &= \\underbrace{E^n_{\\text{train}}}_{\\text{data term}} + \n",
+ " \\underbrace{\\beta E^n_{L_2}}_{\\text{prior term}} = E^n_{\\text{train}} + \\beta_{L_2} \\frac{1}{2}|w_i|^2\n",
+ "\\end{align*}\n",
+ "$\n",
+ "\n",
+ "(4) $\n",
+ "\\begin{align*}\\frac{\\partial E^n}{\\partial w_i} &= \\frac{\\partial (E^n_{\\text{train}} + \\beta_{L_2} E_{L_2}) }{\\partial w_i} \n",
+ " = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} \\frac{\\partial\n",
+ " E_{L_2}}{\\partial w_i} \\right) \n",
+ " = \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} w_i \\right)\n",
+ "\\end{align*}\n",
+ "$\n",
+ "\n",
+ "(5) $\n",
+ "\\begin{align*}\n",
+ " \\Delta w_i &= -\\eta \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_2} w_i \\right) \n",
+ "\\end{align*}\n",
+ "$\n",
+ "\n",
+ "where $\\eta$ is learning rate.\n",
+ "\n",
+ "## $L_{p=1}$ (Sparsity)\n",
+ "\n",
+ "(6) $\n",
+ " \\begin{align*}\n",
+ " E^n &= \\underbrace{E^n_{\\text{train}}}_{\\text{data term}} + \n",
+ " \\underbrace{\\beta E^n_{L_1}}_{\\text{prior term}} \n",
+ " = E^n_{\\text{train}} + \\beta_{L_1} |w_i|\n",
+ "\\end{align*}\n",
+ "$\n",
+ "\n",
+ "(7) $\\begin{align*}\n",
+ " \\frac{\\partial E^n}{\\partial w_i} = \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_1} \\frac{\\partial E_{L_1}}{\\partial w_i} = \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_1} \\mbox{sgn}(w_i)\n",
+ "\\end{align*}\n",
+ "$\n",
+ "\n",
+ "(8) $\\begin{align*}\n",
+ " \\Delta w_i &= -\\eta \\left( \\frac{\\partial E^n_{\\text{train}}}{\\partial w_i} + \\beta_{L_1} \\mbox{sgn}(w_i) \\right) \n",
+ "\\end{align*}$\n",
+ "\n",
+ "Where $\\mbox{sgn}(w_i)$ is the sign of $w_i$: $\\mbox{sgn}(w_i) = 1$ if $w_i>0$ and $\\mbox{sgn}(w_i) = -1$ if $w_i<0$\n",
+ "\n",
+ "One can also apply those penalty terms for biases, however, this is usually not necessary as biases have secondary impact on smoothnes of the given solution.\n",
+ "\n",
+ "## Dropout\n",
+ "\n",
+ "Dropout, for a given layer's output $\\mathbf{h}^i \\in \\mathbb{R}^{BxH^l}$ (where $B$ is batch size and $H^l$ is the $l$-th layer output dimensionality) implements the following transformation:\n",
+ "\n",
+ "(9) $\\mathbf{\\hat h}^l = \\mathbf{d}^l\\circ\\mathbf{h}^l$\n",
+ "\n",
+ "where $\\circ$ denotes an elementwise product and $\\mathbf{d}^l \\in \\{0,1\\}^{BxH^i}$ is a matrix in which $d^l_{ij}$ element is sampled from the Bernoulli distribution:\n",
+ "\n",
+ "(10) $d^l_{ij} \\sim \\mbox{Bernoulli}(p^l_d)$\n",
+ "\n",
+ "with $0"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWwAAAD7CAYAAABOi672AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsvVlsXVt63/lbZ54nTudwkkhRw9WV7q0BdW9VKh2X3V1A\nxX4M4CCNvMQxAiQx2kY/JHA/xHY/Bmi3O4Ef/GAHSMqdFNoBjAAu23GcOCk7VXWdKlXpStekJE46\nhzw88zwPux/EtbQPNVEcxHM21w/YIEWRR0v8n/Xfa3/rW98nDMNAo9FoNOOP7aIHoNFoNJrjoQ1b\no9FoJgRt2BqNRjMhaMPWaDSaCUEbtkaj0UwI2rA1Go1mQjixYQshviGE+FQI8ZkQ4p+e5aA0F4vW\n1ppoXScfcZI8bCGEG1gH/jqQAb4L/APDMO6d7fA07xqtrTXRulqDk66wPwYeGoaxZxhGH/gW8DNn\nNyzNBaK1tSZaVwvgOOHPLQJJ059TwNfM3yCE0EcoxwjDMMQxv1VrO0FoXa3Jq3Q96QpbC2tdtLbW\nROtqAU5q2ClgyfTnJUbv3prJRWtrTbSuFuCkhv2XwB0hxIIQwgn8LPCHZzcszQWitbUmWlcLcKIY\ntmEYbSHEPwT+mGem/28Mw/jhmY5McyFoba2J1tUanCit71gvrDcwxoq32Jx6I1rb8UHrak3OetNR\no9FoNO8YbdgajUYzIWjD1mg0mglBG7ZGo9FMCNqwNRqNZkLQhq3RaDQTgjZsjUajmRC0YWs0Gs2E\noA1bo9FoJoSTllcFQAixA1SBAdAzDOOjsxiU5mLRuloTrevkcyrD5lnJxq8ZhlE8i8FoxgatqzXR\nuk44ZxESObNaBpqxQutqTbSuE8xpDdsA/kQIcV8I8QtnMSDNWKB1tSZa1wnntCGRLxuGkRVCzAB/\nJIRYNwzjP53FwDQXitbVmmhdJ5xTrbANw8gefswBvwd86SwGpblYtK7WROs6+ZzYsIUQPiGE7/Bz\nP/AN4OFZDUxzMWhdrYnW1RqcJiQyB/z+YdFzH/DvDMP4D2czrPHDbrfjcDhwOBwjnzscDmw2Gzab\nDSGE+ijEs70dIQTmJhHdbpdOp0On06Hb7TIYDBgOhwwGA86rmcRbcql0PYrT6cTj8eDxeHC5XEpb\nqSug9DXr1e/3la6dTofBYKAureu7RwgxMkePzt/X6QoozbrdLs1mk2azSavVurD/j0R3nDkmXq8X\nv99PIBAgEAjg9/vV5fF4cDqdOJ3Ol05ywzDUVSwWyeVy5HI5SqUSrVaLdrtNq9ViOBye2/h1Z5Lj\nEYlEiMfjzM3NMT09PaLr0YkuNQWo1WpK11wuN6Jrv98/t/FqXV+O3W5X89Q8ZwOBAD6fT2nqdDqV\nrna7fcSw5XxNJpM8ffqUZPLd9Sx+la6n3XS8NDidTkKhEFNTUyNXLBYjGAzi9XrxeDx4vd6Ru7k0\n7OFwiGEY7O7usrW1xebmJkIIqtUqhmHQ6XTO1bA1x8Pv9zM/P8+NGzdYWVnB6/Wqy+l0qhWa3W5X\nk3o4HJLP59nc3OTJkycYhkG1WgWerdDO07A1L8dms+H1eonFYkxPT4/M2Wg0qjSVT1Jyzkpdh8Mh\nw+GQVCrFvXv3aDQa79SwX4U27GPicrkIBoPMzMywuLhIIpFgYWGB+fl5YrGYuosHAgG1KnM6ndhs\nNiX+cDjk4cOH+Hw+er0ejUZDmbW8s2suFmnYt2/f5sMPP1SaBoNB3G73iLZmXZPJJMFgkOFwOGLW\njUbjgv9HlxObzYbP5yMWi6m5KudrIpF4o64yVLm+vk6z2WR3d/ei/0vAJTRsuTqSd1S32z1yuVwu\n9VE+JtntdsLhMNPT08zMzLxwx5YrbK/Xi9vtHlmFybiYNO5gMMjs7CxXr16l1+vhcrnodruUSiV6\nvd5F/3omFiHEiLZSx5dd8nukJvl8nkKhQKFQwOFw4PV6CYVCRKNRfD6fWonJFbbct5D/rjSHqakp\nlpaWaDabeDweBoMB1WqVZrN5wb8d6+DxePD5fPh8PhwOh4ovN5tNgsEgU1NTTE9PEw6HgWf6FItF\nfD4fs7OzeDyekRW2nOvm2LZEzn9znPuiuZSGLWNXPp+PUCj00svv96vvczqd+P1+gsGguiubY2Jy\nMsvvN8fDzELbbDb8fj+zs7N0Oh0Mw6DX61EqlbDb7Rf4W5l85CaTy+VST0Mv0zUcDqsVlbxZbmxs\nsL6+TrVaxW63K8OOxWIjukqjNm9SSTweD1NTUywvLyOEUCvtdDp9Ub8SS+L1epUpezwe8vk8+Xye\ndrtNJBJhbW2NW7duMTc3x97eHnt7e6RSKbxeL91uF7fbTTQaVZqa9ybMG8lS26NJBBfNpTVsj8dD\nKBRiZmbmpVc0GlXZAjLOdXTyvm7X+WimiPzo9/uZmZlRq7xyuUwqldKGfUqkYbvdbrxeL5FIhNnZ\n2Rd0nZ2dVRvFHo+HVquFy+WiWq2ytbWFw+HA4/EQDAaJRCIjuppX1Uc/ejweYrEYAD6fj2q1yv7+\nPk6n82J+IRZF3hiXlpbw+/3YbDba7TbFYpFwOMza2hoff/wxV65c4ZNPPlFPTj6fb8SwXzZfJUez\nRsbFrMGihm1Or3M4HMpoXS4XPp9PZXfIST07O8vc3Jya4LOzs8RiMbWJ6PF4RnaQ5QpKbiQOBgP6\n/b6KfZkfzY+GRVwuF36/H8Mw1GOc2+0eeRTTvBzzBBJCjNxEj944zSGKxcXFEZ39fr/StdVqkc1m\nSafTJJNJ4vG4emSWIZN+v89gMHgh7GKe7A6HA5/Pp74vHA6rDWjN63mTruaU2YWFBa5evcrVq1fx\ner10Oh1KpZLSXaZlynkeDAbVHBsMBiqbR/qCOXz5Ml3l03UsFmNubm4kLXcwGLzz35XlDPto/qWM\na8lLmrUMbUQiEcLhsPooH5t9Pt/IGwae52YOBgN6vR7dblcJ2G63abfbdDodFWMzxz6dTudIxki/\n31dGYE4P07was7ZOp5NYLKZ0dTqdlEoldQGEQiEWFhZYXV0lHA4TDodVCEs+CrtcLhKJBHfu3EEI\nQSgUYnFxEbvdTqlUUrq2Wi1lylJb80aV1FW+N+QNXOv6Zl6naygUGnmynZqaUjdfmXaXSqWw2WyU\ny2WePHmCy+UilUpRrVaJxWJ89atfVXtSGxsbpNNpNddDoZBK9ZOXNHKn04nX6yWRSHDr1i3a7TaZ\nTIZsNksmk7mQDWXLGTaMhj1isRhXrlxRd2VzNod5pSU/mkMg5jsuMLKi7nQ6arOj0WhQrVapVqvU\n63UikQjRaJRoNMpwOMTj8ag3pXlF3uv11G605s3I36HH48HtdqvN25WVFVwuF0+ePKHb7ZJOp5X5\nSsOWP+PxeEZWVdKwhRBMT09js9kIBoM4HA6KxSLVapVarUa1WsXlcildI5EIHo8HYCQVzHwj1roe\nj5fpurKywtWrV4nH4yOplea86kajQSqVIhgMYrPZqFQqPHnyhGq1SjweJ5FIMD8/z507d2g0GqTT\naTY2NiiVSiQSCfU9MtUvGo1iGIbyALmfEY/HuXXrFi6Xi0ePHiGEoFKpjKdhCyF+B/gZIGsYxt3D\nr8WAb/Hs9FQa+NuGYZTPc6DH5WgsMxaLcfXqVe7evcvdu3fVyjoYDOLxeF4be5avB6jJJ1dR7XZb\nGXWpVFKxsmKxSDwep9vtvhCakT//son9rldik6YroB553W43Pp+Pubk5rl+/zt27d3G73fR6PdLp\nNO12GxhdYQMv1dZut5NIJJiZmeG9996j2WxSr9ep1+sUi0Wlaz6fx+v10m63MQxD6Wm323G73WOz\nwraCrvF4nLW1NT744ANWVlbUfA0EAiMni3O5HI8fPyYQCKj9ILkXEYvF+Kmf+inu3LnDX/trf429\nvT0ODg7Y2NjgRz/6Eaurq1y7do3V1VUWFxeVrnLPQd7M5Qrb5XIxMzODzWajWq3y9OnTC/ldHWeF\n/a+Afwn8a9PXfg34A8MwfkMI8UuHf/7FcxjfsZAC2u12nE6nikfPzc2xvLzM2toai4uLxGIx9Tgr\nH2nNE1geJZZmKo+Py5iVfDxut9tqUsvLvMIWQqgY6tENi263S61Wo1AokM1mKZfL6s3yjhl7XWE0\ntSoQCDA3N6f0lRMukUjgdDpZW1uj0+ngcrm4ceMGy8vLBIPBkaeawWAwoulRbRuNxgu6yhV2JBLB\n7XYTiURe2HgcDAY0Gg2KxSIHBweUSiWazeZFxDknQlczclWdSCTUitcwDJLJJJ1OR4WoYrEYjUZD\nhb2ePn3KxsYGuVyOfr+vdB4MBjSbTTKZDJubm0xNTZHNZnn69KkKc1UqFbLZrMrqkpuRRxkMBrRa\nLWq1GsVikVqtRrvdvrCnpzcatmEY3xFCXD3y5Z8GZHuhbwLf44INW8YSfT4fiUSCtbU1ZdTyqHEo\nFFKbj3Iz0Hxs3ByPbrVayoTlVavV1NVqtVRIxDzxu92uygSRG1DmlZ007Fwux8HBAeVymWaz+c7f\nAJOgK6Buwk6nk0gkwtLSEtevX2dtbY1EIqHM2+FwsLa2hs/nY35+nrm5Oa5evYrf71c3X7nHIA34\nqLYyZ1rWjWg2myPmLjck5UEn881YHoQqFArs7+9TLBap1+vv/JTjpOhqxu12Mzc3x61bt7h9+zbN\nZpNKpcLjx4+VaUtdy+Uyjx8/5tGjRzx+/Jjt7W3S6fQLZxj6/T7ZbJb19XX6/T6VSoXt7W0qlQrD\n4ZBms0mhUMAwDBXq6nQ6L+RcH9W1UCjQaDQu7PTqSWPYM4ZhFAAMw8gLIWbPcExvjfmRKhAIkEgk\nuHnzJl/4whdYWFgYqfthXrEdjn8kLt1oNFSoI5fLkc1myWaz5PN5isUi+XyeUqn0QqEfM9PT0ywt\nLdHv9194A3Q6HarVKtlsVq3EzruOyFswVrrC6H5EJBJheXmZ999/ny984QuEw2Glq91ux+/3s7Cw\nwPvvv4/b7cbv96tTpa1WS2krn26y2Sy5XE6FPORkNN98zQyHQ+bn5+l2uy8UDur3+9Tr9bGZ2EcY\nO13NeDwe4vE47733Hl/5ylfY2Nggn8/z6NEjut0uXq+XhYUFDMNQhv3f//t/58GDB2oBdfT33Ov1\nyGQydLtdMpkMnU6HYrFIpVJRGVryo9frZX5+fuRGfFTXfD7P3t4ehULhQm7EEktsOspfsFyNhcNh\n4vE4KysrzM/Pj5xslDFjWeNBZnrIla+8SqUSmUyGg4MDDg4OVFEfady9Xk9dMl1PXu12m8Fg8MLN\nYTgcqscxucKWj85jYthjh9wHsNvt6pTawsICa2tramNI1mzxer1Eo9GRLB65WjavouXv/uDggEwm\no3TN5XI0m02l62AwGNFV/h2gHqXlU5p8/5gn9hgZ9lgiTVE+Pc3Pz7O2tkalUsHv9zMcDqnX65TL\nZaVZMplka2uLx48f8/jxY7VPcHT+DAYDKpUKzWaTbDbLcDhUusoDazI9t9FoKF0dDge9Xo9arUa9\nXieXy7G/v6+ui9b1pIadE0JMH96tZ4DsWQ7qbTFv9nQ6nRc2fWR82DAMFe6Q2R2VSuWlj8bVapVy\nuUypVFKbGdVqVYlrfn1pJnIHOxKJqHQhr9erNqjk45W8GaTT6XFbYY+VroDaoJWhCWmk5lCWfEKS\nukqDPqqt1LdcLr+grYxNmlMtZU6w1DUajb5U136/T6vVolwuk8lkSCaTlEqll678Loix09W8kjU/\nqRiGQSwW48aNG6psQDgcJp1O86d/+qc8ffqUvb09BoMBgUBAmXC3231hH0j6gnzd4+jq8XjUk5d8\nCpZmnU6nLyzUJTmpYX8b+LvAbxx+/PaZjegEGIZBv99HCEG3233lpJaGXalUKJfLFAqFkbzKSqWi\nDLter4+UyJTxz3a7TbfbHckAsNlsI6fjwuEwwWBQnaiT/3av16PZbI4YtoyZjolhj5Wu8NywpbYv\ny12Xk7HRaCgjzuVyZDIZMpkM+Xz+hTh1q9VSukptO53OyI3YPLFlqqbM5fb5fCP/drPZpFwuk81m\nSaVS6uYxJoY9drqazfpo2FAadiwWGzHNe/fuUS6XqVQqDAYDgsEgrVZLzS2zYZvTLM2r8Dfp6vF4\nqFQqbGxs8ODBA/b29tT7plarqYXe2Bq2EOLfAj8BTAshksA/A34F+JYQ4ueAA+Bnz3WUb8B8Jz3a\nGMA8qWVlPLlDvL+/z+7uLk+fPmV3d3dkJS1XvWahzZd8TUCtsIPBILFYbGSF7fF41Cqg3+8rU5Fv\nRLmr/a4NexJ0heeGLcMOr7oZ9/t9ms0mxWKRTCajNN3d3WV/f3/kyeno05e5TKpEpu6ZJ7ZZVxkb\nl1er1VLvq1QqNfK+eZdMiq7ASEqtObwUi8WIRqPcuHGDdDrNn/3Zn3Hv3j3+63/9rwyHQ5XpFQgE\nlFnLVE4z5nMTkjfp6vF4KJfLPHr0iO985zs8ffp0ZM6bG1NcBMfJEvk7r/irr5/xWE6MebL1ej0V\ne0omky9sHMlVl7zS6TTpdJpsNjtyB+10Oq/9N+XqAFAbYolEgqWlJeLxOOFwGJfLpWJkMjYu49aN\nRkM9xl3EScdJ0BVGtZU320wmw87OzkidjlarNaKrXJUdHByQz+eVro1G47U3R7nSs9ls6qTs7Ows\ny8vLLC4uMjU1hc/nQwhBu91WK690Oq02pC6y6uKk6ArPF1Htdpt0Os36+ro6bSir6Mk0yUKhoMrW\nyp+T8ebXdfV52dfNNX2O6ir3o+QNQdYgkdfRMOu7xhKbjjD6CFSpVEin02xublIsFtVjdL/fV7Ep\nmR0gwyOVSkU9Gh9ntWt+pJObYfPz81y7do1EIqEMu9/vU6vVlJHs7++rXNCLMutJwmzYsshPKpVS\nJ85kznyz2RzRVsanK5UK9Xpdxb+P87s2H3YKhULqROXS0hJTU1N4vV4Mw6DRaJDP58lkMqRSKQqF\nwli0kZoEzLrK5gDy8Iu5o5OsRV0qlZRZyrljt9vfSleJzOl/ma7mMxSLi4v0ej215zEOc9YShm0O\nUfT7farVKgcHByqty5yDa659XK1WVQhFxkfN4ZXXYc5ekCcqZfbCzMwMkUgEp9NJr9ejWq2SyWTY\n3t5WObrm5Htt2K9HTm5Z6CeVSuHxeOj3+yO51eb0vFar9cJG5XF1NWcchUIhlXF01LCbzSb5fF61\nj9KG/XZIXaVhl8tlNjc3Vd2XSCSimkOUSqWRMKfc1zhJeEIIMWLYy8vLTE9Pv2DYS0tLKoWz3W6r\nGjUXiSUMG56bnnlFKyed3GCSG0PypNRJJ5d5QjudTgKBALFYjHg8ztLSkloh2O12tckpH+PT6TTl\nclnVw9a8HvPNuNvtUi6X2d/fRwhBp9NRutbrdaVruVw+cYzRfAjL3EZK1rmQKYSDwUDl56ZSKVKp\nFMViURv2MTmaudVut8nlcgAqrhyNRnE4HJRKJdVKT5Z2OE3YydyNZmFhgenpaZWOKxMD4HkfV3nQ\nbhzmq2UMW2IYBq1Wi1KppB5rzUfMG42GypM+KUI8q38si9DIhq0yPUgKLLMHpGEnk0my2SzVavWF\n2LrmzZgPp8jNJvPpVPO+wElxOBxK11AoxGAwUE0I8vk8KysrDIdDnE4n1WpV5V3LFM2XbX5p3g5p\nmvIJ9ix0PYosNhUIBBgOh+rJt1gsqn2tdDpNJpM51eLurLG0YbfbbWw220gdCWnep8nKkIYdDoeZ\nmppShi13m+X3yNzgUqlENpslmUyqPE5t2G+PTN2T4Qhz7RdzudvTTGz5xDQ1NUUkEmEwGChDzufz\nqs1bLBZTJQZSqRTpdFqVLNCcDmnYsrTDWehqRta5PmrYn332GRsbGyN5+7KmzLjoajnDlnUCpFmb\n64XIvz/tpoHZsGdmZl5YYcv0M/nGMx+oMB+80bwdcoUtV19HUzbPIpXOHOKSecDyKhaLBINBFhcX\nWV5eVitseahC3jg0p0POD/m0ctYpkvLJW2aCyDDbD3/4Q/7iL/5iJHXP3JB3HLCcYcPzEqZniTlf\nVGaFJBIJVldXuXLlCjMzMwQCAWw2mzraKjc/ZTaKLBR1HuO7DMic2rPMgTUf3rDZbCorZHl5mUQi\noWKpsqi+1+ul2Wyyv7+vslHq9brKINC6np6X5cW/LUd1BUaM15w0IPejZAkCuVEtz3OME5Y07PPA\nvBkVCASYmZlRleOuXLmiegXKx3VZYCiZTJLP52k0GhdW+1rzemR3EafTOXIjXllZUTUlarWaSvPr\ndrvs7OyQyWTUfsRF5+dqXsSsq9yoNIcipV4ul0vVH7py5YpK47vIAzKvQhv2MZGGLR+jpqenWVxc\n5Pr16ywsLKgke7Nhp1IpkskkuVyOer0+cofXE3s8kBk/shuNPAC1srLC7du3VUqgDMdUKhUqlYoq\nHlWpVEb2RLSu48FRXaXxvsyAzembV69eZX9/X2UBvekA3bvmjZ1fhRC/I4TICCE+NX3tV4UQKSHE\nvcPrG+c7zIvHvKscjUaZnZ1lYWGBK1eusLCwQDQaVY0+5YEKadj5fF4Z9riswrSuz5B1JXw+H5FI\nhOnpaZWeubKywq1bt7h79y5f/OIXuXHjBqFQiEaj8cIKW+s6XhzVVYa2ZGKA0+lUKcDmNL5YLDbS\njWrcOGnHGQP4dcMwfv1cRjWGuN1ulRGysLDA/Py8MmmZEdLr9RgOh6q4UzKZVCfgZP3dMULryrM6\nMMFgkHg8rh6J5X6E3W5Xj9LD4VCVZt3b22NnZ4dcLjdOFfkkWlee6RoKhZibmyMej6u+qnLDcXZ2\nlnK5zHe/+111NF5uHo+prsDJO84AiJd8zbLIhr5LS0tcu3ZNrarlndhcAtRs2MlkUmU2jJNha12f\nISd2PB7n2rVryrDlwSdzbW1Zx3xvb4/d3V0V3x6nia11fYb5Rnzt2jXVHlAehJGnnTc3N0eKvsna\nMOOmq+Q0Mex/LIT4eeAHwP9mGEbxjMY0lhw17JetsGUn9aMr7IuqyHdCLpWuskt6PB5/IeNHnm6T\n1fjMB2V2dnYuvHLbW3JpdZULrOnpaaanp3G73fzFX/wFm5ubfO9731NVM+VZjXHW9aRBmt8ErgG3\ngU3gX5zZiMYEmVzv8/kIhULEYjFmZmZIJBJKfNnFWcatC4UCe3t7ZLNZVcBe1rQYR/FfguV1hefl\ncGWzCXOoa3Z2lnA4jNvtBp4VnCqXy2qT8WiKpiz9OuZcSl2np6eVrtFoFJvNpipmFgoFarWaOkhn\nbmwyzrqeaIVtGEZefi6E+C3gv5zZiMYEm82G1+tV5R5lF3Z5ya4jsl5IuVwmnU6rU28ye2CSuAy6\nwrOsAFkYTE5qeU1NTREMBlVpXFmqV9YLKRQKE3f8/LLqOjs7qxpwG4ahmpXIWvj1ep1IJIJhGCp1\ncxzDIGZOZNhCiFnDMGSbob8FPDy7IY0HNpsNt9tNKBRSWSFyUs/OzqpEe7vdruKb6XSara0tle41\naafeLoOu8HxiRyKREV3j8TixWAyXy4XT6VQlVHO5HLu7uxNb4Oky6yo/5vN5stks9+7dY319XR2a\niUQiOBwObDYb3W6XRqNx0f+N13KSjjO/AvykEOIDwAXsAn//XEf5DpEF7B0OB36/n2g0SjweJ5FI\nMDc3x8zMDFNTUyPHoWXpxXQ6zfb29kQY9mXVVQihbsQzMzPMz8+riT09PU0wGASe16Sp1Wpks1me\nPn06ETWvL5uuZlwuF8FgUIUuZX0fv9+vmi1vbGzwwx/+cMTM7XY7zWZT9egcZ07aceZ3zmEsF47s\nwG3eYV5ZWWFlZYXV1VXi8bg6zSgzQrrdrrp7y9SgYrE49h2zL5OuMpVL6iuL06+urrK6usry8rIq\n5WkuEFatVlW/T3kUvVarjXWo6zLpehSfz8fc3Bxra2tcuXIFp9OpamnLbkDxeJzPfe5z6gYu5+q4\n6yoZ/1vKO8Rut6t6AubC9e+//z6Li4sq3QtQj0+yYpu5WahsrDvOhn2ZkIbtdrtVPv3S0hI3btzg\nxo0bzM7OKsOWHdBlGVepq8zPlZtSmvHDbNgrKyuqv2Y2m6Xf72O329XK2twZXTZmngRdtWGbkIbt\n8/lUbYFr165x584dZmZmVDF747CZryyaLzt0S8OesDQ+y2M2bHM3kZs3b3L79m3VqMDhcKibbblc\nVk9OZsPWuo4vPp+PeDzO9evXWV1dpVQqkUwm+d73voff7+fWrVvcuHGDRCLBgwcPKJVKqgb2pOiq\nDduEjG3KThSJRILZ2VlmZmYIh8OqXKtsBiuzQra2tlQoRB5z1YwPdrtd7UdMTU0xPz9PPB5ndnaW\nqakpVdFNpmfKwzGy9ZcMhUxadojVkWl88oa7uLio5mw0GlWxadkuUJYdCIfD6sYsSzFPCtqwTXi9\nXlWF7+rVq+pwjMvlUodj5CZjNptla2uL9fV1dnZ2SKVSqquzZryQpxnn5+dZWlp64TSjbN4sywok\nk0k2NjZ4/Pix6tU4CfHNy4bcj5BZPu+99x4LCwuEQiHsdrtaYAE0m03S6TQOh0P14czn82OdGPAy\ntGGbkPmbV65cYW1tjfn5eSKRiDJsObGbzaYy7E8//ZRkMkmlUtGGPabIsqiJREJtSEnDttlsSlfZ\nITuVSrG+vs6nn36qqvNN2sS+DDidTqamplhZWeH69evqRGMwGFR1rmX9ELnSbrVapFIpdRx90m7E\nl96wzele0rCXl5dZXV1VoRCXy6WK57fbbdXkd3t7m4cPH5JOp3Wd6zFEaut0OgmHwyQSCa5du8bi\n4iLT09NqhS1bULVaLVUW99GjRzx48EA1m9Dajg9mXaPRKIuLi7z33nssLi4yOztLIBDAMIwXVtiy\n0a9cfE2irpfasGUMTGYPyFNRMrYZCASUWdfr9ZFTUltbW+TzedVBZlzKa2qe4XK5lLYzMzMqZj07\nO0skEsHr9WKz2eh0OmpzMZvNsr6+zt7eHrVabWRCa23HA7OuoVCIfr/PwcEB9+/fVwWbfD4fXq/3\nBUOWXWxk20D5tUni0hu21+slFAoRDoeVWcvDMW63W514q9VqpNNpNjc32dzcZGtri1wuR6fT0WY9\nhrhcLgK7UlrfAAAgAElEQVSBgMr2kWYtn5pklcVut0uhUGBnZ0dpu7+/rwwbJm9SWxmzrn6/n8Fg\nwMHBAcVikW63q1L73G73yELqqHGbP04Sl96wfT4f0WhUrcLMK2wZ/5Ir7P39fTY2Nnjw4AH5fF4b\n9hgjT71NTU2pWtfypKrf71fayjDIzs4O9+/fH8kKmeSJbVXMuvp8PqrVqqrdA6g87Gg0+tIVtplJ\n1PW11fqEEEtCiP8mhPhUCLEhhPgnh1+PCSH+RAhxXwjxx0KIyLsZ7tkij5/HYjGVDiQ7TpirtVUq\nFfL5PJlMhr29vZG2X+Na1et1WF1XQB1+kjfi6elptSozV1gsFosqzPX06VMODg4ol8uqqa7Wdrww\nr5hlAkC5XFaHYPL5PIVCgXK5TKPRUN2AXvbzk8ibyqt2gX9kGMZd4IvAzwshPgR+DfgDwzA+AP7w\n8M8Th8zPjcVizM/Pq1oSMm7dbDYpFoukUin29vbIZDIUi8WRsqmTkGz/EiytK7xo2NFoFJ/Pp7ra\nVyoVMpkMqVSKg4MD8vk8lUpF9fGbkHK4L8PS2vZ6PWq1mtp3qFQqtNtthsOhOh8hF1aFQkE1v7YK\nrw2JGIaRATKHn9eFEPeBBeCngY8Ov+2bwPeAXzzHcZ4LRw17ZmZmpLSmrHEtTzBms1kKhQLVapV+\nvz+Rq2uwvq7w/BDU7OwsiURCGbbdblfNCGSTiXQ6TT6fp1wuq6emSS0rYHVtu90u9XqdbreL3W6n\n0+mosKSsmnlwcIAQgnw+T6PRmNRF1Us5dgz7sO3Ql4CfA2YMwyjAs1q7QojZcxndOSNDIvL0m3mF\nPRwOR7qfm1fYVsq3tqKu8HyFfdSw5UajnNjJZFKtsOVjtFWworbdbveVOfFS10wmw2AwuHwrbIkQ\nIgD8HvCLhmFUZW7jJCJT+GQanzkrJBgMqm7KnU5HdZDZ2tpStZAn6Rjrm7CSrna7/aXaTk9PE41G\n8Xq9I2UFMpkMT58+ZXt7m0wmQ61Ws9TEtoq2R3UNBAIEg0GCwaBK3ZN1QDweD36/n0qlos5KWE3X\n49TDdgL/HvhdwzB+//DLOSHE9OGdegbIvvoVxgv5qCyr8cnMgenpaXw+H06nc2RDan9/n+3tbfb2\n9ixl2FbTVXYIktqaUzRlJT6zYcsa11tbW6pdlFUena2k7VFd4/E4CwsL6hSyDF/1+32q1SqlUkkV\ndbKarvAGwxbPbsu/DXxmGMb/bfqrbwN/F/iNw4/fPrcRnjHm4vXmFfb09DR2u1014pTx6/39fTY3\nN8nlcqrv26RjRV1flqJpXmFLXTudjopfyxV2q9Wi1WpZYiVmNW2P6rq2tsatW7e4efMmiURipC79\nkydPuH//Pjs7O5bTVfKmFfZXeSbufSHEvcOv/TLPulh8Swjxc8AB8LPnN8Szxe12Ew6HR9K9ZFcK\nWVNC1rqWd2yZJjSunZRPgOV0lRNbpmiGQiEA1TFGIoSgVqtRqVTUKsxi5XAtpe1RXWVhNlk3RJqy\nzKeXJXItqCvw5iyRP+fVqX9fP/vhnC9CCDwejzrVKNO95DFlWQCo3W6r2gPdbldVcpvEjJCXYTVd\n4cWMH4fDoU7Aeb1eotGoulqtFp1Oh16vpya01nY8kbpOTU2xsLDAzMwMoVBInZOQT03NZtPSukou\n3UlHj8dDJBJRp9/M2QPD4ZBer0er1aLRaIwYtiytarU3gFWQKzGZ8VOv1zk4OCCbzdLr9VQ7MJvN\nNjKxZWqm1nY8OWrYs7OzhEIhXC4XgHoiPmrYVtX1Uhq2eYVtLgRkNuyjK2yLhEIsy9EVtuxef+/e\nPZU7b7PZmJ6eVu2g5EpMM74c1fWoYb9uhW1FLG/Ysp29bMAaCoWIRCJEo1EVu5bim7ufp1IplXhv\nVfEnHbOusiCQ7Cojb7wOh4NCoUAikcBms5FOp9Xx80no4XcZMevqdDqp1+ukUincbjepVIpwOKzC\nIuYYdjqdtryul8KwZWNdj8ejKvPFYjG1upbt7VutljLsZDJJPp+nXq9batPCSphzdIPBoLoZx2Ix\nXC4Xbreb2dlZisWiSv3a399XxYKsPLEnGbOuLpeLer3O7u4u1WoVn8+n2oK53W7V3s1ut2vDtgI2\nmw2n04nX68Xv97+wwnY4HNjtdgzDUIa9v7+vDNtqR1uthLmxbjAYJBwOK22npqaYnZ3l+vXrlMtl\ntre32d7eVicbrT6xJxmzrk6nU2X1bG9vq9W3NHV5iCYUClEqlSiXyxPXReZtsLxhm2tey8kcDocJ\nBAJ4vV61KdHr9Wg0GpRKJdUlWzbV1YY9vgghsNlsOBwOXC4XXq+XQCCgutvLPo2yf5/ciKxWq7rt\n15jidDrx+XxEIhHcbjftdpt2u63i01JX2WS3Xq9TrVZVKq427AnG6XQSCASYmZlhbm5upF4IPN9l\nlhXczKekZOU2bdjjidxwEkLQaDRUlT3ZAkrqWq1WKZfLStdKpUKz2dSGPaa43W4ikQhzc3PqlKrT\n6cTpdNLr9ej1eurATKPRoNlsUqlU1OdW1tXyhu1wOAgGg6obuszjlIYtN6dardbIpC4Wi+oElZXS\ngqzEYDBQJW5fZdjNZnPEsGWRp06nM7EV+ayOPNwWj8eZn58fyaE3p/CVSiW2t7fZ2dlhf39fLbCs\nrOubjqYvAb8LRAEX8NuGYfxzIcSvAj8P5A6/9ZcNw/ij8xzoSXE6nQSDQaanp5Vhm1fY8qCMjJOV\ny2Vl2FbM4wRr6AqMHDc/jmHL0Ii5m4zVsIK28qzE3NwcKysrLC0tsby8zNLSEu12m2q1Sq1WY29v\nj36/rzaRrayr5E0rbFkM/cFh9a8fCiH+GDCAXzcM49fPfYSnRMY4zZsVspuyEGIk9unxeHC5XDid\nTux2+8R2Vj4GE68rgNfrxefzqT5+g8GA3d1dvvvd76p0Tamt7M8pN5mlthZk4rU1p9f6fD7VbNfj\n8agOM8lkkp2dHdLpNPV6HSGE1XUFTt7AAGAy6zWakCl/spNyIBBQHZfdbvdIkwIrmbZVdPX7/ap4\nVywWo9/vs7m5SSaTYWFhgStXrrC8vEwgEFDaejwe2u220tZqOfZW0LbRaJDJZFRJiHA4zOLiIsPh\nkGq1yu7uLg8ePODRo0dkMhkqlQp2ux2Xy2VZXSVvahGmMBVD/87hl/6xEOKvhBDfFELEzmFs544Q\nQqX8BYPBkUktV2R2u51JrSV8HCZZV2nYq6urLC0tMRgM2Nzc5D//5//MD37wA1KpFP1+n2AwiN/v\nH3mKcjgc2GzHfvtPJJOqbaPRIJvNsr29zePHj0mn06pMaq1W4+nTp9y/f5//8T/+B9vb21QqFfWk\nbHVdj/U/O3y0+v94Vgy9BvwmcA24DWwC/+LcRnhKZArQYDBQd15ZF0QiwyPysUqGTWToxKpMsq7w\nrKtMIBBgamqKWCyG0+mk2WxycHCguo30+/2XamtlXWGyte31eirzQ3YBMrcBq9frFAoF8vm8SuOT\noU+r6/o2DQz+X1kM3TCMvOnvfwv4L+c2wlMiN59arRa1Wm2kjoQ5Q6TValEsFlV6kNxtPmruVmHS\ndQVUZ6BGo4HH42Fqagqv18vVq1fVBnO/31eZIebmyZPaj/M4TLq2Ho8Hn8+H3+8nkUgQiURUXr3X\n6yUej3Pz5k214pabkFbXFU7YwEAIMWsYhiwy/LeAh+c3xNNhzhYwG7YspSqT7avVqurXKL9Hrsyt\nhhV0hdHCP+FwmKmpKa5evUowGFQT3tyJpFarqYJeVt2csoK2Mq1vampKFWhzu90YhoHP5yMej3Pj\nxg1sNhvJZFIdjrKyrpKTNDD4P4D/VQjxAc/ShnaBv39+Qzwdr1phS8OWu875fF4dqpArbIkF79gT\nryswktLX7/eZmpri+vXrrK2t0W63KZfLVCoVcrkcpVKJer2uVthgSV3BAtqaD84cXWH7fD4SiYSq\nbW8YBqVSSR2AA8vqCpy8gcEfns9wzp5+v6/6M8r6A7Ijus/no1wuqxzdnZ0dcrkcrVbL0qJbQVeA\nTqdDrVYjl8upk3A2m43BYKAOVpTLZQ4ODtjb26NcLlv+kdkK2vb7fVqtFtVqlVwux+7urjrNKg+4\nlUolMpkMxWJR3bCtrKvE8icde72eytPM5/O0223y+Tw7Ozu43W6azaY60prNZslkMjSbzYsetuYY\ndDodyuUyw+GQXC6nmhY8efJEtXmTm1eZTIZSqWTJEJfVeJOucs5Wq1UODg4ula6XwrBrtZraaOx2\nu+zu7uL1erHb7ao7hblxgRUa7V4G2u02w+FQhTlarZZK3ZPNKMwdSXRt88ngVbp6vV6V7SXncrPZ\npNlsXhpdxXk9RgghrP98MkEYhnFm+U5a2/FB62pNXqWrdTPMNRqNxmJow9ZoNJoJ4dxCIhqNRqM5\nW/QKW6PRaCYEbdgajUYzIWjD1mg0mgnhXA1bCPENIcSnQojPhBD/9ISvsSOEuC+EuCeE+OQtfu53\nhBAZIcSnpq/FhBB/cvh6fyyEiJzwdX5VCJE6HNM9IcQ33vAaS0KI/3b4u9gQQvyTk4znNa/zVuM5\nLWeh6+HrvLW2WtfzQ+uqvn98dTW3wTrLC3AD2zwrnu4A/hL4/AleZxuIneDn/ifg88Cnpq/9S+CX\nDj//JeD/OeHr/Arwv7/FWOaAO4efB4BHwIdvO57XvM5bjWccdD2ptlpXretl1vU8V9gfAw8Nw9gz\nDKMPfAv4mRO+1lsfDjAM4ztA6ciXfxr4N4eff/M443nF67zVmAzDyBiG8eDw8zogu4C81Xhe8zpv\nNZ5Tcpa6wluOW+t6bmhdn7/G2Op6noa9CCRNf04dfu1tMQD5GPILpxzTjGEYBVD1gWdP8Von6t4h\nnncB+fPTjEdcXDeRs9IVzk5brevp0bq+hHHT9TwN+6wSvL9sGMYXgP8Z+HtCiP/ljF73NJyoe4d4\n1gXk93jWBaR60n9cXGw3kbNM3B83bbWuZ4PW9cXXORNdz9OwU8CS6c9LjN7Bj4VxWHTdMIwcz355\nXzrFmHJCiGkAIcQMkH3D979qTHnjEOC3jjMm8bwLyO8ah11ATjIe8YpuIm87nlNwJrrCmWqrdT09\nWlcT46rreRr2XwJ3hBALh4P+Wd6yJq8QwieE8B1+7ge+wek6ZXybZ8XdOfz47ZO8iBDC/Cj0xu4d\nQry8C8jbjudVr/O24zklp9YVzlxbrevp0bo+//7x1fUkO5XHvYC/CTwAPgN++QQ/vwL8GPgRz3ZY\n/8+3+Nl/C+wDXZ6tFP4eEAP+hGfB//8IRE7wOj/Hs42HHwN/BfwRsPCG1/jrwPDw/3Hv8PrG247n\nFa/zN992PBet62m01bpqXS+zrrqWiEaj0UwI+qSjRqPRTAjasDUajWZC0Iat0Wg0E4I2bI1Go5kQ\ntGFrNBrNhKANW6PRaCYEbdgajUYzIZzYsMUZ1c7VjB9aW2uidZ18TnRwRgjhBtZ5dpInA3wX+AeG\nYdw72+Fp3jVaW2uidbUGjhP+nKqdCyCEkLVzlfhCCH2EcowwDOO49Xe1thOE1tWavErXk4ZEzrJ2\nrma80NpaE62rBTipYes7sXXR2loTrasFOKlhn1ntXM3YobW1JlpXC3BSwz6T2rmasURra020rhbg\nRJuOhmG0hRD/EPhjnpn+vzEM44dnOjLNhaC1tSZaV2twbvWw9Y7zePEW2QRvRGs7PmhdrclZZ4lo\nNBqN5h2jDVuj0WgmBG3YGo1GMyFow9ZoNJoJQRu2RqPRTAjasDUajWZC0Iat0Wg0E4I2bI1Go5kQ\ntGFrNBrNhHDSetgACCF2gCowAHqGYXx0FoPSXCxaV2uidZ18TmXYPCvZ+DXDMIpnMRjN2KB1tSZa\n1wnnLEIiZ1bLQDNWaF2tidZ1gjmtYRvAnwgh7gshfuEsBqQZC7Su1kTrOuGcNiTyZcMwskKIGeCP\nhBDrhmH8p7MYmOZC0bpaE63rhHOqFbZhGNnDjzng94AvncWgNBeL1tWaaF0nnxOvsIUQPgDDMJpC\nCD/wDeD/OquBjRs2m+2V13A4pN/vq+vozzmdTlwuF06nk36/T6/Xo9frvfC948Bl0/WyoHW1BqcJ\nicwBv39Y9NwH/DvDMP7D2Qxr/HA6nfh8Pnw+H16vd+Rju92mUCioy4zX6yWRSBCPx0kkEhQKBQ4O\nDkin05RKpQv637yWS6XrJULragFObNiGYWwDH57hWMYal8tFMBgkFosRjUZHrlqtxubmJv1+/6WG\nvbi4yJ07d7hz5w7b29s8ePCARqMxloZ92XS9LGhdrcFpNx0vDS6Xi1AoxMzMzMiKOR6PUygU6PV6\n5PP5F37O4/GwuLjIhx9+yNe+9jV+9KMf0Wg02N3dvYD/hUajmWQutWHb7Xbcbre6nE6nugDq9bq6\nPB4PsViM5eVlrly5gtvtxmazUalUaDab+Hw+rly5ghCCarVKrVajWq1it9vxer2Ew2FmZmaYn5/n\n6tWr3Lhxg263S71ep9FoUK/XGQwGF/wbuXw4HA5CoZC6ut0u1WqVarVKvV6/6OFdeoQQI/tFhmEw\nGAwYDoecVz/aceZSG7bT6SQcDhOJRIhGowSDQQKBAIFAAIBkMkkymaTT6eD1epmdnWVlZYXr169T\nKBTI5XLk83l6vR4ul4vr169z+/Zttra22NzcZHt7G3j2ppNvvGg0ytWrV5XJp1IpUqkUnU5HG/YF\n4HQ6SSQSrK6ucu3aNSqVitJPG/bFI4TA4XCohZR50/4yzpdLbdhydZVIJFhYWGB6epqpqSmmpqYw\nDENtKKbTabxeLzMzM6yurnLr1i1+/OMf8+TJEx4+fIjT6eS9997j+vXr3Lx5k08++YR+v082m0WI\nZwfLpGlHIhGuXr2K2+0mGo3i8/nodDocHBxc8G/jciIN++7du3z88cdks1nsdjvFYpFUKnXRw7v0\nSMN2u914PB663S4Ag8FAG7aVMBulfJySpim/7vf7iUQiKlQhY9LxeBzDMCiVSuzu7uJwOFRIZGlp\nidXVVTY3N2m1Wuzu7hIIBLhz5w7Ly8t8+ctfplqt8vTpU7xeL+12W6X9dTod3G43c3Nz6hG8Xq+z\nt7eHw2FZKS4Em82G3W5Xug+HQ/UoLd8TdrudQCBAPB7n+vXrfOELX+Dp06ekUimCweBF/xc0PJun\ndrsdp9OpwpByHtvt9hFdzypEIt83drsdeH5zGA6HZ/L6p8FyLiGFlJfP5yMcDqvL4XCoSwozGAzY\n2dmhVCqRTqeJRqMAbGxskMvl6PV6GIaBYRhKNBna+PznP4/dbicUCtFoNHjy5An7+/uUy2W63S6t\nVotkMsm9e/cACIVCeL1evF4v/X7/UsfjzpNoNMrc3Bxzc3N4PB6y2SyZTIZMJkMwGGR2dpa5uTni\n8TgzMzMUCgX+/M//nL29PR4/fjyWGTyXEcMw1GIHUE/EkUgEu90+omuv1zv1v2ez2dR7Y25ujsFg\noF4/m82e+vVPi+UMG57dIV0uF263W62KFxcXWVhYwOPxqMerdrtNKpVib2+PZDI5kmsNcHBwQDab\nVW8Es2HL0IaMp4XD4RHDrlQq9Ho9ZdiGYZDP59U4FhcXsdvtDAYDbdbnQDQa5fr167z//vtEIhEe\nPnyIzWajUCgQDodZW1vj/fffZ2lpiVqtRqFQYHt7m3Q6TTKZ1IY9JkjDhmcr3Xg8zsrKCjdv3iQY\nDCpdi8XimRn23Nwct2/f5v3336fX6/Hw4UMGg4E27PNArrDdbjder1dldty+fZtbt27h9/vVVSgU\n+O53v0symWRnZ4d+v69W5gDtdptOp0O/3x9ZYZtj0X6/n1qtRqvVol6vk8/nSafTL6yw8/k8Gxsb\nvPfee3zwwQf4/X7C4fDYPGpZDWnYX/nKV5ibm1OTemNjg0gkwtraGl/+8pdZXV3lk08+YXt7m+9/\n//vkcjmazSatVuui/wsanhv2YDCg2+3i9Xq5evUqH330EXNzcwghlK5ngVxh3759m7/xN/4G7Xab\nwWAwNntMbzRsIcTvAD8DZA3DuHv4tRjwLZ6dnkoDf9swjPJ5DvQNY1SxLbvdjsPhQAihDHYwGNDr\n9eh0OiMhkX6/j9PpJBgMMjMzo+LN8mc8Ho/a8HA4HDQaDfb393n06JFK92s2m+oQjLzkCrvb7dLv\n96nVatRqNeDZQRoZv47FYuRyuQtJ6ZsEXQ/HpPYgABU+Gg6H+Hw+AoEAfr8fm81Go9FQl3nfoNPp\n0Ov1Rn5W/l2z2aRcLpPJZEgmkxSLJysVLd93MqNBlh6Q/+67YlJ0PS5yn0EushKJBMFgECEE/X5f\nbd4vLi5SLBZptVpqoXVSBoOBen90u913ruHrOM4K+18B/xL416av/RrwB4Zh/IYQ4pcO//yL5zC+\nY2Gz2UaMWMal2+02xWKR3d1der0ehUJB5Vx7PB4Mw6DVahGPx/na175Gt9tVV7vdVvnR9Xodr9dL\noVDgxz/+Mfv7+7TbbXXJ1bU0i3w+T7lcfukjWq1WI5lMYrPZCAQCPH36lEKhcBF1RcZeV3huhDI3\nXtZrGQ6HTE1NceXKFZV1s7u7y87ODru7u5RKJR4/fozdbiccDrO+vk46nabf71Mul3ny5AkOh4PN\nzU3W19fZ29tTGQgnweFwjKSF1mo16vU6tVrtVK97AiZC1+MiD55JnQOBAP1+nwcPHiCEoFwu4/f7\n+fDDD8lms6rsw0kNezgcks1mefjwIcPhkG63y/r6+ksPxV0EbzRswzC+I4S4euTLPw3I9kLfBL7H\nBRu23EWWuZr9fp9ut0uxWKTf7yvjNm84hsNhlpaW1NXr9dTjsIxryksafqFQUCtw+e/0ej1l9L1e\nj0ajQbPZfKVhp1Ip6vU6LpeLcrlMpVJ554Y9CbrC6H4EQKfTUWGpqakpbt68yRe/+EUCgQA/+MEP\naLVapFIpZdilUgmPx0M+nyeXy9Hv96lUKjx58oRKpYLP5yOfz5PP509lrE6nk0AgwPT0NNPT0+Ry\nOQBardY7NexJ0fW4eDweFhYW+NznPscXv/hFNY8fPnxIpVJRabjLy8vs7e1ht9up1WonflKShj0c\nDsnlcgwGA/L5/AslJy6Kk8awZwzDKAAYhpEXQsye4ZjeGrnCdrvduFwuWq0WnU6HdrtNvV6nWCyq\ndD4zy8vLhMNhtcI2hy+KxSL7+/vs7e2xv7+vDrikUikymcwrx/KmDUR5ik7m+I7ZhuNY6QqolC6P\nx6PCXPLmFovFuHHjBl/96leJRCK0222SySR2u51SqUS5XObx48fqteTvulwuUy6X2dzcfOHvTorD\n4SAQCDAzM8PCwgLwbA+kXB6LyMPY6Xpc5Ar785//PF//+te5d+8eW1tbPHjwgJ2dHT7++GOWl5f5\n8MMPiUaj1Ov1U+XPD4fDl2aEjMs8tcSmozkmKYQgHA6zsLBAIBDA6XSqvE0hhIppdjodwuEwTqeT\nbDbLD3/4QzqdjjpSXqlUKBQK6u6az+epVqt0Op0XxHM4HLhcLnV1u10V/3I6ncRiMWKxGFNTU1Sr\nVUqlEsViceQk3bi8IcYNuf/QbrfVaVMZdrhy5QqdTof79+8D8Nlnn5HNZtV+wJt+p2/6eyGE0tTt\ndqtHZHmZ6ff7atNZZgTV6/WxLKE7bsjkALlaNudaB4NBIpEI6XSaP/uzP2NjY4OtrS2q1aqq3/Pk\nyRM1j/f29s7khOq4zseTGnZOCDF9eLeeAS4032U4HKrwg81mY35+ntXVVVZWVgiFQiMHZ6Qh12o1\nhsMhLpeLbDZLs9mk2Wyqv5MxSPP1qnikzPc+Gr8cDofqkW5tbY21tTX29/dVZb9GowGM1ZtjrHSF\n54ZtGMZIeYCVlRWGwyHtdpv79+9TLpfZ2dkhk8mc2QauEAKPx6N0HQwGr3wf9Ho96vU6Qgja7bZ6\nD4yJYY+drmbkKvrGjRtcv34dp9M5Umu+0+mQTqfZ2dkhlUqxvb2tDDuXy+FwONTcTafTal5ZkZMa\n9reBvwv8xuHHb5/ZiE6AXGEPBgPsdjuRSITr16/z8ccfMzc3N3JySdb/yOfzFItFlSGwsbGhwhW1\nWo1Go6Hi00c/HsXhcOD1eolEIsRiMXUCq9Vq4fF4mJ+fV0ef19fXVU5nJpNRu89jYtpjpSugVrX9\nfh8hBLOzs9y5c4ePP/6Yx48f8+mnn/LgwQO2t7fVxu9ZGrbb7VYZPVL7TqfzginIcFq73aZUKo3U\nvBgDxk5XM16vl4WFBT744AO+8pWv4Ha7VXptqVTixz/+Mdvb2/zoRz+iVCqpBZRcYTcaDfb29tS5\nByunZB4nre/fAj8BTAshksA/A34F+JYQ4ueAA+Bnz3WUb0BW8ALUBJEm6vf7lfg2m02tpL1er0oF\nkyldskKbnHhvQsbFnU7nSOlVn8+n0pBCoRCLi4vMz88Tj8fJZrMqVHORTIKugLqhyXx1l8tFOBxm\ndnaW3d1d6vU6u7u7I7Hqs0SmFTocDobDoXpSO4o8vnyadLKzYFJ0NSOEUPsUfr8fn8/3QrGnbDbL\nxsYGzWZz5GeleZ8l5owz6S0v6yZ1ERwnS+TvvOKvvn7GYzkT5Op1fX0dIQTRaHSkPGOlUqFSqahY\nsoxRyzKpMgvhTZhzv71eL9FoVIViFhYWVMqfy+VStUmSyaTqNCNzvuWBnHfNpOkKz27G5XJZHR+X\n2SDnlYVhGAadTodqtQo8j1O/4zS9t2ISdW232+zt7XH//n16vR6BQECVb6jVaipN813kQgsh8Pl8\nBINBgsEgw+FwJER60Vhi09GM+QhpoVDA6/WObDrKpHqZPy1T8BqNhgp7HOeR2nygQxaGkrHqo/0e\n5aPx06dP2d/fV4Z9UWY9qZgNOxQKqYMu57WqlYYN0O12VTx1TMIclkGmYna7XdLptDpYFgwG6fV6\nqs7PuzhcJg17amqK2dlZ5SfyZn3RWM6wZVnTQqHAo0ePRir0Acok5Ym3o0Wdjmui5hrXcoUtDVtu\nUkOIU1sAABR1SURBVPn9ftrtNpubm2xubr5g2ONyempSkIadSqWw2Wyk0+lzN2x5g5fvIX2TPXuk\nYafTaex2u2q9J/eD8vn8O19hT01Nsbi4qEIh8qTyRWM5w4Z3VytXGn2r1SKfz7Ozs4PX61WlWuXq\nvtVqUSwWSSaTZDIZdWxd83aYU+dkISeZLXBemA3aXPkxGAyqQ0+VSuXCY9eTjMytlzFic1lTu91O\ntVpVT6TvYizdblcdnJOZQeMyXy1p2O8C86pcJuvLGtq3bt2i3+/j9/sxDGOkVkUulzt3k7Eq0rBt\nNptqr1ar1d7Z79Lv97O8vKz2Kba2ttja2lJ5/ZqzQZ44NgwDm82mTou+qyebRqOhzHo4HFIul8cm\n80Qb9gkxh1ZkE4Jyuczu7i6DwYBAIEAikcDpdFIul8lmsySTSSqVCu12e2zu2JOEebUjb3qyHMC7\nIBAIsLy8zOc+9zlu376N3+9XOcKVSuWdjOEyIPPuu92uKvL0rjI0DMOg2WwyGAxUGGScbsjasI/J\n0Q42R1N/ms2mqgkSi8WIx+MsLi6q4vnyarVaqhaJ5u2Qoa7jpFy+DeZNaXMI5OiKzlxf/fr16+zv\n76vTspqz4yJTJOVG87gY9FG0YR8TadKy1rYs8jMzM0O32+Xg4ICDgwMymQyFQoHHjx+rXNL19XXV\nCEGWb9WMB7JnoNTWHE89elPtdruUy2UODg7Y3d0lk8lQq9XGIj9XcznQhn1MzBUB/X6/WmWtra3R\nbDZ58OCBMu5isciTJ0+o1WrY7XZVTEaWBdVZBuODNGxZL0Su7MyHsSTdbpdKpUI6ncbv95PNZrVh\na94p2rCPiTRsj8dDKBRieXmZu3fv8qUvfUllfciuFIVCgVqtxs7ODkKIkVirNuvxQz41+Xw+1V3o\nZSYsDfvg4ACHw6ENW/POsb3pG4QQvyOEyAghPjV97VeFECkhxL3D6xvnO8yLx9ypZDAYqHKaU1NT\nxGIxgsEgLpcLQNXELpfLqvbBu9zlPg5a1+ccPX4sn4JsNhuhUIj5+Xlu3LjB0tISHo9Hnb4bR8PW\nulqbk3acMYBfNwzj189lVGOIuSJgs9lUJ99kUamjB3QmAK0rz3OA5Q1VFpsaDofY7Xamp6dVgwu7\n3a5qV+RyOYrFItVqdawMG62rpTlpxxmAiXKn0yIn8mAweKlhv6oo0LiidX2ODIPIkJXM4nG5XExP\nT3P9+nU++OADOp0Ojx49IpfLsbOzo+rFjFNOvdbV2rwxJPIa/rEQ4q+EEN88bPJ5aTCHR4426TSb\n9yQZuIlLpasMh8g+nuaGvTabTTXDuHXrFleuXMHv99Nqtdjb26NQKJxpOddz5lLpalVOati/CVwD\nbgObwL84sxGNKeFwmGvXrvHxxx/zkz/5kyQSCSqVCt///vf5/ve/r9oSzc/PMzU1RSAQwOGYuD3d\nS6fr65D9/B4/fswnn3zCvXv32N7eHpe2X2+D1tUinMhRDMNQLYSFEL8F/JczG9GYIg37/fffZ2Vl\nRbURk3W0a7UahmEwPz8/0lh3nB6X38Rl1PV1mA27Xq/T6XQ4ODiYuFONWlfrcCLDFkLMGoYh2wz9\nLeDh2Q1pPAmFQqyurvLRRx/x/vvv85d/+Zckk0k++eQTarUawWCQUChEIpHAbrePtACbFC6jrq9D\nGnatViOZTKqWZON6Cu5VaF2tw0k6zvwK8JNCiA8AF7AL/P1zHeU7Qsae5Ud5KlGmecmYdavVGqkP\n0mg0mJ2dVb0dB4PB2JfhvEy6vg0ulwuPx4PH48HlctFqtVSPxkmIVWtdrY04L1MRQoyvW70Ch8Oh\njpPbbDbVeKDX67G0tMTNmzd57733WFhYYH19nfX1df7qr/6Kdrutym6GQqGRrjZnXffipBiGcWY7\noJOo7XGJRCLMzc0xNzdHJBJR5QYymczYaGlG62pNXqXrxO2KnSd2u12tsOx2u6rB2+/3qVQqbG1t\nqfCHbOTbbrdV+EOW/5QdbcYsP1dzDHw+H4lEghs3bpBIJHj06BGDwUA1ndBoLhJt2CakYXu9XlWF\nT3brlqvmra0t9f3mp5PzaAaqeff4/X7m5+e5desW165dU2a9s7Nz0UPTaLRhm5H5uPIwTKfTUYcq\nJPLzl+VZj3PMWnM8ms0m6XSajY0NqtUqW1tb5PP5icr20VgXbdgmpGHLOhKyKe+rjNhs2q+qoayZ\nLKRhD4dDVTI3n8/rhhOasUBvOpp4XZbIq77XbNjjXDpVb04dDxkSO5ol0mq1xjJLROtqTV6lqzbs\nV2Cz2VR6l8fjUbVE5NFlycu6sI8jemI/x5y653Q6lSFPYid7ras10Vkib4nL5SKRSLCwsMDCwgLt\ndptsNksul6NQKKhTjLJg0LgateZFwuEwi4uLLCwsEAqFSKVS7O3tsbe3pzNBNGONNuxXIA37zp07\nfPjhh1QqFZ48ecLjx4/p9Xqqi7KMcWvDnhzC4TCrq6t88MEHxONx7t+/j2EY5HI5bdiasea1hi2E\nWAJ+F4jy7JTUbxuG8c8Pq319C5gD0sDfNgxj4irivA5p2Hfv3uUnfuInyOVyuFwums0mxWIRQIVG\nJu0x+jLrCs/rwnz00Uesrq4yHA7J5XJ89tlnFz20U3PZtbU6b6rW1wX+kWEYd4EvAj8vhPgQ+DXg\nDwzD+AD4w8M/WwpZH7nZbFKr1ajVatTrdRqNBs1mU6X8TZpZH2J5XX0+H7Ozs6yurnLz5k0WFhYI\nh8PY7XaazSYHBwc8efKEhw8f8vTpU0qlklUOOlle28vMa1fYhmFkgMzh53UhxH1gAfhp4KPDb/sm\n8D3gF89xnO+cwWBAo9GgUCiwt7dHPp8nm81SLBap1Wpq83ESQyGXQddAIKD2H7xeL/v7++zv79Nq\ntdQBKMMwiEajbG9vs7+/b4lc68ug7WXm2DHswy4WXwJ+DpgxDKMAz0o3CiFmz2V0F8hwOFSGnUql\nKBQKZLNZSqUS1WqVwWDAYDCY1BW2wqq6BoNBFhYWeO+99wiHwypFT5ZH3dzcJJfL4Xa7qVarVKtV\ny+VaW1Xby8yxDFsIEQB+D/hFwzCqE9pJ5QVsNhsul0sVfJIYhoHX6wWgWq2yt7dHsVgkl8tRLpdp\nNpsXNeQzxaq6AjidTtUkORqNsr+/j9frxWaz0Wg0aDQapNPpix7muWFlbS8zxymv6gT+PfC7hmH8\n/uGXc0KI6cM79QyQffUrjC9e7//f3rn8tnHccfwz4mtFUnyJlONISmA0huLGcZMCRQ3UDQK0BZz2\nWCCHope0uaUogh5apJcG/QPSF3rIob00bVG0ZzdpAhRIckqiWk4iGbbV6EHKEuUlxfeSFMXpgdw1\nrehhvmpxdz7Agqsld/CzvuLPszO/me84p0+ftg4hhNVzBizHGLOHnclkbFNFYGddobW3y8bGBl6v\nl4mJCVZXV8lms3YZpz4Su2vrZI6rEhHAH4AlKeWvOt66Anwf+HX79crQIhwi4+PjzM7Ocv78eZ58\n8kmEEFZ9dblcJp1Os7W1RTKZJJvNksvlbJGw7a4rtBL27du3qVar+Hw+stks2Wz2RK5WHCRO0NbJ\nHLnSUQhxCXgX+BgwP/gK8AF3S4S2gOf3lwiNwqqp2dlZnn32WeswN3yq1Wrous78/Dzz8/N89NFH\nZLPZA1c6jgqdK6f60bV9/4nX1u12W0fnvjBH7Q0ziuxfEWf376xT6Gmlo5TyfQ4v/ftWv0E9CPx+\nP4FAAL/fz8MPP4zf76darXL79m38fr9lYmCOd5q97Uqlcs9wyShjR133YyZnp+EEbZ2M41Y6hsNh\na8x6cnISl8vF2toauq4Tj8dJJBIkEgnLv69er9NoNKyKEDv1zhQKxWjhqIQthCAcDvPII48wNzdH\nMBgknU6zurpKOp3m0Ucf5ezZs0gprZ53Z8JWyVqhUDxIHJGwzZImM2FPT0/z+OOP4/P5yOVy6LrO\nwsICpVIJj8dDNBolFothGAb1et0W9dZ2x9wW1+VyMTY2hpTS0q3ZbH7OcELtX24PnKar7RP22NiY\nNfnk8XgIhUJEIhGi0SihUIi5uTncbjexWIxgMEg0GrWWoN+5c4dSqWSLcWu7o2kaU1NT1pHP59ne\n3iadTlMsFvF6vfh8PrxeL3t7e9RqNer1+khOICtamOsonKSrIxK2KaqmaYRCIcLhMNFolEQigcvl\nYnJykjNnzlAul6lWq5RKJXZ2dtB1nXK5rHrXI4DP52N6eponnniCc+fOkUwmWVpawjAMyuUymqYR\nDAYJBoPUajVKpRLNZtO2X2wnYO5Z7yRdHZGwzaqPQCDAxMQE4XCYSCTCQw89RCwW48yZM9TrdT77\n7DNu3brF9vY2yWRS9bBHCE3TmJ6e5sKFCzzzzDMsLi5iGAapVIp0Oo3P52NiYoJoNIphGNaksmJ0\nGRsbc5yutk/Ybrcbv99POBwmFosRi8UIhUIEAgF8Pp81odjZszYfpXO5nPVHoDjZmGPWu7u790wW\nm5U9piOQedh1jNNJOFFXRyTsYDBIIpHg9OnTJBIJQqEQXq+XarVKKpVifX2dtbU1kskkqVSKVCpF\nNpulVCpRq9Vs/0dgB6rVKhsbG1y7ds06X1lZoVAoIKWkVqtRLBYtq7dKpeLIOm074URdbZ+wPR4P\nExMTxONxZmZmiMfjBINBPB4PtVrN+pLPz8+TyWQoFArk83nK5TL1et1yUVecbEwtzddCoYCu69aX\nuVarWa97e3u2nphyCk7UtVfHmVeBF4E77Y++IqV8c5iB9oq5a1sikWBmZuaeHnY+nyeVSrGwsMA7\n77xj2X7ZPUHbQdf9mE9LGxsb1rVOHavVqq3HNk3sqO1hmOPVTtDV5Lgetule8Wl7u8b/CCHeorVH\nwWtSyteGHuEA6KzT1HUdwzBIJpPs7OywtLREOp223R4Tx2ALXQ/C1DASiRCPx5mcnETTNHRdJ5PJ\noOu6rR+ZsZm2mqYRj8etw3xy0nWdQqHwoMP7v9Or4wzASG6wq+s6yWQSwzDIZDKsrq6yvb3tqIlF\nO+q6n2g0ymOPPcbc3ByhUIibN29y48YN8vm8rRO23bQ1yzXPnTvH3NwcGxsbXL9+nXq9rhL2UXS4\nV7zQfn1JCPEiMA/8WEqZHUaAg0bXddbX10mlUmxvb1Mulx1dumcXXfcTiUQ4e/YsFy9eZGpqCo/H\nYznNOAU7aKtpGjMzMzz11FNcunSJpaUl6vW6rc0njuI4E17Acq/4Oy33iiLwe+ALwBeB/wK/HVqE\nfWKWezUaDer1Orlcjs3NTZaXl1leXmZzc5Niseik4RCLUdYVwOVy4fV68fv9+P1+fD4fbrcbIYT1\n3vj4OH6/H03TrPecwKhra2Kuo9A0jUAgwPj4OB6Ph7Gx+0pdtqMbx5m/mO4VUkq94/3XgX8PLcI+\nMct9DMOgWCxa+4M4aQjkIEZdV2g9LgcCAQKBAC6Xy7L+KpfL5HI5lpeX8Xq9RCIRFhcX2drasnUF\ngYkdtDUxJ5MXFhZoNBqsr69b5ZpOpCfHGSHElJTStBj6LrA4vBD7w1ymaibsSqXi+IRtB12h9bgc\nDoeZnJzE4/GQyWRoNptUKhV2dna4desWhUIBTdOsxVB2Hr8G+2hrYibsRqPB5uYmuVyOra0tlbAP\n4Wu07IQ+FkJcbV/7OfA9IcQFWmVDa8APhxdif3QW1JdKJQzDYHd319EJGxvoCq0edjgc5tSpU3i9\nXitZCyHI5XIUi0VWVlbu8ep0wFyFLbQ1MRP25uYmLpeLZrPpFB0PpFfHmX8OJ5zBYzrGZLNZXC4X\n2WyWcrls+57WUdhBV2hpW6lUyOVyeL1eSqWStdDJqV9qu2hrIqV0rHvQQdh+pWOj0aBYLCKEQNd1\npJSUSiVHjGXanVqtRj6fR0pJtVplb28PwzAcOYGscAa2T9i7u7tWz6ter+PxeCxDVsVoU61WaTab\nGIaBYRi43W52d3dVwlbYliNd0/tqWDkwnygOc2HuBaXtyUHpak8O09WZxYwKhUIxgqiErVAoFCPC\n0IZEFAqFQjFYVA9boVAoRgSVsBUKhWJEUAlboVAoRoShJmwhxGUhxCdCiCUhxM96bGNVCPGxEOKq\nEOKDLu77oxAiLYT4pONaTAjxdru9t4QQkR7beVUIkWrHdFUIcfmYNmaFEO+2fxc3hBA/7SWeI9rp\nKp5+GYSu7Xa61lbpOjyUrtbnT66upvPwoA/AB6zQ2jzdDXwIPN1DOytArIf7vg48DXzSce13wMvt\n85eB3/TYzi+An3QRyyngfPs8CNwEvtRtPEe001U8J0HXXrVVuipdnazrMHvYXwUWpZQbUsoG8Dfg\nOz221fXiACnle8DOvsvfBv7UPn/jfuI5pJ2uYpJSpqWUn7bPS4DpAtJVPEe001U8fTJIXaHLuJWu\nQ0PpereNE6vrMBP2DJDs+DnVvtYtEjAfQ37UZ0wJKWUGrP2Bp/po6yUhxHUhxBtCiNj93iTuuoC8\n3088He281088PTAoXWFw2ipd+0fpegAnTddhJuxBFXhflFJ+GfgG8IIQ4psDarcfenLvEC0XkH/Q\ncgHpeUNf8WDdRAZZuH/StFW6Dgal6+fbGYiuw0zYKWC24+dZ7v0f/L6Q7U3XpZR3aP3yvtJHTHeE\nEHEAIUQC2D7m84fFpMs2wOv3E5O46wLyZ9l2AeklHnGIm0i38fTBQHSFgWqrdO0fpWsHJ1XXYSbs\nD4HzQojpdtDP0+WevEIIvxDC3z4PAJfpzynjCq3N3Wm/XumlESFE56PQse4dQhzsAtJtPIe10208\nfdK3rjBwbZWu/aN0vfv5k6trLzOV93sAzwGfAkvAKz3cfwa4BizQmmH9ZRf3/hW4DdRp9RReAGLA\n27QG//8FRHpo5we0Jh6uAdeBN4HpY9q4BDTb/46r7eNyt/Ec0s5z3cbzoHXtR1ulq9LVybqqvUQU\nCoViRFArHRUKhWJEUAlboVAoRgSVsBUKhWJEUAlboVAoRgSVsBUKhWJEUAlboVAoRgSVsBUKhWJE\n+B95ZPpm8QyaLgAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "%pylab\n",
+ "%matplotlib inline\n",
+ "\n",
+ "train_dp.reset()\n",
+ "x, t = train_dp.next()\n",
+ "img = x[0].reshape(28,28)\n",
+ "pds = [0.9, 0.7, 0.5, 0.2, 0.1]\n",
+ "imgs = [None] * (len(pds)+1)\n",
+ "imgs[0] = img\n",
+ "\n",
+ "for i, pd in enumerate(pds):\n",
+ " d = rng.binomial(1, pd, img.shape)\n",
+ " imgs[i + 1] = d*img\n",
+ "\n",
+ "fig, ax = plt.subplots(2,3)\n",
+ "ax[0, 0].imshow(imgs[0], cmap=cm.Greys_r)\n",
+ "ax[0, 1].imshow(imgs[1], cmap=cm.Greys_r)\n",
+ "ax[0, 2].imshow(imgs[2], cmap=cm.Greys_r)\n",
+ "ax[1, 0].imshow(imgs[3], cmap=cm.Greys_r)\n",
+ "ax[1, 1].imshow(imgs[4], cmap=cm.Greys_r)\n",
+ "ax[1, 2].imshow(imgs[5], cmap=cm.Greys_r)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Exercise 4: Implement Dropout \n",
+ "\n",
+ "Implement dropout regularisation technique. Then for the same initial configuration as in Exercise 1. investigate effectivness of different dropout rates applied to input features and/or hidden layers. Start with $p_{inp}=0.5$ and $p_{hid}=0.5$ and do some search for better settings.\n",
+ "\n",
+ "Implementation tips:\n",
+ "* Add a function `fprop_dropout` to `mlp.layers.MLP` class which (on top of `inputs` argument) takes also dropout-related argument(s) and perform dropout forward propagation through the model.\n",
+ "* One also would have to introduce required modificastions to `mlp.optimisers.SGDOptimiser.train_epoch()` function.\n",
+ "* Design and implemnt dropout scheduler in a similar way to how learning rates are handled (that is, allowing for some implementation dependent schedule which is kept independent of implementation in `mlp.optimisers.SGDOptimiser.train()`). \n",
+ " + For this exercise implement only fixed dropout scheduler - `DropoutFixed`, but implementation should allow to easily add other schedules in the future. \n",
+ " + Dropout scheduler of any type should return a tuple of two numbers $(p_{inp},\\; p_{hid})$, the first one is dropout factor for input features (data-points), and the latter dropout factor for hidden layers (assumed the same for all hidden layers)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": false,
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:root:Training started...\n",
+ "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.624. Accuracy is 8.60%\n",
+ "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.554. Accuracy is 9.84%\n",
+ "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 3.828. Accuracy is 50.90%\n",
+ "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.716. Accuracy is 76.93%\n",
+ "INFO:mlp.optimisers:Epoch 1: Took 9 seconds. Training speed 295 pps. Validation speed 1692 pps.\n",
+ "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 1.132. Accuracy is 66.90%\n",
+ "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.753. Accuracy is 78.82%\n",
+ "INFO:mlp.optimisers:Epoch 2: Took 10 seconds. Training speed 289 pps. Validation speed 1653 pps.\n",
+ "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 1.043. Accuracy is 71.90%\n",
+ "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.501. Accuracy is 85.69%\n",
+ "INFO:mlp.optimisers:Epoch 3: Took 10 seconds. Training speed 280 pps. Validation speed 1681 pps.\n",
+ "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.810. Accuracy is 78.50%\n",
+ "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.481. Accuracy is 85.92%\n",
+ "INFO:mlp.optimisers:Epoch 4: Took 9 seconds. Training speed 308 pps. Validation speed 1675 pps.\n",
+ "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.769. Accuracy is 79.40%\n",
+ "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.588. Accuracy is 84.25%\n",
+ "INFO:mlp.optimisers:Epoch 5: Took 9 seconds. Training speed 320 pps. Validation speed 1733 pps.\n",
+ "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.792. Accuracy is 78.60%\n",
+ "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.434. Accuracy is 88.49%\n",
+ "INFO:mlp.optimisers:Epoch 6: Took 9 seconds. Training speed 334 pps. Validation speed 1692 pps.\n",
+ "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.675. Accuracy is 82.00%\n",
+ "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.514. Accuracy is 86.76%\n",
+ "INFO:mlp.optimisers:Epoch 7: Took 9 seconds. Training speed 284 pps. Validation speed 1704 pps.\n",
+ "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.808. Accuracy is 79.90%\n",
+ "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.620. Accuracy is 84.22%\n",
+ "INFO:mlp.optimisers:Epoch 8: Took 9 seconds. Training speed 317 pps. Validation speed 1684 pps.\n",
+ "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.810. Accuracy is 79.90%\n",
+ "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.645. Accuracy is 84.91%\n",
+ "INFO:mlp.optimisers:Epoch 9: Took 9 seconds. Training speed 304 pps. Validation speed 1675 pps.\n",
+ "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.631. Accuracy is 83.20%\n",
+ "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.493. Accuracy is 88.80%\n",
+ "INFO:mlp.optimisers:Epoch 10: Took 10 seconds. Training speed 286 pps. Validation speed 1656 pps.\n",
+ "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.676. Accuracy is 83.90%\n",
+ "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.598. Accuracy is 86.78%\n",
+ "INFO:mlp.optimisers:Epoch 11: Took 9 seconds. Training speed 296 pps. Validation speed 1710 pps.\n",
+ "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.659. Accuracy is 83.80%\n",
+ "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.432. Accuracy is 90.21%\n",
+ "INFO:mlp.optimisers:Epoch 12: Took 9 seconds. Training speed 309 pps. Validation speed 1675 pps.\n",
+ "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.525. Accuracy is 86.80%\n",
+ "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.490. Accuracy is 89.64%\n",
+ "INFO:mlp.optimisers:Epoch 13: Took 9 seconds. Training speed 320 pps. Validation speed 1681 pps.\n",
+ "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.488. Accuracy is 88.50%\n",
+ "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.606. Accuracy is 86.87%\n",
+ "INFO:mlp.optimisers:Epoch 14: Took 9 seconds. Training speed 305 pps. Validation speed 1678 pps.\n",
+ "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.441. Accuracy is 88.30%\n",
+ "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.570. Accuracy is 89.27%\n",
+ "INFO:mlp.optimisers:Epoch 15: Took 9 seconds. Training speed 331 pps. Validation speed 1736 pps.\n",
+ "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.478. Accuracy is 87.80%\n",
+ "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.488. Accuracy is 90.52%\n",
+ "INFO:mlp.optimisers:Epoch 16: Took 9 seconds. Training speed 337 pps. Validation speed 1710 pps.\n",
+ "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.461. Accuracy is 89.60%\n",
+ "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.736. Accuracy is 86.67%\n",
+ "INFO:mlp.optimisers:Epoch 17: Took 9 seconds. Training speed 294 pps. Validation speed 1678 pps.\n",
+ "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.440. Accuracy is 88.90%\n",
+ "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.618. Accuracy is 88.99%\n",
+ "INFO:mlp.optimisers:Epoch 18: Took 10 seconds. Training speed 279 pps. Validation speed 1659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.599. Accuracy is 87.40%\n",
+ "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.487. Accuracy is 91.03%\n",
+ "INFO:mlp.optimisers:Epoch 19: Took 10 seconds. Training speed 281 pps. Validation speed 1678 pps.\n",
+ "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.447. Accuracy is 90.10%\n",
+ "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.574. Accuracy is 89.52%\n",
+ "INFO:mlp.optimisers:Epoch 20: Took 10 seconds. Training speed 282 pps. Validation speed 1675 pps.\n",
+ "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.579. Accuracy is 87.80%\n",
+ "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.550. Accuracy is 90.48%\n",
+ "INFO:mlp.optimisers:Epoch 21: Took 9 seconds. Training speed 302 pps. Validation speed 1678 pps.\n",
+ "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.461. Accuracy is 89.70%\n",
+ "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.597. Accuracy is 90.02%\n",
+ "INFO:mlp.optimisers:Epoch 22: Took 9 seconds. Training speed 303 pps. Validation speed 1684 pps.\n",
+ "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.519. Accuracy is 89.50%\n",
+ "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.645. Accuracy is 90.35%\n",
+ "INFO:mlp.optimisers:Epoch 23: Took 10 seconds. Training speed 277 pps. Validation speed 1670 pps.\n",
+ "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.439. Accuracy is 90.80%\n",
+ "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.634. Accuracy is 90.04%\n",
+ "INFO:mlp.optimisers:Epoch 24: Took 10 seconds. Training speed 268 pps. Validation speed 1687 pps.\n",
+ "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.365. Accuracy is 91.50%\n",
+ "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.564. Accuracy is 91.55%\n",
+ "INFO:mlp.optimisers:Epoch 25: Took 9 seconds. Training speed 309 pps. Validation speed 1733 pps.\n",
+ "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.391. Accuracy is 91.60%\n",
+ "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.660. Accuracy is 90.20%\n",
+ "INFO:mlp.optimisers:Epoch 26: Took 9 seconds. Training speed 329 pps. Validation speed 1678 pps.\n",
+ "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.412. Accuracy is 91.40%\n",
+ "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.614. Accuracy is 90.82%\n",
+ "INFO:mlp.optimisers:Epoch 27: Took 9 seconds. Training speed 281 pps. Validation speed 1698 pps.\n",
+ "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.470. Accuracy is 90.40%\n",
+ "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.593. Accuracy is 91.29%\n",
+ "INFO:mlp.optimisers:Epoch 28: Took 10 seconds. Training speed 278 pps. Validation speed 1684 pps.\n",
+ "INFO:mlp.optimisers:Epoch 29: Training cost (ce) is 0.443. Accuracy is 90.90%\n",
+ "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.623. Accuracy is 90.79%\n",
+ "INFO:mlp.optimisers:Epoch 29: Took 9 seconds. Training speed 309 pps. Validation speed 1661 pps.\n",
+ "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.359. Accuracy is 92.20%\n",
+ "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.614. Accuracy is 90.74%\n",
+ "INFO:mlp.optimisers:Epoch 30: Took 9 seconds. Training speed 312 pps. Validation speed 1659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 31: Training cost (ce) is 0.345. Accuracy is 92.30%\n",
+ "INFO:mlp.optimisers:Epoch 31: Validation cost (ce) is 0.698. Accuracy is 90.71%\n",
+ "INFO:mlp.optimisers:Epoch 31: Took 9 seconds. Training speed 292 pps. Validation speed 1684 pps.\n",
+ "INFO:mlp.optimisers:Epoch 32: Training cost (ce) is 0.443. Accuracy is 91.20%\n",
+ "INFO:mlp.optimisers:Epoch 32: Validation cost (ce) is 0.590. Accuracy is 91.87%\n",
+ "INFO:mlp.optimisers:Epoch 32: Took 9 seconds. Training speed 291 pps. Validation speed 1678 pps.\n",
+ "INFO:mlp.optimisers:Epoch 33: Training cost (ce) is 0.557. Accuracy is 91.00%\n",
+ "INFO:mlp.optimisers:Epoch 33: Validation cost (ce) is 0.624. Accuracy is 91.39%\n",
+ "INFO:mlp.optimisers:Epoch 33: Took 10 seconds. Training speed 277 pps. Validation speed 1687 pps.\n",
+ "INFO:mlp.optimisers:Epoch 34: Training cost (ce) is 0.451. Accuracy is 91.30%\n",
+ "INFO:mlp.optimisers:Epoch 34: Validation cost (ce) is 0.687. Accuracy is 91.12%\n",
+ "INFO:mlp.optimisers:Epoch 34: Took 9 seconds. Training speed 320 pps. Validation speed 1684 pps.\n",
+ "INFO:mlp.optimisers:Epoch 35: Training cost (ce) is 0.456. Accuracy is 91.40%\n",
+ "INFO:mlp.optimisers:Epoch 35: Validation cost (ce) is 0.723. Accuracy is 91.09%\n",
+ "INFO:mlp.optimisers:Epoch 35: Took 9 seconds. Training speed 336 pps. Validation speed 1721 pps.\n",
+ "INFO:mlp.optimisers:Epoch 36: Training cost (ce) is 0.379. Accuracy is 92.80%\n",
+ "INFO:mlp.optimisers:Epoch 36: Validation cost (ce) is 0.753. Accuracy is 90.54%\n",
+ "INFO:mlp.optimisers:Epoch 36: Took 9 seconds. Training speed 320 pps. Validation speed 1710 pps.\n",
+ "INFO:mlp.optimisers:Epoch 37: Training cost (ce) is 0.387. Accuracy is 93.10%\n",
+ "INFO:mlp.optimisers:Epoch 37: Validation cost (ce) is 0.721. Accuracy is 91.16%\n",
+ "INFO:mlp.optimisers:Epoch 37: Took 9 seconds. Training speed 306 pps. Validation speed 1692 pps.\n",
+ "INFO:mlp.optimisers:Epoch 38: Training cost (ce) is 0.489. Accuracy is 91.60%\n",
+ "INFO:mlp.optimisers:Epoch 38: Validation cost (ce) is 0.818. Accuracy is 89.82%\n",
+ "INFO:mlp.optimisers:Epoch 38: Took 9 seconds. Training speed 301 pps. Validation speed 1707 pps.\n",
+ "INFO:mlp.optimisers:Epoch 39: Training cost (ce) is 0.510. Accuracy is 91.70%\n",
+ "INFO:mlp.optimisers:Epoch 39: Validation cost (ce) is 0.690. Accuracy is 91.15%\n",
+ "INFO:mlp.optimisers:Epoch 39: Took 9 seconds. Training speed 296 pps. Validation speed 1712 pps.\n",
+ "INFO:mlp.optimisers:Epoch 40: Training cost (ce) is 0.560. Accuracy is 91.80%\n",
+ "INFO:mlp.optimisers:Epoch 40: Validation cost (ce) is 0.729. Accuracy is 91.11%\n",
+ "INFO:mlp.optimisers:Epoch 40: Took 9 seconds. Training speed 302 pps. Validation speed 1695 pps.\n",
+ "INFO:mlp.optimisers:Epoch 41: Training cost (ce) is 0.484. Accuracy is 91.50%\n",
+ "INFO:mlp.optimisers:Epoch 41: Validation cost (ce) is 0.629. Accuracy is 92.18%\n",
+ "INFO:mlp.optimisers:Epoch 41: Took 9 seconds. Training speed 325 pps. Validation speed 1689 pps.\n",
+ "INFO:mlp.optimisers:Epoch 42: Training cost (ce) is 0.327. Accuracy is 93.40%\n",
+ "INFO:mlp.optimisers:Epoch 42: Validation cost (ce) is 0.723. Accuracy is 91.48%\n",
+ "INFO:mlp.optimisers:Epoch 42: Took 9 seconds. Training speed 300 pps. Validation speed 1661 pps.\n",
+ "INFO:mlp.optimisers:Epoch 43: Training cost (ce) is 0.358. Accuracy is 93.50%\n",
+ "INFO:mlp.optimisers:Epoch 43: Validation cost (ce) is 0.665. Accuracy is 91.98%\n",
+ "INFO:mlp.optimisers:Epoch 43: Took 9 seconds. Training speed 291 pps. Validation speed 1707 pps.\n",
+ "INFO:mlp.optimisers:Epoch 44: Training cost (ce) is 0.441. Accuracy is 92.80%\n",
+ "INFO:mlp.optimisers:Epoch 44: Validation cost (ce) is 0.846. Accuracy is 90.96%\n",
+ "INFO:mlp.optimisers:Epoch 44: Took 9 seconds. Training speed 325 pps. Validation speed 1718 pps.\n",
+ "INFO:mlp.optimisers:Epoch 45: Training cost (ce) is 0.526. Accuracy is 91.10%\n",
+ "INFO:mlp.optimisers:Epoch 45: Validation cost (ce) is 0.674. Accuracy is 92.17%\n",
+ "INFO:mlp.optimisers:Epoch 45: Took 9 seconds. Training speed 317 pps. Validation speed 1710 pps.\n",
+ "INFO:mlp.optimisers:Epoch 46: Training cost (ce) is 0.407. Accuracy is 91.90%\n",
+ "INFO:mlp.optimisers:Epoch 46: Validation cost (ce) is 0.819. Accuracy is 90.26%\n",
+ "INFO:mlp.optimisers:Epoch 46: Took 9 seconds. Training speed 308 pps. Validation speed 1698 pps.\n",
+ "INFO:mlp.optimisers:Epoch 47: Training cost (ce) is 0.482. Accuracy is 92.60%\n",
+ "INFO:mlp.optimisers:Epoch 47: Validation cost (ce) is 0.752. Accuracy is 91.34%\n",
+ "INFO:mlp.optimisers:Epoch 47: Took 9 seconds. Training speed 286 pps. Validation speed 1687 pps.\n",
+ "INFO:mlp.optimisers:Epoch 48: Training cost (ce) is 0.405. Accuracy is 92.90%\n",
+ "INFO:mlp.optimisers:Epoch 48: Validation cost (ce) is 0.787. Accuracy is 91.25%\n",
+ "INFO:mlp.optimisers:Epoch 48: Took 10 seconds. Training speed 279 pps. Validation speed 1672 pps.\n",
+ "INFO:mlp.optimisers:Epoch 49: Training cost (ce) is 0.597. Accuracy is 91.70%\n",
+ "INFO:mlp.optimisers:Epoch 49: Validation cost (ce) is 0.794. Accuracy is 91.60%\n",
+ "INFO:mlp.optimisers:Epoch 49: Took 9 seconds. Training speed 285 pps. Validation speed 1698 pps.\n",
+ "INFO:mlp.optimisers:Epoch 50: Training cost (ce) is 0.472. Accuracy is 93.30%\n",
+ "INFO:mlp.optimisers:Epoch 50: Validation cost (ce) is 0.918. Accuracy is 90.65%\n",
+ "INFO:mlp.optimisers:Epoch 50: Took 9 seconds. Training speed 303 pps. Validation speed 1672 pps.\n",
+ "INFO:root:Testing the model on test set:\n",
+ "INFO:root:MNIST test set accuracy is 90.79 %, cost (ce) is 0.898\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The autoreload extension is already loaded. To reload it, use:\n",
+ " %reload_ext autoreload\n"
+ ]
+ }
+ ],
+ "source": [
+ "%autoreload\n",
+ "\n",
+ "import numpy\n",
+ "import logging\n",
+ "\n",
+ "from mlp.layers import MLP, Linear, Sigmoid, Softmax #import required layer types\n",
+ "from mlp.optimisers import SGDOptimiser #import the optimiser\n",
+ "from mlp.dataset import MNISTDataProvider #import data provider\n",
+ "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
+ "from mlp.schedulers import LearningRateFixed, DropoutFixed\n",
+ "from scipy.optimize import leastsq\n",
+ "\n",
+ "logger = logging.getLogger()\n",
+ "logger.setLevel(logging.INFO)\n",
+ "rng = numpy.random.RandomState([2015,10,10])\n",
+ "\n",
+ "#some hyper-parameters\n",
+ "nhid = 800\n",
+ "learning_rate = 0.5\n",
+ "max_epochs = 50\n",
+ "l1_weight = 0.0\n",
+ "l2_weight = 0.0\n",
+ "cost = CECost()\n",
+ " \n",
+ "stats = []\n",
+ "layer = 1\n",
+ "for i in xrange(1, 2):\n",
+ "\n",
+ " train_dp.reset()\n",
+ " valid_dp.reset()\n",
+ " test_dp.reset()\n",
+ " \n",
+ " #define the model\n",
+ " model = MLP(cost=cost)\n",
+ " model.add_layer(Sigmoid(idim=784, odim=nhid, irange=0.2, rng=rng))\n",
+ " for i in xrange(1, layer):\n",
+ " logger.info(\"Stacking hidden layer (%s)\" % str(i+1))\n",
+ " model.add_layer(Sigmoid(idim=nhid, odim=nhid, irange=0.2, rng=rng))\n",
+ " model.add_layer(Softmax(idim=nhid, odim=10, rng=rng))\n",
+ "\n",
+ " # define the optimiser, here stochasitc gradient descent\n",
+ " # with fixed learning rate and max_epochs\n",
+ " lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)\n",
+ " dp_scheduler = DropoutFixed(0.5, 0.5)\n",
+ " optimiser = SGDOptimiser(lr_scheduler=lr_scheduler, \n",
+ " dp_scheduler=dp_scheduler,\n",
+ " l1_weight=l1_weight, \n",
+ " l2_weight=l2_weight)\n",
+ "\n",
+ " logger.info('Training started...')\n",
+ " tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)\n",
+ "\n",
+ " logger.info('Testing the model on test set:')\n",
+ " tst_cost, tst_accuracy = optimiser.validate(model, test_dp)\n",
+ " logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))\n",
+ " \n",
+ " stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 2",
+ "language": "python",
+ "name": "python2"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
From c130785b793d754f0eb28d2996a4649ae0aec139 Mon Sep 17 00:00:00 2001
From: pswietojanski
Date: Sat, 14 Nov 2015 17:06:12 +0000
Subject: [PATCH 2/7] some progress with 05 solutions
---
04_Regularisation_solution.ipynb | 5 +-
05_Transfer_functions_solution.ipynb | 637 ++++++++++++++++++++++++++
06_MLP_Coursework2_Introduction.ipynb | 13 +-
mlp/layers.py | 138 +++++-
mlp/optimisers.py | 6 +-
mlp/schedulers.py | 15 +
6 files changed, 800 insertions(+), 14 deletions(-)
create mode 100644 05_Transfer_functions_solution.ipynb
diff --git a/04_Regularisation_solution.ipynb b/04_Regularisation_solution.ipynb
index cbae7c6..9537b96 100644
--- a/04_Regularisation_solution.ipynb
+++ b/04_Regularisation_solution.ipynb
@@ -165,7 +165,10 @@
"import logging\n",
"from mlp.dataset import MNISTDataProvider\n",
"\n",
+ "logger = logging.getLogger()\n",
+ "logger.setLevel(logging.INFO)\n",
"logger.info('Initialising data providers...')\n",
+ "\n",
"train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)\n",
"valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)\n",
"test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)"
@@ -467,8 +470,6 @@
"from mlp.schedulers import LearningRateFixed\n",
"from scipy.optimize import leastsq\n",
"\n",
- "logger = logging.getLogger()\n",
- "logger.setLevel(logging.INFO)\n",
"rng = numpy.random.RandomState([2015,10,10])\n",
"\n",
"#some hyper-parameters\n",
diff --git a/05_Transfer_functions_solution.ipynb b/05_Transfer_functions_solution.ipynb
new file mode 100644
index 0000000..8443e94
--- /dev/null
+++ b/05_Transfer_functions_solution.ipynb
@@ -0,0 +1,637 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Introduction\n",
+ "\n",
+ "This tutorial focuses on implementation of alternatives to sigmoid transfer functions for hidden units. (*Transfer functions* are also called *activation functions* or *nonlinearities*.) First, we will work with hyperboilc tangent (tanh) and then unbounded (or partially unbounded) piecewise linear functions: Rectifying Linear Units (ReLU) and Maxout.\n",
+ "\n",
+ "\n",
+ "## Virtual environments\n",
+ "\n",
+ "Before you proceed onwards, remember to activate your virtual environment by typing `activate_mlp` or `source ~/mlpractical/venv/bin/activate` (or if you did the original install the \"comfy way\" type: `workon mlpractical`).\n",
+ "\n",
+ "\n",
+ "## Syncing the git repository\n",
+ "\n",
+ "Look here for more details. But in short, we recommend to create a separate branch for this lab, as follows:\n",
+ "\n",
+ "1. Enter the mlpractical directory `cd ~/mlpractical/repo-mlp`\n",
+ "2. List the branches and check which are currently active by typing: `git branch`\n",
+ "3. If you have followed our recommendations, you should be in the `lab4` branch, please commit your local changed to the repo index by typing:\n",
+ "```\n",
+ "git commit -am \"finished lab4\"\n",
+ "```\n",
+ "4. Now you can switch to `master` branch by typing: \n",
+ "```\n",
+ "git checkout master\n",
+ " ```\n",
+ "5. To update the repository (note, assuming master does not have any conflicts), if there are some, have a look here\n",
+ "```\n",
+ "git pull\n",
+ "```\n",
+ "6. And now, create the new branch & switch to it by typing:\n",
+ "```\n",
+ "git checkout -b lab5\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Overview of alternative transfer functions\n",
+ "\n",
+ "Now, we briefly summarise some other possible choices for hidden layer transfer functions.\n",
+ "\n",
+ "## Tanh\n",
+ "\n",
+ "Given a linear activation $a_{i}$ tanh implements the following operation:\n",
+ "\n",
+ "(1) $h_i(a_i) = \\mbox{tanh}(a_i) = \\frac{\\exp(a_i) - \\exp(-a_i)}{\\exp(a_i) + \\exp(-a_i)}$\n",
+ "\n",
+ "Hence, the derivative of $h_i$ with respect to $a_i$ is:\n",
+ "\n",
+ "(2) $\\begin{align}\n",
+ "\\frac{\\partial h_i}{\\partial a_i} &= 1 - h^2_i\n",
+ "\\end{align}\n",
+ "$\n",
+ "\n",
+ "\n",
+ "## ReLU\n",
+ "\n",
+ "Given a linear activation $a_{i}$ relu implements the following operation:\n",
+ "\n",
+ "(3) $h_i(a_i) = \\max(0, a_i)$\n",
+ "\n",
+ "Hence, the gradient is :\n",
+ "\n",
+ "(4) $\\begin{align}\n",
+ "\\frac{\\partial h_i}{\\partial a_i} &=\n",
+ "\\begin{cases}\n",
+ " 1 & \\quad \\text{if } a_i > 0 \\\\\n",
+ " 0 & \\quad \\text{if } a_i \\leq 0 \\\\\n",
+ "\\end{cases}\n",
+ "\\end{align}\n",
+ "$\n",
+ "\n",
+ "ReLU implements a form of data-driven sparsity, that is, on average the activations are sparse (many of them are 0) but the general sparsity pattern will depend on particular data-point. This is different from sparsity obtained in model's parameters one can obtain with $L1$ regularisation as the latter affect all data-points in the same way.\n",
+ "\n",
+ "## Maxout\n",
+ "\n",
+ "Maxout is an example of data-driven type of non-linearity in which the transfer function can be learned from data. That is, the model can build a non-linear transfer function from piecewise linear components. These linear components, depending on the number of linear regions used in the pooling operator (given by parameter $K$), can approximate arbitrary functions, such as ReLU, abs, etc.\n",
+ "\n",
+ "Given some subset (group, pool) of $K$ linear activations $a_{j}, a_{j+1}, \\ldots, a_{j+K}$ at the $l$-th layer, maxout implements the following operation:\n",
+ "\n",
+ "(5) $h_i(a_j, a_{j+1}, \\ldots, a_{j+K}) = \\max(a_j, a_{j+1}, \\ldots, a_{j+K})$\n",
+ "\n",
+ "Hence, the gradient of $h_i$ w.r.t to the pooling region $a_{j}, a_{j+1}, \\ldots, a_{j+K}$ is :\n",
+ "\n",
+ "(6) $\\begin{align}\n",
+ "\\frac{\\partial h_i}{\\partial (a_j, a_{j+1}, \\ldots, a_{j+K})} &=\n",
+ "\\begin{cases}\n",
+ " 1 & \\quad \\text{for the max activation} \\\\\n",
+ " 0 & \\quad \\text{otherwise} \\\\\n",
+ "\\end{cases}\n",
+ "\\end{align}\n",
+ "$\n",
+ "\n",
+ "Implementation tips are given in Exercise 3.\n",
+ "\n",
+ "# On weight initialisation\n",
+ "\n",
+ "Activation functions directly affect the \"network dynamics\", that is, the magnitudes of the statistics each layer is producing. For example, *slashing* non-linearities like sigmoid or tanh bring the linear activations to a certain bounded range. ReLU, on the contrary, has an unbounded positive side. This directly affects all statistics collected in forward and backward passes as well as the gradients w.r.t paramters - hence also the pace at which the model learns. That is why learning rate is usually required to be tuned for given the characterictics of the non-linearities used. \n",
+ "\n",
+ "Another important hyperparameter is the initial range used to initialise the weight matrices. We have largely ignored it so far (although if you did further experiments in coursework 1, you may have found setting it had an effect on training deeper networks with 4 or 5 hidden layers). However, for sigmoidal non-linearities (sigmoid, tanh) the initialisation range is an important hyperparameter and a considerable amount of research has been put into determining what is the best strategy for choosing it. In fact, one of the early triggers of the recent resurgence of deep learning was pre-training - techniques for initialising weights in an unsupervised manner so that one can effectively train deeper models in supervised fashion later. \n",
+ "\n",
+ "## Sigmoidal transfer functions\n",
+ "\n",
+ "Y. LeCun in [Efficient Backprop](http://link.springer.com/chapter/10.1007%2F3-540-49430-8_2) recommends the following setting of the initial range $r$ for sigmoidal units (assuming that the data has been normalised to zero mean, unit variance): \n",
+ "\n",
+ "(7) $ r = \\frac{1}{\\sqrt{N_{IN}}} $\n",
+ "\n",
+ "where $N_{IN}$ is the number of inputs to the given layer and the weights are then sampled from the (usually uniform) distribution $U(-r,r)$. The motivation is to keep the initial forward-pass signal in the linear region of the sigmoid non-linearity so that the gradients are large enough for training to proceed (note that the sigmoidal non-linearities saturate when activations are either very positive or very negative, leading to very small gradients and hence poor learning dynamics).\n",
+ "\n",
+ "The initialisation used in (7) however leads to different magnitudes of activations/gradients at different layers (due to multiplicative nature of the computations) and more recently, [Glorot et. al](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf) proposed the so-called *normalised initialisation*, which ensures the variance of the forward signal (activations) is approximately the same in each layer. The same applies to the gradients obtained in backward pass. \n",
+ "\n",
+ "The $r$ in the *normalised initialisation* for $\\mbox{tanh}$ non-linearity is then:\n",
+ "\n",
+ "(8) $ r = \\frac{\\sqrt{6}}{\\sqrt{N_{IN}+N_{OUT}}} $\n",
+ "\n",
+ "For the sigmoid (logistic) non-linearity, to get similiar characteristics, one should scale $r$ in (8) by 4, that is:\n",
+ "\n",
+ "(9) $ r = \\frac{4\\sqrt{6}}{\\sqrt{N_{IN}+N_{OUT}}} $\n",
+ "\n",
+ "## Piece-wise linear transfer functions (ReLU, Maxout)\n",
+ "\n",
+ "For unbounded transfer functions initialisation is not as crucial as for sigmoidal ones. This is due to the fact that their gradients do not diminish (they are acutally more likely to explode) and they do not saturate (ReLU saturates at 0, but not on the positive slope, where gradient is 1 everywhere). (In practice ReLU is sometimes \"clipped\" with a maximum value, typically 20).\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Exercise 1: Implement the tanh transfer function\n",
+ "\n",
+ "Your implementation should follow the code conventions used to build other layer types (for example, Sigmoid and Softmax). Test your solution by training a one-hidden-layer model with 100 hidden units, similiar to the one used in Task 3a in the coursework. \n",
+ "\n",
+ "Tune the learning rate and compare the initial ranges in equations (7) and (8). Note that there might not be much difference for one-hidden-layer model, but you can easily notice a substantial gain from using (8) (or (9) for logistic sigmoid activation) for deeper models, for example, the 5 hidden-layer network from the first coursework.\n",
+ "\n",
+ "Implementation tip: Use numpy.tanh() to compute the non-linearity. Use the irange argument when creating the given layer type to provide the initial sampling range."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:root:Initialising data providers...\n"
+ ]
+ }
+ ],
+ "source": [
+ "import numpy\n",
+ "import logging\n",
+ "from mlp.dataset import MNISTDataProvider\n",
+ "\n",
+ "logger = logging.getLogger()\n",
+ "logger.setLevel(logging.INFO)\n",
+ "\n",
+ "# Note, you were asked to do run the experiments on all data. \n",
+ "# Here I am running those examples on 1000 training data-points only (similar to regularisation notebook)\n",
+ "logger.info('Initialising data providers...')\n",
+ "train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)\n",
+ "valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)\n",
+ "test_dp = MNISTDataProvider(dset='eval', batch_size=10000, max_num_batches=-10, randomize=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": false,
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:root:Training started...\n",
+ "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.368. Accuracy is 7.80%\n",
+ "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.347. Accuracy is 9.86%\n",
+ "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 1.421. Accuracy is 64.70%\n",
+ "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.479. Accuracy is 85.95%\n",
+ "INFO:mlp.optimisers:Epoch 1: Took 10 seconds. Training speed 233 pps. Validation speed 1624 pps.\n",
+ "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.571. Accuracy is 81.60%\n",
+ "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.484. Accuracy is 85.23%\n",
+ "INFO:mlp.optimisers:Epoch 2: Took 11 seconds. Training speed 214 pps. Validation speed 1637 pps.\n",
+ "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.411. Accuracy is 87.40%\n",
+ "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.507. Accuracy is 85.40%\n",
+ "INFO:mlp.optimisers:Epoch 3: Took 11 seconds. Training speed 226 pps. Validation speed 1640 pps.\n",
+ "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.318. Accuracy is 90.10%\n",
+ "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.596. Accuracy is 84.40%\n",
+ "INFO:mlp.optimisers:Epoch 4: Took 10 seconds. Training speed 232 pps. Validation speed 1616 pps.\n",
+ "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.257. Accuracy is 91.80%\n",
+ "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.468. Accuracy is 87.76%\n",
+ "INFO:mlp.optimisers:Epoch 5: Took 11 seconds. Training speed 229 pps. Validation speed 1629 pps.\n",
+ "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.244. Accuracy is 92.30%\n",
+ "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.535. Accuracy is 86.31%\n",
+ "INFO:mlp.optimisers:Epoch 6: Took 11 seconds. Training speed 230 pps. Validation speed 1600 pps.\n",
+ "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.169. Accuracy is 94.30%\n",
+ "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.554. Accuracy is 86.59%\n",
+ "INFO:mlp.optimisers:Epoch 7: Took 11 seconds. Training speed 226 pps. Validation speed 1631 pps.\n",
+ "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.130. Accuracy is 96.60%\n",
+ "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.562. Accuracy is 86.83%\n",
+ "INFO:mlp.optimisers:Epoch 8: Took 11 seconds. Training speed 225 pps. Validation speed 1603 pps.\n",
+ "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.113. Accuracy is 96.90%\n",
+ "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.605. Accuracy is 85.94%\n",
+ "INFO:mlp.optimisers:Epoch 9: Took 11 seconds. Training speed 231 pps. Validation speed 1616 pps.\n",
+ "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.087. Accuracy is 97.10%\n",
+ "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.564. Accuracy is 87.50%\n",
+ "INFO:mlp.optimisers:Epoch 10: Took 11 seconds. Training speed 226 pps. Validation speed 1637 pps.\n",
+ "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.054. Accuracy is 98.70%\n",
+ "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.599. Accuracy is 87.04%\n",
+ "INFO:mlp.optimisers:Epoch 11: Took 10 seconds. Training speed 232 pps. Validation speed 1640 pps.\n",
+ "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.045. Accuracy is 98.60%\n",
+ "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.574. Accuracy is 87.75%\n",
+ "INFO:mlp.optimisers:Epoch 12: Took 10 seconds. Training speed 237 pps. Validation speed 1653 pps.\n",
+ "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.025. Accuracy is 99.30%\n",
+ "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.615. Accuracy is 86.88%\n",
+ "INFO:mlp.optimisers:Epoch 13: Took 11 seconds. Training speed 232 pps. Validation speed 1616 pps.\n",
+ "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.011. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.610. Accuracy is 87.50%\n",
+ "INFO:mlp.optimisers:Epoch 14: Took 11 seconds. Training speed 201 pps. Validation speed 1634 pps.\n",
+ "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.009. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.599. Accuracy is 87.87%\n",
+ "INFO:mlp.optimisers:Epoch 15: Took 10 seconds. Training speed 233 pps. Validation speed 1637 pps.\n",
+ "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.007. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.612. Accuracy is 87.71%\n",
+ "INFO:mlp.optimisers:Epoch 16: Took 10 seconds. Training speed 241 pps. Validation speed 1645 pps.\n",
+ "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.614. Accuracy is 87.73%\n",
+ "INFO:mlp.optimisers:Epoch 17: Took 10 seconds. Training speed 237 pps. Validation speed 1634 pps.\n",
+ "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.620. Accuracy is 87.77%\n",
+ "INFO:mlp.optimisers:Epoch 18: Took 10 seconds. Training speed 245 pps. Validation speed 1645 pps.\n",
+ "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.623. Accuracy is 87.94%\n",
+ "INFO:mlp.optimisers:Epoch 19: Took 10 seconds. Training speed 234 pps. Validation speed 1631 pps.\n",
+ "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.625. Accuracy is 87.84%\n",
+ "INFO:mlp.optimisers:Epoch 20: Took 11 seconds. Training speed 217 pps. Validation speed 1631 pps.\n",
+ "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.633. Accuracy is 87.83%\n",
+ "INFO:mlp.optimisers:Epoch 21: Took 10 seconds. Training speed 235 pps. Validation speed 1618 pps.\n",
+ "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.637. Accuracy is 87.93%\n",
+ "INFO:mlp.optimisers:Epoch 22: Took 11 seconds. Training speed 225 pps. Validation speed 1648 pps.\n",
+ "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.639. Accuracy is 87.90%\n",
+ "INFO:mlp.optimisers:Epoch 23: Took 10 seconds. Training speed 238 pps. Validation speed 1626 pps.\n",
+ "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.642. Accuracy is 87.86%\n",
+ "INFO:mlp.optimisers:Epoch 24: Took 10 seconds. Training speed 233 pps. Validation speed 1659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.645. Accuracy is 87.91%\n",
+ "INFO:mlp.optimisers:Epoch 25: Took 12 seconds. Training speed 179 pps. Validation speed 1618 pps.\n",
+ "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.650. Accuracy is 87.90%\n",
+ "INFO:mlp.optimisers:Epoch 26: Took 10 seconds. Training speed 241 pps. Validation speed 1637 pps.\n",
+ "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.653. Accuracy is 87.98%\n",
+ "INFO:mlp.optimisers:Epoch 27: Took 10 seconds. Training speed 250 pps. Validation speed 1629 pps.\n",
+ "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.656. Accuracy is 87.89%\n",
+ "INFO:mlp.optimisers:Epoch 28: Took 10 seconds. Training speed 232 pps. Validation speed 1640 pps.\n",
+ "INFO:mlp.optimisers:Epoch 29: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.659. Accuracy is 87.92%\n",
+ "INFO:mlp.optimisers:Epoch 29: Took 10 seconds. Training speed 235 pps. Validation speed 1613 pps.\n",
+ "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.663. Accuracy is 87.91%\n",
+ "INFO:mlp.optimisers:Epoch 30: Took 11 seconds. Training speed 223 pps. Validation speed 1613 pps.\n",
+ "INFO:root:Testing the model on test set:\n",
+ "INFO:root:MNIST test set accuracy is 87.69 %, cost (ce) is 0.665\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "from mlp.layers import MLP, Tanh, Softmax #import required layer types\n",
+ "from mlp.optimisers import SGDOptimiser #import the optimiser\n",
+ "\n",
+ "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
+ "from mlp.schedulers import LearningRateFixed\n",
+ "from scipy.optimize import leastsq\n",
+ "\n",
+ "rng = numpy.random.RandomState([2015,10,10])\n",
+ "\n",
+ "#some hyper-parameters\n",
+ "nhid = 800\n",
+ "learning_rate = 0.2\n",
+ "max_epochs = 30\n",
+ "cost = CECost()\n",
+ " \n",
+ "stats = []\n",
+ "for layer in xrange(1, 2):\n",
+ "\n",
+ " train_dp.reset()\n",
+ " valid_dp.reset()\n",
+ " test_dp.reset()\n",
+ " \n",
+ " #define the model\n",
+ " model = MLP(cost=cost)\n",
+ " model.add_layer(Tanh(idim=784, odim=nhid, irange=1./numpy.sqrt(784), rng=rng))\n",
+ " for i in xrange(1, layer):\n",
+ " logger.info(\"Stacking hidden layer (%s)\" % str(i+1))\n",
+ " model.add_layer(Tanh(idim=nhid, odim=nhid, irange=0.2, rng=rng))\n",
+ " model.add_layer(Softmax(idim=nhid, odim=10, rng=rng))\n",
+ "\n",
+ " # define the optimiser, here stochasitc gradient descent\n",
+ " # with fixed learning rate and max_epochs\n",
+ " lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)\n",
+ " optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)\n",
+ "\n",
+ " logger.info('Training started...')\n",
+ " tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)\n",
+ "\n",
+ " logger.info('Testing the model on test set:')\n",
+ " tst_cost, tst_accuracy = optimiser.validate(model, test_dp)\n",
+ " logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))\n",
+ " \n",
+ " stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Exercise 2: Implement ReLU\n",
+ "\n",
+ "Again, your implementation should follow the conventions used to build Linear, Sigmoid and Softmax layers. As in exercise 1, test your solution by training a one-hidden-layer model with 100 hidden units, similiar to the one used in Task 3a in the coursework. Tune the learning rate (start with the initial one set to 0.1) with the initial weight range set to 0.05."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false,
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:root:Training started...\n",
+ "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.362. Accuracy is 9.30%\n",
+ "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.338. Accuracy is 10.80%\n",
+ "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 1.002. Accuracy is 68.60%\n",
+ "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.623. Accuracy is 81.52%\n",
+ "INFO:mlp.optimisers:Epoch 1: Took 10 seconds. Training speed 227 pps. Validation speed 1698 pps.\n",
+ "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.483. Accuracy is 86.10%\n",
+ "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.416. Accuracy is 88.84%\n",
+ "INFO:mlp.optimisers:Epoch 2: Took 10 seconds. Training speed 255 pps. Validation speed 1710 pps.\n",
+ "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.361. Accuracy is 90.20%\n",
+ "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.388. Accuracy is 89.08%\n",
+ "INFO:mlp.optimisers:Epoch 3: Took 10 seconds. Training speed 232 pps. Validation speed 1710 pps.\n",
+ "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.294. Accuracy is 91.80%\n",
+ "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.384. Accuracy is 88.91%\n",
+ "INFO:mlp.optimisers:Epoch 4: Took 10 seconds. Training speed 237 pps. Validation speed 1672 pps.\n",
+ "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.246. Accuracy is 94.10%\n",
+ "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.375. Accuracy is 89.32%\n",
+ "INFO:mlp.optimisers:Epoch 5: Took 10 seconds. Training speed 236 pps. Validation speed 1672 pps.\n",
+ "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.217. Accuracy is 94.10%\n",
+ "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.382. Accuracy is 88.88%\n",
+ "INFO:mlp.optimisers:Epoch 6: Took 10 seconds. Training speed 245 pps. Validation speed 1689 pps.\n",
+ "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.184. Accuracy is 96.10%\n",
+ "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.420. Accuracy is 87.86%\n",
+ "INFO:mlp.optimisers:Epoch 7: Took 10 seconds. Training speed 234 pps. Validation speed 1692 pps.\n",
+ "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.148. Accuracy is 97.00%\n",
+ "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.392. Accuracy is 88.87%\n",
+ "INFO:mlp.optimisers:Epoch 8: Took 11 seconds. Training speed 209 pps. Validation speed 1689 pps.\n",
+ "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.135. Accuracy is 97.60%\n",
+ "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.381. Accuracy is 89.10%\n",
+ "INFO:mlp.optimisers:Epoch 9: Took 10 seconds. Training speed 238 pps. Validation speed 1667 pps.\n",
+ "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.109. Accuracy is 98.80%\n",
+ "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.389. Accuracy is 89.04%\n",
+ "INFO:mlp.optimisers:Epoch 10: Took 10 seconds. Training speed 244 pps. Validation speed 1675 pps.\n",
+ "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.102. Accuracy is 98.40%\n",
+ "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.406. Accuracy is 88.57%\n",
+ "INFO:mlp.optimisers:Epoch 11: Took 10 seconds. Training speed 236 pps. Validation speed 1667 pps.\n",
+ "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.085. Accuracy is 99.00%\n",
+ "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.415. Accuracy is 88.49%\n",
+ "INFO:mlp.optimisers:Epoch 12: Took 11 seconds. Training speed 211 pps. Validation speed 1701 pps.\n",
+ "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.069. Accuracy is 99.40%\n",
+ "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.423. Accuracy is 88.44%\n",
+ "INFO:mlp.optimisers:Epoch 13: Took 11 seconds. Training speed 209 pps. Validation speed 1704 pps.\n",
+ "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.057. Accuracy is 99.60%\n",
+ "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.433. Accuracy is 88.47%\n",
+ "INFO:mlp.optimisers:Epoch 14: Took 10 seconds. Training speed 234 pps. Validation speed 1684 pps.\n",
+ "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.050. Accuracy is 99.70%\n",
+ "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.430. Accuracy is 88.60%\n",
+ "INFO:mlp.optimisers:Epoch 15: Took 10 seconds. Training speed 231 pps. Validation speed 1704 pps.\n",
+ "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.042. Accuracy is 99.90%\n",
+ "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.437. Accuracy is 88.57%\n",
+ "INFO:mlp.optimisers:Epoch 16: Took 10 seconds. Training speed 241 pps. Validation speed 1684 pps.\n",
+ "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.039. Accuracy is 99.80%\n",
+ "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.452. Accuracy is 88.24%\n",
+ "INFO:mlp.optimisers:Epoch 17: Took 10 seconds. Training speed 233 pps. Validation speed 1684 pps.\n",
+ "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.032. Accuracy is 99.80%\n",
+ "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.453. Accuracy is 88.39%\n",
+ "INFO:mlp.optimisers:Epoch 18: Took 10 seconds. Training speed 236 pps. Validation speed 1712 pps.\n",
+ "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.028. Accuracy is 99.90%\n",
+ "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.447. Accuracy is 89.01%\n",
+ "INFO:mlp.optimisers:Epoch 19: Took 10 seconds. Training speed 238 pps. Validation speed 1678 pps.\n",
+ "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.025. Accuracy is 99.90%\n",
+ "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.466. Accuracy is 88.41%\n",
+ "INFO:mlp.optimisers:Epoch 20: Took 10 seconds. Training speed 233 pps. Validation speed 1710 pps.\n",
+ "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.023. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.464. Accuracy is 88.72%\n",
+ "INFO:mlp.optimisers:Epoch 21: Took 10 seconds. Training speed 220 pps. Validation speed 1695 pps.\n",
+ "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.021. Accuracy is 99.90%\n",
+ "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.465. Accuracy is 88.70%\n",
+ "INFO:mlp.optimisers:Epoch 22: Took 11 seconds. Training speed 201 pps. Validation speed 1695 pps.\n",
+ "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.019. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.472. Accuracy is 88.55%\n",
+ "INFO:mlp.optimisers:Epoch 23: Took 11 seconds. Training speed 188 pps. Validation speed 1675 pps.\n",
+ "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.017. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.477. Accuracy is 88.53%\n",
+ "INFO:mlp.optimisers:Epoch 24: Took 11 seconds. Training speed 197 pps. Validation speed 1640 pps.\n",
+ "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.016. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.482. Accuracy is 88.59%\n",
+ "INFO:mlp.optimisers:Epoch 25: Took 11 seconds. Training speed 214 pps. Validation speed 1689 pps.\n",
+ "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.014. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.482. Accuracy is 88.73%\n",
+ "INFO:mlp.optimisers:Epoch 26: Took 11 seconds. Training speed 210 pps. Validation speed 1675 pps.\n",
+ "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.014. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.490. Accuracy is 88.65%\n",
+ "INFO:mlp.optimisers:Epoch 27: Took 12 seconds. Training speed 165 pps. Validation speed 1684 pps.\n",
+ "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.013. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.496. Accuracy is 88.47%\n",
+ "INFO:mlp.optimisers:Epoch 28: Took 12 seconds. Training speed 164 pps. Validation speed 1672 pps.\n",
+ "INFO:mlp.optimisers:Epoch 29: Training cost (ce) is 0.012. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.496. Accuracy is 88.55%\n",
+ "INFO:mlp.optimisers:Epoch 29: Took 12 seconds. Training speed 172 pps. Validation speed 1650 pps.\n",
+ "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.011. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.500. Accuracy is 88.56%\n",
+ "INFO:mlp.optimisers:Epoch 30: Took 10 seconds. Training speed 235 pps. Validation speed 1667 pps.\n",
+ "INFO:root:Testing the model on test set:\n",
+ "INFO:root:MNIST test set accuracy is 88.10 %, cost (ce) is 0.497\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "from mlp.layers import MLP, Relu, Softmax #import required layer types\n",
+ "from mlp.optimisers import SGDOptimiser #import the optimiser\n",
+ "\n",
+ "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
+ "from mlp.schedulers import LearningRateFixed\n",
+ "from scipy.optimize import leastsq\n",
+ "\n",
+ "rng = numpy.random.RandomState([2015,10,10])\n",
+ "\n",
+ "#some hyper-parameters\n",
+ "nhid = 800\n",
+ "learning_rate = 0.1\n",
+ "max_epochs = 30\n",
+ "cost = CECost()\n",
+ " \n",
+ "stats = []\n",
+ "for layer in xrange(1, 2):\n",
+ "\n",
+ " train_dp.reset()\n",
+ " valid_dp.reset()\n",
+ " test_dp.reset()\n",
+ " \n",
+ " #define the model\n",
+ " model = MLP(cost=cost)\n",
+ " model.add_layer(Relu(idim=784, odim=nhid, irange=0.05, rng=rng))\n",
+ " for i in xrange(1, layer):\n",
+ " logger.info(\"Stacking hidden layer (%s)\" % str(i+1))\n",
+ " model.add_layer(Relu(idim=nhid, odim=nhid, irange=0.2, rng=rng))\n",
+ " model.add_layer(Softmax(idim=nhid, odim=10, rng=rng))\n",
+ "\n",
+ " # define the optimiser, here stochasitc gradient descent\n",
+ " # with fixed learning rate and max_epochs\n",
+ " lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)\n",
+ " optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)\n",
+ "\n",
+ " logger.info('Training started...')\n",
+ " tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)\n",
+ "\n",
+ " logger.info('Testing the model on test set:')\n",
+ " tst_cost, tst_accuracy = optimiser.validate(model, test_dp)\n",
+ " logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))\n",
+ " \n",
+ " stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Exercise 3: Implement Maxout\n",
+ "\n",
+ "As with the previous two exercises, your implementation should follow the conventions used to build the Linear, Sigmoid and Softmax layers. For now implement only non-overlapping pools (i.e. the pool in which all activations $a_{j}, a_{j+1}, \\ldots, a_{j+K}$ belong to only one pool). As before, test your solution by training a one-hidden-layer model with 100 hidden units, similiar to the one used in Task 3a in the coursework. Use the same optimisation hyper-parameters (learning rate, initial weights range) as you used for ReLU models. Tune the pool size $K$ (but keep the number of total parameters fixed).\n",
+ "\n",
+ "Note: The Max operator reduces dimensionality, hence for example, to get 100 hidden maxout units with pooling size set to $K=2$ the size of linear part needs to be set to $100K$ (assuming non-overlapping pools). This affects how you compute the total number of weights in the model.\n",
+ "\n",
+ "Implementation tips: To back-propagate through the maxout layer, one needs to keep track of which linear activation $a_{j}, a_{j+1}, \\ldots, a_{j+K}$ was the maximum in each pool. The convenient way to do so is by storing the indices of the maximum units in the fprop function and then in the backprop stage pass the gradient only through those (i.e. for example, one can build an auxiliary matrix where each element is either 1 (if unit was maximum, and passed forward through the max operator for a given data-point) or 0 otherwise. Then in the backward pass it suffices to upsample the maxout *igrads* signal to the linear layer dimension and element-wise multiply by the aforemenioned auxiliary matrix.\n",
+ "\n",
+ "*Optional:* Implement the generic pooling mechanism by introducing an additional *stride* hyper-parameter $0\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 38\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 39\u001b[0m \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Training started...'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 40\u001b[1;33m \u001b[0mtr_stats\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalid_stats\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0moptimiser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_dp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalid_dp\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 41\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 42\u001b[0m \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Testing the model on test set:'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m/afs/inf.ed.ac.uk/user/s11/s1136550/Dropbox/repos/mlpractical/mlp/optimisers.pyc\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(self, model, train_iterator, valid_iterator)\u001b[0m\n\u001b[0;32m 160\u001b[0m \u001b[1;31m# do the initial validation\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 161\u001b[0m \u001b[0mtrain_iterator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 162\u001b[1;33m \u001b[0mtr_nll\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtr_acc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalidate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_iterator\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0ml1_weight\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0ml2_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 163\u001b[0m logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%'\n\u001b[0;32m 164\u001b[0m % (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.))\n",
+ "\u001b[1;32m/afs/inf.ed.ac.uk/user/s11/s1136550/Dropbox/repos/mlpractical/mlp/optimisers.pyc\u001b[0m in \u001b[0;36mvalidate\u001b[1;34m(self, model, valid_iterator, l1_weight, l2_weight)\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[0macc_list\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnll_list\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mt\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mvalid_iterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 36\u001b[1;33m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfprop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 37\u001b[0m \u001b[0mnll_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcost\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcost\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mt\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 38\u001b[0m \u001b[0macc_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclassification_accuracy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mt\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m/afs/inf.ed.ac.uk/user/s11/s1136550/Dropbox/repos/mlpractical/mlp/layers.pyc\u001b[0m in \u001b[0;36mfprop\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m 49\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 51\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfprop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 52\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m/afs/inf.ed.ac.uk/user/s11/s1136550/Dropbox/repos/mlpractical/mlp/layers.pyc\u001b[0m in \u001b[0;36mfprop\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m 466\u001b[0m \u001b[1;31m#get the linear activations\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 467\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mMaxout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfprop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 468\u001b[1;33m \u001b[0mar\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0modim\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 469\u001b[0m \u001b[0mh\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mh_argmax\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmax_and_argmax\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mar\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxes\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkeepdims_argmax\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 470\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mh_argmax\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mh_argmax\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;31mValueError\u001b[0m: total size of new array must be unchanged"
+ ]
+ }
+ ],
+ "source": [
+ "#%load_ext autoreload\n",
+ "%autoreload\n",
+ "from mlp.layers import MLP, Maxout, Softmax #import required layer types\n",
+ "from mlp.optimisers import SGDOptimiser #import the optimiser\n",
+ "\n",
+ "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
+ "from mlp.schedulers import LearningRateFixed\n",
+ "from scipy.optimize import leastsq\n",
+ "\n",
+ "rng = numpy.random.RandomState([2015,10,10])\n",
+ "\n",
+ "#some hyper-parameters\n",
+ "nhid = 800\n",
+ "learning_rate = 0.1\n",
+ "k = 2\n",
+ "max_epochs = 30\n",
+ "cost = CECost()\n",
+ " \n",
+ "stats = []\n",
+ "for layer in xrange(1, 2):\n",
+ "\n",
+ " train_dp.reset()\n",
+ " valid_dp.reset()\n",
+ " test_dp.reset()\n",
+ " \n",
+ " #define the model\n",
+ " model = MLP(cost=cost)\n",
+ " model.add_layer(Maxout(idim=784, odim=nhid, k=k, irange=0.05, rng=rng))\n",
+ " for i in xrange(1, layer):\n",
+ " logger.info(\"Stacking hidden layer (%s)\" % str(i+1))\n",
+ " model.add_layer(Maxout(idim=nhid, odim=nhid, k=k, irange=0.2, rng=rng))\n",
+ " model.add_layer(Softmax(idim=nhid, odim=10, rng=rng))\n",
+ "\n",
+ " # define the optimiser, here stochasitc gradient descent\n",
+ " # with fixed learning rate and max_epochs\n",
+ " lr_scheduler = LearningRateFixed(learning_rate=learning_rate, max_epochs=max_epochs)\n",
+ " optimiser = SGDOptimiser(lr_scheduler=lr_scheduler)\n",
+ "\n",
+ " logger.info('Training started...')\n",
+ " tr_stats, valid_stats = optimiser.train(model, train_dp, valid_dp)\n",
+ "\n",
+ " logger.info('Testing the model on test set:')\n",
+ " tst_cost, tst_accuracy = optimiser.validate(model, test_dp)\n",
+ " logger.info('MNIST test set accuracy is %.2f %%, cost (%s) is %.3f'%(tst_accuracy*100., cost.get_name(), tst_cost))\n",
+ " \n",
+ " stats.append((tr_stats, valid_stats, (tst_cost, tst_accuracy)))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Exercise 4: Train all the above models with dropout\n",
+ "\n",
+ "Try all of the above non-linearities with dropout training. Use the dropout hyper-parameters $\\{p_{inp}, p_{hid}\\}$ that worked best for sigmoid models from the previous lab.\n",
+ "\n",
+ "Note: the code for dropout you were asked to implement last week has not been given as a solution for this week - as a result you need to move/merge the required dropout parts from your previous *lab4* branch (or implement it if you haven't already done so). \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "#This one is a simple merge of above experiments with last exercise in previous tutorial."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 2",
+ "language": "python",
+ "name": "python2"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/06_MLP_Coursework2_Introduction.ipynb b/06_MLP_Coursework2_Introduction.ipynb
index 7c34daa..04cc798 100644
--- a/06_MLP_Coursework2_Introduction.ipynb
+++ b/06_MLP_Coursework2_Introduction.ipynb
@@ -10,7 +10,7 @@
"\n",
"# Store the intermediate results (check-pointing and pickling)\n",
"\n",
- "Once you have finished a certain task it is a good idea to check-point your current notebook's status (logs, plots and whatever else has been stored in the notebook). By doing this, you can always revert to this state later when necessary. You can do this by going to menus `File->Save and Checkpoint` and `File->Revert to Checkpoint`.\n",
+ "Once you have finished a certain task it is a good idea to check-point your current notebook's status (logs, plots and whatever else has been stored in the notebook). By doing this, you can always revert to this state later when necessary (without rerunning experimens). You can do this by going to menus `File->Save and Checkpoint` and `File->Revert to Checkpoint`.\n",
"\n",
"The other good practice would be to dump to disk models and produced statistics. You can easily do it in python by using pickles, as in the following example."
]
@@ -59,7 +59,8 @@
"* `numpy.amax` - the same as with sum\n",
"* `numpy.transpose` - can specify which axes you want to get transposed in a tensor\n",
"* `numpy.argmax` - gives you the argument (index) of the maximum value in a tensor\n",
- "* `numpy.flatten` - collapses the n-dimensional tensor into vector\n",
+ "* `numpy.flatten` - collapses the n-dimensional tensor into vector (copy)\n",
+ "* `numpy.ravel` - collapses the n-dimensional tensor into vector (creates a view)\n",
"* `numpy.reshape` - allows to reshape tensor into another (valid from data perspective) tensor (matrix, vector) with different shape (but the same number of total elements)\n",
"* `numpy.rot90` - rotate a matrix by 90 (or multiply of 90) degrees counter-clockwise\n",
"* `numpy.newaxis` - adds an axis with dimension 1 (handy for keeping tensor shapes compatible with expected broadcasting)\n",
@@ -181,7 +182,7 @@
"f(x) = \\frac{f(x+\\epsilon) - f(x)}{\\epsilon}\n",
"$\n",
"\n",
- "Because $\\epsilon$ is usually very small (1e-4 or smaller) it is recommended (due to finite precision of numerical machines) to use the centred variant (which was implemented in mlp.utils):\n",
+ "Because $\\epsilon$ is usually very small (1e-4 or smaller) it is recommended (due to finite precision of numerical machines) to use the centred variant (which was implemented in `mlp.utils`):\n",
"\n",
"$\n",
"f(x) = \\frac{f(x+\\epsilon) - f(x-\\epsilon)}{2\\epsilon}\n",
@@ -270,7 +271,7 @@
"\n",
"## Using Cython for the crucial bottleneck pieces\n",
"\n",
- "Cython will compile them to C and the code should be comparable in terms of efficiency to numpy using similar operations in numpy. Of course, one can only rely on numpy. Slicing numpy across many dimensions gets much more complicated than working than working with vectors and matrices and we do undersand those can be confusing for some people. Hence, we allow the basic implementation (with any penalty or preference from our side) to be loop only based (which is perhaps much easier to comprehend and debug).\n",
+ "Cython will compile them to C and the code should be comparable in terms of efficiency to numpy using similar operations in numpy. Of course, one can only rely on numpy. Slicing numpy across many dimensions gets much more complicated than working than working with vectors and matrices and we do undersand those can be confusing for some people. Hence, we allow the basic implementation of convolutiona and/or pooling (with any penalty or preference from our side) to be loop only based (which is perhaps much easier to comprehend and debug).\n",
"\n",
"Below we give an example cython code for matrix-matrix dot function from the second tutorial so you can see the basic differences and compare obtained speeds. They give you all the necessary pattern needed to implement naive (reasonably efficient) convolution. Naive looping in (native) python is gonna be *very* slow.\n",
"\n",
@@ -278,7 +279,7 @@
" * [Cython, language basics](http://docs.cython.org/src/userguide/language_basics.html#language-basics)\n",
" * [Cython, basic tutorial](http://docs.cython.org/src/tutorial/cython_tutorial.html)\n",
" * [Cython in ipython notebooks](http://docs.cython.org/src/quickstart/build.html)\n",
- " * [A tutorial on how to optimise the cython code](http://docs.cython.org/src/tutorial/numpy.html) (a working example is actually a simple convolution code)\n",
+ " * [A tutorial on how to optimise the cython code](http://docs.cython.org/src/tutorial/numpy.html) (a working example is actually a simple convolution code, do not use it `as is`)\n",
" \n",
"\n",
"Before you proceed, in case you do not have installed `cython` (it should be installed with scipy). But in case the below imports do not work, staying in the activated virtual environment type:\n",
@@ -420,7 +421,7 @@
"source": [
"You can optimise the code further as in the [linked](http://docs.cython.org/src/tutorial/numpy.html) tutorial. However, the above example seems to be a reasonable compromise for developing the code - it gives a reasonably accelerated code, with all the security checks one may expect to be existent under development (checking bounds of indices, wheter types of variables match, tracking overflows etc.). Look [here](http://docs.cython.org/src/reference/compilation.html) for more optimisation decorators one can use to speed things up.\n",
"\n",
- "Below we do some benchmarks on each of the above functions. Notice huge speed-up from going from non-optimised cython code to optimised one (on my machine, 643ms -> 6.35ms - this is 2 orders!). It's still around two times slower than BLAS accelerated numpy.dot routine (non-cached result is around 3.3ms). But our method just benchmarks the dot product, operation that has been optimised incredibly well in numerical libraries. Of course, we **do not** want you to use this code for dot products and you should rely on functions provided by numpy (whenever reasonably possible). The above code was just given as an example how to produce much more efficient code with very small effort. In many scenarios (convolution is an example) the code is more complex than a single dot product and some looping is necessary anyway, especially when dealing with multi-dimensional tensors where atom operations using direct loop-based indexing may be much easier to comprehend (and debug) than a direct multi-dimensional manipulation of numpy tensors."
+ "Below we do some benchmarks on each of the above functions. Notice huge speed-up from going from non-optimised cython code to optimised one (on my machine, 643ms -> 6.35ms - this is 2 orders!). It's still around two times slower than BLAS accelerated numpy.dot routine (non-cached result is around 3.3ms). But our method just benchmarks the dot product, operation that has been optimised incredibly well in numerical libraries. Of course, we **do not** want you to use this code for dot products and you should rely on functions provided by numpy (whenever reasonably possible). The above code was just given as an example how to produce much more efficient code with very small programming effort. In many scenarios (convolution is an example) the code is more complex than a single dot product and some looping is necessary anyway, especially when dealing with multi-dimensional tensors where atom operations using direct loop-based indexing may be much easier to comprehend (and debug) than a direct multi-dimensional manipulation of numpy tensors."
]
},
{
diff --git a/mlp/layers.py b/mlp/layers.py
index d548c9c..c80fd31 100644
--- a/mlp/layers.py
+++ b/mlp/layers.py
@@ -18,7 +18,7 @@ class MLP(object):
through the model (for a mini-batch), which is required to compute
the gradients for the parameters
"""
- def __init__(self, cost):
+ def __init__(self, cost, rng=None):
assert isinstance(cost, Cost), (
"Cost needs to be of type mlp.costs.Cost, got %s" % type(cost)
@@ -31,6 +31,11 @@ class MLP(object):
# for a given minibatch and each layer
self.cost = cost
+ if rng is None:
+ self.rng = numpy.random.RandomState([2015,11,11])
+ else:
+ self.rng = rng
+
def fprop(self, x):
"""
@@ -46,6 +51,32 @@ class MLP(object):
self.activations[i+1] = self.layers[i].fprop(self.activations[i])
return self.activations[-1]
+ def fprop_dropout(self, x, dp_scheduler):
+ """
+ :param inputs: mini-batch of data-points x
+ :param dp_scheduler: dropout scheduler
+ :return: y (top layer activation) which is an estimate of y given x
+ """
+
+ if len(self.activations) != len(self.layers) + 1:
+ self.activations = [None]*(len(self.layers) + 1)
+
+ p_inp, p_hid = dp_scheduler.get_rate()
+
+ d_inp = 1
+ p_inp_scaler, p_hid_scaler = 1.0/p_inp, 1.0/p_hid
+ if p_inp < 1:
+ d_inp = self.rng.binomial(1, p_inp, size=x.shape)
+
+ self.activations[0] = p_inp_scaler*d_inp*x
+ for i in xrange(0, len(self.layers)):
+ d_hid = 1
+ if p_hid < 1 and i > 0:
+ d_hid = self.rng.binomial(1, p_hid, size=self.activations[i].shape)
+ self.activations[i+1] = self.layers[i].fprop(p_hid_scaler*d_hid*self.activations[i])
+
+ return self.activations[-1]
+
def bprop(self, cost_grad):
"""
:param cost_grad: matrix -- grad of the cost w.r.t y
@@ -258,8 +289,20 @@ class Linear(Layer):
since W and b are only layer's parameters
"""
- grad_W = numpy.dot(inputs.T, deltas)
- grad_b = numpy.sum(deltas, axis=0)
+ #you could basically use different scalers for biases
+ #and weights, but it is not implemented here like this
+ l2_W_penalty, l2_b_penalty = 0, 0
+ if l2_weight > 0:
+ l2_W_penalty = l2_weight*self.W
+ l2_b_penalty = l2_weight*self.b
+
+ l1_W_penalty, l1_b_penalty = 0, 0
+ if l1_weight > 0:
+ l1_W_penalty = l1_weight*numpy.sign(self.W)
+ l1_b_penalty = l1_weight*numpy.sign(self.b)
+
+ grad_W = numpy.dot(inputs.T, deltas) + l2_W_penalty + l1_W_penalty
+ grad_b = numpy.sum(deltas, axis=0) + l2_b_penalty + l1_b_penalty
return [grad_W, grad_b]
@@ -323,12 +366,12 @@ class Softmax(Linear):
odim,
rng=rng,
irange=irange)
-
+
def fprop(self, inputs):
# compute the linear outputs
a = super(Softmax, self).fprop(inputs)
- # apply numerical stabilisation by subtracting max
+ # apply numerical stabilisation by subtracting max
# from each row (not required for the coursework)
# then compute exponent
assert a.ndim in [1, 2], (
@@ -355,3 +398,88 @@ class Softmax(Linear):
def get_name(self):
return 'softmax'
+
+
+class Relu(Linear):
+ def __init__(self, idim, odim,
+ rng=None,
+ irange=0.1):
+
+ super(Relu, self).__init__(idim, odim, rng, irange)
+
+ def fprop(self, inputs):
+ #get the linear activations
+ a = super(Relu, self).fprop(inputs)
+ h = numpy.clip(a, 0, 20.0)
+ #h = numpy.maximum(a, 0)
+ return h
+
+ def bprop(self, h, igrads):
+ deltas = (h > 0)*igrads + (h <= 0)*igrads
+ ___, ograds = super(Relu, self).bprop(h=None, igrads=deltas)
+ return deltas, ograds
+
+ def cost_bprop(self, h, igrads, cost):
+ raise NotImplementedError('Relu.bprop_cost method not implemented '
+ 'for the %s cost' % cost.get_name())
+
+ def get_name(self):
+ return 'relu'
+
+
+class Tanh(Linear):
+ def __init__(self, idim, odim,
+ rng=None,
+ irange=0.1):
+
+ super(Tanh, self).__init__(idim, odim, rng, irange)
+
+ def fprop(self, inputs):
+ #get the linear activations
+ a = super(Tanh, self).fprop(inputs)
+ numpy.clip(a, -30.0, 30.0, out=a)
+ h = numpy.tanh(a)
+ return h
+
+ def bprop(self, h, igrads):
+ deltas = (1.0 - h**2) * igrads
+ ___, ograds = super(Tanh, self).bprop(h=None, igrads=deltas)
+ return deltas, ograds
+
+ def cost_bprop(self, h, igrads, cost):
+ raise NotImplementedError('Tanh.bprop_cost method not implemented '
+ 'for the %s cost' % cost.get_name())
+
+ def get_name(self):
+ return 'tanh'
+
+
+class Maxout(Linear):
+ def __init__(self, idim, odim, k,
+ rng=None,
+ irange=0.05):
+
+ super(Maxout, self).__init__(idim, odim, rng, irange)
+ self.k = k
+
+ def fprop(self, inputs):
+ #get the linear activations
+ a = super(Maxout, self).fprop(inputs)
+ ar = a.reshape(a.shape[0], self.odim, self.k)
+ h, h_argmax = max_and_argmax(ar, axes=3, keepdims_argmax=True)
+ self.h_argmax = h_argmax
+ return h
+
+ def bprop(self, h, igrads):
+ igrads_up = igrads.reshape(a.shape[0], -1, 1)
+ igrads_up = numpy.tile(a, 1, self.k)
+ deltas = (igrads_up * self.h_argmax).reshape(a.shape[0], -1)
+ ___, ograds = super(Maxout, self).bprop(h=None, igrads=deltas)
+ return deltas, ograds
+
+ def cost_bprop(self, h, igrads, cost):
+ raise NotImplementedError('Maxout.bprop_cost method not implemented '
+ 'for the %s cost' % cost.get_name())
+
+ def get_name(self):
+ return 'maxout'
diff --git a/mlp/optimisers.py b/mlp/optimisers.py
index 1959717..5078821 100644
--- a/mlp/optimisers.py
+++ b/mlp/optimisers.py
@@ -112,8 +112,12 @@ class SGDOptimiser(Optimiser):
acc_list, nll_list = [], []
for x, t in train_iterator:
+
# get the prediction
- y = model.fprop(x)
+ if self.dp_scheduler is not None:
+ y = model.fprop_dropout(x, self.dp_scheduler)
+ else:
+ y = model.fprop(x)
# compute the cost and grad of the cost w.r.t y
cost = model.cost.cost(y, t)
diff --git a/mlp/schedulers.py b/mlp/schedulers.py
index f5499e6..914ea29 100644
--- a/mlp/schedulers.py
+++ b/mlp/schedulers.py
@@ -153,3 +153,18 @@ class LearningRateNewBob(LearningRateScheduler):
self.epoch += 1
return self.rate
+
+
+class DropoutFixed(LearningRateList):
+
+ def __init__(self, p_inp_keep, p_hid_keep):
+ assert 0 < p_inp_keep <= 1 and 0 < p_hid_keep <= 1, (
+ "Dropout 'keep' probabilites are suppose to be in (0, 1] range"
+ )
+ super(DropoutFixed, self).__init__([(p_inp_keep, p_hid_keep)], max_epochs=999)
+
+ def get_rate(self):
+ return self.lr_list[0]
+
+ def get_next_rate(self, current_error=None):
+ return self.get_rate()
\ No newline at end of file
From cb6712578d61490e46390303e5a9d2a99944f354 Mon Sep 17 00:00:00 2001
From: pswietojanski
Date: Sun, 15 Nov 2015 16:00:58 +0000
Subject: [PATCH 3/7] solutions to 04 and 05
---
04_Regularisation_solution.ipynb | 4 -
05_Transfer_functions_solution.ipynb | 524 +++++++++++++++-----------
06_MLP_Coursework2_Introduction.ipynb | 30 +-
mlp/layers.py | 68 +++-
4 files changed, 388 insertions(+), 238 deletions(-)
diff --git a/04_Regularisation_solution.ipynb b/04_Regularisation_solution.ipynb
index 9537b96..265ad61 100644
--- a/04_Regularisation_solution.ipynb
+++ b/04_Regularisation_solution.ipynb
@@ -292,7 +292,6 @@
"\n",
"from mlp.costs import CECost #import the cost we want to use for optimisation\n",
"from mlp.schedulers import LearningRateFixed\n",
- "from scipy.optimize import leastsq\n",
"\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.INFO)\n",
@@ -468,7 +467,6 @@
"from mlp.dataset import MNISTDataProvider #import data provider\n",
"from mlp.costs import CECost #import the cost we want to use for optimisation\n",
"from mlp.schedulers import LearningRateFixed\n",
- "from scipy.optimize import leastsq\n",
"\n",
"rng = numpy.random.RandomState([2015,10,10])\n",
"\n",
@@ -644,7 +642,6 @@
"from mlp.dataset import MNISTDataProvider #import data provider\n",
"from mlp.costs import CECost #import the cost we want to use for optimisation\n",
"from mlp.schedulers import LearningRateFixed\n",
- "from scipy.optimize import leastsq\n",
"\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.INFO)\n",
@@ -982,7 +979,6 @@
"from mlp.dataset import MNISTDataProvider #import data provider\n",
"from mlp.costs import CECost #import the cost we want to use for optimisation\n",
"from mlp.schedulers import LearningRateFixed, DropoutFixed\n",
- "from scipy.optimize import leastsq\n",
"\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.INFO)\n",
diff --git a/05_Transfer_functions_solution.ipynb b/05_Transfer_functions_solution.ipynb
index 8443e94..17b5016 100644
--- a/05_Transfer_functions_solution.ipynb
+++ b/05_Transfer_functions_solution.ipynb
@@ -165,8 +165,8 @@
"logger = logging.getLogger()\n",
"logger.setLevel(logging.INFO)\n",
"\n",
- "# Note, you were asked to do run the experiments on all data. \n",
- "# Here I am running those examples on 1000 training data-points only (similar to regularisation notebook)\n",
+ "# Note, you were asked to do run the experiments on all data and smaller models. \n",
+ "# Here I am running the exercises on 1000 training data-points only (similar to regularisation notebook)\n",
"logger.info('Initialising data providers...')\n",
"train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)\n",
"valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)\n",
@@ -175,7 +175,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 6,
"metadata": {
"collapsed": false,
"scrolled": true
@@ -186,100 +186,100 @@
"output_type": "stream",
"text": [
"INFO:root:Training started...\n",
- "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.368. Accuracy is 7.80%\n",
- "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.347. Accuracy is 9.86%\n",
- "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 1.421. Accuracy is 64.70%\n",
- "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.479. Accuracy is 85.95%\n",
- "INFO:mlp.optimisers:Epoch 1: Took 10 seconds. Training speed 233 pps. Validation speed 1624 pps.\n",
- "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.571. Accuracy is 81.60%\n",
- "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.484. Accuracy is 85.23%\n",
- "INFO:mlp.optimisers:Epoch 2: Took 11 seconds. Training speed 214 pps. Validation speed 1637 pps.\n",
- "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.411. Accuracy is 87.40%\n",
- "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.507. Accuracy is 85.40%\n",
- "INFO:mlp.optimisers:Epoch 3: Took 11 seconds. Training speed 226 pps. Validation speed 1640 pps.\n",
- "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.318. Accuracy is 90.10%\n",
- "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.596. Accuracy is 84.40%\n",
- "INFO:mlp.optimisers:Epoch 4: Took 10 seconds. Training speed 232 pps. Validation speed 1616 pps.\n",
- "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.257. Accuracy is 91.80%\n",
- "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.468. Accuracy is 87.76%\n",
- "INFO:mlp.optimisers:Epoch 5: Took 11 seconds. Training speed 229 pps. Validation speed 1629 pps.\n",
- "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.244. Accuracy is 92.30%\n",
- "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.535. Accuracy is 86.31%\n",
- "INFO:mlp.optimisers:Epoch 6: Took 11 seconds. Training speed 230 pps. Validation speed 1600 pps.\n",
- "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.169. Accuracy is 94.30%\n",
- "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.554. Accuracy is 86.59%\n",
- "INFO:mlp.optimisers:Epoch 7: Took 11 seconds. Training speed 226 pps. Validation speed 1631 pps.\n",
- "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.130. Accuracy is 96.60%\n",
- "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.562. Accuracy is 86.83%\n",
- "INFO:mlp.optimisers:Epoch 8: Took 11 seconds. Training speed 225 pps. Validation speed 1603 pps.\n",
- "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.113. Accuracy is 96.90%\n",
- "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.605. Accuracy is 85.94%\n",
- "INFO:mlp.optimisers:Epoch 9: Took 11 seconds. Training speed 231 pps. Validation speed 1616 pps.\n",
- "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.087. Accuracy is 97.10%\n",
- "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.564. Accuracy is 87.50%\n",
- "INFO:mlp.optimisers:Epoch 10: Took 11 seconds. Training speed 226 pps. Validation speed 1637 pps.\n",
- "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.054. Accuracy is 98.70%\n",
- "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.599. Accuracy is 87.04%\n",
- "INFO:mlp.optimisers:Epoch 11: Took 10 seconds. Training speed 232 pps. Validation speed 1640 pps.\n",
- "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.045. Accuracy is 98.60%\n",
- "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.574. Accuracy is 87.75%\n",
- "INFO:mlp.optimisers:Epoch 12: Took 10 seconds. Training speed 237 pps. Validation speed 1653 pps.\n",
- "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.025. Accuracy is 99.30%\n",
- "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.615. Accuracy is 86.88%\n",
- "INFO:mlp.optimisers:Epoch 13: Took 11 seconds. Training speed 232 pps. Validation speed 1616 pps.\n",
- "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.011. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.610. Accuracy is 87.50%\n",
- "INFO:mlp.optimisers:Epoch 14: Took 11 seconds. Training speed 201 pps. Validation speed 1634 pps.\n",
- "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.009. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.599. Accuracy is 87.87%\n",
- "INFO:mlp.optimisers:Epoch 15: Took 10 seconds. Training speed 233 pps. Validation speed 1637 pps.\n",
- "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.007. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.612. Accuracy is 87.71%\n",
- "INFO:mlp.optimisers:Epoch 16: Took 10 seconds. Training speed 241 pps. Validation speed 1645 pps.\n",
- "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.614. Accuracy is 87.73%\n",
- "INFO:mlp.optimisers:Epoch 17: Took 10 seconds. Training speed 237 pps. Validation speed 1634 pps.\n",
- "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.620. Accuracy is 87.77%\n",
- "INFO:mlp.optimisers:Epoch 18: Took 10 seconds. Training speed 245 pps. Validation speed 1645 pps.\n",
- "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.623. Accuracy is 87.94%\n",
- "INFO:mlp.optimisers:Epoch 19: Took 10 seconds. Training speed 234 pps. Validation speed 1631 pps.\n",
- "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.625. Accuracy is 87.84%\n",
- "INFO:mlp.optimisers:Epoch 20: Took 11 seconds. Training speed 217 pps. Validation speed 1631 pps.\n",
- "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.633. Accuracy is 87.83%\n",
- "INFO:mlp.optimisers:Epoch 21: Took 10 seconds. Training speed 235 pps. Validation speed 1618 pps.\n",
- "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.637. Accuracy is 87.93%\n",
- "INFO:mlp.optimisers:Epoch 22: Took 11 seconds. Training speed 225 pps. Validation speed 1648 pps.\n",
- "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.639. Accuracy is 87.90%\n",
- "INFO:mlp.optimisers:Epoch 23: Took 10 seconds. Training speed 238 pps. Validation speed 1626 pps.\n",
- "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.642. Accuracy is 87.86%\n",
- "INFO:mlp.optimisers:Epoch 24: Took 10 seconds. Training speed 233 pps. Validation speed 1659 pps.\n",
- "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.645. Accuracy is 87.91%\n",
- "INFO:mlp.optimisers:Epoch 25: Took 12 seconds. Training speed 179 pps. Validation speed 1618 pps.\n",
- "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.650. Accuracy is 87.90%\n",
- "INFO:mlp.optimisers:Epoch 26: Took 10 seconds. Training speed 241 pps. Validation speed 1637 pps.\n",
- "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.653. Accuracy is 87.98%\n",
- "INFO:mlp.optimisers:Epoch 27: Took 10 seconds. Training speed 250 pps. Validation speed 1629 pps.\n",
- "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.656. Accuracy is 87.89%\n",
- "INFO:mlp.optimisers:Epoch 28: Took 10 seconds. Training speed 232 pps. Validation speed 1640 pps.\n",
- "INFO:mlp.optimisers:Epoch 29: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.659. Accuracy is 87.92%\n",
- "INFO:mlp.optimisers:Epoch 29: Took 10 seconds. Training speed 235 pps. Validation speed 1613 pps.\n",
- "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.663. Accuracy is 87.91%\n",
- "INFO:mlp.optimisers:Epoch 30: Took 11 seconds. Training speed 223 pps. Validation speed 1613 pps.\n",
+ "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.319. Accuracy is 10.50%\n",
+ "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.315. Accuracy is 11.33%\n",
+ "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 1.048. Accuracy is 66.30%\n",
+ "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.571. Accuracy is 82.72%\n",
+ "INFO:mlp.optimisers:Epoch 1: Took 2 seconds. Training speed 764 pps. Validation speed 12988 pps.\n",
+ "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.485. Accuracy is 84.40%\n",
+ "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.455. Accuracy is 86.58%\n",
+ "INFO:mlp.optimisers:Epoch 2: Took 2 seconds. Training speed 720 pps. Validation speed 12988 pps.\n",
+ "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.362. Accuracy is 87.70%\n",
+ "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.435. Accuracy is 86.90%\n",
+ "INFO:mlp.optimisers:Epoch 3: Took 2 seconds. Training speed 788 pps. Validation speed 12659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.251. Accuracy is 92.10%\n",
+ "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.417. Accuracy is 88.09%\n",
+ "INFO:mlp.optimisers:Epoch 4: Took 2 seconds. Training speed 788 pps. Validation speed 13159 pps.\n",
+ "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.175. Accuracy is 95.40%\n",
+ "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.405. Accuracy is 88.16%\n",
+ "INFO:mlp.optimisers:Epoch 5: Took 2 seconds. Training speed 776 pps. Validation speed 12988 pps.\n",
+ "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.121. Accuracy is 96.40%\n",
+ "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.458. Accuracy is 87.24%\n",
+ "INFO:mlp.optimisers:Epoch 6: Took 2 seconds. Training speed 690 pps. Validation speed 12659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.091. Accuracy is 97.90%\n",
+ "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.418. Accuracy is 88.37%\n",
+ "INFO:mlp.optimisers:Epoch 7: Took 2 seconds. Training speed 841 pps. Validation speed 12659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.065. Accuracy is 98.70%\n",
+ "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.400. Accuracy is 89.44%\n",
+ "INFO:mlp.optimisers:Epoch 8: Took 2 seconds. Training speed 794 pps. Validation speed 12501 pps.\n",
+ "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.043. Accuracy is 99.30%\n",
+ "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.406. Accuracy is 89.35%\n",
+ "INFO:mlp.optimisers:Epoch 9: Took 2 seconds. Training speed 747 pps. Validation speed 12822 pps.\n",
+ "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.029. Accuracy is 99.50%\n",
+ "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.410. Accuracy is 89.69%\n",
+ "INFO:mlp.optimisers:Epoch 10: Took 2 seconds. Training speed 953 pps. Validation speed 12822 pps.\n",
+ "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.023. Accuracy is 99.80%\n",
+ "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.424. Accuracy is 89.41%\n",
+ "INFO:mlp.optimisers:Epoch 11: Took 2 seconds. Training speed 953 pps. Validation speed 13159 pps.\n",
+ "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.018. Accuracy is 99.80%\n",
+ "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.429. Accuracy is 89.50%\n",
+ "INFO:mlp.optimisers:Epoch 12: Took 2 seconds. Training speed 870 pps. Validation speed 12988 pps.\n",
+ "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.015. Accuracy is 99.90%\n",
+ "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.428. Accuracy is 89.58%\n",
+ "INFO:mlp.optimisers:Epoch 13: Took 2 seconds. Training speed 878 pps. Validation speed 12822 pps.\n",
+ "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.012. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.436. Accuracy is 89.41%\n",
+ "INFO:mlp.optimisers:Epoch 14: Took 2 seconds. Training speed 894 pps. Validation speed 12501 pps.\n",
+ "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.010. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.433. Accuracy is 89.64%\n",
+ "INFO:mlp.optimisers:Epoch 15: Took 2 seconds. Training speed 834 pps. Validation speed 12659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.009. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.439. Accuracy is 89.63%\n",
+ "INFO:mlp.optimisers:Epoch 16: Took 2 seconds. Training speed 820 pps. Validation speed 12988 pps.\n",
+ "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.008. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.443. Accuracy is 89.78%\n",
+ "INFO:mlp.optimisers:Epoch 17: Took 2 seconds. Training speed 902 pps. Validation speed 12501 pps.\n",
+ "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.008. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.446. Accuracy is 89.72%\n",
+ "INFO:mlp.optimisers:Epoch 18: Took 2 seconds. Training speed 870 pps. Validation speed 12659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.007. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.445. Accuracy is 89.83%\n",
+ "INFO:mlp.optimisers:Epoch 19: Took 2 seconds. Training speed 918 pps. Validation speed 12822 pps.\n",
+ "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.007. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.451. Accuracy is 89.75%\n",
+ "INFO:mlp.optimisers:Epoch 20: Took 2 seconds. Training speed 834 pps. Validation speed 12659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.454. Accuracy is 89.80%\n",
+ "INFO:mlp.optimisers:Epoch 21: Took 2 seconds. Training speed 902 pps. Validation speed 12659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.456. Accuracy is 89.77%\n",
+ "INFO:mlp.optimisers:Epoch 22: Took 2 seconds. Training speed 863 pps. Validation speed 12501 pps.\n",
+ "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.458. Accuracy is 89.84%\n",
+ "INFO:mlp.optimisers:Epoch 23: Took 2 seconds. Training speed 820 pps. Validation speed 12822 pps.\n",
+ "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.460. Accuracy is 89.80%\n",
+ "INFO:mlp.optimisers:Epoch 24: Took 2 seconds. Training speed 856 pps. Validation speed 12988 pps.\n",
+ "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.461. Accuracy is 89.86%\n",
+ "INFO:mlp.optimisers:Epoch 25: Took 2 seconds. Training speed 902 pps. Validation speed 12659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.467. Accuracy is 89.86%\n",
+ "INFO:mlp.optimisers:Epoch 26: Took 2 seconds. Training speed 910 pps. Validation speed 12659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.466. Accuracy is 89.81%\n",
+ "INFO:mlp.optimisers:Epoch 27: Took 2 seconds. Training speed 827 pps. Validation speed 12501 pps.\n",
+ "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.468. Accuracy is 89.84%\n",
+ "INFO:mlp.optimisers:Epoch 28: Took 2 seconds. Training speed 894 pps. Validation speed 12501 pps.\n",
+ "INFO:mlp.optimisers:Epoch 29: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.471. Accuracy is 89.83%\n",
+ "INFO:mlp.optimisers:Epoch 29: Took 2 seconds. Training speed 902 pps. Validation speed 12659 pps.\n",
+ "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.473. Accuracy is 89.81%\n",
+ "INFO:mlp.optimisers:Epoch 30: Took 2 seconds. Training speed 918 pps. Validation speed 11495 pps.\n",
"INFO:root:Testing the model on test set:\n",
- "INFO:root:MNIST test set accuracy is 87.69 %, cost (ce) is 0.665\n"
+ "INFO:root:MNIST test set accuracy is 89.33 %, cost (ce) is 0.480\n"
]
}
],
@@ -287,15 +287,13 @@
"\n",
"from mlp.layers import MLP, Tanh, Softmax #import required layer types\n",
"from mlp.optimisers import SGDOptimiser #import the optimiser\n",
- "\n",
"from mlp.costs import CECost #import the cost we want to use for optimisation\n",
"from mlp.schedulers import LearningRateFixed\n",
- "from scipy.optimize import leastsq\n",
"\n",
"rng = numpy.random.RandomState([2015,10,10])\n",
"\n",
"#some hyper-parameters\n",
- "nhid = 800\n",
+ "nhid = 100\n",
"learning_rate = 0.2\n",
"max_epochs = 30\n",
"cost = CECost()\n",
@@ -341,7 +339,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 5,
"metadata": {
"collapsed": false,
"scrolled": true
@@ -352,116 +350,114 @@
"output_type": "stream",
"text": [
"INFO:root:Training started...\n",
- "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.362. Accuracy is 9.30%\n",
- "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.338. Accuracy is 10.80%\n",
- "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 1.002. Accuracy is 68.60%\n",
- "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.623. Accuracy is 81.52%\n",
- "INFO:mlp.optimisers:Epoch 1: Took 10 seconds. Training speed 227 pps. Validation speed 1698 pps.\n",
- "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.483. Accuracy is 86.10%\n",
- "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.416. Accuracy is 88.84%\n",
- "INFO:mlp.optimisers:Epoch 2: Took 10 seconds. Training speed 255 pps. Validation speed 1710 pps.\n",
- "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.361. Accuracy is 90.20%\n",
- "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.388. Accuracy is 89.08%\n",
- "INFO:mlp.optimisers:Epoch 3: Took 10 seconds. Training speed 232 pps. Validation speed 1710 pps.\n",
- "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.294. Accuracy is 91.80%\n",
- "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.384. Accuracy is 88.91%\n",
- "INFO:mlp.optimisers:Epoch 4: Took 10 seconds. Training speed 237 pps. Validation speed 1672 pps.\n",
- "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.246. Accuracy is 94.10%\n",
- "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.375. Accuracy is 89.32%\n",
- "INFO:mlp.optimisers:Epoch 5: Took 10 seconds. Training speed 236 pps. Validation speed 1672 pps.\n",
- "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.217. Accuracy is 94.10%\n",
- "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.382. Accuracy is 88.88%\n",
- "INFO:mlp.optimisers:Epoch 6: Took 10 seconds. Training speed 245 pps. Validation speed 1689 pps.\n",
- "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.184. Accuracy is 96.10%\n",
- "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.420. Accuracy is 87.86%\n",
- "INFO:mlp.optimisers:Epoch 7: Took 10 seconds. Training speed 234 pps. Validation speed 1692 pps.\n",
- "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.148. Accuracy is 97.00%\n",
- "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.392. Accuracy is 88.87%\n",
- "INFO:mlp.optimisers:Epoch 8: Took 11 seconds. Training speed 209 pps. Validation speed 1689 pps.\n",
- "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.135. Accuracy is 97.60%\n",
- "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.381. Accuracy is 89.10%\n",
- "INFO:mlp.optimisers:Epoch 9: Took 10 seconds. Training speed 238 pps. Validation speed 1667 pps.\n",
- "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.109. Accuracy is 98.80%\n",
- "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.389. Accuracy is 89.04%\n",
- "INFO:mlp.optimisers:Epoch 10: Took 10 seconds. Training speed 244 pps. Validation speed 1675 pps.\n",
- "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.102. Accuracy is 98.40%\n",
- "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.406. Accuracy is 88.57%\n",
- "INFO:mlp.optimisers:Epoch 11: Took 10 seconds. Training speed 236 pps. Validation speed 1667 pps.\n",
- "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.085. Accuracy is 99.00%\n",
- "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.415. Accuracy is 88.49%\n",
- "INFO:mlp.optimisers:Epoch 12: Took 11 seconds. Training speed 211 pps. Validation speed 1701 pps.\n",
- "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.069. Accuracy is 99.40%\n",
- "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.423. Accuracy is 88.44%\n",
- "INFO:mlp.optimisers:Epoch 13: Took 11 seconds. Training speed 209 pps. Validation speed 1704 pps.\n",
- "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.057. Accuracy is 99.60%\n",
- "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.433. Accuracy is 88.47%\n",
- "INFO:mlp.optimisers:Epoch 14: Took 10 seconds. Training speed 234 pps. Validation speed 1684 pps.\n",
- "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.050. Accuracy is 99.70%\n",
- "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.430. Accuracy is 88.60%\n",
- "INFO:mlp.optimisers:Epoch 15: Took 10 seconds. Training speed 231 pps. Validation speed 1704 pps.\n",
- "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.042. Accuracy is 99.90%\n",
- "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.437. Accuracy is 88.57%\n",
- "INFO:mlp.optimisers:Epoch 16: Took 10 seconds. Training speed 241 pps. Validation speed 1684 pps.\n",
- "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.039. Accuracy is 99.80%\n",
- "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.452. Accuracy is 88.24%\n",
- "INFO:mlp.optimisers:Epoch 17: Took 10 seconds. Training speed 233 pps. Validation speed 1684 pps.\n",
- "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.032. Accuracy is 99.80%\n",
- "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.453. Accuracy is 88.39%\n",
- "INFO:mlp.optimisers:Epoch 18: Took 10 seconds. Training speed 236 pps. Validation speed 1712 pps.\n",
- "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.028. Accuracy is 99.90%\n",
- "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.447. Accuracy is 89.01%\n",
- "INFO:mlp.optimisers:Epoch 19: Took 10 seconds. Training speed 238 pps. Validation speed 1678 pps.\n",
- "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.025. Accuracy is 99.90%\n",
- "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.466. Accuracy is 88.41%\n",
- "INFO:mlp.optimisers:Epoch 20: Took 10 seconds. Training speed 233 pps. Validation speed 1710 pps.\n",
- "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.023. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.464. Accuracy is 88.72%\n",
- "INFO:mlp.optimisers:Epoch 21: Took 10 seconds. Training speed 220 pps. Validation speed 1695 pps.\n",
- "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.021. Accuracy is 99.90%\n",
- "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.465. Accuracy is 88.70%\n",
- "INFO:mlp.optimisers:Epoch 22: Took 11 seconds. Training speed 201 pps. Validation speed 1695 pps.\n",
- "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.019. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.472. Accuracy is 88.55%\n",
- "INFO:mlp.optimisers:Epoch 23: Took 11 seconds. Training speed 188 pps. Validation speed 1675 pps.\n",
- "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.017. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.477. Accuracy is 88.53%\n",
- "INFO:mlp.optimisers:Epoch 24: Took 11 seconds. Training speed 197 pps. Validation speed 1640 pps.\n",
- "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.016. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.482. Accuracy is 88.59%\n",
- "INFO:mlp.optimisers:Epoch 25: Took 11 seconds. Training speed 214 pps. Validation speed 1689 pps.\n",
- "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.014. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.482. Accuracy is 88.73%\n",
- "INFO:mlp.optimisers:Epoch 26: Took 11 seconds. Training speed 210 pps. Validation speed 1675 pps.\n",
- "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.014. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.490. Accuracy is 88.65%\n",
- "INFO:mlp.optimisers:Epoch 27: Took 12 seconds. Training speed 165 pps. Validation speed 1684 pps.\n",
- "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.013. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.496. Accuracy is 88.47%\n",
- "INFO:mlp.optimisers:Epoch 28: Took 12 seconds. Training speed 164 pps. Validation speed 1672 pps.\n",
+ "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.317. Accuracy is 15.20%\n",
+ "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.317. Accuracy is 13.98%\n",
+ "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 1.452. Accuracy is 60.20%\n",
+ "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.750. Accuracy is 81.69%\n",
+ "INFO:mlp.optimisers:Epoch 1: Took 2 seconds. Training speed 820 pps. Validation speed 13335 pps.\n",
+ "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.632. Accuracy is 82.40%\n",
+ "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.503. Accuracy is 86.74%\n",
+ "INFO:mlp.optimisers:Epoch 2: Took 2 seconds. Training speed 788 pps. Validation speed 13335 pps.\n",
+ "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.446. Accuracy is 87.50%\n",
+ "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.438. Accuracy is 87.24%\n",
+ "INFO:mlp.optimisers:Epoch 3: Took 2 seconds. Training speed 788 pps. Validation speed 13159 pps.\n",
+ "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.359. Accuracy is 90.00%\n",
+ "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.444. Accuracy is 86.44%\n",
+ "INFO:mlp.optimisers:Epoch 4: Took 2 seconds. Training speed 710 pps. Validation speed 12822 pps.\n",
+ "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.304. Accuracy is 90.80%\n",
+ "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.408. Accuracy is 87.90%\n",
+ "INFO:mlp.optimisers:Epoch 5: Took 2 seconds. Training speed 782 pps. Validation speed 13335 pps.\n",
+ "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.255. Accuracy is 93.80%\n",
+ "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.390. Accuracy is 88.56%\n",
+ "INFO:mlp.optimisers:Epoch 6: Took 2 seconds. Training speed 782 pps. Validation speed 13515 pps.\n",
+ "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.225. Accuracy is 93.80%\n",
+ "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.425. Accuracy is 87.46%\n",
+ "INFO:mlp.optimisers:Epoch 7: Took 2 seconds. Training speed 725 pps. Validation speed 13890 pps.\n",
+ "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.205. Accuracy is 95.00%\n",
+ "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.399. Accuracy is 88.51%\n",
+ "INFO:mlp.optimisers:Epoch 8: Took 2 seconds. Training speed 834 pps. Validation speed 13335 pps.\n",
+ "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.163. Accuracy is 96.20%\n",
+ "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.474. Accuracy is 85.74%\n",
+ "INFO:mlp.optimisers:Epoch 9: Took 2 seconds. Training speed 814 pps. Validation speed 13700 pps.\n",
+ "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.140. Accuracy is 96.40%\n",
+ "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.418. Accuracy is 88.06%\n",
+ "INFO:mlp.optimisers:Epoch 10: Took 2 seconds. Training speed 788 pps. Validation speed 12988 pps.\n",
+ "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.120. Accuracy is 97.70%\n",
+ "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.427. Accuracy is 87.93%\n",
+ "INFO:mlp.optimisers:Epoch 11: Took 2 seconds. Training speed 731 pps. Validation speed 13335 pps.\n",
+ "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.105. Accuracy is 98.10%\n",
+ "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.449. Accuracy is 87.51%\n",
+ "INFO:mlp.optimisers:Epoch 12: Took 2 seconds. Training speed 725 pps. Validation speed 12988 pps.\n",
+ "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.088. Accuracy is 98.50%\n",
+ "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.479. Accuracy is 87.14%\n",
+ "INFO:mlp.optimisers:Epoch 13: Took 2 seconds. Training speed 715 pps. Validation speed 12822 pps.\n",
+ "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.086. Accuracy is 98.30%\n",
+ "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.455. Accuracy is 87.97%\n",
+ "INFO:mlp.optimisers:Epoch 14: Took 2 seconds. Training speed 681 pps. Validation speed 13515 pps.\n",
+ "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.070. Accuracy is 99.00%\n",
+ "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.465. Accuracy is 87.76%\n",
+ "INFO:mlp.optimisers:Epoch 15: Took 2 seconds. Training speed 758 pps. Validation speed 12988 pps.\n",
+ "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.054. Accuracy is 99.50%\n",
+ "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.467. Accuracy is 88.07%\n",
+ "INFO:mlp.optimisers:Epoch 16: Took 2 seconds. Training speed 776 pps. Validation speed 12501 pps.\n",
+ "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.052. Accuracy is 99.60%\n",
+ "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.485. Accuracy is 87.69%\n",
+ "INFO:mlp.optimisers:Epoch 17: Took 2 seconds. Training speed 801 pps. Validation speed 13159 pps.\n",
+ "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.042. Accuracy is 99.70%\n",
+ "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.500. Accuracy is 87.61%\n",
+ "INFO:mlp.optimisers:Epoch 18: Took 2 seconds. Training speed 686 pps. Validation speed 13335 pps.\n",
+ "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.035. Accuracy is 99.80%\n",
+ "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.499. Accuracy is 87.76%\n",
+ "INFO:mlp.optimisers:Epoch 19: Took 2 seconds. Training speed 764 pps. Validation speed 12822 pps.\n",
+ "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.031. Accuracy is 99.80%\n",
+ "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.506. Accuracy is 87.77%\n",
+ "INFO:mlp.optimisers:Epoch 20: Took 2 seconds. Training speed 801 pps. Validation speed 13159 pps.\n",
+ "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.027. Accuracy is 99.90%\n",
+ "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.506. Accuracy is 87.61%\n",
+ "INFO:mlp.optimisers:Epoch 21: Took 2 seconds. Training speed 731 pps. Validation speed 13515 pps.\n",
+ "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.025. Accuracy is 99.80%\n",
+ "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.516. Accuracy is 87.68%\n",
+ "INFO:mlp.optimisers:Epoch 22: Took 2 seconds. Training speed 758 pps. Validation speed 13335 pps.\n",
+ "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.022. Accuracy is 99.90%\n",
+ "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.529. Accuracy is 87.33%\n",
+ "INFO:mlp.optimisers:Epoch 23: Took 2 seconds. Training speed 770 pps. Validation speed 13159 pps.\n",
+ "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.020. Accuracy is 99.90%\n",
+ "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.526. Accuracy is 87.70%\n",
+ "INFO:mlp.optimisers:Epoch 24: Took 2 seconds. Training speed 715 pps. Validation speed 13700 pps.\n",
+ "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.018. Accuracy is 99.90%\n",
+ "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.535. Accuracy is 87.55%\n",
+ "INFO:mlp.optimisers:Epoch 25: Took 2 seconds. Training speed 770 pps. Validation speed 13159 pps.\n",
+ "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.016. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.540. Accuracy is 87.55%\n",
+ "INFO:mlp.optimisers:Epoch 26: Took 2 seconds. Training speed 741 pps. Validation speed 13515 pps.\n",
+ "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.015. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.546. Accuracy is 87.57%\n",
+ "INFO:mlp.optimisers:Epoch 27: Took 2 seconds. Training speed 681 pps. Validation speed 13515 pps.\n",
+ "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.014. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.546. Accuracy is 87.78%\n",
+ "INFO:mlp.optimisers:Epoch 28: Took 2 seconds. Training speed 753 pps. Validation speed 13700 pps.\n",
"INFO:mlp.optimisers:Epoch 29: Training cost (ce) is 0.012. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.496. Accuracy is 88.55%\n",
- "INFO:mlp.optimisers:Epoch 29: Took 12 seconds. Training speed 172 pps. Validation speed 1650 pps.\n",
- "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.011. Accuracy is 100.00%\n",
- "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.500. Accuracy is 88.56%\n",
- "INFO:mlp.optimisers:Epoch 30: Took 10 seconds. Training speed 235 pps. Validation speed 1667 pps.\n",
+ "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.556. Accuracy is 87.56%\n",
+ "INFO:mlp.optimisers:Epoch 29: Took 2 seconds. Training speed 758 pps. Validation speed 13700 pps.\n",
+ "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.012. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.558. Accuracy is 87.74%\n",
+ "INFO:mlp.optimisers:Epoch 30: Took 2 seconds. Training speed 747 pps. Validation speed 13515 pps.\n",
"INFO:root:Testing the model on test set:\n",
- "INFO:root:MNIST test set accuracy is 88.10 %, cost (ce) is 0.497\n"
+ "INFO:root:MNIST test set accuracy is 87.19 %, cost (ce) is 0.554\n"
]
}
],
"source": [
"\n",
- "from mlp.layers import MLP, Relu, Softmax #import required layer types\n",
- "from mlp.optimisers import SGDOptimiser #import the optimiser\n",
- "\n",
- "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
+ "from mlp.layers import MLP, Relu, Softmax \n",
+ "from mlp.optimisers import SGDOptimiser \n",
+ "from mlp.costs import CECost \n",
"from mlp.schedulers import LearningRateFixed\n",
- "from scipy.optimize import leastsq\n",
"\n",
"rng = numpy.random.RandomState([2015,10,10])\n",
"\n",
"#some hyper-parameters\n",
- "nhid = 800\n",
+ "nhid = 100\n",
"learning_rate = 0.1\n",
"max_epochs = 30\n",
"cost = CECost()\n",
@@ -513,50 +509,126 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 4,
"metadata": {
- "collapsed": false
+ "collapsed": false,
+ "scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "INFO:root:Training started...\n"
- ]
- },
- {
- "ename": "ValueError",
- "evalue": "total size of new array must be unchanged",
- "output_type": "error",
- "traceback": [
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
- "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 38\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 39\u001b[0m \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Training started...'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 40\u001b[1;33m \u001b[0mtr_stats\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalid_stats\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0moptimiser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_dp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalid_dp\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 41\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 42\u001b[0m \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Testing the model on test set:'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;32m/afs/inf.ed.ac.uk/user/s11/s1136550/Dropbox/repos/mlpractical/mlp/optimisers.pyc\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(self, model, train_iterator, valid_iterator)\u001b[0m\n\u001b[0;32m 160\u001b[0m \u001b[1;31m# do the initial validation\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 161\u001b[0m \u001b[0mtrain_iterator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 162\u001b[1;33m \u001b[0mtr_nll\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtr_acc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalidate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_iterator\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0ml1_weight\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0ml2_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 163\u001b[0m logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%'\n\u001b[0;32m 164\u001b[0m % (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.))\n",
- "\u001b[1;32m/afs/inf.ed.ac.uk/user/s11/s1136550/Dropbox/repos/mlpractical/mlp/optimisers.pyc\u001b[0m in \u001b[0;36mvalidate\u001b[1;34m(self, model, valid_iterator, l1_weight, l2_weight)\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[0macc_list\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnll_list\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mt\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mvalid_iterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 36\u001b[1;33m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfprop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 37\u001b[0m \u001b[0mnll_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcost\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcost\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mt\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 38\u001b[0m \u001b[0macc_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclassification_accuracy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mt\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;32m/afs/inf.ed.ac.uk/user/s11/s1136550/Dropbox/repos/mlpractical/mlp/layers.pyc\u001b[0m in \u001b[0;36mfprop\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m 49\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 51\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfprop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 52\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;32m/afs/inf.ed.ac.uk/user/s11/s1136550/Dropbox/repos/mlpractical/mlp/layers.pyc\u001b[0m in \u001b[0;36mfprop\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m 466\u001b[0m \u001b[1;31m#get the linear activations\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 467\u001b[0m \u001b[0ma\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mMaxout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfprop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 468\u001b[1;33m \u001b[0mar\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0modim\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 469\u001b[0m \u001b[0mh\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mh_argmax\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmax_and_argmax\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mar\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxes\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkeepdims_argmax\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 470\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mh_argmax\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mh_argmax\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
- "\u001b[1;31mValueError\u001b[0m: total size of new array must be unchanged"
+ "ERROR: Line magic function `%autorelaod` not found.\n",
+ "INFO:root:Training started...\n",
+ "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.314. Accuracy is 9.30%\n",
+ "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.323. Accuracy is 8.27%\n",
+ "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 1.206. Accuracy is 64.20%\n",
+ "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.628. Accuracy is 79.70%\n",
+ "INFO:mlp.optimisers:Epoch 1: Took 9 seconds. Training speed 394 pps. Validation speed 1527 pps.\n",
+ "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.514. Accuracy is 85.80%\n",
+ "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.429. Accuracy is 88.16%\n",
+ "INFO:mlp.optimisers:Epoch 2: Took 9 seconds. Training speed 361 pps. Validation speed 1532 pps.\n",
+ "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.355. Accuracy is 89.70%\n",
+ "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.407. Accuracy is 87.77%\n",
+ "INFO:mlp.optimisers:Epoch 3: Took 10 seconds. Training speed 422 pps. Validation speed 1387 pps.\n",
+ "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.262. Accuracy is 92.30%\n",
+ "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.387. Accuracy is 88.78%\n",
+ "INFO:mlp.optimisers:Epoch 4: Took 9 seconds. Training speed 441 pps. Validation speed 1488 pps.\n",
+ "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.194. Accuracy is 94.70%\n",
+ "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.349. Accuracy is 89.86%\n",
+ "INFO:mlp.optimisers:Epoch 5: Took 9 seconds. Training speed 389 pps. Validation speed 1527 pps.\n",
+ "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.134. Accuracy is 97.50%\n",
+ "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.347. Accuracy is 89.79%\n",
+ "INFO:mlp.optimisers:Epoch 6: Took 9 seconds. Training speed 426 pps. Validation speed 1497 pps.\n",
+ "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.094. Accuracy is 98.70%\n",
+ "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.429. Accuracy is 87.88%\n",
+ "INFO:mlp.optimisers:Epoch 7: Took 9 seconds. Training speed 449 pps. Validation speed 1473 pps.\n",
+ "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.071. Accuracy is 99.10%\n",
+ "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.345. Accuracy is 90.31%\n",
+ "INFO:mlp.optimisers:Epoch 8: Took 9 seconds. Training speed 455 pps. Validation speed 1508 pps.\n",
+ "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.053. Accuracy is 99.40%\n",
+ "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.357. Accuracy is 90.00%\n",
+ "INFO:mlp.optimisers:Epoch 9: Took 9 seconds. Training speed 375 pps. Validation speed 1532 pps.\n",
+ "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.042. Accuracy is 99.50%\n",
+ "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.356. Accuracy is 90.27%\n",
+ "INFO:mlp.optimisers:Epoch 10: Took 9 seconds. Training speed 421 pps. Validation speed 1525 pps.\n",
+ "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.031. Accuracy is 99.70%\n",
+ "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.347. Accuracy is 90.57%\n",
+ "INFO:mlp.optimisers:Epoch 11: Took 9 seconds. Training speed 449 pps. Validation speed 1522 pps.\n",
+ "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.026. Accuracy is 99.70%\n",
+ "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.353. Accuracy is 90.50%\n",
+ "INFO:mlp.optimisers:Epoch 12: Took 9 seconds. Training speed 449 pps. Validation speed 1504 pps.\n",
+ "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.021. Accuracy is 99.90%\n",
+ "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.352. Accuracy is 90.51%\n",
+ "INFO:mlp.optimisers:Epoch 13: Took 9 seconds. Training speed 441 pps. Validation speed 1495 pps.\n",
+ "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.018. Accuracy is 99.90%\n",
+ "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.355. Accuracy is 90.59%\n",
+ "INFO:mlp.optimisers:Epoch 14: Took 9 seconds. Training speed 410 pps. Validation speed 1456 pps.\n",
+ "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.015. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.359. Accuracy is 90.66%\n",
+ "INFO:mlp.optimisers:Epoch 15: Took 9 seconds. Training speed 463 pps. Validation speed 1429 pps.\n",
+ "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.013. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.363. Accuracy is 90.52%\n",
+ "INFO:mlp.optimisers:Epoch 16: Took 10 seconds. Training speed 365 pps. Validation speed 1403 pps.\n",
+ "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.012. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.364. Accuracy is 90.71%\n",
+ "INFO:mlp.optimisers:Epoch 17: Took 10 seconds. Training speed 351 pps. Validation speed 1368 pps.\n",
+ "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.011. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.364. Accuracy is 90.65%\n",
+ "INFO:mlp.optimisers:Epoch 18: Took 10 seconds. Training speed 348 pps. Validation speed 1439 pps.\n",
+ "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.010. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.367. Accuracy is 90.62%\n",
+ "INFO:mlp.optimisers:Epoch 19: Took 11 seconds. Training speed 271 pps. Validation speed 1441 pps.\n",
+ "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.009. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.366. Accuracy is 90.78%\n",
+ "INFO:mlp.optimisers:Epoch 20: Took 10 seconds. Training speed 309 pps. Validation speed 1387 pps.\n",
+ "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.008. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.371. Accuracy is 90.66%\n",
+ "INFO:mlp.optimisers:Epoch 21: Took 10 seconds. Training speed 348 pps. Validation speed 1323 pps.\n",
+ "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.008. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.370. Accuracy is 90.68%\n",
+ "INFO:mlp.optimisers:Epoch 22: Took 9 seconds. Training speed 435 pps. Validation speed 1488 pps.\n",
+ "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.007. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.372. Accuracy is 90.70%\n",
+ "INFO:mlp.optimisers:Epoch 23: Took 9 seconds. Training speed 405 pps. Validation speed 1443 pps.\n",
+ "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.007. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.373. Accuracy is 90.80%\n",
+ "INFO:mlp.optimisers:Epoch 24: Took 9 seconds. Training speed 389 pps. Validation speed 1482 pps.\n",
+ "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.375. Accuracy is 90.71%\n",
+ "INFO:mlp.optimisers:Epoch 25: Took 9 seconds. Training speed 402 pps. Validation speed 1525 pps.\n",
+ "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.380. Accuracy is 90.65%\n",
+ "INFO:mlp.optimisers:Epoch 26: Took 9 seconds. Training speed 405 pps. Validation speed 1522 pps.\n",
+ "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.380. Accuracy is 90.75%\n",
+ "INFO:mlp.optimisers:Epoch 27: Took 9 seconds. Training speed 415 pps. Validation speed 1534 pps.\n",
+ "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.381. Accuracy is 90.66%\n",
+ "INFO:mlp.optimisers:Epoch 28: Took 9 seconds. Training speed 410 pps. Validation speed 1493 pps.\n",
+ "INFO:mlp.optimisers:Epoch 29: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.382. Accuracy is 90.67%\n",
+ "INFO:mlp.optimisers:Epoch 29: Took 9 seconds. Training speed 396 pps. Validation speed 1536 pps.\n",
+ "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+ "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.384. Accuracy is 90.75%\n",
+ "INFO:mlp.optimisers:Epoch 30: Took 9 seconds. Training speed 463 pps. Validation speed 1532 pps.\n",
+ "INFO:root:Testing the model on test set:\n",
+ "INFO:root:MNIST test set accuracy is 90.02 %, cost (ce) is 0.391\n"
]
}
],
"source": [
- "#%load_ext autoreload\n",
- "%autoreload\n",
- "from mlp.layers import MLP, Maxout, Softmax #import required layer types\n",
- "from mlp.optimisers import SGDOptimiser #import the optimiser\n",
"\n",
- "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
+ "from mlp.layers import MLP, Maxout, Softmax \n",
+ "from mlp.optimisers import SGDOptimiser\n",
+ "from mlp.costs import CECost \n",
"from mlp.schedulers import LearningRateFixed\n",
- "from scipy.optimize import leastsq\n",
- "\n",
- "rng = numpy.random.RandomState([2015,10,10])\n",
"\n",
"#some hyper-parameters\n",
- "nhid = 800\n",
+ "nhid = 100\n",
"learning_rate = 0.1\n",
- "k = 2\n",
+ "k = 2 #maxout pool size (stride is assumed k)\n",
"max_epochs = 30\n",
"cost = CECost()\n",
" \n",
diff --git a/06_MLP_Coursework2_Introduction.ipynb b/06_MLP_Coursework2_Introduction.ipynb
index 04cc798..0fd7d79 100644
--- a/06_MLP_Coursework2_Introduction.ipynb
+++ b/06_MLP_Coursework2_Introduction.ipynb
@@ -8,6 +8,33 @@
"\n",
"This notebook contains some extended versions of hints and some code examples that are suppose to make it easier to proceed with certain tasks in the Coursework #2.\n",
"\n",
+ "## Virtual environments\n",
+ "\n",
+ "Before you proceed onwards, remember to activate your virtual environment by typing `activate_mlp` or `source ~/mlpractical/venv/bin/activate` (or if you did the original install the \"comfy way\" type: `workon mlpractical`).\n",
+ "\n",
+ "## Syncing the git repository\n",
+ "\n",
+ "Look here for more details. But in short, we recommend to create a separate branch for this lab, as follows:\n",
+ "\n",
+ "1. Enter the mlpractical directory `cd ~/mlpractical/repo-mlp`\n",
+ "2. List the branches and check which are currently active by typing: `git branch`\n",
+ "3. If you have followed our recommendations, you should be in the `lab5` branch, please commit your local changes to the repo index by typing:\n",
+ "```\n",
+ "git commit -am \"finished lab5\"\n",
+ "```\n",
+ "4. Now you can switch to `master` branch by typing: \n",
+ "```\n",
+ "git checkout master\n",
+ " ```\n",
+ "5. To update the repository (note, assuming master does not have any conflicts), if there are some, have a look here\n",
+ "```\n",
+ "git pull\n",
+ "```\n",
+ "6. And now, create the new branch & switch to it by typing:\n",
+ "```\n",
+ "git checkout -b coursework2\n",
+ "```\n",
+ "\n",
"# Store the intermediate results (check-pointing and pickling)\n",
"\n",
"Once you have finished a certain task it is a good idea to check-point your current notebook's status (logs, plots and whatever else has been stored in the notebook). By doing this, you can always revert to this state later when necessary (without rerunning experimens). You can do this by going to menus `File->Save and Checkpoint` and `File->Revert to Checkpoint`.\n",
@@ -67,6 +94,7 @@
"* `numpy.rollaxis` - allows to shuffle certain axis in a tensor\n",
"* `slice` - allows to specify a range (can be used when indexing numpy arrays)\n",
"* `ellipsis` - allows to pick an arbitrary number of dimensions (inferred)\n",
+ "* `max_and_argmax` - `(mlp.layers)` - an auxiliary function we have provided to get both max and argmax of a tensor across an arbitrary axes, possibly in the format preserving tensor's original shape (this is not trivial to do using numpy out-of-the-shelf functionality).\n",
"\n",
"Below cells contain some simple examples showing basics behind tensor manipulation in numpy (go through them if you haven't used numpy in this context before)."
]
@@ -231,7 +259,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "You can also check the backprop implementation in the layer. Notice, it **does not** necessairly check whether your layer implementation is correct but rather if the gradient computation is correct, given forward pass computation. If you get the forward pass wrong, and somehow get gradients right w.r.t what forward pass is computing, the below check will not capture it (obviously). "
+ "You can also check the backprop implementation in the layer. Notice, it **does not** necessairly check whether your layer implementation is correct but rather if the gradient computation is correct, given forward pass computation. If you get the forward pass wrong, and somehow get gradients right w.r.t what forward pass is computing, the below check will not capture it (obviously). Contrary to normal scenraio where 32 floating point precision is sufficient, when checking gradients please make sure 64bit precision is used (or tune the tolerance)."
]
},
{
diff --git a/mlp/layers.py b/mlp/layers.py
index c80fd31..f3a17e1 100644
--- a/mlp/layers.py
+++ b/mlp/layers.py
@@ -10,6 +10,51 @@ from mlp.costs import Cost
logger = logging.getLogger(__name__)
+def max_and_argmax(x, axes=None, keepdims_max=False, keepdims_argmax=False):
+ """
+ Return both max and argmax for the given multi-dimensional array, possibly
+ preserve the original shapes
+ :param x: input tensor
+ :param axes: tuple of ints denoting axes across which
+ one should perform reduction
+ :param keepdims_max: boolean, if true, shape of x is preserved in result
+ :param keepdims_argmax:, boolean, if true, shape of x is preserved in result
+ :return: max (number) and argmax (indices) of max element along certain axes
+ in multi-dimensional tensor
+ """
+ if axes is None:
+ rval_argmax = numpy.argmax(x)
+ if keepdims_argmax:
+ rval_argmax = numpy.unravel_index(rval_argmax, x.shape)
+ else:
+ if isinstance(axes, int):
+ axes = (axes,)
+ axes = tuple(axes)
+ keep_axes = numpy.array([i for i in range(x.ndim) if i not in axes])
+ transposed_x = numpy.transpose(x, numpy.concatenate((keep_axes, axes)))
+ reshaped_x = transposed_x.reshape(transposed_x.shape[:len(keep_axes)] + (-1,))
+ rval_argmax = numpy.asarray(numpy.argmax(reshaped_x, axis=-1), dtype=numpy.int64)
+
+ # rval_max_arg keeps the arg index referencing to the axis along which reduction was performed (axis=-1)
+ # when keepdims_argmax is True we need to map it back to the original shape of tensor x
+ # print 'rval maxaarg', rval_argmax.ndim, rval_argmax.shape, rval_argmax
+ if keepdims_argmax:
+ dim = tuple([x.shape[a] for a in axes])
+ rval_argmax = numpy.array([idx + numpy.unravel_index(val, dim)
+ for idx, val in numpy.ndenumerate(rval_argmax)])
+ # convert to numpy indexing convention (row indices first, then columns)
+ rval_argmax = zip(*rval_argmax)
+
+ if keepdims_max is False and keepdims_argmax is True:
+ # this could potentially save O(N) steps by not traversing array once more
+ # to get max value, haven't benchmark it though
+ rval_max = x[rval_argmax]
+ else:
+ rval_max = numpy.asarray(numpy.amax(x, axis=axes, keepdims=keepdims_max))
+
+ return rval_max, rval_argmax
+
+
class MLP(object):
"""
This is a container for an arbitrary sequence of other transforms
@@ -459,21 +504,30 @@ class Maxout(Linear):
rng=None,
irange=0.05):
- super(Maxout, self).__init__(idim, odim, rng, irange)
+ super(Maxout, self).__init__(idim, odim*k, rng, irange)
+
+ self.max_odim = odim
self.k = k
def fprop(self, inputs):
#get the linear activations
a = super(Maxout, self).fprop(inputs)
- ar = a.reshape(a.shape[0], self.odim, self.k)
- h, h_argmax = max_and_argmax(ar, axes=3, keepdims_argmax=True)
+ ar = a.reshape(a.shape[0], self.max_odim, self.k)
+ h, h_argmax = max_and_argmax(ar, axes=2, keepdims_max=True, keepdims_argmax=True)
self.h_argmax = h_argmax
- return h
+ return h[:, :, 0] #get rid of the last reduced dimensison (of size 1)
def bprop(self, h, igrads):
- igrads_up = igrads.reshape(a.shape[0], -1, 1)
- igrads_up = numpy.tile(a, 1, self.k)
- deltas = (igrads_up * self.h_argmax).reshape(a.shape[0], -1)
+ #convert into the shape where upsampling is easier
+ igrads_up = igrads.reshape(igrads.shape[0], self.max_odim, 1)
+ #upsample to the linear dimension (but reshaped to (batch_size, maxed_num (1), pool_size)
+ igrads_up = numpy.tile(igrads_up, (1, 1, self.k))
+ #generate mask matrix and set to 1 maxed elements
+ mask = numpy.zeros_like(igrads_up)
+ mask[self.h_argmax] = 1.0
+ #do bprop through max operator and then reshape into 2D
+ deltas = (igrads_up * mask).reshape(igrads_up.shape[0], -1)
+ #and then do bprop thorough linear part
___, ograds = super(Maxout, self).bprop(h=None, igrads=deltas)
return deltas, ograds
From 80661efae1dd1ffecfc2a32f8c19993abe474bb9 Mon Sep 17 00:00:00 2001
From: pswietojanski
Date: Sun, 15 Nov 2015 16:10:05 +0000
Subject: [PATCH 4/7] more changes to intro
---
06_MLP_Coursework2_Introduction.ipynb | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/06_MLP_Coursework2_Introduction.ipynb b/06_MLP_Coursework2_Introduction.ipynb
index 0fd7d79..b2637f1 100644
--- a/06_MLP_Coursework2_Introduction.ipynb
+++ b/06_MLP_Coursework2_Introduction.ipynb
@@ -14,7 +14,7 @@
"\n",
"## Syncing the git repository\n",
"\n",
- "Look here for more details. But in short, we recommend to create a separate branch for this lab, as follows:\n",
+ "Look here for more details. But in short, we recommend to create a separate branch for the coursework, as follows:\n",
"\n",
"1. Enter the mlpractical directory `cd ~/mlpractical/repo-mlp`\n",
"2. List the branches and check which are currently active by typing: `git branch`\n",
@@ -94,7 +94,7 @@
"* `numpy.rollaxis` - allows to shuffle certain axis in a tensor\n",
"* `slice` - allows to specify a range (can be used when indexing numpy arrays)\n",
"* `ellipsis` - allows to pick an arbitrary number of dimensions (inferred)\n",
- "* `max_and_argmax` - `(mlp.layers)` - an auxiliary function we have provided to get both max and argmax of a tensor across an arbitrary axes, possibly in the format preserving tensor's original shape (this is not trivial to do using numpy out-of-the-shelf functionality).\n",
+ "* `max_and_argmax` - `(mlp.layers)` - an auxiliary function we have provided to get both max and argmax of a tensor across an arbitrary axes, possibly in the format preserving tensor's original shape (this is not trivial to do using numpy *out-of-the-shelf* functionality).\n",
"\n",
"Below cells contain some simple examples showing basics behind tensor manipulation in numpy (go through them if you haven't used numpy in this context before)."
]
From ed47b36873ed0f721f835634022bb755874d72b8 Mon Sep 17 00:00:00 2001
From: pswietojanski
Date: Sun, 15 Nov 2015 16:33:53 +0000
Subject: [PATCH 5/7] coursework2, labs and code
---
07_MLP_Coursework2.ipynb | 362 +++++++++++++++++++++++++++++++++++++++
mlp/conv.py | 126 ++++++++++++++
mlp/utils.py | 66 +++++++
3 files changed, 554 insertions(+)
create mode 100644 07_MLP_Coursework2.ipynb
create mode 100644 mlp/conv.py
create mode 100644 mlp/utils.py
diff --git a/07_MLP_Coursework2.ipynb b/07_MLP_Coursework2.ipynb
new file mode 100644
index 0000000..d772972
--- /dev/null
+++ b/07_MLP_Coursework2.ipynb
@@ -0,0 +1,362 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Please don't edit this cell!**\n",
+ "\n",
+ "# Marks and Feedback\n",
+ "\n",
+ "**Total Marks:** XX/100\n",
+ "\n",
+ "**Overall comments:**\n",
+ "\n",
+ "\n",
+ "## Part 1. Investigations into Neural Networks (35 marks)\n",
+ "\n",
+ "* **Task 1**: *Experiments with learning rate schedules* - XX/5\n",
+ " * learning rate schedulers implemented\n",
+ " * experiments carried out\n",
+ " * further comments\n",
+ "\n",
+ "\n",
+ "* **Task 2**: *Experiments with regularisation* - XX/5\n",
+ " * L1 experiments\n",
+ " * L2 experiments\n",
+ " * dropout experiments\n",
+ " * annealed dropout implmented\n",
+ " * further experiments carried out\n",
+ " * further comments\n",
+ " \n",
+ "\n",
+ "* **Task 3**: *Experiments with pretraining* - XX/15\n",
+ " * autoencoder pretraining implemented\n",
+ " * denoising autoencoder pretraining implemented\n",
+ " * CE layer-by-layer pretraining implemented\n",
+ " * experiments\n",
+ " * further comments\n",
+ "\n",
+ "\n",
+ "* **Task 4**: *Experiments with data augmentation* - XX/5\n",
+ " * training data augmneted using noise, rotation, ...\n",
+ " * any further augmnetations\n",
+ " * experiments \n",
+ " * further comments\n",
+ "\n",
+ "\n",
+ "* **Task 5**: *State of the art* - XX/5\n",
+ " * motivation for systems constructed\n",
+ " * experiments\n",
+ " * accuracy of best system\n",
+ " * further comments\n",
+ "\n",
+ "\n",
+ "\n",
+ "## Part 2. Convolutional Neural Networks (55 marks)\n",
+ "\n",
+ "* **Task 6**: *Implement convolutional layer* - XX/20\n",
+ " * linear conv layer\n",
+ " * sigmoid conv layer\n",
+ " * relu conv layer\n",
+ " * any checks for correctness\n",
+ " * loop-based or vectorised implementations\n",
+ " * timing comparisons\n",
+ "\n",
+ "\n",
+ "* **Task 7**: *Implement maxpooling layer* - XX/10\n",
+ " * implementation of non-overlapping pooling\n",
+ " * generic implementation\n",
+ " * any checks for correctness\n",
+ "\n",
+ "\n",
+ "* **Task 8**: *Experiments with convolutional networks* - XX/25\n",
+ " * 1 conv layer (1 fmap)\n",
+ " * 1 conv layer (5 fmaps)\n",
+ " * 2 conv layers\n",
+ " * further experiments\n",
+ "\n",
+ "\n",
+ "\n",
+ "## Presentation (10 marks)\n",
+ "\n",
+ "* ** Marks:** XX/10\n",
+ " * Concise description of each system constructed\n",
+ " * Experiment design and motivations for different systems\n",
+ " * Presentation of results - graphs, tables, diagrams\n",
+ " * Conclusions\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Coursework #2\n",
+ "\n",
+ "## Introduction\n",
+ "\n",
+ "\n",
+ "## Previous Tutorials\n",
+ "\n",
+ "Before starting this coursework make sure that you have completed the following labs:\n",
+ "\n",
+ "* [04_Regularisation.ipynb](https://github.com/CSTR-Edinburgh/mlpractical/blob/master/04_Regularisation.ipynb) - regularising the model\n",
+ "* [05_Transfer_functions.ipynb](https://github.com/CSTR-Edinburgh/mlpractical/blob/master/05_Transfer_functions.ipynb) - building and training different activation functions\n",
+ "* [06_MLP_Coursework2_Introduction.ipynb](https://github.com/CSTR-Edinburgh/mlpractical/blob/master/06_MLP_Coursework2_Introduction.ipynb) - Notes on numpy and tensors\n",
+ "\n",
+ "\n",
+ "## Submission\n",
+ "**Submission Deadline: Thursday 14 January 2016, 16:00** \n",
+ "\n",
+ "Submit the coursework as an ipython notebook file, using the `submit` command in the terminal on a DICE machine. If your file is `06_MLP_Coursework1.ipynb` then you would enter:\n",
+ "\n",
+ "`submit mlp 2 06_MLP_Coursework1.ipynb` \n",
+ "\n",
+ "where `mlp 2` indicates this is the second coursework of MLP.\n",
+ "\n",
+ "After submitting, you should receive an email of acknowledgment from the system confirming that your submission has been received successfully. Keep the email as evidence of your coursework submission.\n",
+ "\n",
+ "**Please make sure you submit a single `ipynb` file (and nothing else)!**\n",
+ "\n",
+ "**Submission Deadline: Thursday 14 January 2016, 16:00** \n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Getting Started\n",
+ "Please enter your student number and the date in the next code cell."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "#MLP Coursework 2\n",
+ "#Student number: \n",
+ "#Date: "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Part 1. Investigations into Neural Networks (35 marks)\n",
+ "\n",
+ "In this part you are may choose exactly what you implement. However, you are expected to express your motivations, observations, and findings in a clear and cohesive way. Try to make it clear why you decided to do certain things. Use graphs and/or tables of results to show trends and other characteristics you think are important. \n",
+ "\n",
+ "For example, in Task 1 you could experiment with different schedulers in order to compare their convergence properties. In Task 2 you could look into (and visualise) what happens to weights when applying L1 and/or L2 regularisation when training. For instance, you could create sorted histograms of weight magnitudes in in each layer, etc..\n",
+ "\n",
+ "**Before submission, please collapse all the log entries into smaller boxes (by clicking on the bar on the left hand side)**\n",
+ "\n",
+ "### Task 1 - Experiments with learning rate schedules (5 marks)\n",
+ "\n",
+ "Investigate the effect of learning rate schedules on training and accuracy. Implement at least one additional learning rate scheduler mentioned in the lectures. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "#load the corresponding code here, and also attach scripts that run the experiments ()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Task 2 - Experiments with regularisers (5 marks)\n",
+ "\n",
+ "Investigate the effect of different regularisation approaches (L1, L2, dropout). Implement the annealing dropout scheduler (mentioned in lecture 5). Do some further investigations and experiments with model structures (and regularisers) of your choice. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Task 3 - Experiments with pretraining (15 marks)\n",
+ "\n",
+ "Implement pretraining of multi-layer networks with autoencoders, denoising autoencoders, and using layer-by-layer cross-entropy training. \n",
+ "\n",
+ "Implementation tip: You could add the corresponding methods to `optimiser`, namely, `pretrain()` and `pretrain_epoch()`, for autoencoders. Simiilarly, `pretrain_discriminative()` and `pretrain_epoch_discriminative()` for cross-entropy layer-by-layer pretraining. Of course, you can modify any other necessary pieces, but include all the modified fragments below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Task 4 - Experiments with data augmentation (5 marks)\n",
+ "\n",
+ "Using the standard MNIST training data, generate some augmented training examples (for example, using noise or rotation). Perform experiments on using this expanded training data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Task 5 - State of the art (5 marks)\n",
+ "\n",
+ "Using any techniques you have learnt so far (combining any number of them), build and train the best model you can (no other constraints)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": true
+ },
+ "source": [
+ "# Part 2. Convolutional Neural Networks (55 marks)\n",
+ "\n",
+ "In this part of the coursework, you are required to implement deep convolutional networks. This includes code for forward prop, back prop, and weight updates for convolutional and max-pooling layers, and should support the stacking of convolutional + pooling layers. You should implement all the parts relating to the convolutional layer in the mlp/conv.py module; if you decide to implement some routines in cython, keep them in mlp/conv.pyx). Attach both files in this notebook.\n",
+ "\n",
+ "Implementation tips: Look at [lecture 7](http://www.inf.ed.ac.uk/teaching/courses/mlp/2015/mlp07-cnn.pdf) and [lecture 8](http://www.inf.ed.ac.uk/teaching/courses/mlp/2015/mlp08-cnn2.pdf), and the introductory tutorial, [06_MLP_Coursework2_Introduction.ipynb](https://github.com/CSTR-Edinburgh/mlpractical/blob/master/06_MLP_Coursework2_Introduction.ipynb)\n",
+ "\n",
+ "### Task 6 - Implement convolutional layer (20 marks)\n",
+ "\n",
+ "Implement linear convolutional layer, and then extend to sigmoid and ReLU transfer functions (do it in a similar way to fully-connected layers). Include all relevant code. It is recommended that you first implement in the naive way with nested loops (python and/or cython); optionally you may then implement in a vectorised way in numpy. Include logs for each way you implement the convolutional layer, as timings for different implementations are of interest. Include all relevant code."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": true
+ },
+ "source": [
+ "### Task 7 - Implement max-pooling layer (10 marks)\n",
+ "\n",
+ "Implement a max-pooling layer. Non-overlapping pooling (which was assumed in the lecture presentation) is required. You may also implement a more generic solution with striding as well. Include all relevant code."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Task 8 - Experiments with convolutional networks (25 marks)\n",
+ "\n",
+ "Construct convolutional networks with a softmax output layer and a single fully connected hidden layer. Your first experiments should use one convolutional+pooling layer. As a default use convolutional kernels of dimension 5x5 (stride 1) and pooling regions of 2x2 (stride 2, hence non-overlapping).\n",
+ "\n",
+ "* Implement and test a convolutional network with 1 feature map\n",
+ "* Implement and test a convolutional network with 5 feature maps\n",
+ "\n",
+ "Explore convolutional networks with two convolutional layers, by implementing, training, and evaluating a network with two convolutional+maxpooling layers with 5 feature maps in the first convolutional layer, and 10 feature maps in the second convolutional layer.\n",
+ "\n",
+ "Carry out further experiments to optimise the convolutional network architecture (you could explore kernel sizes and strides, number of feature maps, sizes and strides of pooling operator, etc. - it is up to you)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": true
+ },
+ "source": [
+ "**This is the end of coursework 2.**\n",
+ "\n",
+ "Please remember to save your notebook, and submit your notebook following the instructions at the top. Please make sure that you have executed all the code cells when you submit the notebook.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 2",
+ "language": "python",
+ "name": "python2"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.10"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/mlp/conv.py b/mlp/conv.py
new file mode 100644
index 0000000..4bb9786
--- /dev/null
+++ b/mlp/conv.py
@@ -0,0 +1,126 @@
+
+# Machine Learning Practical (INFR11119),
+# Pawel Swietojanski, University of Edinburgh
+
+
+import numpy
+import logging
+from mlp.layers import Layer
+
+
+logger = logging.getLogger(__name__)
+
+"""
+You have been given some very initial skeleton below. Feel free to build on top of it and/or
+modify it according to your needs. Just notice, you can factor out the convolution code out of
+the layer code, and just pass (possibly) different conv implementations for each of the stages
+in the model where you are expected to apply the convolutional operator. This will allow you to
+keep the layer implementation independent of conv operator implementation, and you can easily
+swap it layer, for example, for more efficient implementation if you came up with one, etc.
+"""
+
+def my1_conv2d(image, kernels, strides=(1, 1)):
+ """
+ Implements a 2d valid convolution of kernels with the image
+ Note: filer means the same as kernel and convolution (correlation) of those with the input space
+ produces feature maps (sometimes refereed to also as receptive fields). Also note, that
+ feature maps are synonyms here to channels, and as such num_inp_channels == num_inp_feat_maps
+ :param image: 4D tensor of sizes (batch_size, num_input_channels, img_shape_x, img_shape_y)
+ :param filters: 4D tensor of filters of size (num_inp_feat_maps, num_out_feat_maps, kernel_shape_x, kernel_shape_y)
+ :param strides: a tuple (stride_x, stride_y), specifying the shift of the kernels in x and y dimensions
+ :return: 4D tensor of size (batch_size, num_out_feature_maps, feature_map_shape_x, feature_map_shape_y)
+ """
+ raise NotImplementedError('Write me!')
+
+
+class ConvLinear(Layer):
+ def __init__(self,
+ num_inp_feat_maps,
+ num_out_feat_maps,
+ image_shape=(28, 28),
+ kernel_shape=(5, 5),
+ stride=(1, 1),
+ irange=0.2,
+ rng=None,
+ conv_fwd=my1_conv2d,
+ conv_bck=my1_conv2d,
+ conv_grad=my1_conv2d):
+ """
+
+ :param num_inp_feat_maps: int, a number of input feature maps (channels)
+ :param num_out_feat_maps: int, a number of output feature maps (channels)
+ :param image_shape: tuple, a shape of the image
+ :param kernel_shape: tuple, a shape of the kernel
+ :param stride: tuple, shift of kernels in both dimensions
+ :param irange: float, initial range of the parameters
+ :param rng: RandomState object, random number generator
+ :param conv_fwd: handle to a convolution function used in fwd-prop
+ :param conv_bck: handle to a convolution function used in backward-prop
+ :param conv_grad: handle to a convolution function used in pgrads
+ :return:
+ """
+
+ super(ConvLinear, self).__init__(rng=rng)
+
+ raise NotImplementedError()
+
+ def fprop(self, inputs):
+ raise NotImplementedError()
+
+ def bprop(self, h, igrads):
+ raise NotImplementedError()
+
+ def bprop_cost(self, h, igrads, cost):
+ raise NotImplementedError('ConvLinear.bprop_cost method not implemented')
+
+ def pgrads(self, inputs, deltas, l1_weight=0, l2_weight=0):
+ raise NotImplementedError()
+
+ def get_params(self):
+ raise NotImplementedError()
+
+ def set_params(self, params):
+ raise NotImplementedError()
+
+ def get_name(self):
+ return 'convlinear'
+
+#you can derive here particular non-linear implementations:
+#class ConvSigmoid(ConvLinear):
+#...
+
+
+class ConvMaxPool2D(Layer):
+ def __init__(self,
+ num_feat_maps,
+ conv_shape,
+ pool_shape=(2, 2),
+ pool_stride=(2, 2)):
+ """
+
+ :param conv_shape: tuple, a shape of the lower convolutional feature maps output
+ :param pool_shape: tuple, a shape of pooling operator
+ :param pool_stride: tuple, a strides for pooling operator
+ :return:
+ """
+
+ super(ConvMaxPool2D, self).__init__(rng=None)
+ raise NotImplementedError()
+
+ def fprop(self, inputs):
+ raise NotImplementedError()
+
+ def bprop(self, h, igrads):
+ raise NotImplementedError()
+
+ def get_params(self):
+ return []
+
+ def pgrads(self, inputs, deltas, **kwargs):
+ return []
+
+ def set_params(self, params):
+ pass
+
+ def get_name(self):
+ return 'convmaxpool2d'
\ No newline at end of file
diff --git a/mlp/utils.py b/mlp/utils.py
new file mode 100644
index 0000000..f98dda8
--- /dev/null
+++ b/mlp/utils.py
@@ -0,0 +1,66 @@
+# Machine Learning Practical (INFR11119),
+# Pawel Swietojanski, University of Edinburgh
+
+import numpy
+from mlp.layers import Layer
+
+
+def numerical_gradient(f, x, eps=1e-4, **kwargs):
+ """
+ Implements the following numerical gradient rule
+ df(x)/dx = (f(x+eps)-f(x-eps))/(2eps)
+ """
+
+ xc = x.copy()
+ g = numpy.zeros_like(xc)
+ xf = xc.ravel()
+ gf = g.ravel()
+
+ for i in xrange(xf.shape[0]):
+ xx = xf[i]
+ xf[i] = xx + eps
+ fp_eps, ___ = f(xc, **kwargs)
+ xf[i] = xx - eps
+ fm_eps, ___ = f(xc, **kwargs)
+ xf[i] = xx
+ gf[i] = (fp_eps - fm_eps)/(2*eps)
+
+ return g
+
+
+def verify_gradient(f, x, eps=1e-4, tol=1e-6, **kwargs):
+ """
+ Compares the numerical and analytical gradients.
+ """
+ fval, fgrad = f(x=x, **kwargs)
+ ngrad = numerical_gradient(f=f, x=x, eps=eps, tol=tol, **kwargs)
+
+ fgradnorm = numpy.sqrt(numpy.sum(fgrad**2))
+ ngradnorm = numpy.sqrt(numpy.sum(ngrad**2))
+ diffnorm = numpy.sqrt(numpy.sum((fgrad-ngrad)**2))
+
+ if fgradnorm > 0 or ngradnorm > 0:
+ norm = numpy.maximum(fgradnorm, ngradnorm)
+ if not (diffnorm < tol or diffnorm/norm < tol):
+ raise Exception("Numerical and analytical gradients "
+ "are different: %s != %s!" % (ngrad, fgrad))
+ else:
+ if not (diffnorm < tol):
+ raise Exception("Numerical and analytical gradients "
+ "are different: %s != %s!" % (ngrad, fgrad))
+ return True
+
+
+def verify_layer_gradient(layer, x, eps=1e-4, tol=1e-6):
+
+ assert isinstance(layer, Layer), (
+ "Expected to get the instance of Layer class, got"
+ " %s " % type(layer)
+ )
+
+ def grad_layer_wrapper(x, **kwargs):
+ h = layer.fprop(x)
+ deltas, ograds = layer.bprop(h=h, igrads=numpy.ones_like(h))
+ return numpy.sum(h), ograds
+
+ return verify_gradient(f=grad_layer_wrapper, x=x, eps=eps, tol=tol, layer=layer)
\ No newline at end of file
From fc2eecf1357da39f0021671a7bce2431148fe297 Mon Sep 17 00:00:00 2001
From: pswietojanski
Date: Sun, 15 Nov 2015 16:41:21 +0000
Subject: [PATCH 6/7] adding overlooked dataset changes
---
mlp/dataset.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/mlp/dataset.py b/mlp/dataset.py
index d081cc4..fdd8124 100644
--- a/mlp/dataset.py
+++ b/mlp/dataset.py
@@ -83,7 +83,8 @@ class MNISTDataProvider(DataProvider):
max_num_batches=-1,
max_num_examples=-1,
randomize=True,
- rng=None):
+ rng=None,
+ conv_reshape=False):
super(MNISTDataProvider, self).\
__init__(batch_size, randomize, rng)
@@ -119,6 +120,7 @@ class MNISTDataProvider(DataProvider):
self.x = x
self.t = t
self.num_classes = 10
+ self.conv_reshape = conv_reshape
self._rand_idx = None
if self.randomize:
@@ -162,6 +164,9 @@ class MNISTDataProvider(DataProvider):
self._curr_idx += self.batch_size
+ if self.conv_reshape:
+ rval_x = rval_x.reshape(self.batch_size, 1, 28, 28)
+
return rval_x, self.__to_one_of_k(rval_t)
def num_examples(self):
From 689b78e4f418430dee80dc993331f9e3571f36eb Mon Sep 17 00:00:00 2001
From: pswietojanski
Date: Sun, 15 Nov 2015 16:45:41 +0000
Subject: [PATCH 7/7] adding missing conv compatibility code in fc linear
transform
---
mlp/layers.py | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/mlp/layers.py b/mlp/layers.py
index f3a17e1..5c0af09 100644
--- a/mlp/layers.py
+++ b/mlp/layers.py
@@ -265,6 +265,11 @@ class Linear(Layer):
:param inputs: matrix of features (x) or the output of the previous layer h^{i-1}
:return: h^i, matrix of transformed by layer features
"""
+
+ #input comes from 4D convolutional tensor, reshape to expected shape
+ if inputs.ndim == 4:
+ inputs = inputs.reshape(inputs.shape[0], -1)
+
a = numpy.dot(inputs, self.W) + self.b
# here f() is an identity function, so just return a linear transformation
return a
@@ -334,6 +339,10 @@ class Linear(Layer):
since W and b are only layer's parameters
"""
+ #input comes from 4D convolutional tensor, reshape to expected shape
+ if inputs.ndim == 4:
+ inputs = inputs.reshape(inputs.shape[0], -1)
+
#you could basically use different scalers for biases
#and weights, but it is not implemented here like this
l2_W_penalty, l2_b_penalty = 0, 0