From cb6712578d61490e46390303e5a9d2a99944f354 Mon Sep 17 00:00:00 2001
From: pswietojanski <p.swietojanski@gmail.com>
Date: Sun, 15 Nov 2015 16:00:58 +0000
Subject: [PATCH] solutions to 04 and 05

---
 04_Regularisation_solution.ipynb      |   4 -
 05_Transfer_functions_solution.ipynb  | 524 +++++++++++++++-----------
 06_MLP_Coursework2_Introduction.ipynb |  30 +-
 mlp/layers.py                         |  68 +++-
 4 files changed, 388 insertions(+), 238 deletions(-)

diff --git a/04_Regularisation_solution.ipynb b/04_Regularisation_solution.ipynb
index 9537b96..265ad61 100644
--- a/04_Regularisation_solution.ipynb
+++ b/04_Regularisation_solution.ipynb
@@ -292,7 +292,6 @@
     "\n",
     "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
     "from mlp.schedulers import LearningRateFixed\n",
-    "from scipy.optimize import leastsq\n",
     "\n",
     "logger = logging.getLogger()\n",
     "logger.setLevel(logging.INFO)\n",
@@ -468,7 +467,6 @@
     "from mlp.dataset import MNISTDataProvider #import data provider\n",
     "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
     "from mlp.schedulers import LearningRateFixed\n",
-    "from scipy.optimize import leastsq\n",
     "\n",
     "rng = numpy.random.RandomState([2015,10,10])\n",
     "\n",
@@ -644,7 +642,6 @@
     "from mlp.dataset import MNISTDataProvider #import data provider\n",
     "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
     "from mlp.schedulers import LearningRateFixed\n",
-    "from scipy.optimize import leastsq\n",
     "\n",
     "logger = logging.getLogger()\n",
     "logger.setLevel(logging.INFO)\n",
@@ -982,7 +979,6 @@
     "from mlp.dataset import MNISTDataProvider #import data provider\n",
     "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
     "from mlp.schedulers import LearningRateFixed, DropoutFixed\n",
-    "from scipy.optimize import leastsq\n",
     "\n",
     "logger = logging.getLogger()\n",
     "logger.setLevel(logging.INFO)\n",
diff --git a/05_Transfer_functions_solution.ipynb b/05_Transfer_functions_solution.ipynb
index 8443e94..17b5016 100644
--- a/05_Transfer_functions_solution.ipynb
+++ b/05_Transfer_functions_solution.ipynb
@@ -165,8 +165,8 @@
     "logger = logging.getLogger()\n",
     "logger.setLevel(logging.INFO)\n",
     "\n",
-    "# Note, you were asked to do run the experiments on all data. \n",
-    "# Here I am running those examples on 1000 training data-points only (similar to regularisation notebook)\n",
+    "# Note, you were asked to do run the experiments on all data and smaller models. \n",
+    "# Here I am running the exercises on 1000 training data-points only (similar to regularisation notebook)\n",
     "logger.info('Initialising data providers...')\n",
     "train_dp = MNISTDataProvider(dset='train', batch_size=10, max_num_batches=100, randomize=True)\n",
     "valid_dp = MNISTDataProvider(dset='valid', batch_size=10000, max_num_batches=-10, randomize=False)\n",
@@ -175,7 +175,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 6,
    "metadata": {
     "collapsed": false,
     "scrolled": true
@@ -186,100 +186,100 @@
      "output_type": "stream",
      "text": [
       "INFO:root:Training started...\n",
-      "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.368. Accuracy is 7.80%\n",
-      "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.347. Accuracy is 9.86%\n",
-      "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 1.421. Accuracy is 64.70%\n",
-      "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.479. Accuracy is 85.95%\n",
-      "INFO:mlp.optimisers:Epoch 1: Took 10 seconds. Training speed 233 pps. Validation speed 1624 pps.\n",
-      "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.571. Accuracy is 81.60%\n",
-      "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.484. Accuracy is 85.23%\n",
-      "INFO:mlp.optimisers:Epoch 2: Took 11 seconds. Training speed 214 pps. Validation speed 1637 pps.\n",
-      "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.411. Accuracy is 87.40%\n",
-      "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.507. Accuracy is 85.40%\n",
-      "INFO:mlp.optimisers:Epoch 3: Took 11 seconds. Training speed 226 pps. Validation speed 1640 pps.\n",
-      "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.318. Accuracy is 90.10%\n",
-      "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.596. Accuracy is 84.40%\n",
-      "INFO:mlp.optimisers:Epoch 4: Took 10 seconds. Training speed 232 pps. Validation speed 1616 pps.\n",
-      "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.257. Accuracy is 91.80%\n",
-      "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.468. Accuracy is 87.76%\n",
-      "INFO:mlp.optimisers:Epoch 5: Took 11 seconds. Training speed 229 pps. Validation speed 1629 pps.\n",
-      "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.244. Accuracy is 92.30%\n",
-      "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.535. Accuracy is 86.31%\n",
-      "INFO:mlp.optimisers:Epoch 6: Took 11 seconds. Training speed 230 pps. Validation speed 1600 pps.\n",
-      "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.169. Accuracy is 94.30%\n",
-      "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.554. Accuracy is 86.59%\n",
-      "INFO:mlp.optimisers:Epoch 7: Took 11 seconds. Training speed 226 pps. Validation speed 1631 pps.\n",
-      "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.130. Accuracy is 96.60%\n",
-      "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.562. Accuracy is 86.83%\n",
-      "INFO:mlp.optimisers:Epoch 8: Took 11 seconds. Training speed 225 pps. Validation speed 1603 pps.\n",
-      "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.113. Accuracy is 96.90%\n",
-      "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.605. Accuracy is 85.94%\n",
-      "INFO:mlp.optimisers:Epoch 9: Took 11 seconds. Training speed 231 pps. Validation speed 1616 pps.\n",
-      "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.087. Accuracy is 97.10%\n",
-      "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.564. Accuracy is 87.50%\n",
-      "INFO:mlp.optimisers:Epoch 10: Took 11 seconds. Training speed 226 pps. Validation speed 1637 pps.\n",
-      "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.054. Accuracy is 98.70%\n",
-      "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.599. Accuracy is 87.04%\n",
-      "INFO:mlp.optimisers:Epoch 11: Took 10 seconds. Training speed 232 pps. Validation speed 1640 pps.\n",
-      "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.045. Accuracy is 98.60%\n",
-      "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.574. Accuracy is 87.75%\n",
-      "INFO:mlp.optimisers:Epoch 12: Took 10 seconds. Training speed 237 pps. Validation speed 1653 pps.\n",
-      "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.025. Accuracy is 99.30%\n",
-      "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.615. Accuracy is 86.88%\n",
-      "INFO:mlp.optimisers:Epoch 13: Took 11 seconds. Training speed 232 pps. Validation speed 1616 pps.\n",
-      "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.011. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.610. Accuracy is 87.50%\n",
-      "INFO:mlp.optimisers:Epoch 14: Took 11 seconds. Training speed 201 pps. Validation speed 1634 pps.\n",
-      "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.009. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.599. Accuracy is 87.87%\n",
-      "INFO:mlp.optimisers:Epoch 15: Took 10 seconds. Training speed 233 pps. Validation speed 1637 pps.\n",
-      "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.007. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.612. Accuracy is 87.71%\n",
-      "INFO:mlp.optimisers:Epoch 16: Took 10 seconds. Training speed 241 pps. Validation speed 1645 pps.\n",
-      "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.614. Accuracy is 87.73%\n",
-      "INFO:mlp.optimisers:Epoch 17: Took 10 seconds. Training speed 237 pps. Validation speed 1634 pps.\n",
-      "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.620. Accuracy is 87.77%\n",
-      "INFO:mlp.optimisers:Epoch 18: Took 10 seconds. Training speed 245 pps. Validation speed 1645 pps.\n",
-      "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.623. Accuracy is 87.94%\n",
-      "INFO:mlp.optimisers:Epoch 19: Took 10 seconds. Training speed 234 pps. Validation speed 1631 pps.\n",
-      "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.625. Accuracy is 87.84%\n",
-      "INFO:mlp.optimisers:Epoch 20: Took 11 seconds. Training speed 217 pps. Validation speed 1631 pps.\n",
-      "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.633. Accuracy is 87.83%\n",
-      "INFO:mlp.optimisers:Epoch 21: Took 10 seconds. Training speed 235 pps. Validation speed 1618 pps.\n",
-      "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.637. Accuracy is 87.93%\n",
-      "INFO:mlp.optimisers:Epoch 22: Took 11 seconds. Training speed 225 pps. Validation speed 1648 pps.\n",
-      "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.639. Accuracy is 87.90%\n",
-      "INFO:mlp.optimisers:Epoch 23: Took 10 seconds. Training speed 238 pps. Validation speed 1626 pps.\n",
-      "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.642. Accuracy is 87.86%\n",
-      "INFO:mlp.optimisers:Epoch 24: Took 10 seconds. Training speed 233 pps. Validation speed 1659 pps.\n",
-      "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.645. Accuracy is 87.91%\n",
-      "INFO:mlp.optimisers:Epoch 25: Took 12 seconds. Training speed 179 pps. Validation speed 1618 pps.\n",
-      "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.650. Accuracy is 87.90%\n",
-      "INFO:mlp.optimisers:Epoch 26: Took 10 seconds. Training speed 241 pps. Validation speed 1637 pps.\n",
-      "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.653. Accuracy is 87.98%\n",
-      "INFO:mlp.optimisers:Epoch 27: Took 10 seconds. Training speed 250 pps. Validation speed 1629 pps.\n",
-      "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.656. Accuracy is 87.89%\n",
-      "INFO:mlp.optimisers:Epoch 28: Took 10 seconds. Training speed 232 pps. Validation speed 1640 pps.\n",
-      "INFO:mlp.optimisers:Epoch 29: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.659. Accuracy is 87.92%\n",
-      "INFO:mlp.optimisers:Epoch 29: Took 10 seconds. Training speed 235 pps. Validation speed 1613 pps.\n",
-      "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.003. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.663. Accuracy is 87.91%\n",
-      "INFO:mlp.optimisers:Epoch 30: Took 11 seconds. Training speed 223 pps. Validation speed 1613 pps.\n",
+      "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.319. Accuracy is 10.50%\n",
+      "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.315. Accuracy is 11.33%\n",
+      "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 1.048. Accuracy is 66.30%\n",
+      "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.571. Accuracy is 82.72%\n",
+      "INFO:mlp.optimisers:Epoch 1: Took 2 seconds. Training speed 764 pps. Validation speed 12988 pps.\n",
+      "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.485. Accuracy is 84.40%\n",
+      "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.455. Accuracy is 86.58%\n",
+      "INFO:mlp.optimisers:Epoch 2: Took 2 seconds. Training speed 720 pps. Validation speed 12988 pps.\n",
+      "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.362. Accuracy is 87.70%\n",
+      "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.435. Accuracy is 86.90%\n",
+      "INFO:mlp.optimisers:Epoch 3: Took 2 seconds. Training speed 788 pps. Validation speed 12659 pps.\n",
+      "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.251. Accuracy is 92.10%\n",
+      "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.417. Accuracy is 88.09%\n",
+      "INFO:mlp.optimisers:Epoch 4: Took 2 seconds. Training speed 788 pps. Validation speed 13159 pps.\n",
+      "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.175. Accuracy is 95.40%\n",
+      "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.405. Accuracy is 88.16%\n",
+      "INFO:mlp.optimisers:Epoch 5: Took 2 seconds. Training speed 776 pps. Validation speed 12988 pps.\n",
+      "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.121. Accuracy is 96.40%\n",
+      "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.458. Accuracy is 87.24%\n",
+      "INFO:mlp.optimisers:Epoch 6: Took 2 seconds. Training speed 690 pps. Validation speed 12659 pps.\n",
+      "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.091. Accuracy is 97.90%\n",
+      "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.418. Accuracy is 88.37%\n",
+      "INFO:mlp.optimisers:Epoch 7: Took 2 seconds. Training speed 841 pps. Validation speed 12659 pps.\n",
+      "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.065. Accuracy is 98.70%\n",
+      "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.400. Accuracy is 89.44%\n",
+      "INFO:mlp.optimisers:Epoch 8: Took 2 seconds. Training speed 794 pps. Validation speed 12501 pps.\n",
+      "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.043. Accuracy is 99.30%\n",
+      "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.406. Accuracy is 89.35%\n",
+      "INFO:mlp.optimisers:Epoch 9: Took 2 seconds. Training speed 747 pps. Validation speed 12822 pps.\n",
+      "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.029. Accuracy is 99.50%\n",
+      "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.410. Accuracy is 89.69%\n",
+      "INFO:mlp.optimisers:Epoch 10: Took 2 seconds. Training speed 953 pps. Validation speed 12822 pps.\n",
+      "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.023. Accuracy is 99.80%\n",
+      "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.424. Accuracy is 89.41%\n",
+      "INFO:mlp.optimisers:Epoch 11: Took 2 seconds. Training speed 953 pps. Validation speed 13159 pps.\n",
+      "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.018. Accuracy is 99.80%\n",
+      "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.429. Accuracy is 89.50%\n",
+      "INFO:mlp.optimisers:Epoch 12: Took 2 seconds. Training speed 870 pps. Validation speed 12988 pps.\n",
+      "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.015. Accuracy is 99.90%\n",
+      "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.428. Accuracy is 89.58%\n",
+      "INFO:mlp.optimisers:Epoch 13: Took 2 seconds. Training speed 878 pps. Validation speed 12822 pps.\n",
+      "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.012. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.436. Accuracy is 89.41%\n",
+      "INFO:mlp.optimisers:Epoch 14: Took 2 seconds. Training speed 894 pps. Validation speed 12501 pps.\n",
+      "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.010. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.433. Accuracy is 89.64%\n",
+      "INFO:mlp.optimisers:Epoch 15: Took 2 seconds. Training speed 834 pps. Validation speed 12659 pps.\n",
+      "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.009. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.439. Accuracy is 89.63%\n",
+      "INFO:mlp.optimisers:Epoch 16: Took 2 seconds. Training speed 820 pps. Validation speed 12988 pps.\n",
+      "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.008. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.443. Accuracy is 89.78%\n",
+      "INFO:mlp.optimisers:Epoch 17: Took 2 seconds. Training speed 902 pps. Validation speed 12501 pps.\n",
+      "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.008. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.446. Accuracy is 89.72%\n",
+      "INFO:mlp.optimisers:Epoch 18: Took 2 seconds. Training speed 870 pps. Validation speed 12659 pps.\n",
+      "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.007. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.445. Accuracy is 89.83%\n",
+      "INFO:mlp.optimisers:Epoch 19: Took 2 seconds. Training speed 918 pps. Validation speed 12822 pps.\n",
+      "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.007. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.451. Accuracy is 89.75%\n",
+      "INFO:mlp.optimisers:Epoch 20: Took 2 seconds. Training speed 834 pps. Validation speed 12659 pps.\n",
+      "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.454. Accuracy is 89.80%\n",
+      "INFO:mlp.optimisers:Epoch 21: Took 2 seconds. Training speed 902 pps. Validation speed 12659 pps.\n",
+      "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.456. Accuracy is 89.77%\n",
+      "INFO:mlp.optimisers:Epoch 22: Took 2 seconds. Training speed 863 pps. Validation speed 12501 pps.\n",
+      "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.458. Accuracy is 89.84%\n",
+      "INFO:mlp.optimisers:Epoch 23: Took 2 seconds. Training speed 820 pps. Validation speed 12822 pps.\n",
+      "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.460. Accuracy is 89.80%\n",
+      "INFO:mlp.optimisers:Epoch 24: Took 2 seconds. Training speed 856 pps. Validation speed 12988 pps.\n",
+      "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.461. Accuracy is 89.86%\n",
+      "INFO:mlp.optimisers:Epoch 25: Took 2 seconds. Training speed 902 pps. Validation speed 12659 pps.\n",
+      "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.467. Accuracy is 89.86%\n",
+      "INFO:mlp.optimisers:Epoch 26: Took 2 seconds. Training speed 910 pps. Validation speed 12659 pps.\n",
+      "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.466. Accuracy is 89.81%\n",
+      "INFO:mlp.optimisers:Epoch 27: Took 2 seconds. Training speed 827 pps. Validation speed 12501 pps.\n",
+      "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.468. Accuracy is 89.84%\n",
+      "INFO:mlp.optimisers:Epoch 28: Took 2 seconds. Training speed 894 pps. Validation speed 12501 pps.\n",
+      "INFO:mlp.optimisers:Epoch 29: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.471. Accuracy is 89.83%\n",
+      "INFO:mlp.optimisers:Epoch 29: Took 2 seconds. Training speed 902 pps. Validation speed 12659 pps.\n",
+      "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.004. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.473. Accuracy is 89.81%\n",
+      "INFO:mlp.optimisers:Epoch 30: Took 2 seconds. Training speed 918 pps. Validation speed 11495 pps.\n",
       "INFO:root:Testing the model on test set:\n",
-      "INFO:root:MNIST test set accuracy is 87.69 %, cost (ce) is 0.665\n"
+      "INFO:root:MNIST test set accuracy is 89.33 %, cost (ce) is 0.480\n"
      ]
     }
    ],
@@ -287,15 +287,13 @@
     "\n",
     "from mlp.layers import MLP, Tanh, Softmax #import required layer types\n",
     "from mlp.optimisers import SGDOptimiser #import the optimiser\n",
-    "\n",
     "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
     "from mlp.schedulers import LearningRateFixed\n",
-    "from scipy.optimize import leastsq\n",
     "\n",
     "rng = numpy.random.RandomState([2015,10,10])\n",
     "\n",
     "#some hyper-parameters\n",
-    "nhid = 800\n",
+    "nhid = 100\n",
     "learning_rate = 0.2\n",
     "max_epochs = 30\n",
     "cost = CECost()\n",
@@ -341,7 +339,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "metadata": {
     "collapsed": false,
     "scrolled": true
@@ -352,116 +350,114 @@
      "output_type": "stream",
      "text": [
       "INFO:root:Training started...\n",
-      "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.362. Accuracy is 9.30%\n",
-      "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.338. Accuracy is 10.80%\n",
-      "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 1.002. Accuracy is 68.60%\n",
-      "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.623. Accuracy is 81.52%\n",
-      "INFO:mlp.optimisers:Epoch 1: Took 10 seconds. Training speed 227 pps. Validation speed 1698 pps.\n",
-      "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.483. Accuracy is 86.10%\n",
-      "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.416. Accuracy is 88.84%\n",
-      "INFO:mlp.optimisers:Epoch 2: Took 10 seconds. Training speed 255 pps. Validation speed 1710 pps.\n",
-      "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.361. Accuracy is 90.20%\n",
-      "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.388. Accuracy is 89.08%\n",
-      "INFO:mlp.optimisers:Epoch 3: Took 10 seconds. Training speed 232 pps. Validation speed 1710 pps.\n",
-      "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.294. Accuracy is 91.80%\n",
-      "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.384. Accuracy is 88.91%\n",
-      "INFO:mlp.optimisers:Epoch 4: Took 10 seconds. Training speed 237 pps. Validation speed 1672 pps.\n",
-      "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.246. Accuracy is 94.10%\n",
-      "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.375. Accuracy is 89.32%\n",
-      "INFO:mlp.optimisers:Epoch 5: Took 10 seconds. Training speed 236 pps. Validation speed 1672 pps.\n",
-      "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.217. Accuracy is 94.10%\n",
-      "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.382. Accuracy is 88.88%\n",
-      "INFO:mlp.optimisers:Epoch 6: Took 10 seconds. Training speed 245 pps. Validation speed 1689 pps.\n",
-      "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.184. Accuracy is 96.10%\n",
-      "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.420. Accuracy is 87.86%\n",
-      "INFO:mlp.optimisers:Epoch 7: Took 10 seconds. Training speed 234 pps. Validation speed 1692 pps.\n",
-      "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.148. Accuracy is 97.00%\n",
-      "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.392. Accuracy is 88.87%\n",
-      "INFO:mlp.optimisers:Epoch 8: Took 11 seconds. Training speed 209 pps. Validation speed 1689 pps.\n",
-      "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.135. Accuracy is 97.60%\n",
-      "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.381. Accuracy is 89.10%\n",
-      "INFO:mlp.optimisers:Epoch 9: Took 10 seconds. Training speed 238 pps. Validation speed 1667 pps.\n",
-      "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.109. Accuracy is 98.80%\n",
-      "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.389. Accuracy is 89.04%\n",
-      "INFO:mlp.optimisers:Epoch 10: Took 10 seconds. Training speed 244 pps. Validation speed 1675 pps.\n",
-      "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.102. Accuracy is 98.40%\n",
-      "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.406. Accuracy is 88.57%\n",
-      "INFO:mlp.optimisers:Epoch 11: Took 10 seconds. Training speed 236 pps. Validation speed 1667 pps.\n",
-      "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.085. Accuracy is 99.00%\n",
-      "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.415. Accuracy is 88.49%\n",
-      "INFO:mlp.optimisers:Epoch 12: Took 11 seconds. Training speed 211 pps. Validation speed 1701 pps.\n",
-      "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.069. Accuracy is 99.40%\n",
-      "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.423. Accuracy is 88.44%\n",
-      "INFO:mlp.optimisers:Epoch 13: Took 11 seconds. Training speed 209 pps. Validation speed 1704 pps.\n",
-      "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.057. Accuracy is 99.60%\n",
-      "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.433. Accuracy is 88.47%\n",
-      "INFO:mlp.optimisers:Epoch 14: Took 10 seconds. Training speed 234 pps. Validation speed 1684 pps.\n",
-      "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.050. Accuracy is 99.70%\n",
-      "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.430. Accuracy is 88.60%\n",
-      "INFO:mlp.optimisers:Epoch 15: Took 10 seconds. Training speed 231 pps. Validation speed 1704 pps.\n",
-      "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.042. Accuracy is 99.90%\n",
-      "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.437. Accuracy is 88.57%\n",
-      "INFO:mlp.optimisers:Epoch 16: Took 10 seconds. Training speed 241 pps. Validation speed 1684 pps.\n",
-      "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.039. Accuracy is 99.80%\n",
-      "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.452. Accuracy is 88.24%\n",
-      "INFO:mlp.optimisers:Epoch 17: Took 10 seconds. Training speed 233 pps. Validation speed 1684 pps.\n",
-      "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.032. Accuracy is 99.80%\n",
-      "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.453. Accuracy is 88.39%\n",
-      "INFO:mlp.optimisers:Epoch 18: Took 10 seconds. Training speed 236 pps. Validation speed 1712 pps.\n",
-      "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.028. Accuracy is 99.90%\n",
-      "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.447. Accuracy is 89.01%\n",
-      "INFO:mlp.optimisers:Epoch 19: Took 10 seconds. Training speed 238 pps. Validation speed 1678 pps.\n",
-      "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.025. Accuracy is 99.90%\n",
-      "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.466. Accuracy is 88.41%\n",
-      "INFO:mlp.optimisers:Epoch 20: Took 10 seconds. Training speed 233 pps. Validation speed 1710 pps.\n",
-      "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.023. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.464. Accuracy is 88.72%\n",
-      "INFO:mlp.optimisers:Epoch 21: Took 10 seconds. Training speed 220 pps. Validation speed 1695 pps.\n",
-      "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.021. Accuracy is 99.90%\n",
-      "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.465. Accuracy is 88.70%\n",
-      "INFO:mlp.optimisers:Epoch 22: Took 11 seconds. Training speed 201 pps. Validation speed 1695 pps.\n",
-      "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.019. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.472. Accuracy is 88.55%\n",
-      "INFO:mlp.optimisers:Epoch 23: Took 11 seconds. Training speed 188 pps. Validation speed 1675 pps.\n",
-      "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.017. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.477. Accuracy is 88.53%\n",
-      "INFO:mlp.optimisers:Epoch 24: Took 11 seconds. Training speed 197 pps. Validation speed 1640 pps.\n",
-      "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.016. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.482. Accuracy is 88.59%\n",
-      "INFO:mlp.optimisers:Epoch 25: Took 11 seconds. Training speed 214 pps. Validation speed 1689 pps.\n",
-      "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.014. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.482. Accuracy is 88.73%\n",
-      "INFO:mlp.optimisers:Epoch 26: Took 11 seconds. Training speed 210 pps. Validation speed 1675 pps.\n",
-      "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.014. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.490. Accuracy is 88.65%\n",
-      "INFO:mlp.optimisers:Epoch 27: Took 12 seconds. Training speed 165 pps. Validation speed 1684 pps.\n",
-      "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.013. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.496. Accuracy is 88.47%\n",
-      "INFO:mlp.optimisers:Epoch 28: Took 12 seconds. Training speed 164 pps. Validation speed 1672 pps.\n",
+      "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.317. Accuracy is 15.20%\n",
+      "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.317. Accuracy is 13.98%\n",
+      "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 1.452. Accuracy is 60.20%\n",
+      "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.750. Accuracy is 81.69%\n",
+      "INFO:mlp.optimisers:Epoch 1: Took 2 seconds. Training speed 820 pps. Validation speed 13335 pps.\n",
+      "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.632. Accuracy is 82.40%\n",
+      "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.503. Accuracy is 86.74%\n",
+      "INFO:mlp.optimisers:Epoch 2: Took 2 seconds. Training speed 788 pps. Validation speed 13335 pps.\n",
+      "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.446. Accuracy is 87.50%\n",
+      "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.438. Accuracy is 87.24%\n",
+      "INFO:mlp.optimisers:Epoch 3: Took 2 seconds. Training speed 788 pps. Validation speed 13159 pps.\n",
+      "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.359. Accuracy is 90.00%\n",
+      "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.444. Accuracy is 86.44%\n",
+      "INFO:mlp.optimisers:Epoch 4: Took 2 seconds. Training speed 710 pps. Validation speed 12822 pps.\n",
+      "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.304. Accuracy is 90.80%\n",
+      "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.408. Accuracy is 87.90%\n",
+      "INFO:mlp.optimisers:Epoch 5: Took 2 seconds. Training speed 782 pps. Validation speed 13335 pps.\n",
+      "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.255. Accuracy is 93.80%\n",
+      "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.390. Accuracy is 88.56%\n",
+      "INFO:mlp.optimisers:Epoch 6: Took 2 seconds. Training speed 782 pps. Validation speed 13515 pps.\n",
+      "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.225. Accuracy is 93.80%\n",
+      "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.425. Accuracy is 87.46%\n",
+      "INFO:mlp.optimisers:Epoch 7: Took 2 seconds. Training speed 725 pps. Validation speed 13890 pps.\n",
+      "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.205. Accuracy is 95.00%\n",
+      "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.399. Accuracy is 88.51%\n",
+      "INFO:mlp.optimisers:Epoch 8: Took 2 seconds. Training speed 834 pps. Validation speed 13335 pps.\n",
+      "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.163. Accuracy is 96.20%\n",
+      "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.474. Accuracy is 85.74%\n",
+      "INFO:mlp.optimisers:Epoch 9: Took 2 seconds. Training speed 814 pps. Validation speed 13700 pps.\n",
+      "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.140. Accuracy is 96.40%\n",
+      "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.418. Accuracy is 88.06%\n",
+      "INFO:mlp.optimisers:Epoch 10: Took 2 seconds. Training speed 788 pps. Validation speed 12988 pps.\n",
+      "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.120. Accuracy is 97.70%\n",
+      "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.427. Accuracy is 87.93%\n",
+      "INFO:mlp.optimisers:Epoch 11: Took 2 seconds. Training speed 731 pps. Validation speed 13335 pps.\n",
+      "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.105. Accuracy is 98.10%\n",
+      "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.449. Accuracy is 87.51%\n",
+      "INFO:mlp.optimisers:Epoch 12: Took 2 seconds. Training speed 725 pps. Validation speed 12988 pps.\n",
+      "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.088. Accuracy is 98.50%\n",
+      "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.479. Accuracy is 87.14%\n",
+      "INFO:mlp.optimisers:Epoch 13: Took 2 seconds. Training speed 715 pps. Validation speed 12822 pps.\n",
+      "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.086. Accuracy is 98.30%\n",
+      "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.455. Accuracy is 87.97%\n",
+      "INFO:mlp.optimisers:Epoch 14: Took 2 seconds. Training speed 681 pps. Validation speed 13515 pps.\n",
+      "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.070. Accuracy is 99.00%\n",
+      "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.465. Accuracy is 87.76%\n",
+      "INFO:mlp.optimisers:Epoch 15: Took 2 seconds. Training speed 758 pps. Validation speed 12988 pps.\n",
+      "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.054. Accuracy is 99.50%\n",
+      "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.467. Accuracy is 88.07%\n",
+      "INFO:mlp.optimisers:Epoch 16: Took 2 seconds. Training speed 776 pps. Validation speed 12501 pps.\n",
+      "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.052. Accuracy is 99.60%\n",
+      "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.485. Accuracy is 87.69%\n",
+      "INFO:mlp.optimisers:Epoch 17: Took 2 seconds. Training speed 801 pps. Validation speed 13159 pps.\n",
+      "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.042. Accuracy is 99.70%\n",
+      "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.500. Accuracy is 87.61%\n",
+      "INFO:mlp.optimisers:Epoch 18: Took 2 seconds. Training speed 686 pps. Validation speed 13335 pps.\n",
+      "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.035. Accuracy is 99.80%\n",
+      "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.499. Accuracy is 87.76%\n",
+      "INFO:mlp.optimisers:Epoch 19: Took 2 seconds. Training speed 764 pps. Validation speed 12822 pps.\n",
+      "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.031. Accuracy is 99.80%\n",
+      "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.506. Accuracy is 87.77%\n",
+      "INFO:mlp.optimisers:Epoch 20: Took 2 seconds. Training speed 801 pps. Validation speed 13159 pps.\n",
+      "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.027. Accuracy is 99.90%\n",
+      "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.506. Accuracy is 87.61%\n",
+      "INFO:mlp.optimisers:Epoch 21: Took 2 seconds. Training speed 731 pps. Validation speed 13515 pps.\n",
+      "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.025. Accuracy is 99.80%\n",
+      "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.516. Accuracy is 87.68%\n",
+      "INFO:mlp.optimisers:Epoch 22: Took 2 seconds. Training speed 758 pps. Validation speed 13335 pps.\n",
+      "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.022. Accuracy is 99.90%\n",
+      "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.529. Accuracy is 87.33%\n",
+      "INFO:mlp.optimisers:Epoch 23: Took 2 seconds. Training speed 770 pps. Validation speed 13159 pps.\n",
+      "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.020. Accuracy is 99.90%\n",
+      "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.526. Accuracy is 87.70%\n",
+      "INFO:mlp.optimisers:Epoch 24: Took 2 seconds. Training speed 715 pps. Validation speed 13700 pps.\n",
+      "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.018. Accuracy is 99.90%\n",
+      "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.535. Accuracy is 87.55%\n",
+      "INFO:mlp.optimisers:Epoch 25: Took 2 seconds. Training speed 770 pps. Validation speed 13159 pps.\n",
+      "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.016. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.540. Accuracy is 87.55%\n",
+      "INFO:mlp.optimisers:Epoch 26: Took 2 seconds. Training speed 741 pps. Validation speed 13515 pps.\n",
+      "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.015. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.546. Accuracy is 87.57%\n",
+      "INFO:mlp.optimisers:Epoch 27: Took 2 seconds. Training speed 681 pps. Validation speed 13515 pps.\n",
+      "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.014. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.546. Accuracy is 87.78%\n",
+      "INFO:mlp.optimisers:Epoch 28: Took 2 seconds. Training speed 753 pps. Validation speed 13700 pps.\n",
       "INFO:mlp.optimisers:Epoch 29: Training cost (ce) is 0.012. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.496. Accuracy is 88.55%\n",
-      "INFO:mlp.optimisers:Epoch 29: Took 12 seconds. Training speed 172 pps. Validation speed 1650 pps.\n",
-      "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.011. Accuracy is 100.00%\n",
-      "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.500. Accuracy is 88.56%\n",
-      "INFO:mlp.optimisers:Epoch 30: Took 10 seconds. Training speed 235 pps. Validation speed 1667 pps.\n",
+      "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.556. Accuracy is 87.56%\n",
+      "INFO:mlp.optimisers:Epoch 29: Took 2 seconds. Training speed 758 pps. Validation speed 13700 pps.\n",
+      "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.012. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.558. Accuracy is 87.74%\n",
+      "INFO:mlp.optimisers:Epoch 30: Took 2 seconds. Training speed 747 pps. Validation speed 13515 pps.\n",
       "INFO:root:Testing the model on test set:\n",
-      "INFO:root:MNIST test set accuracy is 88.10 %, cost (ce) is 0.497\n"
+      "INFO:root:MNIST test set accuracy is 87.19 %, cost (ce) is 0.554\n"
      ]
     }
    ],
    "source": [
     "\n",
-    "from mlp.layers import MLP, Relu, Softmax #import required layer types\n",
-    "from mlp.optimisers import SGDOptimiser #import the optimiser\n",
-    "\n",
-    "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
+    "from mlp.layers import MLP, Relu, Softmax \n",
+    "from mlp.optimisers import SGDOptimiser \n",
+    "from mlp.costs import CECost \n",
     "from mlp.schedulers import LearningRateFixed\n",
-    "from scipy.optimize import leastsq\n",
     "\n",
     "rng = numpy.random.RandomState([2015,10,10])\n",
     "\n",
     "#some hyper-parameters\n",
-    "nhid = 800\n",
+    "nhid = 100\n",
     "learning_rate = 0.1\n",
     "max_epochs = 30\n",
     "cost = CECost()\n",
@@ -513,50 +509,126 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "scrolled": true
    },
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:root:Training started...\n"
-     ]
-    },
-    {
-     "ename": "ValueError",
-     "evalue": "total size of new array must be unchanged",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[1;32m<ipython-input-6-f32b43e5484f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     38\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     39\u001b[0m     \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Training started...'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 40\u001b[1;33m     \u001b[0mtr_stats\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalid_stats\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0moptimiser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_dp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalid_dp\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     41\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     42\u001b[0m     \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Testing the model on test set:'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32m/afs/inf.ed.ac.uk/user/s11/s1136550/Dropbox/repos/mlpractical/mlp/optimisers.pyc\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(self, model, train_iterator, valid_iterator)\u001b[0m\n\u001b[0;32m    160\u001b[0m         \u001b[1;31m# do the initial validation\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    161\u001b[0m         \u001b[0mtrain_iterator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 162\u001b[1;33m         \u001b[0mtr_nll\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtr_acc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalidate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_iterator\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0ml1_weight\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0ml2_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    163\u001b[0m         logger.info('Epoch %i: Training cost (%s) for initial model is %.3f. Accuracy is %.2f%%'\n\u001b[0;32m    164\u001b[0m                     % (self.lr_scheduler.epoch, cost_name, tr_nll, tr_acc * 100.))\n",
-      "\u001b[1;32m/afs/inf.ed.ac.uk/user/s11/s1136550/Dropbox/repos/mlpractical/mlp/optimisers.pyc\u001b[0m in \u001b[0;36mvalidate\u001b[1;34m(self, model, valid_iterator, l1_weight, l2_weight)\u001b[0m\n\u001b[0;32m     34\u001b[0m         \u001b[0macc_list\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnll_list\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     35\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mt\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mvalid_iterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 36\u001b[1;33m             \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfprop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     37\u001b[0m             \u001b[0mnll_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcost\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcost\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mt\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     38\u001b[0m             \u001b[0macc_list\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclassification_accuracy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mt\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32m/afs/inf.ed.ac.uk/user/s11/s1136550/Dropbox/repos/mlpractical/mlp/layers.pyc\u001b[0m in \u001b[0;36mfprop\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m     49\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     50\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 51\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfprop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     52\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     53\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32m/afs/inf.ed.ac.uk/user/s11/s1136550/Dropbox/repos/mlpractical/mlp/layers.pyc\u001b[0m in \u001b[0;36mfprop\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m    466\u001b[0m         \u001b[1;31m#get the linear activations\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    467\u001b[0m         \u001b[0ma\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mMaxout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfprop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 468\u001b[1;33m         \u001b[0mar\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0modim\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    469\u001b[0m         \u001b[0mh\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mh_argmax\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmax_and_argmax\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mar\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxes\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkeepdims_argmax\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    470\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mh_argmax\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mh_argmax\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;31mValueError\u001b[0m: total size of new array must be unchanged"
+      "ERROR: Line magic function `%autorelaod` not found.\n",
+      "INFO:root:Training started...\n",
+      "INFO:mlp.optimisers:Epoch 0: Training cost (ce) for initial model is 2.314. Accuracy is 9.30%\n",
+      "INFO:mlp.optimisers:Epoch 0: Validation cost (ce) for initial model is 2.323. Accuracy is 8.27%\n",
+      "INFO:mlp.optimisers:Epoch 1: Training cost (ce) is 1.206. Accuracy is 64.20%\n",
+      "INFO:mlp.optimisers:Epoch 1: Validation cost (ce) is 0.628. Accuracy is 79.70%\n",
+      "INFO:mlp.optimisers:Epoch 1: Took 9 seconds. Training speed 394 pps. Validation speed 1527 pps.\n",
+      "INFO:mlp.optimisers:Epoch 2: Training cost (ce) is 0.514. Accuracy is 85.80%\n",
+      "INFO:mlp.optimisers:Epoch 2: Validation cost (ce) is 0.429. Accuracy is 88.16%\n",
+      "INFO:mlp.optimisers:Epoch 2: Took 9 seconds. Training speed 361 pps. Validation speed 1532 pps.\n",
+      "INFO:mlp.optimisers:Epoch 3: Training cost (ce) is 0.355. Accuracy is 89.70%\n",
+      "INFO:mlp.optimisers:Epoch 3: Validation cost (ce) is 0.407. Accuracy is 87.77%\n",
+      "INFO:mlp.optimisers:Epoch 3: Took 10 seconds. Training speed 422 pps. Validation speed 1387 pps.\n",
+      "INFO:mlp.optimisers:Epoch 4: Training cost (ce) is 0.262. Accuracy is 92.30%\n",
+      "INFO:mlp.optimisers:Epoch 4: Validation cost (ce) is 0.387. Accuracy is 88.78%\n",
+      "INFO:mlp.optimisers:Epoch 4: Took 9 seconds. Training speed 441 pps. Validation speed 1488 pps.\n",
+      "INFO:mlp.optimisers:Epoch 5: Training cost (ce) is 0.194. Accuracy is 94.70%\n",
+      "INFO:mlp.optimisers:Epoch 5: Validation cost (ce) is 0.349. Accuracy is 89.86%\n",
+      "INFO:mlp.optimisers:Epoch 5: Took 9 seconds. Training speed 389 pps. Validation speed 1527 pps.\n",
+      "INFO:mlp.optimisers:Epoch 6: Training cost (ce) is 0.134. Accuracy is 97.50%\n",
+      "INFO:mlp.optimisers:Epoch 6: Validation cost (ce) is 0.347. Accuracy is 89.79%\n",
+      "INFO:mlp.optimisers:Epoch 6: Took 9 seconds. Training speed 426 pps. Validation speed 1497 pps.\n",
+      "INFO:mlp.optimisers:Epoch 7: Training cost (ce) is 0.094. Accuracy is 98.70%\n",
+      "INFO:mlp.optimisers:Epoch 7: Validation cost (ce) is 0.429. Accuracy is 87.88%\n",
+      "INFO:mlp.optimisers:Epoch 7: Took 9 seconds. Training speed 449 pps. Validation speed 1473 pps.\n",
+      "INFO:mlp.optimisers:Epoch 8: Training cost (ce) is 0.071. Accuracy is 99.10%\n",
+      "INFO:mlp.optimisers:Epoch 8: Validation cost (ce) is 0.345. Accuracy is 90.31%\n",
+      "INFO:mlp.optimisers:Epoch 8: Took 9 seconds. Training speed 455 pps. Validation speed 1508 pps.\n",
+      "INFO:mlp.optimisers:Epoch 9: Training cost (ce) is 0.053. Accuracy is 99.40%\n",
+      "INFO:mlp.optimisers:Epoch 9: Validation cost (ce) is 0.357. Accuracy is 90.00%\n",
+      "INFO:mlp.optimisers:Epoch 9: Took 9 seconds. Training speed 375 pps. Validation speed 1532 pps.\n",
+      "INFO:mlp.optimisers:Epoch 10: Training cost (ce) is 0.042. Accuracy is 99.50%\n",
+      "INFO:mlp.optimisers:Epoch 10: Validation cost (ce) is 0.356. Accuracy is 90.27%\n",
+      "INFO:mlp.optimisers:Epoch 10: Took 9 seconds. Training speed 421 pps. Validation speed 1525 pps.\n",
+      "INFO:mlp.optimisers:Epoch 11: Training cost (ce) is 0.031. Accuracy is 99.70%\n",
+      "INFO:mlp.optimisers:Epoch 11: Validation cost (ce) is 0.347. Accuracy is 90.57%\n",
+      "INFO:mlp.optimisers:Epoch 11: Took 9 seconds. Training speed 449 pps. Validation speed 1522 pps.\n",
+      "INFO:mlp.optimisers:Epoch 12: Training cost (ce) is 0.026. Accuracy is 99.70%\n",
+      "INFO:mlp.optimisers:Epoch 12: Validation cost (ce) is 0.353. Accuracy is 90.50%\n",
+      "INFO:mlp.optimisers:Epoch 12: Took 9 seconds. Training speed 449 pps. Validation speed 1504 pps.\n",
+      "INFO:mlp.optimisers:Epoch 13: Training cost (ce) is 0.021. Accuracy is 99.90%\n",
+      "INFO:mlp.optimisers:Epoch 13: Validation cost (ce) is 0.352. Accuracy is 90.51%\n",
+      "INFO:mlp.optimisers:Epoch 13: Took 9 seconds. Training speed 441 pps. Validation speed 1495 pps.\n",
+      "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.018. Accuracy is 99.90%\n",
+      "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.355. Accuracy is 90.59%\n",
+      "INFO:mlp.optimisers:Epoch 14: Took 9 seconds. Training speed 410 pps. Validation speed 1456 pps.\n",
+      "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.015. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.359. Accuracy is 90.66%\n",
+      "INFO:mlp.optimisers:Epoch 15: Took 9 seconds. Training speed 463 pps. Validation speed 1429 pps.\n",
+      "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.013. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.363. Accuracy is 90.52%\n",
+      "INFO:mlp.optimisers:Epoch 16: Took 10 seconds. Training speed 365 pps. Validation speed 1403 pps.\n",
+      "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.012. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.364. Accuracy is 90.71%\n",
+      "INFO:mlp.optimisers:Epoch 17: Took 10 seconds. Training speed 351 pps. Validation speed 1368 pps.\n",
+      "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.011. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.364. Accuracy is 90.65%\n",
+      "INFO:mlp.optimisers:Epoch 18: Took 10 seconds. Training speed 348 pps. Validation speed 1439 pps.\n",
+      "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.010. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.367. Accuracy is 90.62%\n",
+      "INFO:mlp.optimisers:Epoch 19: Took 11 seconds. Training speed 271 pps. Validation speed 1441 pps.\n",
+      "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.009. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.366. Accuracy is 90.78%\n",
+      "INFO:mlp.optimisers:Epoch 20: Took 10 seconds. Training speed 309 pps. Validation speed 1387 pps.\n",
+      "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.008. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.371. Accuracy is 90.66%\n",
+      "INFO:mlp.optimisers:Epoch 21: Took 10 seconds. Training speed 348 pps. Validation speed 1323 pps.\n",
+      "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.008. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.370. Accuracy is 90.68%\n",
+      "INFO:mlp.optimisers:Epoch 22: Took 9 seconds. Training speed 435 pps. Validation speed 1488 pps.\n",
+      "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.007. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.372. Accuracy is 90.70%\n",
+      "INFO:mlp.optimisers:Epoch 23: Took 9 seconds. Training speed 405 pps. Validation speed 1443 pps.\n",
+      "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.007. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.373. Accuracy is 90.80%\n",
+      "INFO:mlp.optimisers:Epoch 24: Took 9 seconds. Training speed 389 pps. Validation speed 1482 pps.\n",
+      "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.375. Accuracy is 90.71%\n",
+      "INFO:mlp.optimisers:Epoch 25: Took 9 seconds. Training speed 402 pps. Validation speed 1525 pps.\n",
+      "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.380. Accuracy is 90.65%\n",
+      "INFO:mlp.optimisers:Epoch 26: Took 9 seconds. Training speed 405 pps. Validation speed 1522 pps.\n",
+      "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.006. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.380. Accuracy is 90.75%\n",
+      "INFO:mlp.optimisers:Epoch 27: Took 9 seconds. Training speed 415 pps. Validation speed 1534 pps.\n",
+      "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.381. Accuracy is 90.66%\n",
+      "INFO:mlp.optimisers:Epoch 28: Took 9 seconds. Training speed 410 pps. Validation speed 1493 pps.\n",
+      "INFO:mlp.optimisers:Epoch 29: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.382. Accuracy is 90.67%\n",
+      "INFO:mlp.optimisers:Epoch 29: Took 9 seconds. Training speed 396 pps. Validation speed 1536 pps.\n",
+      "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.005. Accuracy is 100.00%\n",
+      "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.384. Accuracy is 90.75%\n",
+      "INFO:mlp.optimisers:Epoch 30: Took 9 seconds. Training speed 463 pps. Validation speed 1532 pps.\n",
+      "INFO:root:Testing the model on test set:\n",
+      "INFO:root:MNIST test set accuracy is 90.02 %, cost (ce) is 0.391\n"
      ]
     }
    ],
    "source": [
-    "#%load_ext autoreload\n",
-    "%autoreload\n",
-    "from mlp.layers import MLP, Maxout, Softmax #import required layer types\n",
-    "from mlp.optimisers import SGDOptimiser #import the optimiser\n",
     "\n",
-    "from mlp.costs import CECost #import the cost we want to use for optimisation\n",
+    "from mlp.layers import MLP, Maxout, Softmax \n",
+    "from mlp.optimisers import SGDOptimiser\n",
+    "from mlp.costs import CECost \n",
     "from mlp.schedulers import LearningRateFixed\n",
-    "from scipy.optimize import leastsq\n",
-    "\n",
-    "rng = numpy.random.RandomState([2015,10,10])\n",
     "\n",
     "#some hyper-parameters\n",
-    "nhid = 800\n",
+    "nhid = 100\n",
     "learning_rate = 0.1\n",
-    "k = 2\n",
+    "k = 2 #maxout pool size (stride is assumed k)\n",
     "max_epochs = 30\n",
     "cost = CECost()\n",
     "    \n",
diff --git a/06_MLP_Coursework2_Introduction.ipynb b/06_MLP_Coursework2_Introduction.ipynb
index 04cc798..0fd7d79 100644
--- a/06_MLP_Coursework2_Introduction.ipynb
+++ b/06_MLP_Coursework2_Introduction.ipynb
@@ -8,6 +8,33 @@
     "\n",
     "This notebook contains some extended versions of hints and some code examples that are suppose to make it easier to proceed with certain tasks in the Coursework #2.\n",
     "\n",
+    "## Virtual environments\n",
+    "\n",
+    "Before you proceed onwards, remember to activate your virtual environment by typing `activate_mlp` or `source ~/mlpractical/venv/bin/activate` (or if you did the original install the \"comfy way\" type: `workon mlpractical`).\n",
+    "\n",
+    "## Syncing the git repository\n",
+    "\n",
+    "Look <a href=\"https://github.com/CSTR-Edinburgh/mlpractical/blob/master/gitFAQ.md\">here</a> for more details. But in short, we recommend to create a separate branch for this lab, as follows:\n",
+    "\n",
+    "1. Enter the mlpractical directory `cd ~/mlpractical/repo-mlp`\n",
+    "2. List the branches and check which are currently active by typing: `git branch`\n",
+    "3. If you have followed our recommendations, you should be in the `lab5` branch, please commit your local changes to the repo index by typing:\n",
+    "```\n",
+    "git commit -am \"finished lab5\"\n",
+    "```\n",
+    "4. Now you can switch to `master` branch by typing: \n",
+    "```\n",
+    "git checkout master\n",
+    " ```\n",
+    "5. To update the repository (note, assuming master does not have any conflicts), if there are some, have a look <a href=\"https://github.com/CSTR-Edinburgh/mlpractical/blob/master/gitFAQ.md\">here</a>\n",
+    "```\n",
+    "git pull\n",
+    "```\n",
+    "6. And now, create the new branch & switch to it by typing:\n",
+    "```\n",
+    "git checkout -b coursework2\n",
+    "```\n",
+    "\n",
     "# Store the intermediate results (check-pointing and pickling)\n",
     "\n",
     "Once you have finished a certain task it is a good idea to check-point your current notebook's status (logs, plots and whatever else has been stored in the notebook). By doing this, you can always revert to this state later when necessary (without rerunning experimens). You can do this by going to menus `File->Save and Checkpoint` and `File->Revert to Checkpoint`.\n",
@@ -67,6 +94,7 @@
     "* `numpy.rollaxis` - allows to shuffle certain axis in a tensor\n",
     "* `slice` - allows to specify a range (can be used when indexing numpy arrays)\n",
     "* `ellipsis` - allows to pick an arbitrary number of dimensions (inferred)\n",
+    "* `max_and_argmax` - `(mlp.layers)` - an auxiliary function we have provided to get both max and argmax of a tensor across an arbitrary axes, possibly in the format preserving tensor's original shape (this is not trivial to do using numpy out-of-the-shelf functionality).\n",
     "\n",
     "Below cells contain some simple examples showing basics behind tensor manipulation in numpy (go through them if you haven't used numpy in this context before)."
    ]
@@ -231,7 +259,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You can also check the backprop implementation in the layer. Notice, it **does not** necessairly check whether your layer implementation is correct but rather if the gradient computation is correct, given forward pass computation. If you get the forward pass wrong, and somehow get gradients right w.r.t what forward pass is computing, the below check will not capture it (obviously). "
+    "You can also check the backprop implementation in the layer. Notice, it **does not** necessairly check whether your layer implementation is correct but rather if the gradient computation is correct, given forward pass computation. If you get the forward pass wrong, and somehow get gradients right w.r.t what forward pass is computing, the below check will not capture it (obviously). Contrary to normal scenraio where 32 floating point precision is sufficient, when checking gradients please make sure 64bit precision is used (or tune the tolerance)."
    ]
   },
   {
diff --git a/mlp/layers.py b/mlp/layers.py
index c80fd31..f3a17e1 100644
--- a/mlp/layers.py
+++ b/mlp/layers.py
@@ -10,6 +10,51 @@ from mlp.costs import Cost
 logger = logging.getLogger(__name__)
 
 
+def max_and_argmax(x, axes=None, keepdims_max=False, keepdims_argmax=False):
+    """
+    Return both max and argmax for the given multi-dimensional array, possibly
+    preserve the original shapes
+    :param x: input tensor
+    :param axes: tuple of ints denoting axes across which
+                 one should perform reduction
+    :param keepdims_max: boolean, if true, shape of x is preserved in result
+    :param keepdims_argmax:, boolean, if true, shape of x is preserved in result
+    :return: max (number) and argmax (indices) of max element along certain axes
+             in multi-dimensional tensor
+    """
+    if axes is None:
+        rval_argmax = numpy.argmax(x)
+        if keepdims_argmax:
+            rval_argmax = numpy.unravel_index(rval_argmax, x.shape)
+    else:
+        if isinstance(axes, int):
+            axes = (axes,)
+        axes = tuple(axes)
+        keep_axes = numpy.array([i for i in range(x.ndim) if i not in axes])
+        transposed_x = numpy.transpose(x, numpy.concatenate((keep_axes, axes)))
+        reshaped_x = transposed_x.reshape(transposed_x.shape[:len(keep_axes)] + (-1,))
+        rval_argmax = numpy.asarray(numpy.argmax(reshaped_x, axis=-1), dtype=numpy.int64)
+
+        # rval_max_arg keeps the arg index referencing to the axis along which reduction was performed (axis=-1)
+        # when keepdims_argmax is True we need to map it back to the original shape of tensor x
+        # print 'rval maxaarg', rval_argmax.ndim, rval_argmax.shape, rval_argmax
+        if keepdims_argmax:
+            dim = tuple([x.shape[a] for a in axes])
+            rval_argmax = numpy.array([idx + numpy.unravel_index(val, dim)
+                                       for idx, val in numpy.ndenumerate(rval_argmax)])
+            # convert to numpy indexing convention (row indices first, then columns)
+            rval_argmax = zip(*rval_argmax)
+
+    if keepdims_max is False and keepdims_argmax is True:
+        # this could potentially save O(N) steps by not traversing array once more
+        # to get max value, haven't benchmark it though
+        rval_max = x[rval_argmax]
+    else:
+        rval_max = numpy.asarray(numpy.amax(x, axis=axes, keepdims=keepdims_max))
+
+    return rval_max, rval_argmax
+
+
 class MLP(object):
     """
     This is a container for an arbitrary sequence of other transforms
@@ -459,21 +504,30 @@ class Maxout(Linear):
                  rng=None,
                  irange=0.05):
 
-        super(Maxout, self).__init__(idim, odim, rng, irange)
+        super(Maxout, self).__init__(idim, odim*k, rng, irange)
+
+        self.max_odim = odim
         self.k = k
 
     def fprop(self, inputs):
         #get the linear activations
         a = super(Maxout, self).fprop(inputs)
-        ar = a.reshape(a.shape[0], self.odim, self.k)
-        h, h_argmax = max_and_argmax(ar, axes=3, keepdims_argmax=True)
+        ar = a.reshape(a.shape[0], self.max_odim, self.k)
+        h, h_argmax = max_and_argmax(ar, axes=2, keepdims_max=True, keepdims_argmax=True)
         self.h_argmax = h_argmax
-        return h
+        return h[:, :, 0] #get rid of the last reduced dimensison (of size 1)
 
     def bprop(self, h, igrads):
-        igrads_up = igrads.reshape(a.shape[0], -1, 1)
-        igrads_up = numpy.tile(a, 1, self.k)
-        deltas = (igrads_up * self.h_argmax).reshape(a.shape[0], -1)
+        #convert into the shape where upsampling is easier
+        igrads_up = igrads.reshape(igrads.shape[0], self.max_odim, 1)
+        #upsample to the linear dimension (but reshaped to (batch_size, maxed_num (1), pool_size)
+        igrads_up = numpy.tile(igrads_up, (1, 1, self.k))
+        #generate mask matrix and set to 1 maxed elements
+        mask = numpy.zeros_like(igrads_up)
+        mask[self.h_argmax] = 1.0
+        #do bprop through max operator and then reshape into 2D
+        deltas = (igrads_up * mask).reshape(igrads_up.shape[0], -1)
+        #and then do bprop thorough linear part
         ___, ograds = super(Maxout, self).bprop(h=None, igrads=deltas)
         return deltas, ograds