diff --git a/04_Regularisation_solution.ipynb b/04_Regularisation_solution.ipynb index 63dd229..b070bba 100644 --- a/04_Regularisation_solution.ipynb +++ b/04_Regularisation_solution.ipynb @@ -794,7 +794,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "collapsed": false, "scrolled": true @@ -849,7 +849,117 @@ "INFO:mlp.optimisers:Epoch 13: Took 12 seconds. Training speed 204 pps. Validation speed 1515 pps.\n", "INFO:mlp.optimisers:Epoch 14: Training cost (ce) is 0.423. Accuracy is 87.70%\n", "INFO:mlp.optimisers:Epoch 14: Validation cost (ce) is 0.412. Accuracy is 89.93%\n", - "INFO:mlp.optimisers:Epoch 14: Took 11 seconds. Training speed 229 pps. Validation speed 1525 pps.\n" + "INFO:mlp.optimisers:Epoch 14: Took 11 seconds. Training speed 229 pps. Validation speed 1525 pps.\n", + "INFO:mlp.optimisers:Epoch 15: Training cost (ce) is 0.439. Accuracy is 88.10%\n", + "INFO:mlp.optimisers:Epoch 15: Validation cost (ce) is 0.465. Accuracy is 89.30%\n", + "INFO:mlp.optimisers:Epoch 15: Took 11 seconds. Training speed 225 pps. Validation speed 1452 pps.\n", + "INFO:mlp.optimisers:Epoch 16: Training cost (ce) is 0.460. Accuracy is 87.30%\n", + "INFO:mlp.optimisers:Epoch 16: Validation cost (ce) is 0.392. Accuracy is 90.99%\n", + "INFO:mlp.optimisers:Epoch 16: Took 12 seconds. Training speed 202 pps. Validation speed 1527 pps.\n", + "INFO:mlp.optimisers:Epoch 17: Training cost (ce) is 0.427. Accuracy is 87.60%\n", + "INFO:mlp.optimisers:Epoch 17: Validation cost (ce) is 0.434. Accuracy is 90.41%\n", + "INFO:mlp.optimisers:Epoch 17: Took 11 seconds. Training speed 242 pps. Validation speed 1479 pps.\n", + "INFO:mlp.optimisers:Epoch 18: Training cost (ce) is 0.398. Accuracy is 89.20%\n", + "INFO:mlp.optimisers:Epoch 18: Validation cost (ce) is 0.376. Accuracy is 91.62%\n", + "INFO:mlp.optimisers:Epoch 18: Took 11 seconds. Training speed 206 pps. Validation speed 1543 pps.\n", + "INFO:mlp.optimisers:Epoch 19: Training cost (ce) is 0.342. Accuracy is 88.60%\n", + "INFO:mlp.optimisers:Epoch 19: Validation cost (ce) is 0.422. Accuracy is 91.39%\n", + "INFO:mlp.optimisers:Epoch 19: Took 10 seconds. Training speed 303 pps. Validation speed 1534 pps.\n", + "INFO:mlp.optimisers:Epoch 20: Training cost (ce) is 0.385. Accuracy is 89.60%\n", + "INFO:mlp.optimisers:Epoch 20: Validation cost (ce) is 0.397. Accuracy is 91.78%\n", + "INFO:mlp.optimisers:Epoch 20: Took 11 seconds. Training speed 242 pps. Validation speed 1486 pps.\n", + "INFO:mlp.optimisers:Epoch 21: Training cost (ce) is 0.343. Accuracy is 89.60%\n", + "INFO:mlp.optimisers:Epoch 21: Validation cost (ce) is 0.498. Accuracy is 90.57%\n", + "INFO:mlp.optimisers:Epoch 21: Took 11 seconds. Training speed 229 pps. Validation speed 1508 pps.\n", + "INFO:mlp.optimisers:Epoch 22: Training cost (ce) is 0.406. Accuracy is 88.60%\n", + "INFO:mlp.optimisers:Epoch 22: Validation cost (ce) is 0.378. Accuracy is 92.42%\n", + "INFO:mlp.optimisers:Epoch 22: Took 11 seconds. Training speed 226 pps. Validation speed 1499 pps.\n", + "INFO:mlp.optimisers:Epoch 23: Training cost (ce) is 0.315. Accuracy is 91.60%\n", + "INFO:mlp.optimisers:Epoch 23: Validation cost (ce) is 0.434. Accuracy is 91.37%\n", + "INFO:mlp.optimisers:Epoch 23: Took 12 seconds. Training speed 202 pps. Validation speed 1449 pps.\n", + "INFO:mlp.optimisers:Epoch 24: Training cost (ce) is 0.409. Accuracy is 90.00%\n", + "INFO:mlp.optimisers:Epoch 24: Validation cost (ce) is 0.480. Accuracy is 90.29%\n", + "INFO:mlp.optimisers:Epoch 24: Took 11 seconds. Training speed 209 pps. Validation speed 1529 pps.\n", + "INFO:mlp.optimisers:Epoch 25: Training cost (ce) is 0.328. Accuracy is 91.00%\n", + "INFO:mlp.optimisers:Epoch 25: Validation cost (ce) is 0.446. Accuracy is 91.48%\n", + "INFO:mlp.optimisers:Epoch 25: Took 12 seconds. Training speed 215 pps. Validation speed 1443 pps.\n", + "INFO:mlp.optimisers:Epoch 26: Training cost (ce) is 0.309. Accuracy is 91.40%\n", + "INFO:mlp.optimisers:Epoch 26: Validation cost (ce) is 0.477. Accuracy is 91.10%\n", + "INFO:mlp.optimisers:Epoch 26: Took 11 seconds. Training speed 301 pps. Validation speed 1342 pps.\n", + "INFO:mlp.optimisers:Epoch 27: Training cost (ce) is 0.351. Accuracy is 90.00%\n", + "INFO:mlp.optimisers:Epoch 27: Validation cost (ce) is 0.487. Accuracy is 91.21%\n", + "INFO:mlp.optimisers:Epoch 27: Took 10 seconds. Training speed 279 pps. Validation speed 1502 pps.\n", + "INFO:mlp.optimisers:Epoch 28: Training cost (ce) is 0.284. Accuracy is 92.70%\n", + "INFO:mlp.optimisers:Epoch 28: Validation cost (ce) is 0.463. Accuracy is 92.24%\n", + "INFO:mlp.optimisers:Epoch 28: Took 11 seconds. Training speed 264 pps. Validation speed 1311 pps.\n", + "INFO:mlp.optimisers:Epoch 29: Training cost (ce) is 0.360. Accuracy is 91.70%\n", + "INFO:mlp.optimisers:Epoch 29: Validation cost (ce) is 0.456. Accuracy is 92.37%\n", + "INFO:mlp.optimisers:Epoch 29: Took 11 seconds. Training speed 268 pps. Validation speed 1326 pps.\n", + "INFO:mlp.optimisers:Epoch 30: Training cost (ce) is 0.298. Accuracy is 92.30%\n", + "INFO:mlp.optimisers:Epoch 30: Validation cost (ce) is 0.466. Accuracy is 92.34%\n", + "INFO:mlp.optimisers:Epoch 30: Took 11 seconds. Training speed 242 pps. Validation speed 1473 pps.\n", + "INFO:mlp.optimisers:Epoch 31: Training cost (ce) is 0.316. Accuracy is 92.30%\n", + "INFO:mlp.optimisers:Epoch 31: Validation cost (ce) is 0.516. Accuracy is 91.59%\n", + "INFO:mlp.optimisers:Epoch 31: Took 10 seconds. Training speed 251 pps. Validation speed 1558 pps.\n", + "INFO:mlp.optimisers:Epoch 32: Training cost (ce) is 0.308. Accuracy is 92.50%\n", + "INFO:mlp.optimisers:Epoch 32: Validation cost (ce) is 0.481. Accuracy is 92.51%\n", + "INFO:mlp.optimisers:Epoch 32: Took 11 seconds. Training speed 255 pps. Validation speed 1421 pps.\n", + "INFO:mlp.optimisers:Epoch 33: Training cost (ce) is 0.270. Accuracy is 93.60%\n", + "INFO:mlp.optimisers:Epoch 33: Validation cost (ce) is 0.520. Accuracy is 91.91%\n", + "INFO:mlp.optimisers:Epoch 33: Took 10 seconds. Training speed 294 pps. Validation speed 1484 pps.\n", + "INFO:mlp.optimisers:Epoch 34: Training cost (ce) is 0.359. Accuracy is 92.60%\n", + "INFO:mlp.optimisers:Epoch 34: Validation cost (ce) is 0.665. Accuracy is 89.27%\n", + "INFO:mlp.optimisers:Epoch 34: Took 11 seconds. Training speed 263 pps. Validation speed 1397 pps.\n", + "INFO:mlp.optimisers:Epoch 35: Training cost (ce) is 0.423. Accuracy is 91.50%\n", + "INFO:mlp.optimisers:Epoch 35: Validation cost (ce) is 0.548. Accuracy is 92.05%\n", + "INFO:mlp.optimisers:Epoch 35: Took 11 seconds. Training speed 226 pps. Validation speed 1464 pps.\n", + "INFO:mlp.optimisers:Epoch 36: Training cost (ce) is 0.329. Accuracy is 92.40%\n", + "INFO:mlp.optimisers:Epoch 36: Validation cost (ce) is 0.528. Accuracy is 92.26%\n", + "INFO:mlp.optimisers:Epoch 36: Took 11 seconds. Training speed 248 pps. Validation speed 1511 pps.\n", + "INFO:mlp.optimisers:Epoch 37: Training cost (ce) is 0.342. Accuracy is 92.40%\n", + "INFO:mlp.optimisers:Epoch 37: Validation cost (ce) is 0.506. Accuracy is 92.50%\n", + "INFO:mlp.optimisers:Epoch 37: Took 11 seconds. Training speed 223 pps. Validation speed 1486 pps.\n", + "INFO:mlp.optimisers:Epoch 38: Training cost (ce) is 0.313. Accuracy is 92.60%\n", + "INFO:mlp.optimisers:Epoch 38: Validation cost (ce) is 0.599. Accuracy is 90.90%\n", + "INFO:mlp.optimisers:Epoch 38: Took 10 seconds. Training speed 268 pps. Validation speed 1515 pps.\n", + "INFO:mlp.optimisers:Epoch 39: Training cost (ce) is 0.291. Accuracy is 92.40%\n", + "INFO:mlp.optimisers:Epoch 39: Validation cost (ce) is 0.622. Accuracy is 91.38%\n", + "INFO:mlp.optimisers:Epoch 39: Took 11 seconds. Training speed 259 pps. Validation speed 1462 pps.\n", + "INFO:mlp.optimisers:Epoch 40: Training cost (ce) is 0.339. Accuracy is 92.90%\n", + "INFO:mlp.optimisers:Epoch 40: Validation cost (ce) is 0.525. Accuracy is 92.40%\n", + "INFO:mlp.optimisers:Epoch 40: Took 10 seconds. Training speed 303 pps. Validation speed 1421 pps.\n", + "INFO:mlp.optimisers:Epoch 41: Training cost (ce) is 0.317. Accuracy is 93.00%\n", + "INFO:mlp.optimisers:Epoch 41: Validation cost (ce) is 0.653. Accuracy is 90.91%\n", + "INFO:mlp.optimisers:Epoch 41: Took 10 seconds. Training speed 277 pps. Validation speed 1464 pps.\n", + "INFO:mlp.optimisers:Epoch 42: Training cost (ce) is 0.349. Accuracy is 93.20%\n", + "INFO:mlp.optimisers:Epoch 42: Validation cost (ce) is 0.529. Accuracy is 92.87%\n", + "INFO:mlp.optimisers:Epoch 42: Took 11 seconds. Training speed 278 pps. Validation speed 1447 pps.\n", + "INFO:mlp.optimisers:Epoch 43: Training cost (ce) is 0.278. Accuracy is 94.00%\n", + "INFO:mlp.optimisers:Epoch 43: Validation cost (ce) is 0.557. Accuracy is 92.57%\n", + "INFO:mlp.optimisers:Epoch 43: Took 11 seconds. Training speed 216 pps. Validation speed 1497 pps.\n", + "INFO:mlp.optimisers:Epoch 44: Training cost (ce) is 0.340. Accuracy is 93.10%\n", + "INFO:mlp.optimisers:Epoch 44: Validation cost (ce) is 0.640. Accuracy is 91.63%\n", + "INFO:mlp.optimisers:Epoch 44: Took 11 seconds. Training speed 228 pps. Validation speed 1548 pps.\n", + "INFO:mlp.optimisers:Epoch 45: Training cost (ce) is 0.367. Accuracy is 92.20%\n", + "INFO:mlp.optimisers:Epoch 45: Validation cost (ce) is 0.613. Accuracy is 91.58%\n", + "INFO:mlp.optimisers:Epoch 45: Took 11 seconds. Training speed 251 pps. Validation speed 1504 pps.\n", + "INFO:mlp.optimisers:Epoch 46: Training cost (ce) is 0.257. Accuracy is 95.40%\n", + "INFO:mlp.optimisers:Epoch 46: Validation cost (ce) is 0.610. Accuracy is 91.76%\n", + "INFO:mlp.optimisers:Epoch 46: Took 11 seconds. Training speed 237 pps. Validation speed 1493 pps.\n", + "INFO:mlp.optimisers:Epoch 47: Training cost (ce) is 0.282. Accuracy is 94.40%\n", + "INFO:mlp.optimisers:Epoch 47: Validation cost (ce) is 0.628. Accuracy is 91.62%\n", + "INFO:mlp.optimisers:Epoch 47: Took 10 seconds. Training speed 302 pps. Validation speed 1555 pps.\n", + "INFO:mlp.optimisers:Epoch 48: Training cost (ce) is 0.284. Accuracy is 94.50%\n", + "INFO:mlp.optimisers:Epoch 48: Validation cost (ce) is 0.556. Accuracy is 92.37%\n", + "INFO:mlp.optimisers:Epoch 48: Took 10 seconds. Training speed 266 pps. Validation speed 1488 pps.\n", + "INFO:mlp.optimisers:Epoch 49: Training cost (ce) is 0.263. Accuracy is 93.10%\n", + "INFO:mlp.optimisers:Epoch 49: Validation cost (ce) is 0.608. Accuracy is 92.42%\n", + "INFO:mlp.optimisers:Epoch 49: Took 12 seconds. Training speed 206 pps. Validation speed 1479 pps.\n", + "INFO:mlp.optimisers:Epoch 50: Training cost (ce) is 0.280. Accuracy is 94.30%\n", + "INFO:mlp.optimisers:Epoch 50: Validation cost (ce) is 0.628. Accuracy is 92.29%\n", + "INFO:mlp.optimisers:Epoch 50: Took 11 seconds. Training speed 237 pps. Validation speed 1479 pps.\n", + "INFO:root:Testing the model on test set:\n", + "INFO:root:MNIST test set accuracy is 91.75 %, cost (ce) is 0.624\n" ] } ], diff --git a/mlp/layers.py b/mlp/layers.py index c35b1be..777cda3 100644 --- a/mlp/layers.py +++ b/mlp/layers.py @@ -119,7 +119,7 @@ class MLP(object): d_hid = 1 if p_hid < 1: d_hid = self.rng.binomial(1, p_hid, size=self.activations[i].shape) - self.activations[i] = p_hid_scaler*d_hid*self.activations[i] + self.activations[i] *= p_hid_scaler * d_hid self.activations[i+1] = self.layers[i].fprop(self.activations[i]) return self.activations[-1] @@ -529,10 +529,11 @@ class Maxout(Linear): return h[:, :, 0] #get rid of the last reduced dimensison (of size 1) def bprop(self, h, igrads): - #hack for dropout backprop (ignore dropped neurons), note, this is not - #entirely correct when h fires at 0 exaclty (but is not dropped, when - #derivative should be 1. However, this is rather unlikely to happen and - #probably can be ignored right now + #hack for dropout backprop (ignore dropped neurons). Note, this is not + #entirely correct when h fires at 0 exaclty (but is not dropped, in which case + #derivative should be 1). However, this is rather unlikely to happen (that h fires as 0) + #and probably can be ignored for now. Otherwise, one would have to keep the dropped unit + #indexes and zero grads according to them. igrads = (h != 0)*igrads #convert into the shape where upsampling is easier igrads_up = igrads.reshape(igrads.shape[0], self.max_odim, 1)