From 236fe5253bcca260604f3fe5bd71e92e31abdfc8 Mon Sep 17 00:00:00 2001 From: Matt Graham Date: Thu, 22 Sep 2016 15:33:59 +0100 Subject: [PATCH] Adding more test code and expanding instructions in first lab notebook. --- notebooks/01_Introduction.ipynb | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/notebooks/01_Introduction.ipynb b/notebooks/01_Introduction.ipynb index 6e48440..fc6e823 100644 --- a/notebooks/01_Introduction.ipynb +++ b/notebooks/01_Introduction.ipynb @@ -276,15 +276,15 @@ }, "outputs": [], "source": [ + "%matplotlib inline\n", + "import numpy as np\n", "import matplotlib.pyplot as plt\n", - "import matplotlib.gridspec as gridspec\n", - "import matplotlib.cm as cm\n", "import mlp.data_providers as data_providers\n", "\n", "def show_single_image(img, fig_size=(2, 2)):\n", " fig = plt.figure(figsize=fig_size)\n", " ax = fig.add_subplot(111)\n", - " ax.imshow(img, cmap=cm.Greys_r)\n", + " ax.imshow(img, cmap='Greys')\n", " ax.axis('off')\n", " plt.show()\n", " return fig, ax\n", @@ -352,7 +352,7 @@ "```\n", "\n", " * Implement the `to_one_of_k` method of `MNISTDataProvider` class. \n", - " * Uncomment and modify an appropriate line in the `next` method, so the raw targets are converted to 1-of-K coding. \n", + " * Uncomment the overloaded `next` method, so the raw targets are converted to 1-of-K coding. \n", " * Test your code by running the the cell below." ] }, @@ -385,11 +385,11 @@ "source": [ "### Exercise 3\n", "\n", - "Write your own data provider `MetOfficeDataProvider` that wraps [weather data for south Scotland](http://www.metoffice.gov.uk/hadobs/hadukp/data/daily/HadSSP_daily_qc.txt). A previous version of this data has been stored in `data` directory for your convenience.\n", + "Here you will write your own data provider `MetOfficeDataProvider` that wraps [weather data for south Scotland](http://www.metoffice.gov.uk/hadobs/hadukp/data/daily/HadSSP_daily_qc.txt). A previous version of this data has been stored in `data` directory for your convenience and skeleton code for the class provided in `mlp/data_providers.py`.\n", "\n", "The data is organised in the text file as a table, with the first two columns indexing the year and month of the readings and the following 31 columns giving daily precipitation values for the corresponding month. As not all months have 31 days some of entries correspond to non-existing days. These values are indicated by a non-physical value of `-99.9`.\n", "\n", - " * You should read all of the data from the file (`np.loadtxt` may be useful for this) and then filter out the `-99.9` values and collapse the table to one-dimensional array corresponding to a sequence of daily measurements for the whole period data is available for.\n", + " * You should read all of the data from the file ([`np.loadtxt`](http://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html) may be useful for this) and then filter out the `-99.9` values and collapse the table to a one-dimensional array corresponding to a sequence of daily measurements for the whole period data is available for. [NumPy's boolean indexing feature](http://docs.scipy.org/doc/numpy/user/basics.indexing.html#boolean-or-mask-index-arrays) could be helpful here.\n", " * A common initial preprocessing step in machine learning tasks is to normalise data so that it has zero mean and a standard deviation of one. Normalise the data sequence so that its overall mean is zero and standard deviation one.\n", " * Each data point in the data provider should correspond to a window of length specified in the `__init__` method as `window_size` of this contiguous data sequence, with the model inputs being the first `window_size - 1` elements of the window and the target output being the last element of the window. For example if the original data sequence was `[1, 2, 3, 4, 5, 6]` and `window_size=3` then `input, target` pairs iterated over by the data provider should be\n", " ```\n", @@ -418,16 +418,27 @@ }, "outputs": [], "source": [ + "batch_size = 3\n", "for window_size in [2, 5, 10]:\n", " met_dp = data_providers.MetOfficeDataProvider(\n", - " window_size=window_size, batch_size=5, max_num_batches=5, shuffle_order=False)\n", + " window_size=window_size, batch_size=batch_size,\n", + " max_num_batches=1, shuffle_order=False)\n", + " fig = plt.figure(figsize=(6, 3))\n", + " ax = fig.add_subplot(111)\n", + " ax.set_title('Window size {0}'.format(window_size))\n", + " ax.set_xlabel('Day in window')\n", + " ax.set_ylabel('Normalised reading')\n", + " # iterate over data provider batches checking size and plotting\n", " for inputs, targets in met_dp:\n", - " assert inputs.shape == (5, window_size - 1)\n", - " assert targets.shape == (5, )" + " assert inputs.shape == (batch_size, window_size - 1)\n", + " assert targets.shape == (batch_size, )\n", + " ax.plot(np.c_[inputs, targets].T, '.-')\n", + " ax.plot([window_size - 1] * batch_size, targets, 'ko')" ] } ], "metadata": { + "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [default]", "language": "python",