diff --git a/notebooks/01_Introduction.ipynb b/notebooks/01_Introduction.ipynb index d0d35b9..1a7493e 100644 --- a/notebooks/01_Introduction.ipynb +++ b/notebooks/01_Introduction.ipynb @@ -138,37 +138,11 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "scrolled": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Hello world!\n", - "Hello again!\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Alarming hello!\n" - ] - }, - { - "data": { - "text/plain": [ - "'And again!'" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from __future__ import print_function\n", "import sys\n", @@ -231,26 +205,13 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "nbpresent": { "id": "2bced39d-ae3a-4603-ac94-fbb6a6283a96" } }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# use the matplotlib magic to specify to display plots inline in the notebook\n", "%matplotlib inline\n", @@ -306,52 +267,13 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "nbpresent": { "id": "978c1095-a9ce-4626-a113-e0be5fe51ecb" } }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAHsAAAB7CAYAAABUx/9/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAEIElEQVR4nO2dvS8sURiHd69bUagkWoWCqER8NETUohIiUSGRSAR/gEZC6yMS4qtCR6LQKEQlOjQK0RAf0VEoNG632fdNdsfetePM/p6nml+OzB6enD3eM2dm0l9fXynQ4M9vdwDiA9lCIFsIZAuBbCGQLcTfiHbqsuSRztXAyBYC2UIgWwhkC4FsIZAtBLKFQLYQyBYC2UIgWwhkC4FsIZAtBLKFQLYQyBYC2UIgWwhkC4FsIZAtBLKFQLYQyBYC2UJE3f4jwefnp8nn5+cmn5ycmLy9vW3yy8uLyc3NzSbv7++bXF9f/1/9LBZGthDIFgLZQqQjnpZUlrfsbm5umry2tmby5eWlyf5vlE6nC2ofGBgweXd39/udLRxu2QVkS4FsIcqmzo6qlaenpzPHV1dXpq2ystLkzs5OkxcWFkxua2szeXl5OednpVKp1PPzc65uxwojWwhkC4FsIcqmzvbz5szMjMnZv6dfu15dXTW5tbW1oM+uqKgw2dfZ/n+Cu7s7k2tqagr6vAioswHZUiBbiMTU2b6OnpiYMHlra8tkP28ODg5mjjc2Nkybn1M9Dw8PJvs6O+ptDC0tLSZXVVXl/flSwcgWAtlCIFuIxNTZY2NjJu/s7Jjc29tr8tDQkMn9/f05z/3x8WHy0tKSySsrKya/vr6aHHU9++bmxuQS70GjzgZkS4FsIYKds6empkz2a9+1tbUmPz095T1fdp3ur3V3d3ebXOges7q6OpP39vZMLnStvUiYswHZUgS7XHp9fW2y/+ocGRkx+fj4OO/5ZmdnM8d+q7A/t88e3+77GrX8+lswsoVAthDIFiKYOdtfwnx/fzfZlz/z8/N52/OVT75s87fcerq6ukxeX183OdQ52sPIFgLZQiBbiGCWS9/e3kxubGw02c+rUUuafrvwwcFB5nhyctK0HR0d5T13zJcoi4XlUkC2FMgWIpg6u7q62uTHx8cfPX/2/wQXFxemzc/3i4uLJgc+R38bRrYQyBYC2UIEM2eXmp6ensyx3wo8Ojpq8vj4eCx9ihtGthDIFgLZQgSzNl4sfm29oaHB5Oy19b6+PtN2eHhYuo7FD2vjgGwpkC1E2dTZfi+4r6Wzr1HPzc3F0qfQYGQLgWwhkC1EYufs29tbk6Nuu83e693U1FS6jgUMI1sIZAuBbCESM2f7fWPDw8Mm+zn69PTUZP8qCEUY2UIgWwhkC5GYOdu/UtG/esE/nso/9hkY2VIgW4hgv8b917Z/G70vtfxTBZPy6Is4YWQLgWwhkC1EMFuJ7+/vTW5vbzfZv6n27OzMZJZDM7CVGJAtBbKFCKbO9rfv+K3A/u0/HR0dJe9TucHIFgLZQiBbiGDqbPgxqLMB2VIgW4ioOjv/O48gUTCyhUC2EMgWAtlCIFsIZAvxD/qZFpZUkkmyAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Image target: [9]\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAHsAAAB7CAYAAABUx/9/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAEhUlEQVR4nO2dzSt8XwDGz+jrpSjMQojIy1bykqwURYqUjYUFNmLBgv/APyFZKIqNjaQoKREhsvGysbDxuvFW3pLv7n7vc37N3PEbd8x4ns/qPJ2Zc48+zpzOmXvPBD4/P43gIOmnOyBih2QTIdlESDYRkk2EZBPxx6Ne67LEIxCqQiObCMkmQrKJkGwiJJsIySZCsomQbCIkmwjJJkKyiZBsIiSbCMkmQrKJkGwiJJsIySZCsomQbCIkmwjJJsLrVuJfyc3NDeTl5eWwr29ra4McDAa/vU+xQCObCMkmQrKJ8HXOLikpccoHBwdQl5WV5eel/8PR0ZFTrq2thbrX19ew783OzoY8NTUFub29PcrexQaNbCIkmwjJJiLgcVpSVI/sJiX9+186PDyEuoqKimia9sSeh6uqqpzyyckJ1OXm5kK+vr4O27b77zLGmL29PciVlZUR99MH9MiukGwqJJuImO2Nb2xsQPZ7zr68vIR8enrqlHt7e6FufHwc8vz8POT+/n7ILy8vkG9vb/9vN2OKRjYRkk2EZBMRszk7PT09VpcyxhhzdnYW8WtTUlIgd3d3Q354eIA8OjoK2V6n29h7Gfv7+5Df39+dcn19ffjORoFGNhGSTYRkE+Hr3nhqaqpTPj4+hrrS0tJomvbk6ekJcnFxsVMuLy+Huq2tLciBQMjtZWOMMRcXF5Dz8/PDvv75+Rmy/be7v+v3mv8jQHvjQrKpkGwifF1n9/T0OGW/52ibtLQ0yO618s7ODtTZ++j2HPzx8QE5OTkZ8vb2NuSVlRXIc3NzkDMyMiB/wzwdERrZREg2EZJNhK/rbPd9YO4190/gvrd7aWkJ6gYGBiDn5eVBnpmZgfyVfXdjjCksLIS8vr4Ouaio6EvteaB1tpBsKnxdev30R7ebsrKykHUTExNfaqumpgZyXV0d5Orqasitra2Qc3JyvnS970IjmwjJJkKyifg1x2ycn59DHh4ehry4uOiUvX4gvqurC/LY2Bhk+yvSREEjmwjJJkKyifB1u9RPNjc3IXd0dEC+u7uDnJmZGbKt+/t7yLu7u5DtdXOco+1SIdlUSDYRCbPOtr9WtI+jsm8dXlhYgNzS0uKUJycnoW5oaOg7uhj3aGQTIdlESDYRCTNnDw4OQnY/5mqMMaurq5AbGhpCtvX29hb2Wj/1fbPfaGQTIdlESDYRcTtn24/F2vvVIyMjkMPN0TbT09OQm5ubIRcUFETcViKhkU2EZBMh2UTE7Zzt/pkHY4x5fHyEvLa2BrmpqQmyfcSk+zFa+zjs2dlZyF7HbCQqGtlESDYRkk1E3M7ZXthHWzQ2Nkb8XvfxH8YY09nZ+S19inc0somQbCIkm4i4vW/cXlf39fVBtu8xs39+yX18pf1+e1/dPoI6wdF940KyqZBsIuJ2zvbi6uoKsj3vBoPBWHYnntCcLSSbioT9GBch0ce4kGwqJJsIySZCsomQbCIkmwjJJkKyiZBsIiSbCMkmQrKJkGwiJJsIr8d/fuezq6RoZBMh2URINhGSTYRkEyHZRPwFDfX8wxWlSWcAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Image target: [8]\n" - ] - } - ], + "outputs": [], "source": [ "%matplotlib inline\n", "import numpy as np\n", @@ -373,7 +295,10 @@ " which_set='valid', batch_size=1, max_num_batches=2, shuffle_order=True)\n", "\n", "for inputs, target in mnist_dp:\n", - " show_single_image(inputs.reshape((28, 28)))\n", + " # The reshape operation reorganizes data from 1D array of size 784 to 2D array of size 28x28\n", + " # See notes in the next cell\n", + " square_inputs = inputs.reshape((28, 28))\n", + " show_single_image(square_inputs)\n", " print('Image target: {0}'.format(target))" ] }, @@ -394,7 +319,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -417,7 +342,7 @@ "\n", "`MNISTDataProvider` as `targets` currently returns a vector of integers, each element in this vector represents an the integer ID of the class the corresponding data-point represents. \n", "\n", - "For training of neural networks a 1-of-K representation of multi-class targets is more useful. Instead of representing class identity by an integer ID, for each data point a vector of length equal to the number of classes is created, will all elements zero except for the element corresponding to the class ID. \n", + "It is easier to train neural networks using a 1-of-K representation of multi-class targets. Instead of representing class identity by an integer, each target is replaced by a vector of length equal to teh number of classes whose values are zero everywhere except on the index corresponding to the class.\n", "\n", "For instance, given a batch of 5 integer targets `[2, 2, 0, 1, 0]` and assuming there are 3 different classes \n", "the corresponding 1-of-K encoded targets would be\n", @@ -444,9 +369,10 @@ " which_set='valid', batch_size=5, max_num_batches=5, shuffle_order=False)\n", "\n", "for inputs, targets in mnist_dp:\n", + " # Check that values are either 0 or 1\n", + " assert np.all(np.logical_or(targets == 0., targets == 1.))\n", + " # Check that there is exactly a single 1\n", " assert np.all(targets.sum(-1) == 1.)\n", - " assert np.all(targets >= 0.)\n", - " assert np.all(targets <= 1.)\n", " print(targets)" ] }, @@ -463,7 +389,7 @@ "\n", "Here you will write your own data provider `MetOfficeDataProvider` that wraps [weather data for south Scotland](http://www.metoffice.gov.uk/hadobs/hadukp/data/daily/HadSSP_daily_qc.txt). A previous version of this data has been stored in `data` directory for your convenience and skeleton code for the class provided in `mlp/data_providers.py`.\n", "\n", - "The data is organised in the text file as a table, with the first two columns indexing the year and month of the readings and the following 31 columns giving daily precipitation values for the corresponding month. As not all months have 31 days some of entries correspond to non-existing days. These values are indicated by a non-physical value of `-99.9`.\n", + "The data is organised in the text file as a table, with the first two columns indexing the year and month of the readings and the following 31 columns giving daily precipitation values for the corresponding month. As not all months have 31 days some of the entries correspond to non-existing days. These values are indicated by a non-physical value of `-99.9`.\n", "\n", " * You should read all of the data from the file ([`np.loadtxt`](http://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html) may be useful for this) and then filter out the `-99.9` values and collapse the table to a one-dimensional array corresponding to a sequence of daily measurements for the whole period data is available for. [NumPy's boolean indexing feature](http://docs.scipy.org/doc/numpy/user/basics.indexing.html#boolean-or-mask-index-arrays) could be helpful here.\n", " * A common initial preprocessing step in machine learning tasks is to normalise data so that it has zero mean and a standard deviation of one. Normalise the data sequence so that its overall mean is zero and standard deviation one.\n", @@ -472,7 +398,7 @@ " [1, 2], 3\n", " [4, 5], 6\n", " ```\n", - " * **Extension**: Have the data provider instead overlapping windows of the sequence so that more training data instances are produced. For example for the sequence `[1, 2, 3, 4, 5, 6]` the corresponding `input, target` pairs would be\n", + " * **Extension**: The current data provider only produces `len(data)/window_size` sample points. A better approach is to have it return overlapping windows of the sequence so that more training data instances are produced. For example for the sequence `[1, 2, 3, 4, 5, 6]` the corresponding `input, target` pairs would be\n", "\n", "```\n", "[1, 2], 3\n", @@ -489,7 +415,8 @@ "metadata": { "nbpresent": { "id": "c8553a56-9f25-4198-8a1a-d7e9572b4382" - } + }, + "scrolled": false }, "outputs": [], "source": [ @@ -529,7 +456,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.2" + "version": "3.9.7" } }, "nbformat": 4,