Merge pull request #17 from kungfujam/master

Clarify Exercise 1
2015-10-19 11:45:28 +01:00 · 2015-10-19 11:45:28 +01:00 · 929237301b
commit 929237301b
parent 3cb5f81736 80b025e525
2 changed files with 60 additions and 44 deletions
--- a/01_Linear_Models.ipynb
+++ b/01_Linear_Models.ipynb
@ -97,7 +97,7 @@
    "\\end{equation}\n",
    "$\n",
    "\n",
-    "where both $\\mathbf{X}\\in\\mathbb{R}^{B\\times D}$ and $\\mathbf{Y}\\in\\mathbb{R}^{B\\times K}$ are matrices, and $\\mathbf{b}$ needs to be <a href=\"http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html\">broadcasted</a> $B$ times (numpy will do this by default). However, we will not make an explicit distinction between a special case for $B=1$ and $B>1$ and simply use equation (3) instead, although $\\mathbf{x}$ and hence $\\mathbf{y}$ could be matrices. From an implementation point of view, it does not matter.\n",
+    "where $\\mathbf{W} \\in \\mathbb{R}^{D\\times K}$ and both $\\mathbf{X}\\in\\mathbb{R}^{B\\times D}$ and $\\mathbf{Y}\\in\\mathbb{R}^{B\\times K}$ are matrices, and $\\mathbf{b}\\in\\mathbb{R}^{1\\times K}$ needs to be <a href=\"http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html\">broadcasted</a> $B$ times (numpy will do this by default). However, we will not make an explicit distinction between a special case for $B=1$ and $B>1$ and simply use equation (3) instead, although $\\mathbf{x}$ and hence $\\mathbf{y}$ could be matrices. From an implementation point of view, it does not matter.\n",
    "\n",
    "The desired functionality for matrix multiplication in numpy is provided by <a href=\"http://docs.scipy.org/doc/numpy/reference/generated/numpy.dot.html\">numpy.dot</a> function. If you haven't use it so far, get familiar with it as we will use it extensively."
   ]
@ -132,9 +132,16 @@
   "source": [
    "## Exercise 1 \n",
    "\n",
-    "Using `numpy.dot`, implement **forward** propagation through the linear transform defined by equations (3) and (4) for $B=1$ and $B>1$. As data ($\\mathbf{x}$) use `MNISTDataProvider` introduced last week. For the case when $B=1$, write a function to compute the 1st output ($y_1$) using equations (1) and (2). Check if the output is the same as the corresponding one obtained with numpy. \n",
+    "Using `numpy.dot`, implement **forward** propagation through the linear transform defined by equations (3) and (4) for $B=1$ and $B>1$ i.e. use parameters $\\mathbf{W}$ and $\\mathbf{b}$ with data $\\mathbf{X}$ to determine $\\mathbf{Y}$. Use `MNISTDataProvider` (introduced last week) to generate $\\mathbf{X}$. We are going to write a function for each equation:\n",
+    "1. `y1_equation_1`: Return the value of the $1^{st}$ dimension of $\\mathbf{y}$ (the output of the first output node) given a single training data point $\\mathbf{x}$ using a sum\n",
+    "1. `y1_equation_2`: Repeat above using vector multiplication (use `numpy.dot()`)\n",
+    "1. `y_equation_3`: Return the value of $\\mathbf{y}$ (the whole output layer) given a single training data point $\\mathbf{x}$\n",
+    "1. `Y_equation_4`: Return the value of $\\mathbf{Y}$ given $\\mathbf{X}$\n",
    "\n",
-    "Tip: To generate random data you can use `random_generator.uniform(-0.1, 0.1, (D, 10))` from above."
+    "We have initialised $\\mathbf{b}$ to zeros and randomly generated $\\mathbf{W}$ for you. The constants introduced above are:\n",
+    "* The number of data points $B = 3$\n",
+    "* The dimensionality of the input $D = 784$\n",
+    "* The dimensionality of the output $K = 10$"
   ]
  },
  {
@ -148,9 +155,11 @@
    "from mlp.dataset import MNISTDataProvider\n",
    "\n",
    "mnist_dp = MNISTDataProvider(dset='valid', batch_size=3, max_num_batches=1, randomize=False)\n",
-    "\n",
+    "B = 3\n",
+    "D = 784\n",
+    "K = 10\n",
    "irange = 0.1\n",
-    "W = random_generator.uniform(-irange, irange, (784,10)) \n",
+    "W = random_generator.uniform(-irange, irange, (D, K)) \n",
    "b = numpy.zeros((10,))\n"
   ]
  },
@ -176,20 +185,21 @@
    "    #use numpy.dot\n",
    "    raise NotImplementedError()\n",
    "\n",
-    "def y_equation_4(x, W, b):\n",
+    "def Y_equation_4(x, W, b):\n",
    "    #use numpy.dot\n",
    "    raise NotImplementedError()\n",
    "\n",
-    "for x, t in mnist_dp:\n",
-    "    y1e1 = y1_equation_1(x[0], W, b)\n",
-    "    y1e2 = y1_equation_2(x[0], W, b)\n",
-    "    ye3 = y_equation_3(x, W, b)\n",
-    "    ye4 = y_equation_4(x, W, b)\n",
+    "for X, t in mnist_dp:\n",
+    "    n = 0\n",
+    "    y1e1 = y1_equation_1(x[n], W, b)\n",
+    "    y1e2 = y1_equation_2(x[n], W, b)\n",
+    "    ye3 = y_equation_3(x[n], W, b)\n",
+    "    Ye4 = Y_equation_4(x, W, b)\n",
    "\n",
    "print 'y1e1', y1e1\n",
    "print 'y1e1', y1e1\n",
    "print 'ye3', ye3\n",
-    "print 'ye4', ye4\n",
+    "print 'Ye4', ye4\n",
    "    "
   ]
  },
@ -632,7 +642,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
-   "version": "2.7.9"
+   "version": "2.7.10"
  }
 },
 "nbformat": 4,
--- a/01_Linear_Models_solution.ipynb
+++ b/01_Linear_Models_solution.ipynb
@ -136,7 +136,7 @@
    "\\end{equation}\n",
    "$\n",
    "\n",
-    "where both $\\mathbf{X}\\in\\mathbb{R}^{B\\times D}$ and $\\mathbf{Y}\\in\\mathbb{R}^{B\\times K}$ are matrices, and $\\mathbf{b}$ needs to be <a href=\"http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html\">broadcasted</a> $B$ times (numpy will do this by default). However, we will not make an explicit distinction between a special case for $B=1$ and $B>1$ and simply use equation (3) instead, although $\\mathbf{x}$ and hence $\\mathbf{y}$ could be matrices. From an implementation point of view, it does not matter.\n",
+    "where $\\mathbf{W} \\in \\mathbb{R}^{D\\times K}$ and both $\\mathbf{X}\\in\\mathbb{R}^{B\\times D}$ and $\\mathbf{Y}\\in\\mathbb{R}^{B\\times K}$ are matrices, and $\\mathbf{b}\\in\\mathbb{R}^{1\\times K}$ needs to be <a href=\"http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html\">broadcasted</a> $B$ times (numpy will do this by default). However, we will not make an explicit distinction between a special case for $B=1$ and $B>1$ and simply use equation (3) instead, although $\\mathbf{x}$ and hence $\\mathbf{y}$ could be matrices. From an implementation point of view, it does not matter.\n",
    "\n",
    "The desired functionality for matrix multiplication in numpy is provided by <a href=\"http://docs.scipy.org/doc/numpy/reference/generated/numpy.dot.html\">numpy.dot</a> function. If you haven't use it so far, get familiar with it as we will use it extensively."
   ]
@ -152,7 +152,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
@ -172,14 +172,21 @@
   "source": [
    "## Exercise 1 \n",
    "\n",
-    "Using numpy.dot, implement **forward** propagation through the linear transform defined by equations (3) and (4) for $B=1$ and $B>1$. As data ($\\mathbf{x}$) use `MNISTDataProvider` from previous laboratories. For case when $B=1$ write a function to compute the 1st output ($y_1$) using equations (1) and (2). Check if the output is the same as the corresponding one obtained with numpy. \n",
+    "Using `numpy.dot`, implement **forward** propagation through the linear transform defined by equations (3) and (4) for $B=1$ and $B>1$ i.e. use parameters $\\mathbf{W}$ and $\\mathbf{b}$ with data $\\mathbf{X}$ to determine $\\mathbf{Y}$. Use `MNISTDataProvider` (introduced last week) to generate $\\mathbf{X}$. We are going to write a function for each equation:\n",
+    "1. `y1_equation_1`: Return the value of the $1^{st}$ dimension of $\\mathbf{y}$ (the output of the first output node) given a single training data point $\\mathbf{x}$ using a sum\n",
+    "1. `y1_equation_2`: Repeat above using vector multiplication (use `numpy.dot()`)\n",
+    "1. `y_equation_3`: Return the value of $\\mathbf{y}$ (the whole output layer) given a single training data point $\\mathbf{x}$\n",
+    "1. `Y_equation_4`: Return the value of $\\mathbf{Y}$ given $\\mathbf{X}$\n",
    "\n",
-    "Tip: To generate random data you can use `random_generator.uniform(-0.1, 0.1, (D, 10))` from the preceeding cell."
+    "We have initialised $\\mathbf{b}$ to zeros and randomly generated $\\mathbf{W}$ for you. The constants introduced above are:\n",
+    "* The number of data points $B = 3$\n",
+    "* The dimensionality of the input $D = 10$\n",
+    "* The dimensionality of the output $K = 10$"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
@ -188,15 +195,17 @@
    "from mlp.dataset import MNISTDataProvider\n",
    "\n",
    "mnist_dp = MNISTDataProvider(dset='valid', batch_size=3, max_num_batches=1, randomize=False)\n",
-    "\n",
+    "B = 3\n",
+    "D = 784\n",
+    "K = 10\n",
    "irange = 0.1\n",
-    "W = random_generator.uniform(-irange, irange, (784,10)) \n",
+    "W = random_generator.uniform(-irange, irange, (D, K)) \n",
    "b = numpy.zeros((10,))\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
@ -207,13 +216,9 @@
     "text": [
      "y1e1 0.55861474982\n",
      "y1e2 0.55861474982\n",
-      "ye3 [[ 0.55861475  0.79450077  0.17439693  0.00265688  0.66272539 -0.09985686\n",
-      "   0.56468591  0.58105588 -0.18613727  0.08151257]\n",
-      " [-0.43965864  0.59573972 -0.22691119  0.26767124 -0.31343979  0.07224664\n",
-      "  -0.19616183  0.0851733  -0.24088286 -0.19305162]\n",
-      " [-0.20176359  0.42394166 -1.03984446  0.15492101  0.15694745 -0.53741022\n",
-      "   0.05887668 -0.21124527 -0.07870156 -0.00506471]]\n",
-      "ye4 [[ 0.55861475  0.79450077  0.17439693  0.00265688  0.66272539 -0.09985686\n",
+      "ye3 [ 0.55861475  0.79450077  0.17439693  0.00265688  0.66272539 -0.09985686\n",
+      "  0.56468591  0.58105588 -0.18613727  0.08151257]\n",
+      "Ye4 [[ 0.55861475  0.79450077  0.17439693  0.00265688  0.66272539 -0.09985686\n",
      "   0.56468591  0.58105588 -0.18613727  0.08151257]\n",
      " [-0.43965864  0.59573972 -0.22691119  0.26767124 -0.31343979  0.07224664\n",
      "  -0.19616183  0.0851733  -0.24088286 -0.19305162]\n",
@ -223,36 +228,37 @@
    }
   ],
   "source": [
-    "\n",
    "mnist_dp.reset()\n",
    "\n",
    "#implement following functions, then run the cell\n",
    "def y1_equation_1(x, W, b):\n",
-    "    y1=0\n",
-    "    for j in xrange(0, x.shape[0]):\n",
-    "      y1 += x[j]*W[j,0]\n",
-    "    return y1 + b[0]\n",
+    "    k = 0\n",
+    "    s = 0\n",
+    "    for j in xrange(len(x)):\n",
+    "        s += x[j] * W[j,k]\n",
+    "    return b[k] + s\n",
    "    \n",
    "def y1_equation_2(x, W, b):\n",
-    "    return numpy.dot(x, W[:,0].T) + b[0]\n",
+    "    k = 0\n",
+    "    return numpy.dot(x, W[:,k]) + b[k]\n",
    "\n",
    "def y_equation_3(x, W, b):\n",
-    "    return numpy.dot(x,W) + b\n",
+    "    return numpy.dot(x, W) + b\n",
    "\n",
    "def y_equation_4(x, W, b):\n",
-    "    return numpy.dot(x,W) + b\n",
+    "    return numpy.dot(x, W) + b\n",
    "\n",
-    "for x, t in mnist_dp:\n",
-    "    y1e1 = y1_equation_1(x[0], W, b)\n",
-    "    y1e2 = y1_equation_2(x[0], W, b)\n",
-    "    ye3 = y_equation_3(x, W, b)\n",
-    "    ye4 = y_equation_4(x, W, b)\n",
+    "for X, t in mnist_dp:\n",
+    "    n = 0\n",
+    "    y1e1 = y1_equation_1(X[n], W, b)\n",
+    "    y1e2 = y1_equation_2(X[n], W, b)\n",
+    "    ye3 = y_equation_3(X[n], W, b)\n",
+    "    Ye4 = y_equation_4(X, W, b)\n",
    "\n",
    "print 'y1e1', y1e1\n",
    "print 'y1e2', y1e2\n",
    "print 'ye3', ye3\n",
-    "print 'ye4', ye4\n",
-    "    "
+    "print 'Ye4', Ye4"
   ]
  },
  {
@ -882,7 +888,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
-   "version": "2.7.9"
+   "version": "2.7.10"
  }
 },
 "nbformat": 4,