Minor fixes in the notebooks

This commit is contained in:
AntreasAntoniou 2017-10-02 01:21:49 +01:00
parent 78668638a5
commit a47ab50984
2 changed files with 312 additions and 100 deletions

File diff suppressed because one or more lines are too long

View File

@ -17,10 +17,8 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
@ -76,10 +74,8 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"size = 1000\n",
@ -96,11 +92,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.88 ms ± 26.5 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)\n"
]
}
],
"source": [
"%%timeit -n 100 -r 3\n",
"c = np.empty(size)\n",
@ -117,11 +119,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3.17 µs ± 1.87 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)\n"
]
}
],
"source": [
"%%timeit -n 100 -r 3\n",
"c = a + b"
@ -160,10 +168,8 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def fprop(inputs, weights, biases):\n",
@ -181,7 +187,7 @@
" Returns:\n",
" outputs: Array of layer outputs of shape (batch_size, output_dim).\n",
" \"\"\"\n",
" raise NotImplementedError('Delete this and write your code here instead.')"
" raise NotImplementedError('Delete this raise statement and write your code here instead.')"
]
},
{
@ -193,11 +199,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"execution_count": 7,
"metadata": {},
"outputs": [
{
"ename": "NotImplementedError",
"evalue": "Delete this raise command and write your code here instead.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-7-0f5e9427204b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mtrue_outputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m6.\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m6.\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m17.\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m50.\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mallclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfprop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweights\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbiases\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrue_outputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Wrong outputs computed.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-6-fb37a1b342aa>\u001b[0m in \u001b[0;36mfprop\u001b[0;34m(inputs, weights, biases)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mArray\u001b[0m \u001b[0mof\u001b[0m \u001b[0mlayer\u001b[0m \u001b[0moutputs\u001b[0m \u001b[0mof\u001b[0m \u001b[0mshape\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_dim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \"\"\"\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Delete this raise command and write your code here instead.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNotImplementedError\u001b[0m: Delete this raise command and write your code here instead."
]
}
],
"source": [
"inputs = np.array([[0., -1., 2.], [-6., 3., 1.]])\n",
"weights = np.array([[2., -3., -1.], [-5., 7., 2.]])\n",
@ -220,16 +237,30 @@
"\n",
"#### `numpy.dot` function\n",
"\n",
"Matrix-matrix, matrix-vector and vector-vector (dot) products can all be computed in NumPy using the [`dot`](http://docs.scipy.org/doc/numpy/reference/generated/numpy.dot.html) function. For example if `A` and `B` are both two dimensional arrays, then `C = np.dot(A, B)` or equivalently `C = A.dot(B)` will both compute the matrix product of `A` and `B` assuming `A` and `B` have compatible dimensions. Similarly if `a` and `b` are one dimensional arrays then `c = np.dot(a, b)` / `c = a.dot(b)` will compute the [scalar / dot product](https://en.wikipedia.org/wiki/Dot_product) of the two arrays. If `A` is a two-dimensional array and `b` a one-dimensional array `np.dot(A, b)` / `A.dot(b)` will compute the matrix-vector product of `A` and `b`. Examples of all three of these product types are shown in the cell below:"
"Matrix-matrix, matrix-vector and vector-vector (dot) products can all be computed in NumPy using the [`dot`](http://docs.scipy.org/doc/numpy/reference/generated/numpy.dot.html) function. For example if `A` and `B` are both two dimensional arrays, then `C = np.dot(A, B)` or equivalently `C = A.dot(B)` will both compute the matrix product of `A` and `B` assuming `A` and `B` have compatible dimensions. Similarly if `a` and `b` are one dimensional arrays then `c = np.dot(a, b)` (which is equivalent to `c = a.dot(b)`) will compute the [scalar / dot product](https://en.wikipedia.org/wiki/Dot_product) of the two arrays. If `A` is a two-dimensional array and `b` a one-dimensional array `np.dot(A, b)` (which is equivalent to `A.dot(b)`) will compute the matrix-vector product of `A` and `b`. Examples of all three of these product types are shown in the cell below:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 6. 6. 6.]\n",
" [ 24. 24. 24.]\n",
" [ 42. 42. 42.]]\n",
"[[ 18. 24. 30.]\n",
" [ 18. 24. 30.]\n",
" [ 18. 24. 30.]]\n",
"[ 0.8 2.6 4.4]\n",
"[ 2.4 3. 3.6]\n",
"0.2\n"
]
}
],
"source": [
"# Initiliase arrays with arbitrary values\n",
"A = np.arange(9).reshape((3, 3))\n",
@ -254,11 +285,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0.1 1.2]\n",
" [ 2.1 3.2]\n",
" [ 4.1 5.2]]\n",
"[[-1. 0.]\n",
" [ 2. 3.]\n",
" [ 5. 6.]]\n",
"[[ 0. 0.2]\n",
" [ 0.2 0.6]\n",
" [ 0.4 1. ]]\n"
]
}
],
"source": [
"# Initiliase arrays with arbitrary values\n",
"A = np.arange(6).reshape((3, 2))\n",
@ -332,9 +377,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"weights_init_range = 0.5\n",
@ -427,7 +470,7 @@
" Returns:\n",
" Scalar error function value.\n",
" \"\"\"\n",
" raise NotImplementedError('Delete this and write your code here instead.')\n",
" raise NotImplementedError('Delete this raise statement and write your code here instead.')\n",
" \n",
"def error_grad(outputs, targets):\n",
" \"\"\"Calculates gradient of error function with respect to model outputs.\n",
@ -440,7 +483,7 @@
" Gradient of error function with respect to outputs.\n",
" This will be an array of shape (batch_size, output_dim).\n",
" \"\"\"\n",
" raise NotImplementedError('Delete this and write your code here instead.')"
" raise NotImplementedError('Delete this raise clause and write your code here instead.')"
]
},
{
@ -453,9 +496,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"outputs = np.array([[1., 2.], [-1., 0.], [6., -5.], [-1., 1.]])\n",
@ -482,9 +523,9 @@
"Using the [chain rule for derivatives](https://en.wikipedia.org/wiki/Chain_rule#Higher_dimensions) we can write the partial deriviative of the error function with respect to single elements of the weight matrix and bias vector as\n",
"\n",
"\\begin{equation}\n",
" \\frac{\\partial \\bar{E}}{\\partial W_{kj}} = \\sum_{n=1}^N \\left\\lbrace \\frac{\\partial \\bar{E}}{\\partial y^{(n)}_k} \\frac{\\partial y^{(n)}_k}{\\partial W_{kj}} \\right\\rbrace\n",
" \\frac{\\partial E}{\\partial W_{kj}} = \\sum_{n=1}^N \\left\\lbrace \\frac{\\partial E}{\\partial y^{(n)}_k} \\frac{\\partial y^{(n)}_k}{\\partial W_{kj}} \\right\\rbrace\n",
" \\quad \\textrm{and} \\quad\n",
" \\frac{\\partial \\bar{E}}{\\partial b_k} = \\sum_{n=1}^N \\left\\lbrace \\frac{\\partial \\bar{E}}{\\partial y^{(n)}_k} \\frac{\\partial y^{(n)}_k}{\\partial b_k} \\right\\rbrace.\n",
" \\frac{\\partial E}{\\partial b_k} = \\sum_{n=1}^N \\left\\lbrace \\frac{\\partial E}{\\partial y^{(n)}_k} \\frac{\\partial y^{(n)}_k}{\\partial b_k} \\right\\rbrace.\n",
"\\end{equation}\n",
"\n",
"From the definition of our model at the beginning we have \n",
@ -500,11 +541,11 @@
"Putting this together we get that\n",
"\n",
"\\begin{equation}\n",
" \\frac{\\partial \\bar{E}}{\\partial W_{kj}} = \n",
" \\sum_{n=1}^N \\left\\lbrace \\frac{\\partial \\bar{E}}{\\partial y^{(n)}_k} x^{(n)}_j \\right\\rbrace\n",
" \\frac{\\partial E}{\\partial W_{kj}} = \n",
" \\sum_{n=1}^N \\left\\lbrace \\frac{\\partial E}{\\partial y^{(n)}_k} x^{(n)}_j \\right\\rbrace\n",
" \\quad \\textrm{and} \\quad\n",
" \\frac{\\partial \\bar{E}}{\\partial b_{k}} = \n",
" \\sum_{n=1}^N \\left\\lbrace \\frac{\\partial \\bar{E}}{\\partial y^{(n)}_k} \\right\\rbrace.\n",
" \\frac{\\partial E}{\\partial b_{k}} = \n",
" \\sum_{n=1}^N \\left\\lbrace \\frac{\\partial E}{\\partial y^{(n)}_k} \\right\\rbrace.\n",
"\\end{equation}\n",
"\n",
"Although this may seem a bit of a roundabout way to get to these results, this method of decomposing the error gradient with respect to the parameters in terms of the gradient of the error function with respect to the model outputs and the derivatives of the model outputs with respect to the model parameters, will be key when calculating the parameter gradients of more complex models later in the course.\n",
@ -532,7 +573,7 @@
" list of arrays of gradients with respect to the model parameters\n",
" `[grads_wrt_weights, grads_wrt_biases]`.\n",
" \"\"\"\n",
" raise NotImplementedError('Delete this and write your code here instead.')"
" raise NotImplementedError('Delete this raise statement and write your code here instead.')"
]
},
{
@ -545,9 +586,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"inputs = np.array([[1., 2., 3.], [-1., 4., -9.]])\n",
@ -579,15 +618,13 @@
" * In the [`mlp.layers`](/edit/mlp/layers.py) module, use your implementations of `fprop` and `grad_wrt_params` above to implement the corresponding methods in the skeleton `AffineLayer` class provided.\n",
" * In the [`mlp.errors`](/edit/mlp/errors.py) module use your implementation of `error` and `error_grad` to implement the `__call__` and `grad` methods respectively of the skeleton `SumOfSquaredDiffsError` class provided. Note `__call__` is a special Python method that allows an object to be used with a function call syntax.\n",
"\n",
"Run the cell below to use your completed `AffineLayer` and `SumOfSquaredDiffsError` implementations to train a single-layer model using batch gradient descent on the CCCP dataset."
"Run the cell below to use your completed `AffineLayer` and `SumOfSquaredDiffsError` implementations to train a single-layer model using batch gradient descent on the CCPP dataset."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"from mlp.layers import AffineLayer\n",
@ -650,9 +687,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"metadata": {},
"outputs": [],
"source": [
"data_provider = CCPPDataProvider(\n",
@ -706,7 +741,6 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [],
@ -811,23 +845,23 @@
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda env:mlp]",
"display_name": "Python 3",
"language": "python",
"name": "conda-env-mlp-py"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
"nbformat_minor": 1
}