diff --git a/notebooks/03_Multiple_layer_models.ipynb b/notebooks/03_Multiple_layer_models.ipynb index 26efdae..151074b 100644 --- a/notebooks/03_Multiple_layer_models.ipynb +++ b/notebooks/03_Multiple_layer_models.ipynb @@ -341,7 +341,7 @@ "\\begin{equation}\n", " y^{(b)}_k = \\textrm{Softmax}_k\\lpa\\vct{x}^{(b)}\\rpa = \\frac{\\exp(x^{(b)}_k)}{\\sum_{d=1}^D \\lbr \\exp(x^{(b)}_d) \\rbr}\n", " \\qquad\n", - " E^{(b)} = \\textrm{CrossEntropy}\\lpa\\vct{y}^{(b)},\\,\\vct{t}^{(b)}\\rpa = -\\sum_{d=1}^D \\lbr t^{(b)}_k \\log(y^{(b)}_k) \\rbr\n", + " E^{(b)} = \\textrm{CrossEntropy}\\lpa\\vct{y}^{(b)},\\,\\vct{t}^{(b)}\\rpa = -\\sum_{d=1}^D \\lbr t^{(b)}_d \\log(y^{(b)}_d) \\rbr\n", "\\end{equation}\n", "\n", "and it can be shown (this is an instructive mathematical exercise if you want a challenge!) that\n", @@ -355,7 +355,7 @@ "Rather than computing the gradient in two steps like this we can instead wrap the softmax transformation in to the definition of the error function and make use of the simpler gradient expression above. More explicitly we define an error function as follows\n", "\n", "\\begin{equation}\n", - " E^{(b)} = \\textrm{CrossEntropySoftmax}\\lpa\\vct{y}^{(b)},\\,\\vct{t}^{(b)}\\rpa = -\\sum_{d=1}^D \\lbr t^{(b)}_k \\log\\lsb\\textrm{Softmax}_k\\lpa \\vct{y}^{(b)}\\rpa\\rsb\\rbr\n", + " E^{(b)} = \\textrm{CrossEntropySoftmax}\\lpa\\vct{y}^{(b)},\\,\\vct{t}^{(b)}\\rpa = -\\sum_{d=1}^D \\lbr t^{(b)}_d \\log\\lsb\\textrm{Softmax}_d\\lpa \\vct{y}^{(b)}\\rpa\\rsb\\rbr\n", "\\end{equation}\n", "\n", "with corresponding gradient\n",