progress
This commit is contained in:
parent
1a45e7d596
commit
bad8e42630
@ -80,6 +80,7 @@ plot coordinates {
|
||||
\\\cline{1-4}\cline{6-9}
|
||||
GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$&&GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$
|
||||
\\\cline{1-4}\cline{6-9}
|
||||
\multicolumn{9}{c}{test}\\
|
||||
0.265&0.633&0.203&0.989&&2.267&1.947&3.91&0.032
|
||||
\end{tabu}
|
||||
\caption{Performance metrics of the networks trained in
|
||||
|
53
TeX/Plots/fashion_mnist.tex
Normal file
53
TeX/Plots/fashion_mnist.tex
Normal file
@ -0,0 +1,53 @@
|
||||
\begin{figure}[h]
|
||||
\centering
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist0.pdf}
|
||||
\caption{T-shirt/top}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist1.pdf}
|
||||
\caption{Trousers}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist2.pdf}
|
||||
\caption{Pullover}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist3.pdf}
|
||||
\caption{Dress}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist4.pdf}
|
||||
\caption{Coat}
|
||||
\end{subfigure}\\
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist5.pdf}
|
||||
\caption{Sandal}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist6.pdf}
|
||||
\caption{Shirt}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist7.pdf}
|
||||
\caption{Sneaker}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist8.pdf}
|
||||
\caption{Bag}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/fashion_mnist9.pdf}
|
||||
\caption{Ankle boot}
|
||||
\end{subfigure}
|
||||
\caption{The fashtion MNIST data set contains 70.000 images of
|
||||
preprocessed product images from Zalando, which are categorized as
|
||||
T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt,
|
||||
Sneaker, Bag, Ankle boot. Of these images 60.000 are used as training images, while
|
||||
the rest are used to validate the models trained.}
|
||||
\label{fig:MNIST}
|
||||
\end{figure}
|
||||
%%% Local Variables:
|
||||
%%% mode: latex
|
||||
%%% TeX-master: "../main"
|
||||
%%% End:
|
79
TeX/Plots/gen_dropout.tex
Normal file
79
TeX/Plots/gen_dropout.tex
Normal file
@ -0,0 +1,79 @@
|
||||
\pgfplotsset{
|
||||
compat=1.11,
|
||||
legend image code/.code={
|
||||
\draw[mark repeat=2,mark phase=2]
|
||||
plot coordinates {
|
||||
(0cm,0cm)
|
||||
(0.3cm,0cm) %% default is (0.3cm,0cm)
|
||||
(0.6cm,0cm) %% default is (0.6cm,0cm)
|
||||
};%
|
||||
}
|
||||
}
|
||||
\begin{figure}
|
||||
\begin{subfigure}[h]{\textwidth}
|
||||
\begin{tikzpicture}
|
||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
||||
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
||||
height = 0.6\textwidth, ymin = 0.988, legend style={at={(0.9825,0.0175)},anchor=south east},
|
||||
xlabel = {epoch}, ylabel = {Classification Accuracy}, cycle list/Dark2]
|
||||
\addplot table
|
||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||
{Plots/Data/adam_datagen_full_mean.log};
|
||||
\addplot table
|
||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||
{Plots/Data/adam_datagen_dropout_02_full_mean.log};
|
||||
\addplot table
|
||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||
{Plots/Data/adam_datagen_dropout_04_full_mean.log};
|
||||
\addplot table
|
||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||
{Plots/Data/adam_dropout_02_full_mean.log};
|
||||
\addplot table
|
||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||
{Plots/Data/adam_dropout_04_full_mean.log};
|
||||
\addplot [dashed] table
|
||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||
{Plots/Data/adam_full_mean.log};
|
||||
|
||||
\addlegendentry{\footnotesize{G.}}
|
||||
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
||||
\addlegendentry{\footnotesize{G. + D. 0.4}}
|
||||
\addlegendentry{\footnotesize{D. 0.2}}
|
||||
\addlegendentry{\footnotesize{D. 0.4}}
|
||||
\addlegendentry{\footnotesize{Default}}
|
||||
\end{axis}
|
||||
\end{tikzpicture}
|
||||
\caption{Classification accuracy}
|
||||
\vspace{.25cm}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}[h]{1.0\linewidth}
|
||||
\begin{tabu} to \textwidth {@{} l *6{X[c]} @{}}
|
||||
\multicolumn{7}{c}{Classification Accuracy}\Bstrut
|
||||
\\\hline
|
||||
&\textsc{Adam}&D. 0.2&D. 0.4&G.&G.+D.~0.2&G.+D.~0.4 \Tstrut \Bstrut
|
||||
\\\hline
|
||||
mean&0.9914&0.9918&0.9928&0.9937&0.9938&0.9940 \Tstrut \\
|
||||
max& \\
|
||||
min& \\
|
||||
\multicolumn{7}{c}{Training Accuracy}\Bstrut
|
||||
\\\hline
|
||||
mean&0.9994&0.9990&0.9989&0.9967&0.9954&0.9926 \Tstrut \\
|
||||
max& \\
|
||||
min& \\
|
||||
|
||||
\end{tabu}
|
||||
\caption{Mean and maximum accuracy after 48 epochs of training.}
|
||||
\end{subfigure}
|
||||
\caption{Accuracy for the net given in ... with Dropout (D.),
|
||||
data generation (G.), a combination, or neither (Default) implemented and trained
|
||||
with \textsc{Adam}. For each epoch the 60.000 training samples
|
||||
were used, or for data generation 10.000 steps with each using
|
||||
batches of 60 generated data points. For each configuration the
|
||||
model was trained 5 times and the average accuracies at each epoch
|
||||
are given in (a). Mean, maximum and minimum values of accuracy on
|
||||
the test and training set are given in (b).}
|
||||
\end{figure}
|
||||
%%% Local Variables:
|
||||
%%% mode: latex
|
||||
%%% TeX-master: "../main"
|
||||
%%% End:
|
@ -7,6 +7,10 @@
|
||||
\usepackage{tabu}
|
||||
\usepackage{graphicx}
|
||||
\usetikzlibrary{calc, 3d}
|
||||
\usepgfplotslibrary{colorbrewer}
|
||||
|
||||
\newcommand\Tstrut{\rule{0pt}{2.6ex}} % = `top' strut
|
||||
\newcommand\Bstrut{\rule[-0.9ex]{0pt}{0pt}} % = `bottom' strut
|
||||
|
||||
\begin{document}
|
||||
\pgfplotsset{
|
||||
@ -15,71 +19,80 @@ legend image code/.code={
|
||||
\draw[mark repeat=2,mark phase=2]
|
||||
plot coordinates {
|
||||
(0cm,0cm)
|
||||
(0.0cm,0cm) %% default is (0.3cm,0cm)
|
||||
(0.0cm,0cm) %% default is (0.6cm,0cm)
|
||||
(0.3cm,0cm) %% default is (0.3cm,0cm)
|
||||
(0.6cm,0cm) %% default is (0.6cm,0cm)
|
||||
};%
|
||||
}
|
||||
}
|
||||
\begin{figure}
|
||||
\begin{subfigure}[b]{\textwidth}
|
||||
\begin{subfigure}[h]{\textwidth}
|
||||
\begin{tikzpicture}
|
||||
\begin{axis}[tick style = {draw = none}, width = \textwidth,
|
||||
height = 0.7\textwidth, ymin = 0.92, legend style={at={(0.9825,0.75)},anchor=north east},
|
||||
xlabel = {epoch}, ylabel = {Classification Accuracy}]
|
||||
\begin{axis}[legend cell align={left},yticklabel style={/pgf/number format/fixed,
|
||||
/pgf/number format/precision=3},tick style = {draw = none}, width = \textwidth,
|
||||
height = 0.6\textwidth, ymin = 0.988, legend style={at={(0.9825,0.0175)},anchor=south east},
|
||||
xlabel = {epoch}, ylabel = {Classification Accuracy}, cycle list/Dark2]
|
||||
% \addplot [dashed] table
|
||||
% [x=epoch, y=accuracy, col sep=comma, mark = none]
|
||||
% {Data/adam_datagen_full.log};
|
||||
\addplot table
|
||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||
{Data/adagrad.log};
|
||||
{Data/adam_datagen_full_mean.log};
|
||||
% \addplot [dashed] table
|
||||
% [x=epoch, y=accuracy, col sep=comma, mark = none]
|
||||
% {Data/adam_datagen_dropout_02_full.log};
|
||||
\addplot table
|
||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||
{Data/adadelta.log};
|
||||
{Data/adam_datagen_dropout_02_full_mean.log};
|
||||
\addplot table
|
||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||
{Data/adam.log};
|
||||
{Data/adam_datagen_dropout_04_full_mean.log};
|
||||
\addplot table
|
||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||
{Data/adam_dropout_02_full_mean.log};
|
||||
\addplot table
|
||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||
{Data/adam_dropout_04_full_mean.log};
|
||||
\addplot [dashed] table
|
||||
[x=epoch, y=val_accuracy, col sep=comma, mark = none]
|
||||
{Data/adam_full_mean.log};
|
||||
|
||||
\addlegendentry{\footnotesize{ADAGRAD}}
|
||||
\addlegendentry{\footnotesize{ADADELTA}}
|
||||
\addlegendentry{\footnotesize{ADAM}}
|
||||
\addlegendentry{SGD$_{0.01}$}
|
||||
\addlegendentry{\footnotesize{G.}}
|
||||
\addlegendentry{\footnotesize{G. + D. 0.2}}
|
||||
\addlegendentry{\footnotesize{G. + D. 0.4}}
|
||||
\addlegendentry{\footnotesize{D. 0.2}}
|
||||
\addlegendentry{\footnotesize{D. 0.4}}
|
||||
\addlegendentry{\footnotesize{Default}}
|
||||
\end{axis}
|
||||
\end{tikzpicture}
|
||||
%\caption{Classification accuracy}
|
||||
\caption{Classification accuracy}
|
||||
\vspace{.25cm}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}[b]{\textwidth}
|
||||
\begin{tikzpicture}
|
||||
\begin{axis}[tick style = {draw = none}, width = \textwidth,
|
||||
height = 0.7\textwidth, ymax = 0.5,
|
||||
xlabel = {epoch}, ylabel = {Error Measure\vphantom{y}},ytick ={0,0.1,0.2,0.3,0.4,0.45,0.5}, yticklabels =
|
||||
{0,0.1,0.2,0.3,0.4,\phantom{0.94},0.5}]
|
||||
\addplot table
|
||||
[x=epoch, y=val_loss, col sep=comma, mark = none] {Data/adagrad.log};
|
||||
\addplot table
|
||||
[x=epoch, y=val_loss, col sep=comma, mark = none] {Data/adadelta.log};
|
||||
\addplot table
|
||||
[x=epoch, y=val_loss, col sep=comma, mark = none] {Data/adam.log};
|
||||
|
||||
\addlegendentry{\footnotesize{ADAGRAD}}
|
||||
\addlegendentry{\footnotesize{ADADELTA}}
|
||||
\addlegendentry{\footnotesize{ADAM}}
|
||||
\addlegendentry{SGD$_{0.01}$}
|
||||
|
||||
\end{axis}
|
||||
\end{tikzpicture}
|
||||
\caption{Performance metrics during training}
|
||||
\end{subfigure}
|
||||
\\~\\
|
||||
\begin{subfigure}[b]{1.0\linewidth}
|
||||
\begin{tabu} to \textwidth {@{} *3{X[c]}c*3{X[c]} @{}}
|
||||
\multicolumn{3}{c}{Classification Accuracy}
|
||||
&~&\multicolumn{3}{c}{Error Measure}
|
||||
\\\cline{1-3}\cline{5-7}
|
||||
ADAGRAD&ADADELTA&ADAM&&ADAGRAD&ADADELTA&ADAM
|
||||
\\\cline{1-3}\cline{5-7}
|
||||
1&1&1&&1&1&1
|
||||
\begin{subfigure}[h]{1.0\linewidth}
|
||||
\begin{tabu} to \textwidth {@{} l *6{X[c]} @{}}
|
||||
\multicolumn{7}{c}{Classification Accuracy}\Bstrut
|
||||
\\\hline
|
||||
&\textsc{Adam}&D. 0.2&D. 0.4&G.&G.+D.~0.2&G.~,D.~0.4 \Tstrut \Bstrut
|
||||
\\\hline
|
||||
mean&0.9994&0.9990&0.9989&0.9937&0.9938&0.9940 \Tstrut \\
|
||||
max& \\
|
||||
min& \\
|
||||
\multicolumn{7}{c}{Training Accuracy}\Bstrut
|
||||
\\\hline
|
||||
mean&0.9914&0.9918&0.9928&0.9937&0.9938&0.9940 \Tstrut \\
|
||||
max& \\
|
||||
min& \\
|
||||
|
||||
\end{tabu}
|
||||
\caption{Performace metrics after 20 epochs}
|
||||
\caption{Mean and maximum accuracy after 48 epochs of training.}
|
||||
\end{subfigure}
|
||||
\caption{Performance metrics of the network given in ... trained
|
||||
with different optimization algorithms}
|
||||
\caption{Accuracy for the net given in ... with Dropout (D.),
|
||||
data generation (G.), a combination, or neither (Default) implemented and trained
|
||||
with \textsc{Adam}. For each epoch the 60.000 training samples
|
||||
were used, or for data generation 10.000 steps with each using
|
||||
batches of 60 generated data points. For each configuration the
|
||||
model was trained 5 times and the average accuracies at each epoch
|
||||
are given in (a). Mean, maximum and minimum values of accuracy on
|
||||
the test and training set are given in (b).}
|
||||
\end{figure}
|
||||
|
||||
\begin{center}
|
||||
@ -87,18 +100,23 @@ plot coordinates {
|
||||
\centering
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Data/mnist0.pdf}
|
||||
\caption{original\\image}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Data/mnist1.pdf}
|
||||
\includegraphics[width=\textwidth]{Data/mnist_gen_zoom.pdf}
|
||||
\caption{random\\zoom}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Data/mnist2.pdf}
|
||||
\includegraphics[width=\textwidth]{Data/mnist_gen_shear.pdf}
|
||||
\caption{random\\shear}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Data/mnist3.pdf}
|
||||
\includegraphics[width=\textwidth]{Data/mnist_gen_rotation.pdf}
|
||||
\caption{random\\rotation}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Data/mnist4.pdf}
|
||||
\includegraphics[width=\textwidth]{Data/mnist_gen_shift.pdf}
|
||||
\caption{random\\positional shift}
|
||||
\end{subfigure}\\
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Data/mnist5.pdf}
|
||||
|
@ -67,7 +67,7 @@ plot coordinates {
|
||||
\end{tabu}
|
||||
\caption{Performace metrics after 20 epochs}
|
||||
\end{subfigure}
|
||||
\caption{Performance metrics of the network given in ... trained
|
||||
\caption{Classification accuracy on the test set and ...Performance metrics of the network given in ... trained
|
||||
with different optimization algorithms}
|
||||
\end{figure}
|
||||
%%% Local Variables:
|
||||
|
@ -450,7 +450,7 @@ $\gamma$ is divided by the sum of the squares of the past partial
|
||||
derivatives in this parameter. This results in a monotonously
|
||||
decreasing learning rate for each parameter. This results in a faster
|
||||
decaying learning rate for parameters with large updates, where as
|
||||
parameters with small updates experience smaller decay. The ADAGRAD
|
||||
parameters with small updates experience smaller decay. The \textsc{AdaGrad}
|
||||
algorithm is given in Algorithm~\ref{alg:ADAGRAD}.
|
||||
|
||||
\begin{algorithm}[H]
|
||||
@ -465,15 +465,15 @@ algorithm is given in Algorithm~\ref{alg:ADAGRAD}.
|
||||
1, \dots,p$\;
|
||||
Apply Update: $x_{t+1} \leftarrow x_t + \Delta x_t$\;
|
||||
}
|
||||
\caption{\textls{ADAGRAD}}
|
||||
\caption{\textls{\textsc{AdaGrad}}}
|
||||
\label{alg:ADAGRAD}
|
||||
\end{algorithm}
|
||||
|
||||
Building on \textsc{AdaGrad} \textcite{ADADELTA} developed the ... (ADADELTA)
|
||||
in order to improve upon the two main drawbacks of ADAGRAD, being the
|
||||
Building on \textsc{AdaGrad} \textcite{ADADELTA} developed the ... (\textsc{AdaDelta})
|
||||
in order to improve upon the two main drawbacks of \textsc{AdaGrad}, being the
|
||||
continual decay of the learning rate and the need for a manually
|
||||
selected global learning rate $\gamma$.
|
||||
As ADAGRAD accumulates the squared gradients the learning rate will
|
||||
As \textsc{AdaGrad} uses division by the accumulated squared gradients the learning rate will
|
||||
eventually become infinitely small.
|
||||
In order to ensure that even after a significant of iterations
|
||||
learning continues to make progress instead of summing the gradients a
|
||||
@ -500,7 +500,7 @@ by these of the parameter update $\Delta x_t$. This proper
|
||||
x^2]_{t-1} + (1+p)\Delta x_t^2$\;
|
||||
Apply Update: $x_{t+1} \leftarrow x_t + \Delta x_t$\;
|
||||
}
|
||||
\caption{ADADELTA, \textcite{ADADELTA}}
|
||||
\caption{\textsc{AdaDelta}, \textcite{ADADELTA}}
|
||||
\label{alg:gd}
|
||||
\end{algorithm}
|
||||
|
||||
@ -520,11 +520,11 @@ of the marble.
|
||||
This results in the algorithm being able to escape ... due to the
|
||||
build up momentum from approaching it.
|
||||
|
||||
\begin{itemize}
|
||||
\item ADAM
|
||||
\item momentum
|
||||
\item ADADETLA \textcite{ADADELTA}
|
||||
\end{itemize}
|
||||
% \begin{itemize}
|
||||
% \item ADAM
|
||||
% \item momentum
|
||||
% \item ADADETLA \textcite{ADADELTA}
|
||||
% \end{itemize}
|
||||
|
||||
|
||||
\begin{algorithm}[H]
|
||||
@ -665,7 +665,37 @@ When using this one has to be sure that the labels indeed remain the
|
||||
same or else the network will not learn the desired ...
|
||||
In the case of handwritten digits for example a to high rotation angle
|
||||
will ... a nine or six.
|
||||
The most common transformations are rotation, zoom, shear, brightness, mirroring.
|
||||
The most common transformations are rotation, zoom, shear, brightness,
|
||||
mirroring.
|
||||
|
||||
\begin{figure}[h]
|
||||
\centering
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/mnist0.pdf}
|
||||
\caption{original\\image}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/mnist_gen_zoom.pdf}
|
||||
\caption{random\\zoom}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/mnist_gen_shear.pdf}
|
||||
\caption{random\\shear}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/mnist_gen_rotation.pdf}
|
||||
\caption{random\\rotation}
|
||||
\end{subfigure}
|
||||
\begin{subfigure}{0.19\textwidth}
|
||||
\includegraphics[width=\textwidth]{Plots/Data/mnist_gen_shift.pdf}
|
||||
\caption{random\\positional shift}
|
||||
\end{subfigure}
|
||||
\caption{Example for the manipuations used in ... As all images are
|
||||
of the same intensity brightness manipulation does not seem
|
||||
... Additionally mirroring is not used for ... reasons.}
|
||||
\end{figure}
|
||||
|
||||
\input{Plots/gen_dropout.tex}
|
||||
|
||||
\todo{Vergleich verschiedene dropout größen auf MNSIT o.ä., subset als
|
||||
training set?}
|
||||
@ -674,10 +704,41 @@ training set?}
|
||||
|
||||
For some applications (medical problems with small amount of patients)
|
||||
the available data can be highly limited.
|
||||
In order to get a understanding for the achievable accuracy for such a
|
||||
scenario in the following we examine the ... and .. with a highly
|
||||
reduced training set and the impact the above mentioned strategies on
|
||||
combating overfitting have.
|
||||
In these problems the networks are highly ... for overfitting the
|
||||
data. In order to get a understanding of accuracys achievable and the
|
||||
impact of the measures to prevent overfitting discussed above we and train
|
||||
the network on datasets of varying sizes.
|
||||
First we use the mnist handwriting dataset and then a slightly harder
|
||||
problem given by the mnist fashion dataset which contains PREEDITED
|
||||
pictures of clothes from 10 different categories.
|
||||
|
||||
\input{Plots/fashion_mnist.tex}
|
||||
|
||||
For training for each class a certain number of random datapoints are
|
||||
chosen for training the network. The sizes chosen are:
|
||||
full dataset: ... per class\\
|
||||
1000 per class
|
||||
100 per class
|
||||
10 per class
|
||||
|
||||
the results for training .. are given in ... Here can be seen...
|
||||
|
||||
\begin{figure}[h]
|
||||
\centering
|
||||
\missingfigure{datagen digits}
|
||||
\caption{Sample pictures of the mnist fashioyn dataset, one per
|
||||
class.}
|
||||
\label{mnist fashion}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}[h]
|
||||
\centering
|
||||
\missingfigure{datagen fashion}
|
||||
\caption{Sample pictures of the mnist fashioyn dataset, one per
|
||||
class.}
|
||||
\label{mnist fashion}
|
||||
\end{figure}
|
||||
|
||||
|
||||
\clearpage
|
||||
\section{Bla}
|
||||
|
@ -295,7 +295,7 @@ interpretation.
|
||||
Commonly the nodes in the output layer each correspond to a class and
|
||||
the class chosen as prediction is the one with the highest value at
|
||||
the corresponding output node.
|
||||
The naive transformation to achieve this is transforming the output
|
||||
This corresponds to a transformation of the output
|
||||
vector $o$ into a one-hot vector
|
||||
\[
|
||||
\text{pred}_i =
|
||||
|
@ -92,6 +92,9 @@
|
||||
|
||||
\newcommand{\abs}[1]{\ensuremath{\left\vert#1\right\vert}}
|
||||
|
||||
\newcommand\Tstrut{\rule{0pt}{2.6ex}} % = `top' strut
|
||||
\newcommand\Bstrut{\rule[-0.9ex]{0pt}{0pt}} % = `bottom' strut
|
||||
|
||||
\SetKwInput{KwInput}{Input}
|
||||
|
||||
%\newcommand{\myrightarrow}[1]{\xrightarrow{\makebox[2em][c]{$\scriptstyle#1$}}}
|
||||
|
@ -6,6 +6,10 @@
|
||||
%%% End:
|
||||
\section{Shallow Neural Networks}
|
||||
|
||||
In order to get a some understanding of the behavior of neural
|
||||
networks we study a simplified class of networks called shallow neural
|
||||
networks in this chapter. We consider shallow neural networks consist of a single
|
||||
hidden layer and
|
||||
In order to examine some behavior of neural networks in this chapter
|
||||
we consider a simple class of networks, the shallow ones. These
|
||||
networks only contain one hidden layer and have a single output node.
|
||||
|
Loading…
Reference in New Issue
Block a user