\pgfplotsset{
compat=1.11,
legend image code/.code={
\draw[mark repeat=2,mark phase=2]
plot coordinates {
(0cm,0cm)
(0.0cm,0cm)        %% default is (0.3cm,0cm)
(0.0cm,0cm)         %% default is (0.6cm,0cm)
};%
}
}
\begin{figure}
  \begin{subfigure}[h!]{\textwidth}
    \begin{tikzpicture}
      \begin{axis}[tick style = {draw = none}, width = \textwidth,
        height = 0.6\textwidth,
        xtick = {1, 3, 5,7,9,11,13,15,17,19},
        xticklabels = {$2$, $4$, $6$, $8$,
          $10$,$12$,$14$,$16$,$18$,$20$},
        xlabel = {training epoch}, ylabel = {classification accuracy}]
        \addplot table
        [x=epoch, y=val_accuracy, col sep=comma] {Figures/Data/GD_01.log};
        \addplot table
        [x=epoch, y=val_accuracy, col sep=comma] {Figures/Data/GD_05.log};
        \addplot table
        [x=epoch, y=val_accuracy, col sep=comma] {Figures/Data/GD_1.log};
        \addplot table
        [x=epoch, y=val_accuracy, col sep=comma]
        {Figures/Data/SGD_01_b32.log};
        
         \addlegendentry{GD$_{0.01}$}
        \addlegendentry{GD$_{0.05}$}
        \addlegendentry{GD$_{0.1}$}
        \addlegendentry{SGD$_{0.01}$}
      \end{axis}
    \end{tikzpicture}
    %\caption{Classification accuracy}
  \end{subfigure}
  \begin{subfigure}[b]{\textwidth}
    \begin{tikzpicture}
      \begin{axis}[tick style = {draw = none}, width = \textwidth,
        height = 0.6\textwidth,
        ytick = {0, 1, 2, 3, 4},
        yticklabels = {$0$, $1$, $\phantom{0.}2$, $3$, $4$},
        xtick = {1, 3, 5,7,9,11,13,15,17,19},
        xticklabels = {$2$, $4$, $6$, $8$,
          $10$,$12$,$14$,$16$,$18$,$20$},
        xlabel = {training epoch}, ylabel = {error measure\vphantom{fy}}]
        \addplot table
        [x=epoch, y=val_loss, col sep=comma] {Figures/Data/GD_01.log};
        \addplot table
        [x=epoch, y=val_loss, col sep=comma] {Figures/Data/GD_05.log};
        \addplot table
        [x=epoch, y=val_loss, col sep=comma] {Figures/Data/GD_1.log};
        \addplot table
        [x=epoch, y=val_loss, col sep=comma] {Figures/Data/SGD_01_b32.log};
        
        \addlegendentry{GD$_{0.01}$}
        \addlegendentry{GD$_{0.05}$}
        \addlegendentry{GD$_{0.1}$}
        \addlegendentry{SGD$_{0.01}$}
        
      \end{axis}
    \end{tikzpicture}
    \caption{Performance metrics during training}
  \end{subfigure}
  % \\~\\
  \caption[Performance comparison of SDG and GD]{The neural network
    given in Figure~\ref{fig:mnist_architecture} trained with different
    algorithms on the MNIST handwritten digits data set. For gradient
    descent the learning rated 0.01, 0.05 and 0.1 are (GD$_{\cdot}$). For
    stochastic gradient descend a batch size of 32 and learning rate
    of 0.01 is used (SDG$_{0.01}$).}
  \label{fig:sgd_vs_gd}
\end{figure}

\begin{table}[h]
  \begin{tabu} to \textwidth {@{}  *4{X[c]}c*4{X[c]} @{}}
    \multicolumn{4}{c}{Classification Accuracy}
    &~&\multicolumn{4}{c}{Error Measure}
    \\\cline{1-4}\cline{6-9}
    GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$&&GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$
    \\\cline{1-4}\cline{6-9}
    \multicolumn{9}{c}{test}\\
    0.265&0.633&0.203&0.989&&2.267&1.947&3.91&0.032
  \end{tabu}
  \caption{Performance metrics of the networks trained in
    Figure~\ref{fig:sgd_vs_gd} after 20 training epochs.}
  \label{table:sgd_vs_gd}
\end{table}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "../main"
%%% End: