\pgfplotsset{ compat=1.11, legend image code/.code={ \draw[mark repeat=2,mark phase=2] plot coordinates { (0cm,0cm) (0.0cm,0cm) %% default is (0.3cm,0cm) (0.0cm,0cm) %% default is (0.6cm,0cm) };% } } \begin{figure} \begin{subfigure}[h!]{\textwidth} \begin{tikzpicture} \begin{axis}[tick style = {draw = none}, width = \textwidth, height = 0.6\textwidth, xtick = {1, 3, 5,7,9,11,13,15,17,19}, xticklabels = {$2$, $4$, $6$, $8$, $10$,$12$,$14$,$16$,$18$,$20$}, xlabel = {training epoch}, ylabel = {classification accuracy}] \addplot table [x=epoch, y=val_accuracy, col sep=comma] {Figures/Data/GD_01.log}; \addplot table [x=epoch, y=val_accuracy, col sep=comma] {Figures/Data/GD_05.log}; \addplot table [x=epoch, y=val_accuracy, col sep=comma] {Figures/Data/GD_1.log}; \addplot table [x=epoch, y=val_accuracy, col sep=comma] {Figures/Data/SGD_01_b32.log}; \addlegendentry{GD$_{0.01}$} \addlegendentry{GD$_{0.05}$} \addlegendentry{GD$_{0.1}$} \addlegendentry{SGD$_{0.01}$} \end{axis} \end{tikzpicture} %\caption{Classification accuracy} \end{subfigure} \begin{subfigure}[b]{\textwidth} \begin{tikzpicture} \begin{axis}[tick style = {draw = none}, width = \textwidth, height = 0.6\textwidth, ytick = {0, 1, 2, 3, 4}, yticklabels = {$0$, $1$, $\phantom{0.}2$, $3$, $4$}, xtick = {1, 3, 5,7,9,11,13,15,17,19}, xticklabels = {$2$, $4$, $6$, $8$, $10$,$12$,$14$,$16$,$18$,$20$}, xlabel = {training epoch}, ylabel = {error measure\vphantom{fy}}] \addplot table [x=epoch, y=val_loss, col sep=comma] {Figures/Data/GD_01.log}; \addplot table [x=epoch, y=val_loss, col sep=comma] {Figures/Data/GD_05.log}; \addplot table [x=epoch, y=val_loss, col sep=comma] {Figures/Data/GD_1.log}; \addplot table [x=epoch, y=val_loss, col sep=comma] {Figures/Data/SGD_01_b32.log}; \addlegendentry{GD$_{0.01}$} \addlegendentry{GD$_{0.05}$} \addlegendentry{GD$_{0.1}$} \addlegendentry{SGD$_{0.01}$} \end{axis} \end{tikzpicture} \caption{Performance metrics during training} \end{subfigure} % \\~\\ \caption[Performance comparison of SDG and GD]{The neural network given in Figure~\ref{fig:mnist_architecture} trained with different algorithms on the MNIST handwritten digits data set. For gradient descent the learning rated 0.01, 0.05 and 0.1 are (GD$_{\cdot}$). For stochastic gradient descend a batch size of 32 and learning rate of 0.01 is used (SDG$_{0.01}$).} \label{fig:sgd_vs_gd} \end{figure} \begin{table}[h] \begin{tabu} to \textwidth {@{} *4{X[c]}c*4{X[c]} @{}} \multicolumn{4}{c}{Classification Accuracy} &~&\multicolumn{4}{c}{Error Measure} \\\cline{1-4}\cline{6-9} GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$&&GD$_{0.01}$&GD$_{0.05}$&GD$_{0.1}$&SGD$_{0.01}$ \\\cline{1-4}\cline{6-9} \multicolumn{9}{c}{test}\\ 0.265&0.633&0.203&0.989&&2.267&1.947&3.91&0.032 \end{tabu} \caption{Performance metrics of the networks trained in Figure~\ref{fig:sgd_vs_gd} after 20 training epochs.} \label{table:sgd_vs_gd} \end{table} %%% Local Variables: %%% mode: latex %%% TeX-master: "../main" %%% End: