This commit is contained in:
Visual Computing (VICO) Group 2024-10-14 09:56:47 +01:00
parent 207595b4a1
commit 6883eb77c2
29 changed files with 5330 additions and 17 deletions

12
.gitignore vendored
View File

@ -1,3 +1,4 @@
#dropbox stuff #dropbox stuff
*.dropbox* *.dropbox*
@ -25,7 +26,6 @@ var/
*.egg-info/ *.egg-info/
.installed.cfg .installed.cfg
*.egg *.egg
etc/
# PyInstaller # PyInstaller
# Usually these files are written by a python script from a template # Usually these files are written by a python script from a template
@ -60,9 +60,17 @@ docs/_build/
# PyBuilder # PyBuilder
target/ target/
# Pycharm
.idea/*
# Notebook stuff # Notebook stuff
notebooks/.ipynb_checkpoints/ notebooks/.ipynb_checkpoints/
# Data folder # Data folder
data/ data/
solutions/
# Latex stuff
report/mlp-cw1-template.aux
report/mlp-cw1-template.out
report/mlp-cw1-template.pdf
report/mlp-cw1-template.synctex.gz

View File

@ -6,19 +6,10 @@ This assignment-based course is focused on the implementation and evaluation of
The code in this repository is split into: The code in this repository is split into:
* a Python package `mlp`, a [NumPy](http://www.numpy.org/) based neural network package designed specifically for the course that students will implement parts of and extend during the course labs and assignments, * a Python package `mlp`, a [NumPy](http://www.numpy.org/) based neural network package designed specifically for the course that students will implement parts of and extend during the course labs and assignments,
* a series of [Jupyter](http://jupyter.org/) notebooks in the `notebooks` directory containing explanatory material and coding exercises to be completed during the course labs. * a series of [Jupyter](http://jupyter.org/) notebooks in the `notebooks` directory containing explanatory material and coding exercises to be completed during the course labs.
## Remote working ## Coursework 1
This branch contains the python code and latex files of the first coursework. The code follows the same structure as the labs, in particular the mlp package, and a specific notebook is provided to help you run experiments.
If you are working remotely, follow this [guide](notes/remote-working-guide.md). * Detailed instructions are given in MLP2024_25_CW1_Spec.pdf (see Learn, Assignment Submission, Coursework 1).
* The [report directory](https://github.com/VICO-UoE/mlpractical/tree/mlp2024-25/coursework1/report) contains the latex files that you will use to create your report.
## Getting set up
Detailed instructions for setting up a development environment for the course are given in [this file](notes/environment-set-up.md). Students doing the course will spend part of the first lab getting their own environment set up.
## Exercises
If you are first time users of jupyter notebook, check out `notebooks/00_notebook.ipynb` to understand its features.
To get started with the exercises, go to the `notebooks` directory. For lab 1, work with the notebook starting with the prefix `01`, and so on.

90
mlp/penalties.py Normal file
View File

@ -0,0 +1,90 @@
import numpy as np
seed = 22102017
rng = np.random.RandomState(seed)
class L1Penalty(object):
"""L1 parameter penalty.
Term to add to the objective function penalising parameters
based on their L1 norm.
"""
def __init__(self, coefficient):
"""Create a new L1 penalty object.
Args:
coefficient: Positive constant to scale penalty term by.
"""
assert coefficient > 0., 'Penalty coefficient must be positive.'
self.coefficient = coefficient
def __call__(self, parameter):
"""Calculate L1 penalty value for a parameter.
Args:
parameter: Array corresponding to a model parameter.
Returns:
Value of penalty term.
"""
raise NotImplementedError
def grad(self, parameter):
"""Calculate the penalty gradient with respect to the parameter.
Args:
parameter: Array corresponding to a model parameter.
Returns:
Value of penalty gradient with respect to parameter. This
should be an array of the same shape as the parameter.
"""
raise NotImplementedError
def __repr__(self):
return 'L1Penalty({0})'.format(self.coefficient)
class L2Penalty(object):
"""L1 parameter penalty.
Term to add to the objective function penalising parameters
based on their L2 norm.
"""
def __init__(self, coefficient):
"""Create a new L2 penalty object.
Args:
coefficient: Positive constant to scale penalty term by.
"""
assert coefficient > 0., 'Penalty coefficient must be positive.'
self.coefficient = coefficient
def __call__(self, parameter):
"""Calculate L2 penalty value for a parameter.
Args:
parameter: Array corresponding to a model parameter.
Returns:
Value of penalty term.
"""
raise NotImplementedError
def grad(self, parameter):
"""Calculate the penalty gradient with respect to the parameter.
Args:
parameter: Array corresponding to a model parameter.
Returns:
Value of penalty gradient with respect to parameter. This
should be an array of the same shape as the parameter.
"""
raise NotImplementedError
def __repr__(self):
return 'L2Penalty({0})'.format(self.coefficient)

80
mlp/test_methods.py Normal file
View File

@ -0,0 +1,80 @@
# from mlp.learning_rules import AdamLearningRuleWithWeightDecay
# from mlp.schedulers import CosineAnnealingWithWarmRestarts
from mlp.layers import DropoutLayer
from mlp.penalties import L1Penalty, L2Penalty
import numpy as np
import os
def test_dropout_layer():
# loaded = np.load("../data/correct_results.npz")
rng = np.random.RandomState(92019)
x = rng.normal(loc=0, scale=5.0, size=(50, 3, 64, 64))
correct_outputs = np.load(os.path.join(os.environ['MLP_DATA_DIR'], 'regularization_debug_pack.npy'), allow_pickle=True).item()
rng = np.random.RandomState(92019)
layer = DropoutLayer(rng=rng)
out = layer.fprop(x)
grads = layer.bprop(inputs=x, outputs=out, grads_wrt_outputs=np.ones(x.shape))
# correct_outputs = correct_outputs['dropout']
fprop_test = np.allclose(correct_outputs['DropoutLayer_fprop'], out)
bprop_test = np.allclose(correct_outputs['DropoutLayer_bprop'], grads)
return fprop_test, out, correct_outputs['DropoutLayer_fprop'], bprop_test, grads, correct_outputs['DropoutLayer_bprop']
def test_L1_Penalty():
rng = np.random.RandomState(92019)
x = rng.normal(loc=0, scale=5.0, size=(50, 3, 64, 64))
correct_outputs = np.load(os.path.join(os.environ['MLP_DATA_DIR'], 'regularization_debug_pack.npy'), allow_pickle=True).item()
layer = L1Penalty(1e-4)
out = layer(x)
grads = layer.grad(x)
# correct_outputs = correct_outputs['l1penalty']
__call__test = np.allclose(correct_outputs['L1Penalty___call__correct'], out)
grad_test = np.allclose(correct_outputs['L1Penalty_grad_correct'], grads)
return __call__test, out, correct_outputs['L1Penalty___call__correct'], grad_test, grads, correct_outputs['L1Penalty_grad_correct']
def test_L2_Penalty():
rng = np.random.RandomState(92019)
x = rng.normal(loc=0, scale=5.0, size=(50, 3, 64, 64))
correct_outputs = np.load(os.path.join(os.environ['MLP_DATA_DIR'], 'regularization_debug_pack.npy'), allow_pickle=True).item()
layer = L2Penalty(1e-4)
out = layer(x)
grads = layer.grad(x)
# correct_outputs = correct_outputs['l2penalty']
__call__test = np.allclose(correct_outputs['L2Penalty___call__correct'], out)
grad_test = np.allclose(correct_outputs['L2Penalty_grad_correct'], grads)
return __call__test, out, correct_outputs['L2Penalty___call__correct'], grad_test, grads, correct_outputs['L2Penalty_grad_correct']

View File

@ -0,0 +1,182 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Coursework 1\n",
"\n",
"This notebook is intended to be used as a starting point for your experiments. The instructions can be found in the MLP2023_24_CW1_Spec.pdf (see Learn, Assignment Submission, Coursework 1). The methods provided here are just helper functions. If you want more complex graphs such as side by side comparisons of different experiments you should learn more about matplotlib and implement them. Before each experiment remember to re-initialize neural network weights and reset the data providers so you get a properly initialized experiment. For each experiment try to keep most hyperparameters the same except the one under investigation so you can understand what the effects of each are."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"plt.style.use('ggplot')\n",
"\n",
"def train_model_and_plot_stats(\n",
" model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):\n",
" \n",
" # As well as monitoring the error over training also monitor classification\n",
" # accuracy i.e. proportion of most-probable predicted classes being equal to targets\n",
" data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}\n",
"\n",
" # Use the created objects to initialise a new Optimiser instance.\n",
" optimiser = Optimiser(\n",
" model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)\n",
"\n",
" # Run the optimiser for num_epochs epochs (full passes through the training set)\n",
" # printing statistics every epoch.\n",
" stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)\n",
"\n",
" # Plot the change in the validation and training set error over training.\n",
" fig_1 = plt.figure(figsize=(8, 4))\n",
" ax_1 = fig_1.add_subplot(111)\n",
" for k in ['error(train)', 'error(valid)']:\n",
" ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, \n",
" stats[1:, keys[k]], label=k)\n",
" ax_1.legend(loc=0)\n",
" ax_1.set_xlabel('Epoch number')\n",
" ax_1.set_ylabel('Error')\n",
"\n",
" # Plot the change in the validation and training set accuracy over training.\n",
" fig_2 = plt.figure(figsize=(8, 4))\n",
" ax_2 = fig_2.add_subplot(111)\n",
" for k in ['acc(train)', 'acc(valid)']:\n",
" ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, \n",
" stats[1:, keys[k]], label=k)\n",
" ax_2.legend(loc=0)\n",
" ax_2.set_xlabel('Epoch number')\n",
" ax_2.set_xlabel('Accuracy')\n",
" \n",
" return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The below code will set up the data providers, random number\n",
"# generator and logger objects needed for training runs. As\n",
"# loading the data from file take a little while you generally\n",
"# will probably not want to reload the data providers on\n",
"# every training run. If you wish to reset their state you\n",
"# should instead use the .reset() method of the data providers.\n",
"import numpy as np\n",
"import logging\n",
"import sys\n",
"# sys.path.append('/path/to/mlpractical')\n",
"from mlp.data_providers import MNISTDataProvider, EMNISTDataProvider\n",
"\n",
"# Seed a random number generator\n",
"seed = 11102019 \n",
"rng = np.random.RandomState(seed)\n",
"batch_size = 100\n",
"# Set up a logger object to print info about the training run to stdout\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.INFO)\n",
"logger.handlers = [logging.StreamHandler()]\n",
"\n",
"# Create data provider objects for the MNIST data set\n",
"train_data = EMNISTDataProvider('train', batch_size=batch_size, rng=rng)\n",
"valid_data = EMNISTDataProvider('valid', batch_size=batch_size, rng=rng)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The model set up code below is provided as a starting point.\n",
"# You will probably want to add further code cells for the\n",
"# different experiments you run.\n",
"\n",
"%pip install tqdm\n",
"\n",
"from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer\n",
"from mlp.errors import CrossEntropySoftmaxError\n",
"from mlp.models import MultipleLayerModel\n",
"from mlp.initialisers import ConstantInit, GlorotUniformInit\n",
"from mlp.learning_rules import AdamLearningRule\n",
"from mlp.optimisers import Optimiser\n",
"\n",
"# Setup hyperparameters\n",
"learning_rate = 0.001\n",
"num_epochs = 100\n",
"stats_interval = 1\n",
"input_dim, output_dim, hidden_dim = 784, 47, 128\n",
"\n",
"weights_init = GlorotUniformInit(rng=rng)\n",
"biases_init = ConstantInit(0.)\n",
"\n",
"# Create model with ONE hidden layer\n",
"model = MultipleLayerModel([\n",
" AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # hidden layer\n",
" ReluLayer(),\n",
" AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer\n",
"])\n",
"\n",
"error = CrossEntropySoftmaxError()\n",
"# Use a Adam learning rule\n",
"learning_rule = AdamLearningRule(learning_rate=learning_rate)\n",
"\n",
"# Remember to use notebook=False when you write a script to be run in a terminal\n",
"_ = train_model_and_plot_stats(\n",
" model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Create model with TWO hidden layers\n",
"model = MultipleLayerModel([\n",
" AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # first hidden layer\n",
" ReluLayer(),\n",
" AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # second hidden layer\n",
" ReluLayer(),\n",
" AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer\n",
"])\n",
"\n",
"error = CrossEntropySoftmaxError()\n",
"# Use a Adam learning rule\n",
"learning_rule = AdamLearningRule(learning_rate=learning_rate)\n",
"\n",
"# Remember to use notebook=False when you write a script to be run in a terminal\n",
"_ = train_model_and_plot_stats(\n",
" model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -0,0 +1,122 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"import sys\n",
"# sys.path.append('/path/to/mlpractical')\n",
"from mlp.test_methods import test_dropout_layer\n",
"import numpy as np\n",
"\n",
"fprop_test, fprop_output, fprop_correct, \\\n",
"bprop_test, bprop_output, bprop_correct = test_dropout_layer()\n",
"\n",
"assert fprop_test == 1.0, (\n",
"'The dropout layer fprop functionality test failed'\n",
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
".format(fprop_correct, fprop_output, fprop_output-fprop_correct)\n",
")\n",
"\n",
"print(\"Dropout Layer Fprop Functionality Test Passed\")\n",
"\n",
"assert bprop_test == 1.0, (\n",
"'The dropout layer bprop functionality test failed'\n",
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
".format(bprop_correct, bprop_output, bprop_output-bprop_correct)\n",
")\n",
"\n",
"print(\"Dropout Layer Bprop Test Passed\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from mlp.test_methods import test_L1_Penalty\n",
"\n",
"\n",
"call_test, call_output, call_correct, \\\n",
"grad_test, grad_output, grad_correct = test_L1_Penalty()\n",
"\n",
"\n",
"assert call_test == 1.0, (\n",
"'The call function for L1 Penalty test failed'\n",
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
".format(call_correct, call_output, call_output-call_correct)\n",
")\n",
"\n",
"print(\"L1 Penalty Call Functionality Test Passed\")\n",
"\n",
"assert grad_test == 1.0, (\n",
"'The grad function for L1 Penalty test failed'\n",
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
".format(grad_correct, grad_output, grad_output-grad_correct, grad_output/grad_correct)\n",
")\n",
"\n",
"\n",
"\n",
"print(\"L1 Penalty Grad Function Test Passed\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from mlp.test_methods import test_L2_Penalty\n",
"\n",
"\n",
"call_test, call_output, call_correct, \\\n",
"grad_test, grad_output, grad_correct = test_L2_Penalty()\n",
"\n",
"\n",
"assert call_test == 1.0, (\n",
"'The call function for L2 Penalty test failed'\n",
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
".format(call_correct, call_output, call_output-call_correct)\n",
")\n",
"\n",
"print(\"L2 Penalty Call Functionality Test Passed\")\n",
"\n",
"assert grad_test == 1.0, (\n",
"'The grad function for L2 Penalty test failed'\n",
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
".format(grad_correct, grad_output, grad_output-grad_correct, grad_output/grad_correct)\n",
")\n",
"\n",
"\n",
"\n",
"print(\"L2 Penalty Grad Function Test Passed\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

1
report/README.txt Normal file
View File

@ -0,0 +1 @@
Most reasonable LaTeX distributions should have no problem building the document from what is in the provided LaTeX source directory. However certain LaTeX distributions are missing certain files, and the they are included in this directory. If you get an error message when you build the LaTeX document saying one of these files is missing, then move the relevant file into your latex source directory.

View File

@ -0,0 +1 @@
Most reasonable LaTeX distributions should have no problem building the document from what is in the provided LaTeX source directory. However certain LaTeX distributions are missing certain files, and the they are included in this directory. If you get an error message when you build the LaTeX document saying one of these files is missing, then move the relevant file into your latex source directory.

View File

@ -0,0 +1,79 @@
% ALGORITHM STYLE -- Released 8 April 1996
% for LaTeX-2e
% Copyright -- 1994 Peter Williams
% E-mail Peter.Williams@dsto.defence.gov.au
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{algorithm}
\typeout{Document Style `algorithm' - floating environment}
\RequirePackage{float}
\RequirePackage{ifthen}
\newcommand{\ALG@within}{nothing}
\newboolean{ALG@within}
\setboolean{ALG@within}{false}
\newcommand{\ALG@floatstyle}{ruled}
\newcommand{\ALG@name}{Algorithm}
\newcommand{\listalgorithmname}{List of \ALG@name s}
% Declare Options
% first appearance
\DeclareOption{plain}{
\renewcommand{\ALG@floatstyle}{plain}
}
\DeclareOption{ruled}{
\renewcommand{\ALG@floatstyle}{ruled}
}
\DeclareOption{boxed}{
\renewcommand{\ALG@floatstyle}{boxed}
}
% then numbering convention
\DeclareOption{part}{
\renewcommand{\ALG@within}{part}
\setboolean{ALG@within}{true}
}
\DeclareOption{chapter}{
\renewcommand{\ALG@within}{chapter}
\setboolean{ALG@within}{true}
}
\DeclareOption{section}{
\renewcommand{\ALG@within}{section}
\setboolean{ALG@within}{true}
}
\DeclareOption{subsection}{
\renewcommand{\ALG@within}{subsection}
\setboolean{ALG@within}{true}
}
\DeclareOption{subsubsection}{
\renewcommand{\ALG@within}{subsubsection}
\setboolean{ALG@within}{true}
}
\DeclareOption{nothing}{
\renewcommand{\ALG@within}{nothing}
\setboolean{ALG@within}{true}
}
\DeclareOption*{\edef\ALG@name{\CurrentOption}}
% ALGORITHM
%
\ProcessOptions
\floatstyle{\ALG@floatstyle}
\ifthenelse{\boolean{ALG@within}}{
\ifthenelse{\equal{\ALG@within}{part}}
{\newfloat{algorithm}{htbp}{loa}[part]}{}
\ifthenelse{\equal{\ALG@within}{chapter}}
{\newfloat{algorithm}{htbp}{loa}[chapter]}{}
\ifthenelse{\equal{\ALG@within}{section}}
{\newfloat{algorithm}{htbp}{loa}[section]}{}
\ifthenelse{\equal{\ALG@within}{subsection}}
{\newfloat{algorithm}{htbp}{loa}[subsection]}{}
\ifthenelse{\equal{\ALG@within}{subsubsection}}
{\newfloat{algorithm}{htbp}{loa}[subsubsection]}{}
\ifthenelse{\equal{\ALG@within}{nothing}}
{\newfloat{algorithm}{htbp}{loa}}{}
}{
\newfloat{algorithm}{htbp}{loa}
}
\floatname{algorithm}{\ALG@name}
\newcommand{\listofalgorithms}{\listof{algorithm}{\listalgorithmname}}

View File

@ -0,0 +1,201 @@
% ALGORITHMIC STYLE -- Released 8 APRIL 1996
% for LaTeX version 2e
% Copyright -- 1994 Peter Williams
% E-mail PeterWilliams@dsto.defence.gov.au
%
% Modified by Alex Smola (08/2000)
% E-mail Alex.Smola@anu.edu.au
%
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{algorithmic}
\typeout{Document Style `algorithmic' - environment}
%
\RequirePackage{ifthen}
\RequirePackage{calc}
\newboolean{ALC@noend}
\setboolean{ALC@noend}{false}
\newcounter{ALC@line}
\newcounter{ALC@rem}
\newlength{\ALC@tlm}
%
\DeclareOption{noend}{\setboolean{ALC@noend}{true}}
%
\ProcessOptions
%
% ALGORITHMIC
\newcommand{\algorithmicrequire}{\textbf{Require:}}
\newcommand{\algorithmicensure}{\textbf{Ensure:}}
\newcommand{\algorithmiccomment}[1]{\{#1\}}
\newcommand{\algorithmicend}{\textbf{end}}
\newcommand{\algorithmicif}{\textbf{if}}
\newcommand{\algorithmicthen}{\textbf{then}}
\newcommand{\algorithmicelse}{\textbf{else}}
\newcommand{\algorithmicelsif}{\algorithmicelse\ \algorithmicif}
\newcommand{\algorithmicendif}{\algorithmicend\ \algorithmicif}
\newcommand{\algorithmicfor}{\textbf{for}}
\newcommand{\algorithmicforall}{\textbf{for all}}
\newcommand{\algorithmicdo}{\textbf{do}}
\newcommand{\algorithmicendfor}{\algorithmicend\ \algorithmicfor}
\newcommand{\algorithmicwhile}{\textbf{while}}
\newcommand{\algorithmicendwhile}{\algorithmicend\ \algorithmicwhile}
\newcommand{\algorithmicloop}{\textbf{loop}}
\newcommand{\algorithmicendloop}{\algorithmicend\ \algorithmicloop}
\newcommand{\algorithmicrepeat}{\textbf{repeat}}
\newcommand{\algorithmicuntil}{\textbf{until}}
%changed by alex smola
\newcommand{\algorithmicinput}{\textbf{input}}
\newcommand{\algorithmicoutput}{\textbf{output}}
\newcommand{\algorithmicset}{\textbf{set}}
\newcommand{\algorithmictrue}{\textbf{true}}
\newcommand{\algorithmicfalse}{\textbf{false}}
\newcommand{\algorithmicand}{\textbf{and\ }}
\newcommand{\algorithmicor}{\textbf{or\ }}
\newcommand{\algorithmicfunction}{\textbf{function}}
\newcommand{\algorithmicendfunction}{\algorithmicend\ \algorithmicfunction}
\newcommand{\algorithmicmain}{\textbf{main}}
\newcommand{\algorithmicendmain}{\algorithmicend\ \algorithmicmain}
%end changed by alex smola
\def\ALC@item[#1]{%
\if@noparitem \@donoparitem
\else \if@inlabel \indent \par \fi
\ifhmode \unskip\unskip \par \fi
\if@newlist \if@nobreak \@nbitem \else
\addpenalty\@beginparpenalty
\addvspace\@topsep \addvspace{-\parskip}\fi
\else \addpenalty\@itempenalty \addvspace\itemsep
\fi
\global\@inlabeltrue
\fi
\everypar{\global\@minipagefalse\global\@newlistfalse
\if@inlabel\global\@inlabelfalse \hskip -\parindent \box\@labels
\penalty\z@ \fi
\everypar{}}\global\@nobreakfalse
\if@noitemarg \@noitemargfalse \if@nmbrlist \refstepcounter{\@listctr}\fi \fi
\sbox\@tempboxa{\makelabel{#1}}%
\global\setbox\@labels
\hbox{\unhbox\@labels \hskip \itemindent
\hskip -\labelwidth \hskip -\ALC@tlm
\ifdim \wd\@tempboxa >\labelwidth
\box\@tempboxa
\else \hbox to\labelwidth {\unhbox\@tempboxa}\fi
\hskip \ALC@tlm}\ignorespaces}
%
\newenvironment{algorithmic}[1][0]{
\let\@item\ALC@item
\newcommand{\ALC@lno}{%
\ifthenelse{\equal{\arabic{ALC@rem}}{0}}
{{\footnotesize \arabic{ALC@line}:}}{}%
}
\let\@listii\@listi
\let\@listiii\@listi
\let\@listiv\@listi
\let\@listv\@listi
\let\@listvi\@listi
\let\@listvii\@listi
\newenvironment{ALC@g}{
\begin{list}{\ALC@lno}{ \itemsep\z@ \itemindent\z@
\listparindent\z@ \rightmargin\z@
\topsep\z@ \partopsep\z@ \parskip\z@\parsep\z@
\leftmargin 1em
\addtolength{\ALC@tlm}{\leftmargin}
}
}
{\end{list}}
\newcommand{\ALC@it}{\addtocounter{ALC@line}{1}\addtocounter{ALC@rem}{1}\ifthenelse{\equal{\arabic{ALC@rem}}{#1}}{\setcounter{ALC@rem}{0}}{}\item}
\newcommand{\ALC@com}[1]{\ifthenelse{\equal{##1}{default}}%
{}{\ \algorithmiccomment{##1}}}
\newcommand{\REQUIRE}{\item[\algorithmicrequire]}
\newcommand{\ENSURE}{\item[\algorithmicensure]}
\newcommand{\STATE}{\ALC@it}
\newcommand{\COMMENT}[1]{\algorithmiccomment{##1}}
%changes by alex smola
\newcommand{\INPUT}{\item[\algorithmicinput]}
\newcommand{\OUTPUT}{\item[\algorithmicoutput]}
\newcommand{\SET}{\item[\algorithmicset]}
% \newcommand{\TRUE}{\algorithmictrue}
% \newcommand{\FALSE}{\algorithmicfalse}
\newcommand{\AND}{\algorithmicand}
\newcommand{\OR}{\algorithmicor}
\newenvironment{ALC@func}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@main}{\begin{ALC@g}}{\end{ALC@g}}
%end changes by alex smola
\newenvironment{ALC@if}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@for}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@whl}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@loop}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@rpt}{\begin{ALC@g}}{\end{ALC@g}}
\renewcommand{\\}{\@centercr}
\newcommand{\IF}[2][default]{\ALC@it\algorithmicif\ ##2\ \algorithmicthen%
\ALC@com{##1}\begin{ALC@if}}
\newcommand{\SHORTIF}[2]{\ALC@it\algorithmicif\ ##1\
\algorithmicthen\ {##2}}
\newcommand{\ELSE}[1][default]{\end{ALC@if}\ALC@it\algorithmicelse%
\ALC@com{##1}\begin{ALC@if}}
\newcommand{\ELSIF}[2][default]%
{\end{ALC@if}\ALC@it\algorithmicelsif\ ##2\ \algorithmicthen%
\ALC@com{##1}\begin{ALC@if}}
\newcommand{\FOR}[2][default]{\ALC@it\algorithmicfor\ ##2\ \algorithmicdo%
\ALC@com{##1}\begin{ALC@for}}
\newcommand{\FORALL}[2][default]{\ALC@it\algorithmicforall\ ##2\ %
\algorithmicdo%
\ALC@com{##1}\begin{ALC@for}}
\newcommand{\SHORTFORALL}[2]{\ALC@it\algorithmicforall\ ##1\ %
\algorithmicdo\ {##2}}
\newcommand{\WHILE}[2][default]{\ALC@it\algorithmicwhile\ ##2\ %
\algorithmicdo%
\ALC@com{##1}\begin{ALC@whl}}
\newcommand{\LOOP}[1][default]{\ALC@it\algorithmicloop%
\ALC@com{##1}\begin{ALC@loop}}
%changed by alex smola
\newcommand{\FUNCTION}[2][default]{\ALC@it\algorithmicfunction\ ##2\ %
\ALC@com{##1}\begin{ALC@func}}
\newcommand{\MAIN}[2][default]{\ALC@it\algorithmicmain\ ##2\ %
\ALC@com{##1}\begin{ALC@main}}
%end changed by alex smola
\newcommand{\REPEAT}[1][default]{\ALC@it\algorithmicrepeat%
\ALC@com{##1}\begin{ALC@rpt}}
\newcommand{\UNTIL}[1]{\end{ALC@rpt}\ALC@it\algorithmicuntil\ ##1}
\ifthenelse{\boolean{ALC@noend}}{
\newcommand{\ENDIF}{\end{ALC@if}}
\newcommand{\ENDFOR}{\end{ALC@for}}
\newcommand{\ENDWHILE}{\end{ALC@whl}}
\newcommand{\ENDLOOP}{\end{ALC@loop}}
\newcommand{\ENDFUNCTION}{\end{ALC@func}}
\newcommand{\ENDMAIN}{\end{ALC@main}}
}{
\newcommand{\ENDIF}{\end{ALC@if}\ALC@it\algorithmicendif}
\newcommand{\ENDFOR}{\end{ALC@for}\ALC@it\algorithmicendfor}
\newcommand{\ENDWHILE}{\end{ALC@whl}\ALC@it\algorithmicendwhile}
\newcommand{\ENDLOOP}{\end{ALC@loop}\ALC@it\algorithmicendloop}
\newcommand{\ENDFUNCTION}{\end{ALC@func}\ALC@it\algorithmicendfunction}
\newcommand{\ENDMAIN}{\end{ALC@main}\ALC@it\algorithmicendmain}
}
\renewcommand{\@toodeep}{}
\begin{list}{\ALC@lno}{\setcounter{ALC@line}{0}\setcounter{ALC@rem}{0}%
\itemsep\z@ \itemindent\z@ \listparindent\z@%
\partopsep\z@ \parskip\z@ \parsep\z@%
\labelsep 0.5em \topsep 0.2em%
\ifthenelse{\equal{#1}{0}}
{\labelwidth 0.5em }
{\labelwidth 1.2em }
\leftmargin\labelwidth \addtolength{\leftmargin}{\labelsep}
\ALC@tlm\labelsep
}
}
{\end{list}}

View File

@ -0,0 +1,485 @@
% fancyhdr.sty version 3.2
% Fancy headers and footers for LaTeX.
% Piet van Oostrum,
% Dept of Computer and Information Sciences, University of Utrecht,
% Padualaan 14, P.O. Box 80.089, 3508 TB Utrecht, The Netherlands
% Telephone: +31 30 2532180. Email: piet@cs.uu.nl
% ========================================================================
% LICENCE:
% This file may be distributed under the terms of the LaTeX Project Public
% License, as described in lppl.txt in the base LaTeX distribution.
% Either version 1 or, at your option, any later version.
% ========================================================================
% MODIFICATION HISTORY:
% Sep 16, 1994
% version 1.4: Correction for use with \reversemargin
% Sep 29, 1994:
% version 1.5: Added the \iftopfloat, \ifbotfloat and \iffloatpage commands
% Oct 4, 1994:
% version 1.6: Reset single spacing in headers/footers for use with
% setspace.sty or doublespace.sty
% Oct 4, 1994:
% version 1.7: changed \let\@mkboth\markboth to
% \def\@mkboth{\protect\markboth} to make it more robust
% Dec 5, 1994:
% version 1.8: corrections for amsbook/amsart: define \@chapapp and (more
% importantly) use the \chapter/sectionmark definitions from ps@headings if
% they exist (which should be true for all standard classes).
% May 31, 1995:
% version 1.9: The proposed \renewcommand{\headrulewidth}{\iffloatpage...
% construction in the doc did not work properly with the fancyplain style.
% June 1, 1995:
% version 1.91: The definition of \@mkboth wasn't restored on subsequent
% \pagestyle{fancy}'s.
% June 1, 1995:
% version 1.92: The sequence \pagestyle{fancyplain} \pagestyle{plain}
% \pagestyle{fancy} would erroneously select the plain version.
% June 1, 1995:
% version 1.93: \fancypagestyle command added.
% Dec 11, 1995:
% version 1.94: suggested by Conrad Hughes <chughes@maths.tcd.ie>
% CJCH, Dec 11, 1995: added \footruleskip to allow control over footrule
% position (old hardcoded value of .3\normalbaselineskip is far too high
% when used with very small footer fonts).
% Jan 31, 1996:
% version 1.95: call \@normalsize in the reset code if that is defined,
% otherwise \normalsize.
% this is to solve a problem with ucthesis.cls, as this doesn't
% define \@currsize. Unfortunately for latex209 calling \normalsize doesn't
% work as this is optimized to do very little, so there \@normalsize should
% be called. Hopefully this code works for all versions of LaTeX known to
% mankind.
% April 25, 1996:
% version 1.96: initialize \headwidth to a magic (negative) value to catch
% most common cases that people change it before calling \pagestyle{fancy}.
% Note it can't be initialized when reading in this file, because
% \textwidth could be changed afterwards. This is quite probable.
% We also switch to \MakeUppercase rather than \uppercase and introduce a
% \nouppercase command for use in headers. and footers.
% May 3, 1996:
% version 1.97: Two changes:
% 1. Undo the change in version 1.8 (using the pagestyle{headings} defaults
% for the chapter and section marks. The current version of amsbook and
% amsart classes don't seem to need them anymore. Moreover the standard
% latex classes don't use \markboth if twoside isn't selected, and this is
% confusing as \leftmark doesn't work as expected.
% 2. include a call to \ps@empty in ps@@fancy. This is to solve a problem
% in the amsbook and amsart classes, that make global changes to \topskip,
% which are reset in \ps@empty. Hopefully this doesn't break other things.
% May 7, 1996:
% version 1.98:
% Added % after the line \def\nouppercase
% May 7, 1996:
% version 1.99: This is the alpha version of fancyhdr 2.0
% Introduced the new commands \fancyhead, \fancyfoot, and \fancyhf.
% Changed \headrulewidth, \footrulewidth, \footruleskip to
% macros rather than length parameters, In this way they can be
% conditionalized and they don't consume length registers. There is no need
% to have them as length registers unless you want to do calculations with
% them, which is unlikely. Note that this may make some uses of them
% incompatible (i.e. if you have a file that uses \setlength or \xxxx=)
% May 10, 1996:
% version 1.99a:
% Added a few more % signs
% May 10, 1996:
% version 1.99b:
% Changed the syntax of \f@nfor to be resistent to catcode changes of :=
% Removed the [1] from the defs of \lhead etc. because the parameter is
% consumed by the \@[xy]lhead etc. macros.
% June 24, 1997:
% version 1.99c:
% corrected \nouppercase to also include the protected form of \MakeUppercase
% \global added to manipulation of \headwidth.
% \iffootnote command added.
% Some comments added about \@fancyhead and \@fancyfoot.
% Aug 24, 1998
% version 1.99d
% Changed the default \ps@empty to \ps@@empty in order to allow
% \fancypagestyle{empty} redefinition.
% Oct 11, 2000
% version 2.0
% Added LPPL license clause.
%
% A check for \headheight is added. An errormessage is given (once) if the
% header is too large. Empty headers don't generate the error even if
% \headheight is very small or even 0pt.
% Warning added for the use of 'E' option when twoside option is not used.
% In this case the 'E' fields will never be used.
%
% Mar 10, 2002
% version 2.1beta
% New command: \fancyhfoffset[place]{length}
% defines offsets to be applied to the header/footer to let it stick into
% the margins (if length > 0).
% place is like in fancyhead, except that only E,O,L,R can be used.
% This replaces the old calculation based on \headwidth and the marginpar
% area.
% \headwidth will be dynamically calculated in the headers/footers when
% this is used.
%
% Mar 26, 2002
% version 2.1beta2
% \fancyhfoffset now also takes h,f as possible letters in the argument to
% allow the header and footer widths to be different.
% New commands \fancyheadoffset and \fancyfootoffset added comparable to
% \fancyhead and \fancyfoot.
% Errormessages and warnings have been made more informative.
%
% Dec 9, 2002
% version 2.1
% The defaults for \footrulewidth, \plainheadrulewidth and
% \plainfootrulewidth are changed from \z@skip to 0pt. In this way when
% someone inadvertantly uses \setlength to change any of these, the value
% of \z@skip will not be changed, rather an errormessage will be given.
% March 3, 2004
% Release of version 3.0
% Oct 7, 2004
% version 3.1
% Added '\endlinechar=13' to \fancy@reset to prevent problems with
% includegraphics in header when verbatiminput is active.
% March 22, 2005
% version 3.2
% reset \everypar (the real one) in \fancy@reset because spanish.ldf does
% strange things with \everypar between << and >>.
\def\ifancy@mpty#1{\def\temp@a{#1}\ifx\temp@a\@empty}
\def\fancy@def#1#2{\ifancy@mpty{#2}\fancy@gbl\def#1{\leavevmode}\else
\fancy@gbl\def#1{#2\strut}\fi}
\let\fancy@gbl\global
\def\@fancyerrmsg#1{%
\ifx\PackageError\undefined
\errmessage{#1}\else
\PackageError{Fancyhdr}{#1}{}\fi}
\def\@fancywarning#1{%
\ifx\PackageWarning\undefined
\errmessage{#1}\else
\PackageWarning{Fancyhdr}{#1}{}\fi}
% Usage: \@forc \var{charstring}{command to be executed for each char}
% This is similar to LaTeX's \@tfor, but expands the charstring.
\def\@forc#1#2#3{\expandafter\f@rc\expandafter#1\expandafter{#2}{#3}}
\def\f@rc#1#2#3{\def\temp@ty{#2}\ifx\@empty\temp@ty\else
\f@@rc#1#2\f@@rc{#3}\fi}
\def\f@@rc#1#2#3\f@@rc#4{\def#1{#2}#4\f@rc#1{#3}{#4}}
% Usage: \f@nfor\name:=list\do{body}
% Like LaTeX's \@for but an empty list is treated as a list with an empty
% element
\newcommand{\f@nfor}[3]{\edef\@fortmp{#2}%
\expandafter\@forloop#2,\@nil,\@nil\@@#1{#3}}
% Usage: \def@ult \cs{defaults}{argument}
% sets \cs to the characters from defaults appearing in argument
% or defaults if it would be empty. All characters are lowercased.
\newcommand\def@ult[3]{%
\edef\temp@a{\lowercase{\edef\noexpand\temp@a{#3}}}\temp@a
\def#1{}%
\@forc\tmpf@ra{#2}%
{\expandafter\if@in\tmpf@ra\temp@a{\edef#1{#1\tmpf@ra}}{}}%
\ifx\@empty#1\def#1{#2}\fi}
%
% \if@in <char><set><truecase><falsecase>
%
\newcommand{\if@in}[4]{%
\edef\temp@a{#2}\def\temp@b##1#1##2\temp@b{\def\temp@b{##1}}%
\expandafter\temp@b#2#1\temp@b\ifx\temp@a\temp@b #4\else #3\fi}
\newcommand{\fancyhead}{\@ifnextchar[{\f@ncyhf\fancyhead h}%
{\f@ncyhf\fancyhead h[]}}
\newcommand{\fancyfoot}{\@ifnextchar[{\f@ncyhf\fancyfoot f}%
{\f@ncyhf\fancyfoot f[]}}
\newcommand{\fancyhf}{\@ifnextchar[{\f@ncyhf\fancyhf{}}%
{\f@ncyhf\fancyhf{}[]}}
% New commands for offsets added
\newcommand{\fancyheadoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyheadoffset h}%
{\f@ncyhfoffs\fancyheadoffset h[]}}
\newcommand{\fancyfootoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyfootoffset f}%
{\f@ncyhfoffs\fancyfootoffset f[]}}
\newcommand{\fancyhfoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyhfoffset{}}%
{\f@ncyhfoffs\fancyhfoffset{}[]}}
% The header and footer fields are stored in command sequences with
% names of the form: \f@ncy<x><y><z> with <x> for [eo], <y> from [lcr]
% and <z> from [hf].
\def\f@ncyhf#1#2[#3]#4{%
\def\temp@c{}%
\@forc\tmpf@ra{#3}%
{\expandafter\if@in\tmpf@ra{eolcrhf,EOLCRHF}%
{}{\edef\temp@c{\temp@c\tmpf@ra}}}%
\ifx\@empty\temp@c\else
\@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
[#3]}%
\fi
\f@nfor\temp@c{#3}%
{\def@ult\f@@@eo{eo}\temp@c
\if@twoside\else
\if\f@@@eo e\@fancywarning
{\string#1's `E' option without twoside option is useless}\fi\fi
\def@ult\f@@@lcr{lcr}\temp@c
\def@ult\f@@@hf{hf}{#2\temp@c}%
\@forc\f@@eo\f@@@eo
{\@forc\f@@lcr\f@@@lcr
{\@forc\f@@hf\f@@@hf
{\expandafter\fancy@def\csname
f@ncy\f@@eo\f@@lcr\f@@hf\endcsname
{#4}}}}}}
\def\f@ncyhfoffs#1#2[#3]#4{%
\def\temp@c{}%
\@forc\tmpf@ra{#3}%
{\expandafter\if@in\tmpf@ra{eolrhf,EOLRHF}%
{}{\edef\temp@c{\temp@c\tmpf@ra}}}%
\ifx\@empty\temp@c\else
\@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
[#3]}%
\fi
\f@nfor\temp@c{#3}%
{\def@ult\f@@@eo{eo}\temp@c
\if@twoside\else
\if\f@@@eo e\@fancywarning
{\string#1's `E' option without twoside option is useless}\fi\fi
\def@ult\f@@@lcr{lr}\temp@c
\def@ult\f@@@hf{hf}{#2\temp@c}%
\@forc\f@@eo\f@@@eo
{\@forc\f@@lcr\f@@@lcr
{\@forc\f@@hf\f@@@hf
{\expandafter\setlength\csname
f@ncyO@\f@@eo\f@@lcr\f@@hf\endcsname
{#4}}}}}%
\fancy@setoffs}
% Fancyheadings version 1 commands. These are more or less deprecated,
% but they continue to work.
\newcommand{\lhead}{\@ifnextchar[{\@xlhead}{\@ylhead}}
\def\@xlhead[#1]#2{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#2}}
\def\@ylhead#1{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#1}}
\newcommand{\chead}{\@ifnextchar[{\@xchead}{\@ychead}}
\def\@xchead[#1]#2{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#2}}
\def\@ychead#1{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#1}}
\newcommand{\rhead}{\@ifnextchar[{\@xrhead}{\@yrhead}}
\def\@xrhead[#1]#2{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#2}}
\def\@yrhead#1{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#1}}
\newcommand{\lfoot}{\@ifnextchar[{\@xlfoot}{\@ylfoot}}
\def\@xlfoot[#1]#2{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#2}}
\def\@ylfoot#1{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#1}}
\newcommand{\cfoot}{\@ifnextchar[{\@xcfoot}{\@ycfoot}}
\def\@xcfoot[#1]#2{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#2}}
\def\@ycfoot#1{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#1}}
\newcommand{\rfoot}{\@ifnextchar[{\@xrfoot}{\@yrfoot}}
\def\@xrfoot[#1]#2{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#2}}
\def\@yrfoot#1{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#1}}
\newlength{\fancy@headwidth}
\let\headwidth\fancy@headwidth
\newlength{\f@ncyO@elh}
\newlength{\f@ncyO@erh}
\newlength{\f@ncyO@olh}
\newlength{\f@ncyO@orh}
\newlength{\f@ncyO@elf}
\newlength{\f@ncyO@erf}
\newlength{\f@ncyO@olf}
\newlength{\f@ncyO@orf}
\newcommand{\headrulewidth}{0.4pt}
\newcommand{\footrulewidth}{0pt}
\newcommand{\footruleskip}{.3\normalbaselineskip}
% Fancyplain stuff shouldn't be used anymore (rather
% \fancypagestyle{plain} should be used), but it must be present for
% compatibility reasons.
\newcommand{\plainheadrulewidth}{0pt}
\newcommand{\plainfootrulewidth}{0pt}
\newif\if@fancyplain \@fancyplainfalse
\def\fancyplain#1#2{\if@fancyplain#1\else#2\fi}
\headwidth=-123456789sp %magic constant
% Command to reset various things in the headers:
% a.o. single spacing (taken from setspace.sty)
% and the catcode of ^^M (so that epsf files in the header work if a
% verbatim crosses a page boundary)
% It also defines a \nouppercase command that disables \uppercase and
% \Makeuppercase. It can only be used in the headers and footers.
\let\fnch@everypar\everypar% save real \everypar because of spanish.ldf
\def\fancy@reset{\fnch@everypar{}\restorecr\endlinechar=13
\def\baselinestretch{1}%
\def\nouppercase##1{{\let\uppercase\relax\let\MakeUppercase\relax
\expandafter\let\csname MakeUppercase \endcsname\relax##1}}%
\ifx\undefined\@newbaseline% NFSS not present; 2.09 or 2e
\ifx\@normalsize\undefined \normalsize % for ucthesis.cls
\else \@normalsize \fi
\else% NFSS (2.09) present
\@newbaseline%
\fi}
% Initialization of the head and foot text.
% The default values still contain \fancyplain for compatibility.
\fancyhf{} % clear all
% lefthead empty on ``plain'' pages, \rightmark on even, \leftmark on odd pages
% evenhead empty on ``plain'' pages, \leftmark on even, \rightmark on odd pages
\if@twoside
\fancyhead[el,or]{\fancyplain{}{\sl\rightmark}}
\fancyhead[er,ol]{\fancyplain{}{\sl\leftmark}}
\else
\fancyhead[l]{\fancyplain{}{\sl\rightmark}}
\fancyhead[r]{\fancyplain{}{\sl\leftmark}}
\fi
\fancyfoot[c]{\rm\thepage} % page number
% Use box 0 as a temp box and dimen 0 as temp dimen.
% This can be done, because this code will always
% be used inside another box, and therefore the changes are local.
\def\@fancyvbox#1#2{\setbox0\vbox{#2}\ifdim\ht0>#1\@fancywarning
{\string#1 is too small (\the#1): ^^J Make it at least \the\ht0.^^J
We now make it that large for the rest of the document.^^J
This may cause the page layout to be inconsistent, however\@gobble}%
\dimen0=#1\global\setlength{#1}{\ht0}\ht0=\dimen0\fi
\box0}
% Put together a header or footer given the left, center and
% right text, fillers at left and right and a rule.
% The \lap commands put the text into an hbox of zero size,
% so overlapping text does not generate an errormessage.
% These macros have 5 parameters:
% 1. LEFTSIDE BEARING % This determines at which side the header will stick
% out. When \fancyhfoffset is used this calculates \headwidth, otherwise
% it is \hss or \relax (after expansion).
% 2. \f@ncyolh, \f@ncyelh, \f@ncyolf or \f@ncyelf. This is the left component.
% 3. \f@ncyoch, \f@ncyech, \f@ncyocf or \f@ncyecf. This is the middle comp.
% 4. \f@ncyorh, \f@ncyerh, \f@ncyorf or \f@ncyerf. This is the right component.
% 5. RIGHTSIDE BEARING. This is always \relax or \hss (after expansion).
\def\@fancyhead#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
\@fancyvbox\headheight{\hbox
{\rlap{\parbox[b]{\headwidth}{\raggedright#2}}\hfill
\parbox[b]{\headwidth}{\centering#3}\hfill
\llap{\parbox[b]{\headwidth}{\raggedleft#4}}}\headrule}}#5}
\def\@fancyfoot#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
\@fancyvbox\footskip{\footrule
\hbox{\rlap{\parbox[t]{\headwidth}{\raggedright#2}}\hfill
\parbox[t]{\headwidth}{\centering#3}\hfill
\llap{\parbox[t]{\headwidth}{\raggedleft#4}}}}}#5}
\def\headrule{{\if@fancyplain\let\headrulewidth\plainheadrulewidth\fi
\hrule\@height\headrulewidth\@width\headwidth \vskip-\headrulewidth}}
\def\footrule{{\if@fancyplain\let\footrulewidth\plainfootrulewidth\fi
\vskip-\footruleskip\vskip-\footrulewidth
\hrule\@width\headwidth\@height\footrulewidth\vskip\footruleskip}}
\def\ps@fancy{%
\@ifundefined{@chapapp}{\let\@chapapp\chaptername}{}%for amsbook
%
% Define \MakeUppercase for old LaTeXen.
% Note: we used \def rather than \let, so that \let\uppercase\relax (from
% the version 1 documentation) will still work.
%
\@ifundefined{MakeUppercase}{\def\MakeUppercase{\uppercase}}{}%
\@ifundefined{chapter}{\def\sectionmark##1{\markboth
{\MakeUppercase{\ifnum \c@secnumdepth>\z@
\thesection\hskip 1em\relax \fi ##1}}{}}%
\def\subsectionmark##1{\markright {\ifnum \c@secnumdepth >\@ne
\thesubsection\hskip 1em\relax \fi ##1}}}%
{\def\chaptermark##1{\markboth {\MakeUppercase{\ifnum \c@secnumdepth>\m@ne
\@chapapp\ \thechapter. \ \fi ##1}}{}}%
\def\sectionmark##1{\markright{\MakeUppercase{\ifnum \c@secnumdepth >\z@
\thesection. \ \fi ##1}}}}%
%\csname ps@headings\endcsname % use \ps@headings defaults if they exist
\ps@@fancy
\gdef\ps@fancy{\@fancyplainfalse\ps@@fancy}%
% Initialize \headwidth if the user didn't
%
\ifdim\headwidth<0sp
%
% This catches the case that \headwidth hasn't been initialized and the
% case that the user added something to \headwidth in the expectation that
% it was initialized to \textwidth. We compensate this now. This loses if
% the user intended to multiply it by a factor. But that case is more
% likely done by saying something like \headwidth=1.2\textwidth.
% The doc says you have to change \headwidth after the first call to
% \pagestyle{fancy}. This code is just to catch the most common cases were
% that requirement is violated.
%
\global\advance\headwidth123456789sp\global\advance\headwidth\textwidth
\fi}
\def\ps@fancyplain{\ps@fancy \let\ps@plain\ps@plain@fancy}
\def\ps@plain@fancy{\@fancyplaintrue\ps@@fancy}
\let\ps@@empty\ps@empty
\def\ps@@fancy{%
\ps@@empty % This is for amsbook/amsart, which do strange things with \topskip
\def\@mkboth{\protect\markboth}%
\def\@oddhead{\@fancyhead\fancy@Oolh\f@ncyolh\f@ncyoch\f@ncyorh\fancy@Oorh}%
\def\@oddfoot{\@fancyfoot\fancy@Oolf\f@ncyolf\f@ncyocf\f@ncyorf\fancy@Oorf}%
\def\@evenhead{\@fancyhead\fancy@Oelh\f@ncyelh\f@ncyech\f@ncyerh\fancy@Oerh}%
\def\@evenfoot{\@fancyfoot\fancy@Oelf\f@ncyelf\f@ncyecf\f@ncyerf\fancy@Oerf}%
}
% Default definitions for compatibility mode:
% These cause the header/footer to take the defined \headwidth as width
% And to shift in the direction of the marginpar area
\def\fancy@Oolh{\if@reversemargin\hss\else\relax\fi}
\def\fancy@Oorh{\if@reversemargin\relax\else\hss\fi}
\let\fancy@Oelh\fancy@Oorh
\let\fancy@Oerh\fancy@Oolh
\let\fancy@Oolf\fancy@Oolh
\let\fancy@Oorf\fancy@Oorh
\let\fancy@Oelf\fancy@Oelh
\let\fancy@Oerf\fancy@Oerh
% New definitions for the use of \fancyhfoffset
% These calculate the \headwidth from \textwidth and the specified offsets.
\def\fancy@offsolh{\headwidth=\textwidth\advance\headwidth\f@ncyO@olh
\advance\headwidth\f@ncyO@orh\hskip-\f@ncyO@olh}
\def\fancy@offselh{\headwidth=\textwidth\advance\headwidth\f@ncyO@elh
\advance\headwidth\f@ncyO@erh\hskip-\f@ncyO@elh}
\def\fancy@offsolf{\headwidth=\textwidth\advance\headwidth\f@ncyO@olf
\advance\headwidth\f@ncyO@orf\hskip-\f@ncyO@olf}
\def\fancy@offself{\headwidth=\textwidth\advance\headwidth\f@ncyO@elf
\advance\headwidth\f@ncyO@erf\hskip-\f@ncyO@elf}
\def\fancy@setoffs{%
% Just in case \let\headwidth\textwidth was used
\fancy@gbl\let\headwidth\fancy@headwidth
\fancy@gbl\let\fancy@Oolh\fancy@offsolh
\fancy@gbl\let\fancy@Oelh\fancy@offselh
\fancy@gbl\let\fancy@Oorh\hss
\fancy@gbl\let\fancy@Oerh\hss
\fancy@gbl\let\fancy@Oolf\fancy@offsolf
\fancy@gbl\let\fancy@Oelf\fancy@offself
\fancy@gbl\let\fancy@Oorf\hss
\fancy@gbl\let\fancy@Oerf\hss}
\newif\iffootnote
\let\latex@makecol\@makecol
\def\@makecol{\ifvoid\footins\footnotetrue\else\footnotefalse\fi
\let\topfloat\@toplist\let\botfloat\@botlist\latex@makecol}
\def\iftopfloat#1#2{\ifx\topfloat\empty #2\else #1\fi}
\def\ifbotfloat#1#2{\ifx\botfloat\empty #2\else #1\fi}
\def\iffloatpage#1#2{\if@fcolmade #1\else #2\fi}
\newcommand{\fancypagestyle}[2]{%
\@namedef{ps@#1}{\let\fancy@gbl\relax#2\relax\ps@fancy}}

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

BIN
report/figures/fig1_acc.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

BIN
report/figures/fig1_err.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

1441
report/icml2017.bst Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,231 @@
%% REPLACE SXXXXXX with your student number
\def\studentNumber{SXXXXXX}
%% START of YOUR ANSWERS
%% Add answers to the questions below, by replacing the text inside the brackets {} for \youranswer{ "Text to be replaced with your answer." }.
%
% Do not delete the commands for adding figures and tables. Instead fill in the missing values with your experiment results, and replace the images with your own respective figures.
%
% You can generally delete the placeholder text, such as for example the text "Question Figure 2 - Replace the images ..."
%
% There are 18 TEXT QUESTIONS (a few of the short first ones have their answers added to both the Introduction and the Abstract). Replace the text inside the brackets of the command \youranswer with your answer to the question.
%
% There are also 3 "questions" to replace some placeholder FIGURES with your own, and 3 "questions" asking you to fill in the missing entries in the TABLES provided.
%
% NOTE! that questions are ordered by the order of appearance of their answers in the text, and not by the order you should tackle them. Specifically, you cannot answer Questions 2, 3, and 4 before concluding all of the relevant experiments and analysis. Similarly, you should fill in the TABLES and FIGURES before discussing the results presented there.
%
% NOTE! If for some reason you do not manage to produce results for some FIGURES and TABLES, then you can get partial marks by discussing your expectations of the results in the relevant TEXT QUESTIONS (for example Question 8 makes use of Table 1 and Figure 2).
%
% Please refer to the coursework specification for more details.
%% - - - - - - - - - - - - TEXT QUESTIONS - - - - - - - - - - - -
%% Question 1:
\newcommand{\questionOne} {
\youranswer{Question 1 - Explain what these figures contain and how the curves evolve, and spot where overfitting occurs. Reason based on the min/max points and velocities (direction and magnitude of change) of the accuracy and error curves}
}
%% Question 2:
\newcommand{\questionTwo} {
\youranswer{Question 2 - Present your network width experiment results by using the relevant figure and table}
}
%% Question 3:
\newcommand{\questionThree} {
\youranswer{Question 3 - Discuss whether varying width affects the results in a consistent way, and whether the results are expected and match well with the prior knowledge (by which we mean your expectations as are formed from the relevant Theory and literature)}
}
%% Question 4:
\newcommand{\questionFour} {
\youranswer{Question 4 - Present your network depth experiment results by using the relevant figure and table}
}
%% Question 5:
\newcommand{\questionFive} {
\youranswer{Question 5 - Discuss whether varying depth affects the results in a consistent way, and whether the results are expected and match well with the prior knowledge (by which we mean your expectations as are formed from the relevant Theory and literature)}
}
%% Question 6:
\newcommand{\questionSix} {
\youranswer{Question 6 - Explain the experimental details (e.g. hyperparameters), discuss the results in terms of their generalisation performance and overfitting. Select and test the best performing model as part of this analysis.}
}
%% Question 7:
\newcommand{\questionSeven} {
\youranswer{Question 7 - Assume you were able to run 8 further training instances (8 specific hyperparameter configurations) where you could combine Dropout and L1, and/or Dropout and L2 regularisation. Which 8 runs would you pick and what question(s) would you aim to answer? Make sure you define the experiment setup, including any relevant hyperparameters}
}
%% Question 8:
\newcommand{\questionEight} {
\youranswer{Question 8 - Briefly draw your conclusions based on the results from the previous sections (what are the take-away messages?), discussing them in the context of the overall literature, and conclude your report with a recommendation for future directions}
}
%% - - - - - - - - - - - - FIGURES - - - - - - - - - - - -
%% Question Figure 2:
\newcommand{\questionFigureTwo} {
\youranswer{Question Figure 2 - Replace the images in Figure 2 with figures depicting the accuracy and error, training and validation curves for your experiments varying the number of hidden units.
%
\begin{figure}[t]
\centering
\begin{subfigure}{\linewidth}
\includegraphics[width=\linewidth]{figures/empty_acc_curve_width.png}
\caption{accuracy by epoch}
\label{fig:width_acccurves}
\end{subfigure}
\begin{subfigure}{\linewidth}
\centering
\includegraphics[width=\linewidth]{figures/empty_error_curve_width.png}
\caption{error by epoch}
\label{fig:width_errorcurves}
\end{subfigure}
\caption{Training and validation curves in terms of classification accuracy (a) and cross-entropy error (b) on the EMNIST dataset for different network widths.}
\label{fig:width}
\end{figure}
}
}
%% Question Figure 3:
\newcommand{\questionFigureThree} {
\youranswer{Question Figure 3 - Replace these images with figures depicting the accuracy and error, training and validation curves for your experiments varying the number of hidden layers.
%
\begin{figure}[t]
\centering
\begin{subfigure}{\linewidth}
\includegraphics[width=\linewidth]{figures/empty_acc_curve_depth.png}
\caption{accuracy by epoch}
\label{fig:depth_acccurves}
\end{subfigure}
\begin{subfigure}{\linewidth}
\centering
\includegraphics[width=\linewidth]{figures/empty_error_curve_depth.png}
\caption{error by epoch}
\label{fig:depth_errorcurves}
\end{subfigure}
\caption{Training and validation curves in terms of classification accuracy (a) and cross-entropy error (b) on the EMNIST dataset for different network depths.}
\label{fig:depth}
\end{figure}
}
}
%% Question Figure 4:
\newcommand{\questionFigureFour} {
\youranswer{Question Figure 4 - Replace these images with figures depicting the Validation Accuracy and Generalisation Gap (difference between validation and training error) for each of the experiment results varying the Dropout inclusion rate, and L1/L2 weight penalty depicted in Table 3 (including any results you have filled in).
%
\begin{figure*}[t]
\centering
\begin{subfigure}{.475\linewidth}
\includegraphics[width=\linewidth]{figures/empty_dropout_plot.png}
\caption{Accuracy and error by inclusion probability.}
\label{fig:dropoutrates}
\end{subfigure}
\begin{subfigure}{.475\linewidth}
\centering
\includegraphics[width=\linewidth]{figures/empty_wd_plot.png}
\caption{Accuracy and error by weight penalty.}
\label{fig:weightrates}
\end{subfigure}
\caption{Accuracy and error by regularisation strength of each method (Dropout and L1/L2 Regularisation).}
\label{fig:hp_search}
\end{figure*}
}
}
%% - - - - - - - - - - - - TABLES - - - - - - - - - - - -
%% Question Table 1:
\newcommand{\questionTableOne} {
\youranswer{
Question Table 1 - Fill in Table 1 with the results from your experiments varying the number of hidden units.
%
\begin{table}[t]
\centering
\begin{tabular}{c|ccc}
\toprule
\# Hidden Units & Val. Acc. & Train Error & Val. Error\\
\midrule
32 & --.- & -.--- & -.--- \\
64 & --.- & -.--- & -.--- \\
128 & --.- & -.--- & -.--- \\
\bottomrule
\end{tabular}
\caption{Validation accuracy (\%) and training/validation error (in terms of cross-entropy error) for varying network widths on the EMNIST dataset.}
\label{tab:width_exp}
\end{table}
}
}
%% Question Table 2:
\newcommand{\questionTableTwo} {
\youranswer{
Question Table 2 - Fill in Table 2 with the results from your experiments varying the number of hidden layers.
%
\begin{table}[t]
\centering
\begin{tabular}{c|ccc}
\toprule
\# Hidden Layers & Val. Acc. & Train Error & Val. Error \\
\midrule
1 & --.- & -.--- & -.--- \\
2 & --.- & -.--- & -.--- \\
3 & --.- & -.--- & -.--- \\
\bottomrule
\end{tabular}
\caption{Validation accuracy (\%) and training/validation error (in terms of cross-entropy error) for varying network depths on the EMNIST dataset.}
\label{tab:depth_exps}
\end{table}
}
}
%% Question Table 3:
\newcommand{\questionTableThree} {
\youranswer{
Question Table 3 - Fill in Table 3 with the results from your experiments for the missing hyperparameter values for each of L1 regularisation, L2 regularisation, Dropout and label smoothing (use the values shown on the table).
%
\begin{table*}[t]
\centering
\begin{tabular}{c|c|ccc}
\toprule
Model & Hyperparameter value(s) & Validation accuracy & Train Error & Validation Error \\
\midrule
\midrule
Baseline & - & 0.837 & 0.241 & 0.533 \\
\midrule
\multirow{4}*{Dropout}
& 0.6 & 80.7 & 0.549 & 0.593 \\
& 0.7 & --.- & -.--- & -.--- \\
& 0.85 & 85.1 & 0.329 & 0.434 \\
& 0.97 & 85.4 & 0.244 & 0.457 \\
\midrule
\multirow{4}*{L1 penalty}
& 5e-4 & 79.5 & 0.642 & 0.658 \\
& 1e-3 & --.- & -.--- & -.--- \\
& 5e-3 & 2.41 & 3.850 & 3.850 \\
& 5e-2 & 2.20 & 3.850 & 3.850 \\
\midrule
\multirow{4}*{L2 penalty}
& 5e-4 & 85.1 & 0.306 & 0.460 \\
& 1e-3 & --.- & -.--- & -.--- \\
& 5e-3 & 81.3 & 0.586 & 0.607 \\
& 5e-2 & 39.2 & 2.258 & 2.256 \\
\midrule
Label smoothing & 0.1 & --.- & -.--- & -.--- \\
\bottomrule
\end{tabular}
\caption{Results of all hyperparameter search experiments. \emph{italics} indicate the best results per series (Dropout, L1 Regularisation, L2 Regularisation, Label smoothing) and \textbf{bold} indicates the best overall.}
\label{tab:hp_search}
\end{table*}
}
}
%% END of YOUR ANSWERS

238
report/mlp-cw1-template.tex Normal file
View File

@ -0,0 +1,238 @@
%% Template for MLP Coursework 1 / 14 October 2024
%% Based on LaTeX template for ICML 2017 - example_paper.tex at
%% https://2017.icml.cc/Conferences/2017/StyleAuthorInstructions
\documentclass{article}
\input{mlp2022_includes}
\definecolor{red}{rgb}{0.95,0.4,0.4}
\definecolor{blue}{rgb}{0.4,0.4,0.95}
\definecolor{orange}{rgb}{1, 0.65, 0}
\newcommand{\youranswer}[1]{{\color{red} \bf[#1]}} %your answer:
%% START of YOUR ANSWERS
\input{mlp-cw1-questions}
%% END of YOUR ANSWERS
%% Do not change anything in this file. Add your answers to mlp-cw1-questions.tex
\begin{document}
\twocolumn[
\mlptitle{MLP Coursework 1}
\centerline{\studentNumber}
\vskip 7mm
]
\begin{abstract}
In this report we study the problem of overfitting, which is the training regime where performance increases on the training set but decrease on validation data. Overfitting prevents our trained model from generalizing well to unseen data.
We first analyse the given example and discuss the probable causes of the underlying problem.
Then we investigate how the depth and width of a neural network can affect overfitting in a feedforward architecture and observe that increasing width and depth tend to enable further overfitting.
Next we discuss how two standard methods, Dropout and Weight Penalty, can
mitigate overfitting, then describe their implementation and use them in our experiments
to reduce the overfitting on the EMNIST dataset.
Based on our results, we ultimately find that both dropout and weight penalty are able to mitigate overfitting.
Finally, we conclude the report with our observations and related work.
Our main findings indicate that preventing overfitting is achievable through regularization, although combining different methods together is not straightforward.
\end{abstract}
\section{Introduction}
\label{sec:intro}
In this report we focus on a common and important problem while training machine learning models known as overfitting, or overtraining, which is the training regime where performances increase on the training set but decrease on unseen data.
We first start with analysing the given problem in Figure~\ref{fig:example}, study it in different architectures and then investigate different strategies to mitigate the problem.
In particular, Section~\ref{sec:task1} identifies and discusses the given problem, and investigates the effect of network width and depth in terms of generalization gap (see Ch.~5 in \citealt{Goodfellow-et-al-2016}) and generalization performance.
Section \ref{sec:task2.1} introduces two regularization techniques to alleviate overfitting: Dropout \cite{srivastava2014dropout} and L1/L2 Weight Penalties (see Section~7.1 in \citealt{Goodfellow-et-al-2016}).
We first explain them in detail and discuss why they are used for alleviating overfitting.
In Section~\ref{sec:task2.2}, we incorporate each of them and their various combinations to a three hidden layer\footnote{We denote all layers as hidden except the final (output) one. This means that depth of a network is equal to the number of its hidden layers + 1.} neural network, train it on the EMNIST dataset, which contains 131,600 images of characters and digits, each of size 28x28, from 47 classes.
We evaluate them in terms of generalization gap and performance, and discuss the results and effectiveness of the tested regularization strategies.
Our results show that both dropout and weight penalty are able to mitigate overfitting.
Finally, we conclude our study in Section~\ref{sec:concl}, noting that preventing overfitting is achievable through regularization, although combining different methods together is not straightforward.
\section{Problem identification}
\label{sec:task1}
\begin{figure}[t]
\centering
\begin{subfigure}{\linewidth}
\includegraphics[width=\linewidth]{figures/fig1_acc.png}
\caption{accuracy by epoch}
\label{fig:example_acccurves}
\end{subfigure}
\begin{subfigure}{\linewidth}
\centering
\includegraphics[width=\linewidth]{figures/fig1_err.png}
\caption{error by epoch}
\label{fig:example_errorcurves}
\end{subfigure}
\caption{Training and validation curves in terms of classification accuracy (a) and cross-entropy error (b) on the EMNIST dataset for the baseline model.}
\label{fig:example}
\end{figure}
Overfitting to training data is a very common and important issue that needs to be dealt with when training neural networks or other machine learning models in general (see Ch.~5 in \citealt{Goodfellow-et-al-2016}).
A model is said to be overfitting when as the training progresses, its performance on the training data keeps improving, while its is degrading on validation data.
Effectively, the model stops learning related patterns for the task and instead starts to memorize specificities of each training sample that are irrelevant to new samples.
Overfitting leads to bad generalization performance in unseen data, as performance on validation data is indicative of performance on test data and (to an extent) during deployment.
Although it eventually happens to all gradient-based training, it is most often caused by models that are too large with respect to the amount and diversity of training data. The more free parameters the model has, the easier it will be to memorize complex data patterns that only apply to a restricted amount of samples.
A prominent symptom of overfitting is the generalization gap, defined as the difference between the validation and training error.
A steady increase in this quantity is usually interpreted as the model entering the overfitting regime.
Figure~\ref{fig:example_acccurves} and \ref{fig:example_errorcurves} show a prototypical example of overfitting.
We see in Figure~\ref{fig:example_acccurves} that \questionOne.
The extent to which our model overfits depends on many factors.
For example, the quality and quantity of the training set and the complexity of the model.
If we have sufficiently many diverse training samples, or if our model contains few hidden units, it will in general be less prone to overfitting.
Any form of regularization will also limit the extent to which the model overfits.
\subsection{Network width}
\questionTableOne
\questionFigureTwo
First we investigate the effect of increasing the number of hidden units in a single hidden layer network when training on the EMNIST dataset.
The network is trained using the Adam optimizer
with a learning rate of $9 \times 10^{-4}$ and a batch size of 100, for a total of 100 epochs.
The input layer is of size 784, and output layer consists of 47 units.
Three different models were trained, with a single hidden layer of 32, 64 and 128 ReLU hidden units respectively.
Figure~\ref{fig:width} depicts the error and accuracy curves over 100 epochs for the model with varying number of hidden units.
Table~\ref{tab:width_exp} reports the final accuracy, training error, and validation error.
We observe that \questionTwo.
\questionThree.
\subsection{Network depth}
\questionTableTwo
\questionFigureThree
Next we investigate the effect of varying the number of hidden layers in the network.
Table~\ref{tab:depth_exps} and Figure~\ref{fig:depth} depict results from training three models with one, two and three hidden layers respectively, each with 128 ReLU hidden units.
As with previous experiments, they are trained with the Adam optimizer with a learning rate of $9 \times 10^{-4}$ and a batch size of 100.
We observe that \questionFour.
\questionFive.
\section{Regularization}
\label{sec:task2.1}
In this section, we investigate three regularization methods to alleviate the overfitting problem, specifically dropout layers, the L1 and L2 weight penalties and label smoothing.
\subsection{Dropout}
Dropout~\cite{srivastava2014dropout} is a stochastic method that randomly inactivates neurons in a neural network according to an hyperparameter, the inclusion rate (\textit{i.e.} the rate that an unit is included).
Dropout is commonly represented by an additional layer inserted between the linear layer and activation function.
Its forward pass during training is defined as follows:
\begin{align}
\text{mask} &\sim \text{bernoulli}(p)\\
\bm{y}' &= \text{mask} \odot \bm{y}\
\end{align}
where $\bm{y}, \bm{y}' \in \mathbb{R}^d$ are the output of the linear layer before and after applying dropout, respectively.
$\text{mask} \in \mathbb{R}^d$ is a mask vector randomly sampled from the Bernoulli distribution with inclusion probability $p$, and $\odot$ denotes the element-wise multiplication.
At inference time, stochasticity is not desired, so no neurons are dropped.
To account for the change in expectations of the output values, we scale them down by the inclusion probability $p$:
\begin{align}
\bm{y}' &= \bm{y}*p\
\end{align}
As there is no nonlinear calculation involved, the backward propagation is just the element-wise product of the gradients with respect to the layer outputs and mask created in the forward calculation.
The backward propagation for dropout is therefore formulated as follows:
\begin{align}
\frac{\partial \bm{y}'}{\partial \bm{y}} = mask
\end{align}
Dropout is an easy to implement and highly scalable method.
It can be implemented as a layer-based calculation unit, and be placed on any layer of the neural network at will.
Dropout can reduce the dependence of hidden units between layers so that the neurons of the next layer will not rely on only few features from of the previous layer.
Instead, it forces the network to extract diverse features and evenly distribute information among all features.
By randomly dropping some neurons in training, dropout makes use of a subset of the whole architecture, so it can also be viewed as bagging different sub networks and averaging their outputs.
\subsection{Weight penalty}
L1 and L2 regularization~\cite{ng2004feature} are simple but effective methods to mitigate overfitting to training data.
The application of L1 and L2 regularization strategies could be formulated as adding penalty terms with L1 and L2 norm square of weights in the cost function without changing other formulations.
The idea behind this regularization method is to penalize the weights by adding a term to the cost function, and explicitly constrain the magnitude of the weights with either the L1 and L2 norms.
The optimization problem takes a different form:
\begin{align}
\text{L1: } & \text{min}_{\bm{w}} \; E_{\text{data}}(\bm{X}, \bm{y}, \bm{w}) + \lambda ||w||_1\\
\text{L2: } & \text{min}_{\bm{w}} \; E_{\text{data}}(\bm{X}, \bm{y}, \bm{w}) + \lambda ||w||^2_2
\end{align}
where $E_{\text{data}}$ denotes the cross entropy error function, and $\{\bm{X}, \bm{y}\}$ denotes the input and target training pairs.
$\lambda$ controls the strength of regularization.
Weight penalty works by constraining the scale of parameters and preventing them to grow too much, avoiding overly sensitive behaviour on unseen data.
While L1 and L2 regularization are similar to each other in calculation, they have different effects.
Gradient magnitude in L1 regularization does not depend on the weight value and tends to bring small weights to 0, which can be used as a form of feature selection, whereas L2 regularization tends to shrink the weights to a smaller scale uniformly.
\subsection{Label smoothing}
Label smoothing regularizes a model based on a softmax with $K$ output values by replacing the hard target 0 labels and 1 labels with $\frac{\alpha}{K-1}$ and $1-\alpha$ respectively.
$\alpha$ is typically set to a small number such as $0.1$.
\begin{equation}
\begin{cases}
\frac{\alpha}{K-1}, & \quad \text{if} \quad t_k=0\\
1 - \alpha, & \quad \text{if} \quad t_k=1
\end{cases}
\end{equation}
The standard cross-entropy error is typically used with these \emph{soft} targets to train the neural network.
Hence, implementing label smoothing requires only modifying the targets of training set.
This strategy may prevent a neural network to obtain very large weights by discouraging very high output values.
\section{Balanced EMNIST Experiments}
\questionTableThree
\questionFigureFour
\label{sec:task2.2}
Here we evaluate the effectiveness of the given regularization methods for reducing the overfitting on the EMNIST dataset.
We build a baseline architecture with three hidden layers, each with 128 neurons, which suffers from overfitting as shown in section \ref{sec:task1}.
Here we train the network with a lower learning rate of $10^{-4}$, as the previous runs were overfitting after only a handful of epochs.
Results for the new baseline (c.f. Table~\ref{tab:hp_search}) confirm that lower learning rate helps, so all further experiments are run using it.
Here, we apply the L1 or L2 regularization with dropout to our baseline and search for good hyperparameters on the validation set.
Then, we apply the label smoothing with $\alpha=0.1$ to our baseline.
We summarize all the experimental results in Table~\ref{tab:hp_search}. For each method except the label smoothing, we plot the relationship between generalization gap and validation accuracy in Figure~\ref{fig:hp_search}.
First we analyze three methods separately, train each over a set of hyperparameters and compare their best performing results.
\questionSix.
\questionSeven.
\section{Conclusion}
\label{sec:concl}
\questionEight.
\newpage
\bibliography{refs}
\end{document}

720
report/mlp2022.sty Normal file
View File

@ -0,0 +1,720 @@
% File: mlp2017.sty (LaTeX style file for ICML-2017, version of 2017-05-31)
% Modified by Daniel Roy 2017: changed byline to use footnotes for affiliations, and removed emails
% This file contains the LaTeX formatting parameters for a two-column
% conference proceedings that is 8.5 inches wide by 11 inches high.
%
% Modified by Percy Liang 12/2/2013: changed the year, location from the previous template for ICML 2014
% Modified by Fei Sha 9/2/2013: changed the year, location form the previous template for ICML 2013
%
% Modified by Fei Sha 4/24/2013: (1) remove the extra whitespace after the first author's email address (in %the camera-ready version) (2) change the Proceeding ... of ICML 2010 to 2014 so PDF's metadata will show up % correctly
%
% Modified by Sanjoy Dasgupta, 2013: changed years, location
%
% Modified by Francesco Figari, 2012: changed years, location
%
% Modified by Christoph Sawade and Tobias Scheffer, 2011: added line
% numbers, changed years
%
% Modified by Hal Daume III, 2010: changed years, added hyperlinks
%
% Modified by Kiri Wagstaff, 2009: changed years
%
% Modified by Sam Roweis, 2008: changed years
%
% Modified by Ricardo Silva, 2007: update of the ifpdf verification
%
% Modified by Prasad Tadepalli and Andrew Moore, merely changing years.
%
% Modified by Kristian Kersting, 2005, based on Jennifer Dy's 2004 version
% - running title. If the original title is to long or is breaking a line,
% use \mlptitlerunning{...} in the preamble to supply a shorter form.
% Added fancyhdr package to get a running head.
% - Updated to store the page size because pdflatex does compile the
% page size into the pdf.
%
% Hacked by Terran Lane, 2003:
% - Updated to use LaTeX2e style file conventions (ProvidesPackage,
% etc.)
% - Added an ``appearing in'' block at the base of the first column
% (thus keeping the ``appearing in'' note out of the bottom margin
% where the printer should strip in the page numbers).
% - Added a package option [accepted] that selects between the ``Under
% review'' notice (default, when no option is specified) and the
% ``Appearing in'' notice (for use when the paper has been accepted
% and will appear).
%
% Originally created as: ml2k.sty (LaTeX style file for ICML-2000)
% by P. Langley (12/23/99)
%%%%%%%%%%%%%%%%%%%%
%% This version of the style file supports both a ``review'' version
%% and a ``final/accepted'' version. The difference is only in the
%% text that appears in the note at the bottom of the first column of
%% the first page. The default behavior is to print a note to the
%% effect that the paper is under review and don't distribute it. The
%% final/accepted version prints an ``Appearing in'' note. To get the
%% latter behavior, in the calling file change the ``usepackage'' line
%% from:
%% \usepackage{icml2017}
%% to
%% \usepackage[accepted]{icml2017}
%%%%%%%%%%%%%%%%%%%%
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{mlp2022}[2022/10/16 MLP Coursework Style File]
% Use fancyhdr package
\RequirePackage{fancyhdr}
\RequirePackage{color}
\RequirePackage{algorithm}
\RequirePackage{algorithmic}
\RequirePackage{natbib}
\RequirePackage{eso-pic} % used by \AddToShipoutPicture
\RequirePackage{forloop}
%%%%%%%% Options
%\DeclareOption{accepted}{%
% \renewcommand{\Notice@String}{\ICML@appearing}
\gdef\isaccepted{1}
%}
\DeclareOption{nohyperref}{%
\gdef\nohyperref{1}
}
\ifdefined\nohyperref\else\ifdefined\hypersetup
\definecolor{mydarkblue}{rgb}{0,0.08,0.45}
\hypersetup{ %
pdftitle={},
pdfauthor={},
pdfsubject={MLP Coursework 2022-23},
pdfkeywords={},
pdfborder=0 0 0,
pdfpagemode=UseNone,
colorlinks=true,
linkcolor=mydarkblue,
citecolor=mydarkblue,
filecolor=mydarkblue,
urlcolor=mydarkblue,
pdfview=FitH}
\ifdefined\isaccepted \else
\hypersetup{pdfauthor={Anonymous Submission}}
\fi
\fi\fi
%%%%%%%%%%%%%%%%%%%%
% This string is printed at the bottom of the page for the
% final/accepted version of the ``appearing in'' note. Modify it to
% change that text.
%%%%%%%%%%%%%%%%%%%%
\newcommand{\ICML@appearing}{\textit{MLP Coursework 1 2021--22}}
%%%%%%%%%%%%%%%%%%%%
% This string is printed at the bottom of the page for the draft/under
% review version of the ``appearing in'' note. Modify it to change
% that text.
%%%%%%%%%%%%%%%%%%%%
\newcommand{\Notice@String}{MLP Coursework 1 2021--22}
% Cause the declared options to actually be parsed and activated
\ProcessOptions\relax
% Uncomment the following for debugging. It will cause LaTeX to dump
% the version of the ``appearing in'' string that will actually appear
% in the document.
%\typeout{>> Notice string='\Notice@String'}
% Change citation commands to be more like old ICML styles
\newcommand{\yrcite}[1]{\citeyearpar{#1}}
\renewcommand{\cite}[1]{\citep{#1}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% to ensure the letter format is used. pdflatex does compile the
% page size into the pdf. This is done using \pdfpagewidth and
% \pdfpageheight. As Latex does not know this directives, we first
% check whether pdflatex or latex is used.
%
% Kristian Kersting 2005
%
% in order to account for the more recent use of pdfetex as the default
% compiler, I have changed the pdf verification.
%
% Ricardo Silva 2007
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\paperwidth=210mm
\paperheight=297mm
% old PDFLaTex verification, circa 2005
%
%\newif\ifpdf\ifx\pdfoutput\undefined
% \pdffalse % we are not running PDFLaTeX
%\else
% \pdfoutput=1 % we are running PDFLaTeX
% \pdftrue
%\fi
\newif\ifpdf %adapted from ifpdf.sty
\ifx\pdfoutput\undefined
\else
\ifx\pdfoutput\relax
\else
\ifcase\pdfoutput
\else
\pdftrue
\fi
\fi
\fi
\ifpdf
% \pdfpagewidth=\paperwidth
% \pdfpageheight=\paperheight
\setlength{\pdfpagewidth}{210mm}
\setlength{\pdfpageheight}{297mm}
\fi
% Physical page layout
\evensidemargin -5.5mm
\oddsidemargin -5.5mm
\setlength\textheight{248mm}
\setlength\textwidth{170mm}
\setlength\columnsep{6.5mm}
\setlength\headheight{10pt}
\setlength\headsep{10pt}
\addtolength{\topmargin}{-20pt}
%\setlength\headheight{1em}
%\setlength\headsep{1em}
\addtolength{\topmargin}{-6mm}
%\addtolength{\topmargin}{-2em}
%% The following is adapted from code in the acmconf.sty conference
%% style file. The constants in it are somewhat magical, and appear
%% to work well with the two-column format on US letter paper that
%% ICML uses, but will break if you change that layout, or if you use
%% a longer block of text for the copyright notice string. Fiddle with
%% them if necessary to get the block to fit/look right.
%%
%% -- Terran Lane, 2003
%%
%% The following comments are included verbatim from acmconf.sty:
%%
%%% This section (written by KBT) handles the 1" box in the lower left
%%% corner of the left column of the first page by creating a picture,
%%% and inserting the predefined string at the bottom (with a negative
%%% displacement to offset the space allocated for a non-existent
%%% caption).
%%%
\def\ftype@copyrightbox{8}
\def\@copyrightspace{
% Create a float object positioned at the bottom of the column. Note
% that because of the mystical nature of floats, this has to be called
% before the first column is populated with text (e.g., from the title
% or abstract blocks). Otherwise, the text will force the float to
% the next column. -- TDRL.
\@float{copyrightbox}[b]
\begin{center}
\setlength{\unitlength}{1pc}
\begin{picture}(20,1.5)
% Create a line separating the main text from the note block.
% 4.818pc==0.8in.
\put(0,2.5){\line(1,0){4.818}}
% Insert the text string itself. Note that the string has to be
% enclosed in a parbox -- the \put call needs a box object to
% position. Without the parbox, the text gets splattered across the
% bottom of the page semi-randomly. The 19.75pc distance seems to be
% the width of the column, though I can't find an appropriate distance
% variable to substitute here. -- TDRL.
\put(0,0){\parbox[b]{19.75pc}{\small \Notice@String}}
\end{picture}
\end{center}
\end@float}
% Note: A few Latex versions need the next line instead of the former.
% \addtolength{\topmargin}{0.3in}
% \setlength\footheight{0pt}
\setlength\footskip{0pt}
%\pagestyle{empty}
\flushbottom \twocolumn
\sloppy
% Clear out the addcontentsline command
\def\addcontentsline#1#2#3{}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% commands for formatting paper title, author names, and addresses.
%%start%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%% title as running head -- Kristian Kersting 2005 %%%%%%%%%%%%%
%\makeatletter
%\newtoks\mytoksa
%\newtoks\mytoksb
%\newcommand\addtomylist[2]{%
% \mytoksa\expandafter{#1}%
% \mytoksb{#2}%
% \edef#1{\the\mytoksa\the\mytoksb}%
%}
%\makeatother
% box to check the size of the running head
\newbox\titrun
% general page style
\pagestyle{fancy}
\fancyhf{}
\fancyhead{}
\fancyfoot{}
% set the width of the head rule to 1 point
\renewcommand{\headrulewidth}{1pt}
% definition to set the head as running head in the preamble
\def\mlptitlerunning#1{\gdef\@mlptitlerunning{#1}}
% main definition adapting \mlptitle from 2004
\long\def\mlptitle#1{%
%check whether @mlptitlerunning exists
% if not \mlptitle is used as running head
\ifx\undefined\@mlptitlerunning%
\gdef\@mlptitlerunning{#1}
\fi
%add it to pdf information
\ifdefined\nohyperref\else\ifdefined\hypersetup
\hypersetup{pdftitle={#1}}
\fi\fi
%get the dimension of the running title
\global\setbox\titrun=\vbox{\small\bf\@mlptitlerunning}
% error flag
\gdef\@runningtitleerror{0}
% running title too long
\ifdim\wd\titrun>\textwidth%
{\gdef\@runningtitleerror{1}}%
% running title breaks a line
\else\ifdim\ht\titrun>6.25pt
{\gdef\@runningtitleerror{2}}%
\fi
\fi
% if there is somthing wrong with the running title
\ifnum\@runningtitleerror>0
\typeout{}%
\typeout{}%
\typeout{*******************************************************}%
\typeout{Title exceeds size limitations for running head.}%
\typeout{Please supply a shorter form for the running head}
\typeout{with \string\mlptitlerunning{...}\space prior to \string\begin{document}}%
\typeout{*******************************************************}%
\typeout{}%
\typeout{}%
% set default running title
\chead{\small\bf Title Suppressed Due to Excessive Size}%
\else
% 'everything' fine, set provided running title
\chead{\small\bf\@mlptitlerunning}%
\fi
% no running title on the first page of the paper
\thispagestyle{empty}
%%%%%%%%%%%%%%%%%%%% Kristian Kersting %%%%%%%%%%%%%%%%%%%%%%%%%
%end%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
{\center\baselineskip 18pt
\toptitlebar{\Large\bf #1}\bottomtitlebar}
}
\gdef\icmlfullauthorlist{}
\newcommand\addstringtofullauthorlist{\g@addto@macro\icmlfullauthorlist}
\newcommand\addtofullauthorlist[1]{%
\ifdefined\icmlanyauthors%
\addstringtofullauthorlist{, #1}%
\else%
\addstringtofullauthorlist{#1}%
\gdef\icmlanyauthors{1}%
\fi%
\ifdefined\nohyperref\else\ifdefined\hypersetup%
\hypersetup{pdfauthor=\icmlfullauthorlist}%
\fi\fi}
\def\toptitlebar{\hrule height1pt \vskip .25in}
\def\bottomtitlebar{\vskip .22in \hrule height1pt \vskip .3in}
\newenvironment{icmlauthorlist}{%
\setlength\topsep{0pt}
\setlength\parskip{0pt}
\begin{center}
}{%
\end{center}
}
\newcounter{@affiliationcounter}
\newcommand{\@pa}[1]{%
% ``#1''
\ifcsname the@affil#1\endcsname
% do nothing
\else
\ifcsname @icmlsymbol#1\endcsname
% nothing
\else
\stepcounter{@affiliationcounter}%
\newcounter{@affil#1}%
\setcounter{@affil#1}{\value{@affiliationcounter}}%
\fi
\fi%
\ifcsname @icmlsymbol#1\endcsname
\textsuperscript{\csname @icmlsymbol#1\endcsname\,}%
\else
%\expandafter\footnotemark[\arabic{@affil#1}\,]%
\textsuperscript{\arabic{@affil#1}\,}%
\fi
}
%\newcommand{\icmlauthor}[2]{%
%\addtofullauthorlist{#1}%
%#1\@for\theaffil:=#2\do{\pa{\theaffil}}%
%}
\newcommand{\icmlauthor}[2]{%
\ifdefined\isaccepted
\mbox{\bf #1}\,\@for\theaffil:=#2\do{\@pa{\theaffil}} \addtofullauthorlist{#1}%
\else
\ifdefined\@icmlfirsttime
\else
\gdef\@icmlfirsttime{1}
\mbox{\bf Anonymous Authors}\@pa{@anon} \addtofullauthorlist{Anonymous Authors}
\fi
\fi
}
\newcommand{\icmlsetsymbol}[2]{%
\expandafter\gdef\csname @icmlsymbol#1\endcsname{#2}
}
\newcommand{\icmlaffiliation}[2]{%
\ifdefined\isaccepted
\ifcsname the@affil#1\endcsname
\expandafter\gdef\csname @affilname\csname the@affil#1\endcsname\endcsname{#2}%
\else
{\bf AUTHORERR: Error in use of \textbackslash{}icmlaffiliation command. Label ``#1'' not mentioned in some \textbackslash{}icmlauthor\{author name\}\{labels here\} command beforehand. }
\typeout{}%
\typeout{}%
\typeout{*******************************************************}%
\typeout{Affiliation label undefined. }%
\typeout{Make sure \string\icmlaffiliation\space follows }
\typeout{all of \string\icmlauthor\space commands}%
\typeout{*******************************************************}%
\typeout{}%
\typeout{}%
\fi
\else % \isaccepted
% can be called multiple times... it's idempotent
\expandafter\gdef\csname @affilname1\endcsname{Anonymous Institution, Anonymous City, Anonymous Region, Anonymous Country}
\fi
}
\newcommand{\icmlcorrespondingauthor}[2]{
\ifdefined\isaccepted
\ifdefined\icmlcorrespondingauthor@text
\g@addto@macro\icmlcorrespondingauthor@text{, #1 \textless{}#2\textgreater{}}
\else
\gdef\icmlcorrespondingauthor@text{#1 \textless{}#2\textgreater{}}
\fi
\else
\gdef\icmlcorrespondingauthor@text{Anonymous Author \textless{}anon.email@domain.com\textgreater{}}
\fi
}
\newcommand{\icmlEqualContribution}{\textsuperscript{*}Equal contribution }
\newcounter{@affilnum}
\newcommand{\printAffiliationsAndNotice}[1]{%
\stepcounter{@affiliationcounter}%
{\let\thefootnote\relax\footnotetext{\hspace*{-\footnotesep}#1%
\forloop{@affilnum}{1}{\value{@affilnum} < \value{@affiliationcounter}}{
\textsuperscript{\arabic{@affilnum}}\ifcsname @affilname\the@affilnum\endcsname%
\csname @affilname\the@affilnum\endcsname%
\else
{\bf AUTHORERR: Missing \textbackslash{}icmlaffiliation.}
\fi
}.
\ifdefined\icmlcorrespondingauthor@text
Correspondence to: \icmlcorrespondingauthor@text.
\else
{\bf AUTHORERR: Missing \textbackslash{}icmlcorrespondingauthor.}
\fi
\ \\
\Notice@String
}
}
}
%\makeatother
\long\def\icmladdress#1{%
{\bf The \textbackslash{}icmladdress command is no longer used. See the example\_paper PDF .tex for usage of \textbackslash{}icmlauther and \textbackslash{}icmlaffiliation.}
}
%% keywords as first class citizens
\def\icmlkeywords#1{%
% \ifdefined\isaccepted \else
% \par {\bf Keywords:} #1%
% \fi
% \ifdefined\nohyperref\else\ifdefined\hypersetup
% \hypersetup{pdfkeywords={#1}}
% \fi\fi
% \ifdefined\isaccepted \else
% \par {\bf Keywords:} #1%
% \fi
\ifdefined\nohyperref\else\ifdefined\hypersetup
\hypersetup{pdfkeywords={#1}}
\fi\fi
}
% modification to natbib citations
\setcitestyle{authoryear,round,citesep={;},aysep={,},yysep={;}}
% Redefinition of the abstract environment.
\renewenvironment{abstract}
{%
% Insert the ``appearing in'' copyright notice.
%\@copyrightspace
\centerline{\large\bf Abstract}
\vspace{-0.12in}\begin{quote}}
{\par\end{quote}\vskip 0.12in}
% numbered section headings with different treatment of numbers
\def\@startsection#1#2#3#4#5#6{\if@noskipsec \leavevmode \fi
\par \@tempskipa #4\relax
\@afterindenttrue
% Altered the following line to indent a section's first paragraph.
% \ifdim \@tempskipa <\z@ \@tempskipa -\@tempskipa \@afterindentfalse\fi
\ifdim \@tempskipa <\z@ \@tempskipa -\@tempskipa \fi
\if@nobreak \everypar{}\else
\addpenalty{\@secpenalty}\addvspace{\@tempskipa}\fi \@ifstar
{\@ssect{#3}{#4}{#5}{#6}}{\@dblarg{\@sict{#1}{#2}{#3}{#4}{#5}{#6}}}}
\def\@sict#1#2#3#4#5#6[#7]#8{\ifnum #2>\c@secnumdepth
\def\@svsec{}\else
\refstepcounter{#1}\edef\@svsec{\csname the#1\endcsname}\fi
\@tempskipa #5\relax
\ifdim \@tempskipa>\z@
\begingroup #6\relax
\@hangfrom{\hskip #3\relax\@svsec.~}{\interlinepenalty \@M #8\par}
\endgroup
\csname #1mark\endcsname{#7}\addcontentsline
{toc}{#1}{\ifnum #2>\c@secnumdepth \else
\protect\numberline{\csname the#1\endcsname}\fi
#7}\else
\def\@svsechd{#6\hskip #3\@svsec #8\csname #1mark\endcsname
{#7}\addcontentsline
{toc}{#1}{\ifnum #2>\c@secnumdepth \else
\protect\numberline{\csname the#1\endcsname}\fi
#7}}\fi
\@xsect{#5}}
\def\@sect#1#2#3#4#5#6[#7]#8{\ifnum #2>\c@secnumdepth
\def\@svsec{}\else
\refstepcounter{#1}\edef\@svsec{\csname the#1\endcsname\hskip 0.4em }\fi
\@tempskipa #5\relax
\ifdim \@tempskipa>\z@
\begingroup #6\relax
\@hangfrom{\hskip #3\relax\@svsec}{\interlinepenalty \@M #8\par}
\endgroup
\csname #1mark\endcsname{#7}\addcontentsline
{toc}{#1}{\ifnum #2>\c@secnumdepth \else
\protect\numberline{\csname the#1\endcsname}\fi
#7}\else
\def\@svsechd{#6\hskip #3\@svsec #8\csname #1mark\endcsname
{#7}\addcontentsline
{toc}{#1}{\ifnum #2>\c@secnumdepth \else
\protect\numberline{\csname the#1\endcsname}\fi
#7}}\fi
\@xsect{#5}}
% section headings with less space above and below them
\def\thesection {\arabic{section}}
\def\thesubsection {\thesection.\arabic{subsection}}
\def\section{\@startsection{section}{1}{\z@}{-0.12in}{0.02in}
{\large\bf\raggedright}}
\def\subsection{\@startsection{subsection}{2}{\z@}{-0.10in}{0.01in}
{\normalsize\bf\raggedright}}
\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-0.08in}{0.01in}
{\normalsize\sc\raggedright}}
\def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
0.5ex minus .2ex}{-1em}{\normalsize\bf}}
\def\subparagraph{\@startsection{subparagraph}{5}{\z@}{1.5ex plus
0.5ex minus .2ex}{-1em}{\normalsize\bf}}
% Footnotes
\footnotesep 6.65pt %
\skip\footins 9pt
\def\footnoterule{\kern-3pt \hrule width 0.8in \kern 2.6pt }
\setcounter{footnote}{0}
% Lists and paragraphs
\parindent 0pt
\topsep 4pt plus 1pt minus 2pt
\partopsep 1pt plus 0.5pt minus 0.5pt
\itemsep 2pt plus 1pt minus 0.5pt
\parsep 2pt plus 1pt minus 0.5pt
\parskip 6pt
\leftmargin 2em \leftmargini\leftmargin \leftmarginii 2em
\leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em
\leftmarginvi .5em
\labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt
\def\@listi{\leftmargin\leftmargini}
\def\@listii{\leftmargin\leftmarginii
\labelwidth\leftmarginii\advance\labelwidth-\labelsep
\topsep 2pt plus 1pt minus 0.5pt
\parsep 1pt plus 0.5pt minus 0.5pt
\itemsep \parsep}
\def\@listiii{\leftmargin\leftmarginiii
\labelwidth\leftmarginiii\advance\labelwidth-\labelsep
\topsep 1pt plus 0.5pt minus 0.5pt
\parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
\itemsep \topsep}
\def\@listiv{\leftmargin\leftmarginiv
\labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
\def\@listv{\leftmargin\leftmarginv
\labelwidth\leftmarginv\advance\labelwidth-\labelsep}
\def\@listvi{\leftmargin\leftmarginvi
\labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
\abovedisplayskip 7pt plus2pt minus5pt%
\belowdisplayskip \abovedisplayskip
\abovedisplayshortskip 0pt plus3pt%
\belowdisplayshortskip 4pt plus3pt minus3pt%
% Less leading in most fonts (due to the narrow columns)
% The choices were between 1-pt and 1.5-pt leading
\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
\def\small{\@setsize\small{10pt}\ixpt\@ixpt}
\def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
\def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
\def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
\def\large{\@setsize\large{14pt}\xiipt\@xiipt}
\def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
\def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
\def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
\def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
% Revised formatting for figure captions and table titles.
\newsavebox\newcaptionbox\newdimen\newcaptionboxwid
\long\def\@makecaption#1#2{
\vskip 10pt
\baselineskip 11pt
\setbox\@tempboxa\hbox{#1. #2}
\ifdim \wd\@tempboxa >\hsize
\sbox{\newcaptionbox}{\small\sl #1.~}
\newcaptionboxwid=\wd\newcaptionbox
\usebox\newcaptionbox {\footnotesize #2}
% \usebox\newcaptionbox {\small #2}
\else
\centerline{{\small\sl #1.} {\small #2}}
\fi}
\def\fnum@figure{Figure \thefigure}
\def\fnum@table{Table \thetable}
% Strut macros for skipping spaces above and below text in tables.
\def\abovestrut#1{\rule[0in]{0in}{#1}\ignorespaces}
\def\belowstrut#1{\rule[-#1]{0in}{#1}\ignorespaces}
\def\abovespace{\abovestrut{0.20in}}
\def\aroundspace{\abovestrut{0.20in}\belowstrut{0.10in}}
\def\belowspace{\belowstrut{0.10in}}
% Various personal itemization commands.
\def\texitem#1{\par\noindent\hangindent 12pt
\hbox to 12pt {\hss #1 ~}\ignorespaces}
\def\icmlitem{\texitem{$\bullet$}}
% To comment out multiple lines of text.
\long\def\comment#1{}
%% Line counter (not in final version). Adapted from NIPS style file by Christoph Sawade
% Vertical Ruler
% This code is, largely, from the CVPR 2010 conference style file
% ----- define vruler
\makeatletter
\newbox\icmlrulerbox
\newcount\icmlrulercount
\newdimen\icmlruleroffset
\newdimen\cv@lineheight
\newdimen\cv@boxheight
\newbox\cv@tmpbox
\newcount\cv@refno
\newcount\cv@tot
% NUMBER with left flushed zeros \fillzeros[<WIDTH>]<NUMBER>
\newcount\cv@tmpc@ \newcount\cv@tmpc
\def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
\cv@tmpc=1 %
\loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
\ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
\ifnum#2<0\advance\cv@tmpc1\relax-\fi
\loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
% \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
\def\makevruler[#1][#2][#3][#4][#5]{
\begingroup\offinterlineskip
\textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt%
\global\setbox\icmlrulerbox=\vbox to \textheight{%
{
\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight
\cv@lineheight=#1\global\icmlrulercount=#2%
\cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2%
\cv@refno1\vskip-\cv@lineheight\vskip1ex%
\loop\setbox\cv@tmpbox=\hbox to0cm{ % side margin
\hfil {\hfil\fillzeros[#4]\icmlrulercount}
}%
\ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break
\advance\cv@refno1\global\advance\icmlrulercount#3\relax
\ifnum\cv@refno<\cv@tot\repeat
}
}
\endgroup
}%
\makeatother
% ----- end of vruler
% \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
\def\icmlruler#1{\makevruler[12pt][#1][1][3][\textheight]\usebox{\icmlrulerbox}}
\AddToShipoutPicture{%
\icmlruleroffset=\textheight
\advance\icmlruleroffset by 5.2pt % top margin
\color[rgb]{.7,.7,.7}
\ifdefined\isaccepted \else
\AtTextUpperLeft{%
\put(\LenToUnit{-35pt},\LenToUnit{-\icmlruleroffset}){%left ruler
\icmlruler{\icmlrulercount}}
% \put(\LenToUnit{1.04\textwidth},\LenToUnit{-\icmlruleroffset}){%right ruler
% \icmlruler{\icmlrulercount}}
}
\fi
}
\endinput

View File

@ -0,0 +1,49 @@
\usepackage[T1]{fontenc}
\usepackage{amssymb,amsmath}
\usepackage{txfonts}
\usepackage{microtype}
% For figures
\usepackage{graphicx}
\usepackage{subcaption}
% For citations
\usepackage{natbib}
% For algorithms
\usepackage{algorithm}
\usepackage{algorithmic}
% the hyperref package is used to produce hyperlinks in the
% resulting PDF. If this breaks your system, please commend out the
% following usepackage line and replace \usepackage{mlp2017} with
% \usepackage[nohyperref]{mlp2017} below.
\usepackage{hyperref}
\usepackage{url}
\urlstyle{same}
\usepackage{color}
\usepackage{booktabs} % To thicken table lines
\usepackage{multirow} % Multirow cells in table
\usepackage{soul}
\usepackage{bm}
% Packages hyperref and algorithmic misbehave sometimes. We can fix
% this with the following command.
\newcommand{\theHalgorithm}{\arabic{algorithm}}
% Set up MLP coursework style (based on ICML style)
\usepackage{mlp2022}
\mlptitlerunning{MLP Coursework 1 (\studentNumber)}
\bibliographystyle{icml2017}
\DeclareMathOperator{\softmax}{softmax}
\DeclareMathOperator{\sigmoid}{sigmoid}
\DeclareMathOperator{\sgn}{sgn}
\DeclareMathOperator{\relu}{relu}
\DeclareMathOperator{\lrelu}{lrelu}
\DeclareMathOperator{\elu}{elu}
\DeclareMathOperator{\selu}{selu}
\DeclareMathOperator{\maxout}{maxout}

43
report/refs.bib Normal file
View File

@ -0,0 +1,43 @@
@inproceedings{goodfellow2013maxout,
title={Maxout networks},
author={Goodfellow, Ian and Warde-Farley, David and Mirza, Mehdi and Courville, Aaron and Bengio, Yoshua},
booktitle={International conference on machine learning},
pages={1319--1327},
year={2013},
organization={PMLR}
}
@article{srivastava2014dropout,
title={Dropout: a simple way to prevent neural networks from overfitting},
author={Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
journal={The journal of machine learning research},
volume={15},
number={1},
pages={1929--1958},
year={2014},
publisher={JMLR. org}
}
@book{Goodfellow-et-al-2016,
title={Deep Learning},
author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},
publisher={MIT Press},
note={\url{http://www.deeplearningbook.org}},
year={2016}
}
@inproceedings{ng2004feature,
title={Feature selection, L1 vs. L2 regularization, and rotational invariance},
author={Ng, Andrew Y},
booktitle={Proceedings of the twenty-first international conference on Machine learning},
pages={78},
year={2004}
}
@inproceedings{loshchilov2019decoupled,
title={Decoupled weight decay regularization},
author={Loshchilov, Ilya and Hutter, Frank},
booktitle={International Conference on Learning Representations (ICLR)},
year={2019}
}

View File

@ -0,0 +1,105 @@
import argparse
import os
import numpy as np
import sys
# sys.path.append('/path/to/mlpractical')
from mlp.layers import DropoutLayer
from mlp.penalties import L1Penalty, L2Penalty
parser = argparse.ArgumentParser(description='Welcome to regularization test script')
parser.add_argument('--student_id', nargs="?", type=str, help='Your student id in the format "Sxxxxxxx"')
args = parser.parse_args()
student_id = args.student_id
def fprop_bprop_layer(inputs, dropout_layer, grads_wrt_outputs, weights, params=False):
if params:
dropout_layer.params = [weights]
fprop = dropout_layer.fprop(inputs)
bprop = dropout_layer.bprop(inputs, fprop, grads_wrt_outputs)
outputs = [fprop, bprop]
if params:
grads_wrt_weights = dropout_layer.grads_wrt_params(
inputs, grads_wrt_outputs)
outputs.append(grads_wrt_weights)
return outputs
def call_grad_layer(inputs, penalty_layer, grads_wrt_outputs, weights, params=False):
if params:
penalty_layer.params = [weights]
call = penalty_layer(inputs)
grad = penalty_layer.grad(inputs)
outputs = [call, grad]
if params:
grads_wrt_weights = penalty_layer.grads_wrt_params(
inputs, grads_wrt_outputs)
outputs.append(grads_wrt_weights)
return outputs
def get_student_seed(student_id):
student_seed_number = int(student_id[1:])
return student_seed_number
seed = get_student_seed(student_id)
rng = np.random.RandomState(seed)
reg_output_dict = dict()
inputs = rng.normal(loc=0.0, scale=1.0, size=(32, 3, 8, 8))
grads_wrt_outputs = rng.normal(loc=0.0, scale=1.0, size=(32, 3, 8, 8))
weights = rng.normal(loc=0.0, scale=1.0, size=(1))
reg_output_dict['inputs'] = inputs
reg_output_dict['weights'] = weights
reg_output_dict['grads_wrt_outputs'] = grads_wrt_outputs
for dropout_layer, params_flag in zip(
[DropoutLayer],
[False]):
if isinstance(dropout_layer(), DropoutLayer):
rng = np.random.RandomState(92019)
print(True)
outputs = fprop_bprop_layer(inputs, dropout_layer(
rng=rng), grads_wrt_outputs, weights, params_flag)
else:
outputs = fprop_bprop_layer(
inputs, dropout_layer(), grads_wrt_outputs, weights, params_flag)
reg_output_dict['{}_{}'.format(
dropout_layer.__name__, 'fprop')] = outputs[0]
reg_output_dict['{}_{}'.format(
dropout_layer.__name__, 'bprop')] = outputs[1]
if params_flag:
reg_output_dict['{}_{}'.format(
dropout_layer.__name__, 'grads_wrt_outputs')] = outputs[2]
for penalty_layer, params_flag in zip(
[L1Penalty, L2Penalty], [False, False]):
outputs = call_grad_layer(inputs, penalty_layer(
1e-4), grads_wrt_outputs, weights, params_flag)
reg_output_dict['{}_{}'.format(
penalty_layer.__name__, '__call__correct')] = outputs[0]
reg_output_dict['{}_{}'.format(
penalty_layer.__name__, 'grad_correct')] = outputs[1]
if params_flag:
reg_output_dict['{}_{}'.format(
penalty_layer.__name__, 'grads_wrt_outputs')] = outputs[2]
np.save(os.path.join(os.environ['MLP_DATA_DIR'],
'{}_regularization_test_pack.npy'.format(seed)), reg_output_dict)
test_data = np.load(os.path.join(
os.environ['MLP_DATA_DIR'], '{}_regularization_test_pack.npy'.format(seed)), allow_pickle=True)
for key, value in test_data.item().items():
print(key, value)