reset repo for new year

This commit is contained in:
AntreasAntoniou 2018-09-13 02:14:28 +01:00
parent d14e05706f
commit 973201d585
63 changed files with 0 additions and 24376 deletions

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,6 +0,0 @@
# -*- coding: utf-8 -*-
"""Machine Learning Practical package."""
__authors__ = ['Pawel Swietojanski', 'Steve Renals', 'Matt Graham']
DEFAULT_SEED = 123456 # Default random number generator seed if none provided.

View File

@ -1,401 +0,0 @@
# -*- coding: utf-8 -*-
"""Data providers.
This module provides classes for loading datasets and iterating over batches of
data points.
"""
import pickle
import gzip
import numpy as np
import os
from mlp import DEFAULT_SEED
class DataProvider(object):
"""Generic data provider."""
def __init__(self, inputs, targets, batch_size, max_num_batches=-1,
shuffle_order=True, rng=None):
"""Create a new data provider object.
Args:
inputs (ndarray): Array of data input features of shape
(num_data, input_dim).
targets (ndarray): Array of data output targets of shape
(num_data, output_dim) or (num_data,) if output_dim == 1.
batch_size (int): Number of data points to include in each batch.
max_num_batches (int): Maximum number of batches to iterate over
in an epoch. If `max_num_batches * batch_size > num_data` then
only as many batches as the data can be split into will be
used. If set to -1 all of the data will be used.
shuffle_order (bool): Whether to randomly permute the order of
the data before each epoch.
rng (RandomState): A seeded random number generator.
"""
self.inputs = inputs
self.targets = targets
if batch_size < 1:
raise ValueError('batch_size must be >= 1')
self._batch_size = batch_size
if max_num_batches == 0 or max_num_batches < -1:
raise ValueError('max_num_batches must be -1 or > 0')
self._max_num_batches = max_num_batches
self._update_num_batches()
self.shuffle_order = shuffle_order
self._current_order = np.arange(inputs.shape[0])
if rng is None:
rng = np.random.RandomState(DEFAULT_SEED)
self.rng = rng
self.new_epoch()
@property
def batch_size(self):
"""Number of data points to include in each batch."""
return self._batch_size
@batch_size.setter
def batch_size(self, value):
if value < 1:
raise ValueError('batch_size must be >= 1')
self._batch_size = value
self._update_num_batches()
@property
def max_num_batches(self):
"""Maximum number of batches to iterate over in an epoch."""
return self._max_num_batches
@max_num_batches.setter
def max_num_batches(self, value):
if value == 0 or value < -1:
raise ValueError('max_num_batches must be -1 or > 0')
self._max_num_batches = value
self._update_num_batches()
def _update_num_batches(self):
"""Updates number of batches to iterate over."""
# maximum possible number of batches is equal to number of whole times
# batch_size divides in to the number of data points which can be
# found using integer division
possible_num_batches = self.inputs.shape[0] // self.batch_size
if self.max_num_batches == -1:
self.num_batches = possible_num_batches
else:
self.num_batches = min(self.max_num_batches, possible_num_batches)
def __iter__(self):
"""Implements Python iterator interface.
This should return an object implementing a `next` method which steps
through a sequence returning one element at a time and raising
`StopIteration` when at the end of the sequence. Here the object
returned is the DataProvider itself.
"""
return self
def new_epoch(self):
"""Starts a new epoch (pass through data), possibly shuffling first."""
self._curr_batch = 0
if self.shuffle_order:
self.shuffle()
def __next__(self):
return self.next()
def reset(self):
"""Resets the provider to the initial state."""
inv_perm = np.argsort(self._current_order)
self._current_order = self._current_order[inv_perm]
self.inputs = self.inputs[inv_perm]
self.targets = self.targets[inv_perm]
self.new_epoch()
def shuffle(self):
"""Randomly shuffles order of data."""
perm = self.rng.permutation(self.inputs.shape[0])
self._current_order = self._current_order[perm]
self.inputs = self.inputs[perm]
self.targets = self.targets[perm]
def next(self):
"""Returns next data batch or raises `StopIteration` if at end."""
if self._curr_batch + 1 > self.num_batches:
# no more batches in current iteration through data set so start
# new epoch ready for another pass and indicate iteration is at end
self.new_epoch()
raise StopIteration()
# create an index slice corresponding to current batch number
batch_slice = slice(self._curr_batch * self.batch_size,
(self._curr_batch + 1) * self.batch_size)
inputs_batch = self.inputs[batch_slice]
targets_batch = self.targets[batch_slice]
self._curr_batch += 1
return inputs_batch, targets_batch
class MNISTDataProvider(DataProvider):
"""Data provider for MNIST handwritten digit images."""
def __init__(self, which_set='train', batch_size=100, max_num_batches=-1,
shuffle_order=True, rng=None):
"""Create a new MNIST data provider object.
Args:
which_set: One of 'train', 'valid' or 'eval'. Determines which
portion of the MNIST data this object should provide.
batch_size (int): Number of data points to include in each batch.
max_num_batches (int): Maximum number of batches to iterate over
in an epoch. If `max_num_batches * batch_size > num_data` then
only as many batches as the data can be split into will be
used. If set to -1 all of the data will be used.
shuffle_order (bool): Whether to randomly permute the order of
the data before each epoch.
rng (RandomState): A seeded random number generator.
"""
# check a valid which_set was provided
assert which_set in ['train', 'valid', 'test'], (
'Expected which_set to be either train, valid or eval. '
'Got {0}'.format(which_set)
)
self.which_set = which_set
self.num_classes = 10
# construct path to data using os.path.join to ensure the correct path
# separator for the current platform / OS is used
# MLP_DATA_DIR environment variable should point to the data directory
data_path = os.path.join(
os.environ['MLP_DATA_DIR'], 'mnist-{0}.npz'.format(which_set))
assert os.path.isfile(data_path), (
'Data file does not exist at expected path: ' + data_path
)
# load data from compressed numpy file
loaded = np.load(data_path)
inputs, targets = loaded['inputs'], loaded['targets']
inputs = inputs.astype(np.float32)
# pass the loaded data to the parent class __init__
super(MNISTDataProvider, self).__init__(
inputs, targets, batch_size, max_num_batches, shuffle_order, rng)
def next(self):
"""Returns next data batch or raises `StopIteration` if at end."""
inputs_batch, targets_batch = super(MNISTDataProvider, self).next()
return inputs_batch, self.to_one_of_k(targets_batch)
def to_one_of_k(self, int_targets):
"""Converts integer coded class target to 1 of K coded targets.
Args:
int_targets (ndarray): Array of integer coded class targets (i.e.
where an integer from 0 to `num_classes` - 1 is used to
indicate which is the correct class). This should be of shape
(num_data,).
Returns:
Array of 1 of K coded targets i.e. an array of shape
(num_data, num_classes) where for each row all elements are equal
to zero except for the column corresponding to the correct class
which is equal to one.
"""
one_of_k_targets = np.zeros((int_targets.shape[0], self.num_classes))
one_of_k_targets[range(int_targets.shape[0]), int_targets] = 1
return one_of_k_targets
class EMNISTDataProvider(DataProvider):
"""Data provider for EMNIST handwritten digit images."""
def __init__(self, which_set='train', batch_size=100, max_num_batches=-1,
shuffle_order=True, rng=None):
"""Create a new EMNIST data provider object.
Args:
which_set: One of 'train', 'valid' or 'eval'. Determines which
portion of the EMNIST data this object should provide.
batch_size (int): Number of data points to include in each batch.
max_num_batches (int): Maximum number of batches to iterate over
in an epoch. If `max_num_batches * batch_size > num_data` then
only as many batches as the data can be split into will be
used. If set to -1 all of the data will be used.
shuffle_order (bool): Whether to randomly permute the order of
the data before each epoch.
rng (RandomState): A seeded random number generator.
"""
# check a valid which_set was provided
assert which_set in ['train', 'valid', 'test'], (
'Expected which_set to be either train, valid or eval. '
'Got {0}'.format(which_set)
)
self.which_set = which_set
self.num_classes = 47
# construct path to data using os.path.join to ensure the correct path
# separator for the current platform / OS is used
# MLP_DATA_DIR environment variable should point to the data directory
data_path = os.path.join(
os.environ['MLP_DATA_DIR'], 'emnist-{0}.npz'.format(which_set))
assert os.path.isfile(data_path), (
'Data file does not exist at expected path: ' + data_path
)
# load data from compressed numpy file
loaded = np.load(data_path)
print(loaded.keys())
inputs, targets = loaded['inputs'], loaded['targets']
inputs = inputs.astype(np.float32)
inputs = np.reshape(inputs, newshape=(-1, 28*28))
inputs = inputs / 255.0
# pass the loaded data to the parent class __init__
super(EMNISTDataProvider, self).__init__(
inputs, targets, batch_size, max_num_batches, shuffle_order, rng)
def next(self):
"""Returns next data batch or raises `StopIteration` if at end."""
inputs_batch, targets_batch = super(EMNISTDataProvider, self).next()
return inputs_batch, self.to_one_of_k(targets_batch)
def to_one_of_k(self, int_targets):
"""Converts integer coded class target to 1 of K coded targets.
Args:
int_targets (ndarray): Array of integer coded class targets (i.e.
where an integer from 0 to `num_classes` - 1 is used to
indicate which is the correct class). This should be of shape
(num_data,).
Returns:
Array of 1 of K coded targets i.e. an array of shape
(num_data, num_classes) where for each row all elements are equal
to zero except for the column corresponding to the correct class
which is equal to one.
"""
one_of_k_targets = np.zeros((int_targets.shape[0], self.num_classes))
one_of_k_targets[range(int_targets.shape[0]), int_targets] = 1
return one_of_k_targets
class MetOfficeDataProvider(DataProvider):
"""South Scotland Met Office weather data provider."""
def __init__(self, window_size, batch_size=10, max_num_batches=-1,
shuffle_order=True, rng=None):
"""Create a new Met Office data provider object.
Args:
window_size (int): Size of windows to split weather time series
data into. The constructed input features will be the first
`window_size - 1` entries in each window and the target outputs
the last entry in each window.
batch_size (int): Number of data points to include in each batch.
max_num_batches (int): Maximum number of batches to iterate over
in an epoch. If `max_num_batches * batch_size > num_data` then
only as many batches as the data can be split into will be
used. If set to -1 all of the data will be used.
shuffle_order (bool): Whether to randomly permute the order of
the data before each epoch.
rng (RandomState): A seeded random number generator.
"""
data_path = os.path.join(
os.environ['MLP_DATA_DIR'], 'HadSSP_daily_qc.txt')
assert os.path.isfile(data_path), (
'Data file does not exist at expected path: ' + data_path
)
raw = np.loadtxt(data_path, skiprows=3, usecols=range(2, 32))
assert window_size > 1, 'window_size must be at least 2.'
self.window_size = window_size
# filter out all missing datapoints and flatten to a vector
filtered = raw[raw >= 0].flatten()
# normalise data to zero mean, unit standard deviation
mean = np.mean(filtered)
std = np.std(filtered)
normalised = (filtered - mean) / std
# create a view on to array corresponding to a rolling window
shape = (normalised.shape[-1] - self.window_size + 1, self.window_size)
strides = normalised.strides + (normalised.strides[-1],)
windowed = np.lib.stride_tricks.as_strided(
normalised, shape=shape, strides=strides)
# inputs are first (window_size - 1) entries in windows
inputs = windowed[:, :-1]
# targets are last entry in windows
targets = windowed[:, -1]
super(MetOfficeDataProvider, self).__init__(
inputs, targets, batch_size, max_num_batches, shuffle_order, rng)
class CCPPDataProvider(DataProvider):
def __init__(self, which_set='train', input_dims=None, batch_size=10,
max_num_batches=-1, shuffle_order=True, rng=None):
"""Create a new Combined Cycle Power Plant data provider object.
Args:
which_set: One of 'train' or 'valid'. Determines which portion of
data this object should provide.
input_dims: Which of the four input dimension to use. If `None` all
are used. If an iterable of integers are provided (consisting
of a subset of {0, 1, 2, 3}) then only the corresponding
input dimensions are included.
batch_size (int): Number of data points to include in each batch.
max_num_batches (int): Maximum number of batches to iterate over
in an epoch. If `max_num_batches * batch_size > num_data` then
only as many batches as the data can be split into will be
used. If set to -1 all of the data will be used.
shuffle_order (bool): Whether to randomly permute the order of
the data before each epoch.
rng (RandomState): A seeded random number generator.
"""
data_path = os.path.join(
os.environ['MLP_DATA_DIR'], 'ccpp_data.npz')
assert os.path.isfile(data_path), (
'Data file does not exist at expected path: ' + data_path
)
# check a valid which_set was provided
assert which_set in ['train', 'valid'], (
'Expected which_set to be either train or valid '
'Got {0}'.format(which_set)
)
# check input_dims are valid
if not input_dims is not None:
input_dims = set(input_dims)
assert input_dims.issubset({0, 1, 2, 3}), (
'input_dims should be a subset of {0, 1, 2, 3}'
)
loaded = np.load(data_path)
inputs = loaded[which_set + '_inputs']
if input_dims is not None:
inputs = inputs[:, input_dims]
targets = loaded[which_set + '_targets']
super(CCPPDataProvider, self).__init__(
inputs, targets, batch_size, max_num_batches, shuffle_order, rng)
class AugmentedMNISTDataProvider(MNISTDataProvider):
"""Data provider for MNIST dataset which randomly transforms images."""
def __init__(self, which_set='train', batch_size=100, max_num_batches=-1,
shuffle_order=True, rng=None, transformer=None):
"""Create a new augmented MNIST data provider object.
Args:
which_set: One of 'train', 'valid' or 'test'. Determines which
portion of the MNIST data this object should provide.
batch_size (int): Number of data points to include in each batch.
max_num_batches (int): Maximum number of batches to iterate over
in an epoch. If `max_num_batches * batch_size > num_data` then
only as many batches as the data can be split into will be
used. If set to -1 all of the data will be used.
shuffle_order (bool): Whether to randomly permute the order of
the data before each epoch.
rng (RandomState): A seeded random number generator.
transformer: Function which takes an `inputs` array of shape
(batch_size, input_dim) corresponding to a batch of input
images and a `rng` random number generator object (i.e. a
call signature `transformer(inputs, rng)`) and applies a
potentiall random set of transformations to some / all of the
input images as each new batch is returned when iterating over
the data provider.
"""
super(AugmentedMNISTDataProvider, self).__init__(
which_set, batch_size, max_num_batches, shuffle_order, rng)
self.transformer = transformer
def next(self):
"""Returns next data batch or raises `StopIteration` if at end."""
inputs_batch, targets_batch = super(
AugmentedMNISTDataProvider, self).next()
transformed_inputs_batch = self.transformer(inputs_batch, self.rng)
return transformed_inputs_batch, targets_batch

View File

@ -1,176 +0,0 @@
# -*- coding: utf-8 -*-
"""Error functions.
This module defines error functions, with the aim of model training being to
minimise the error function given a set of inputs and target outputs.
The error functions will typically measure some concept of distance between the
model outputs and target outputs, averaged over all data points in the data set
or batch.
"""
import numpy as np
class SumOfSquaredDiffsError(object):
"""Sum of squared differences (squared Euclidean distance) error."""
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar cost function value.
"""
return 0.5 * np.mean(np.sum((outputs - targets)**2, axis=1))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
return (outputs - targets) / outputs.shape[0]
def __repr__(self):
return 'MeanSquaredErrorCost'
class BinaryCrossEntropyError(object):
"""Binary cross entropy error."""
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
"""
return -np.mean(
targets * np.log(outputs) + (1. - targets) * np.log(1. - ouputs))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
return ((1. - targets) / (1. - outputs) -
(targets / outputs)) / outputs.shape[0]
def __repr__(self):
return 'BinaryCrossEntropyError'
class BinaryCrossEntropySigmoidError(object):
"""Binary cross entropy error with logistic sigmoid applied to outputs."""
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
"""
probs = 1. / (1. + np.exp(-outputs))
return -np.mean(
targets * np.log(probs) + (1. - targets) * np.log(1. - probs))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
probs = 1. / (1. + np.exp(-outputs))
return (probs - targets) / outputs.shape[0]
def __repr__(self):
return 'BinaryCrossEntropySigmoidError'
class CrossEntropyError(object):
"""Multi-class cross entropy error."""
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
"""
return -np.mean(np.sum(targets * np.log(outputs), axis=1))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
return -(targets / outputs) / outputs.shape[0]
def __repr__(self):
return 'CrossEntropyError'
class CrossEntropySoftmaxError(object):
"""Multi-class cross entropy error with Softmax applied to outputs."""
def __call__(self, outputs, targets):
"""Calculates error function given a batch of outputs and targets.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Scalar error function value.
"""
normOutputs = outputs - outputs.max(-1)[:, None]
logProb = normOutputs - np.log(np.sum(np.exp(normOutputs), axis=-1)[:, None])
return -np.mean(np.sum(targets * logProb, axis=1))
def grad(self, outputs, targets):
"""Calculates gradient of error function with respect to outputs.
Args:
outputs: Array of model outputs of shape (batch_size, output_dim).
targets: Array of target outputs of shape (batch_size, output_dim).
Returns:
Gradient of error function with respect to outputs.
"""
probs = np.exp(outputs - outputs.max(-1)[:, None])
probs /= probs.sum(-1)[:, None]
return (probs - targets) / outputs.shape[0]
def __repr__(self):
return 'CrossEntropySoftmaxError'

View File

@ -1,143 +0,0 @@
# -*- coding: utf-8 -*-
"""Parameter initialisers.
This module defines classes to initialise the parameters in a layer.
"""
import numpy as np
from mlp import DEFAULT_SEED
class ConstantInit(object):
"""Constant parameter initialiser."""
def __init__(self, value):
"""Construct a constant parameter initialiser.
Args:
value: Value to initialise parameter to.
"""
self.value = value
def __call__(self, shape):
return np.ones(shape=shape) * self.value
class UniformInit(object):
"""Random uniform parameter initialiser."""
def __init__(self, low, high, rng=None):
"""Construct a random uniform parameter initialiser.
Args:
low: Lower bound of interval to sample from.
high: Upper bound of interval to sample from.
rng (RandomState): Seeded random number generator.
"""
self.low = low
self.high = high
if rng is None:
rng = np.random.RandomState(DEFAULT_SEED)
self.rng = rng
def __call__(self, shape):
return self.rng.uniform(low=self.low, high=self.high, size=shape)
class NormalInit(object):
"""Random normal parameter initialiser."""
def __init__(self, mean, std, rng=None):
"""Construct a random uniform parameter initialiser.
Args:
mean: Mean of distribution to sample from.
std: Standard deviation of distribution to sample from.
rng (RandomState): Seeded random number generator.
"""
self.mean = mean
self.std = std
if rng is None:
rng = np.random.RandomState(DEFAULT_SEED)
self.rng = rng
def __call__(self, shape):
return self.rng.normal(loc=self.mean, scale=self.std, size=shape)
class GlorotUniformInit(object):
"""Glorot and Bengio (2010) random uniform weights initialiser.
Initialises an two-dimensional parameter array using the 'normalized
initialisation' scheme suggested in [1] which attempts to maintain a
roughly constant variance in the activations and backpropagated gradients
of a multi-layer model consisting of interleaved affine and logistic
sigmoidal transformation layers.
Weights are sampled from a zero-mean uniform distribution with standard
deviation `sqrt(2 / (input_dim * output_dim))` where `input_dim` and
`output_dim` are the input and output dimensions of the weight matrix
respectively.
References:
[1]: Understanding the difficulty of training deep feedforward neural
networks, Glorot and Bengio (2010)
"""
def __init__(self, gain=1., rng=None):
"""Construct a normalised initilisation random initialiser object.
Args:
gain: Multiplicative factor to scale initialised weights by.
Recommended values is 1 for affine layers followed by
logistic sigmoid layers (or another affine layer).
rng (RandomState): Seeded random number generator.
"""
self.gain = gain
if rng is None:
rng = np.random.RandomState(DEFAULT_SEED)
self.rng = rng
def __call__(self, shape):
assert len(shape) == 2, (
'Initialiser should only be used for two dimensional arrays.')
std = self.gain * (2. / (shape[0] + shape[1]))**0.5
half_width = 3.**0.5 * std
return self.rng.uniform(low=-half_width, high=half_width, size=shape)
class GlorotNormalInit(object):
"""Glorot and Bengio (2010) random normal weights initialiser.
Initialises an two-dimensional parameter array using the 'normalized
initialisation' scheme suggested in [1] which attempts to maintain a
roughly constant variance in the activations and backpropagated gradients
of a multi-layer model consisting of interleaved affine and logistic
sigmoidal transformation layers.
Weights are sampled from a zero-mean normal distribution with standard
deviation `sqrt(2 / (input_dim * output_dim))` where `input_dim` and
`output_dim` are the input and output dimensions of the weight matrix
respectively.
References:
[1]: Understanding the difficulty of training deep feedforward neural
networks, Glorot and Bengio (2010)
"""
def __init__(self, gain=1., rng=None):
"""Construct a normalised initilisation random initialiser object.
Args:
gain: Multiplicative factor to scale initialised weights by.
Recommended values is 1 for affine layers followed by
logistic sigmoid layers (or another affine layer).
rng (RandomState): Seeded random number generator.
"""
self.gain = gain
if rng is None:
rng = np.random.RandomState(DEFAULT_SEED)
self.rng = rng
def __call__(self, shape):
std = self.gain * (2. / (shape[0] + shape[1]))**0.5
return self.rng.normal(loc=0., scale=std, size=shape)

File diff suppressed because it is too large Load Diff

View File

@ -1,162 +0,0 @@
# -*- coding: utf-8 -*-
"""Learning rules.
This module contains classes implementing gradient based learning rules.
"""
import numpy as np
class GradientDescentLearningRule(object):
"""Simple (stochastic) gradient descent learning rule.
For a scalar error function `E(p[0], p_[1] ... )` of some set of
potentially multidimensional parameters this attempts to find a local
minimum of the loss function by applying updates to each parameter of the
form
p[i] := p[i] - learning_rate * dE/dp[i]
With `learning_rate` a positive scaling parameter.
The error function used in successive applications of these updates may be
a stochastic estimator of the true error function (e.g. when the error with
respect to only a subset of data-points is calculated) in which case this
will correspond to a stochastic gradient descent learning rule.
"""
def __init__(self, learning_rate=1e-3):
"""Creates a new learning rule object.
Args:
learning_rate: A postive scalar to scale gradient updates to the
parameters by. This needs to be carefully set - if too large
the learning dynamic will be unstable and may diverge, while
if set too small learning will proceed very slowly.
"""
assert learning_rate > 0., 'learning_rate should be positive.'
self.learning_rate = learning_rate
def initialise(self, params):
"""Initialises the state of the learning rule for a set or parameters.
This must be called before `update_params` is first called.
Args:
params: A list of the parameters to be optimised. Note these will
be updated *in-place* to avoid reallocating arrays on each
update.
"""
self.params = params
def reset(self):
"""Resets any additional state variables to their intial values.
For this learning rule there are no additional state variables so we
do nothing here.
"""
pass
def update_params(self, grads_wrt_params):
"""Applies a single gradient descent update to all parameters.
All parameter updates are performed using in-place operations and so
nothing is returned.
Args:
grads_wrt_params: A list of gradients of the scalar loss function
with respect to each of the parameters passed to `initialise`
previously, with this list expected to be in the same order.
"""
for param, grad in zip(self.params, grads_wrt_params):
param -= self.learning_rate * grad
class MomentumLearningRule(GradientDescentLearningRule):
"""Gradient descent with momentum learning rule.
This extends the basic gradient learning rule by introducing extra
momentum state variables for each parameter. These can help the learning
dynamic help overcome shallow local minima and speed convergence when
making multiple successive steps in a similar direction in parameter space.
For parameter p[i] and corresponding momentum m[i] the updates for a
scalar loss function `L` are of the form
m[i] := mom_coeff * m[i] - learning_rate * dL/dp[i]
p[i] := p[i] + m[i]
with `learning_rate` a positive scaling parameter for the gradient updates
and `mom_coeff` a value in [0, 1] that determines how much 'friction' there
is the system and so how quickly previous momentum contributions decay.
"""
def __init__(self, learning_rate=1e-3, mom_coeff=0.9):
"""Creates a new learning rule object.
Args:
learning_rate: A postive scalar to scale gradient updates to the
parameters by. This needs to be carefully set - if too large
the learning dynamic will be unstable and may diverge, while
if set too small learning will proceed very slowly.
mom_coeff: A scalar in the range [0, 1] inclusive. This determines
the contribution of the previous momentum value to the value
after each update. If equal to 0 the momentum is set to exactly
the negative scaled gradient each update and so this rule
collapses to standard gradient descent. If equal to 1 the
momentum will just be decremented by the scaled gradient at
each update. This is equivalent to simulating the dynamic in
a frictionless system. Due to energy conservation the loss
of 'potential energy' as the dynamics moves down the loss
function surface will lead to an increasingly large 'kinetic
energy' and so speed, meaning the updates will become
increasingly large, potentially unstably so. Typically a value
less than but close to 1 will avoid these issues and cause the
dynamic to converge to a local minima where the gradients are
by definition zero.
"""
super(MomentumLearningRule, self).__init__(learning_rate)
assert mom_coeff >= 0. and mom_coeff <= 1., (
'mom_coeff should be in the range [0, 1].'
)
self.mom_coeff = mom_coeff
def initialise(self, params):
"""Initialises the state of the learning rule for a set or parameters.
This must be called before `update_params` is first called.
Args:
params: A list of the parameters to be optimised. Note these will
be updated *in-place* to avoid reallocating arrays on each
update.
"""
super(MomentumLearningRule, self).initialise(params)
self.moms = []
for param in self.params:
self.moms.append(np.zeros_like(param))
def reset(self):
"""Resets any additional state variables to their intial values.
For this learning rule this corresponds to zeroing all the momenta.
"""
for mom in zip(self.moms):
mom *= 0.
def update_params(self, grads_wrt_params):
"""Applies a single update to all parameters.
All parameter updates are performed using in-place operations and so
nothing is returned.
Args:
grads_wrt_params: A list of gradients of the scalar loss function
with respect to each of the parameters passed to `initialise`
previously, with this list expected to be in the same order.
"""
for param, mom, grad in zip(self.params, self.moms, grads_wrt_params):
mom *= self.mom_coeff
mom -= self.learning_rate * grad
param += mom

View File

@ -1,145 +0,0 @@
# -*- coding: utf-8 -*-
"""Model definitions.
This module implements objects encapsulating learnable models of input-output
relationships. The model objects implement methods for forward propagating
the inputs through the transformation(s) defined by the model to produce
outputs (and intermediate states) and for calculating gradients of scalar
functions of the outputs with respect to the model parameters.
"""
from mlp.layers import LayerWithParameters, StochasticLayer, StochasticLayerWithParameters
class SingleLayerModel(object):
"""A model consisting of a single transformation layer."""
def __init__(self, layer):
"""Create a new single layer model instance.
Args:
layer: The layer object defining the model architecture.
"""
self.layer = layer
@property
def params(self):
"""A list of all of the parameters of the model."""
return self.layer.params
def fprop(self, inputs):
"""Calculate the model outputs corresponding to a batch of inputs.
Args:
inputs: Batch of inputs to the model.
Returns:
List which is a concatenation of the model inputs and model
outputs, this being done for consistency of the interface with
multi-layer models for which `fprop` returns a list of
activations through all immediate layers of the model and including
the inputs and outputs.
"""
activations = [inputs, self.layer.fprop(inputs)]
return activations
def grads_wrt_params(self, activations, grads_wrt_outputs):
"""Calculates gradients with respect to the model parameters.
Args:
activations: List of all activations from forward pass through
model using `fprop`.
grads_wrt_outputs: Gradient with respect to the model outputs of
the scalar function parameter gradients are being calculated
for.
Returns:
List of gradients of the scalar function with respect to all model
parameters.
"""
return self.layer.grads_wrt_params(activations[0], grads_wrt_outputs)
def __repr__(self):
return 'SingleLayerModel(' + str(self.layer) + ')'
class MultipleLayerModel(object):
"""A model consisting of multiple layers applied sequentially."""
def __init__(self, layers):
"""Create a new multiple layer model instance.
Args:
layers: List of the the layer objecst defining the model in the
order they should be applied from inputs to outputs.
"""
self.layers = layers
@property
def params(self):
"""A list of all of the parameters of the model."""
params = []
for layer in self.layers:
if isinstance(layer, LayerWithParameters) or isinstance(layer, StochasticLayerWithParameters):
params += layer.params
return params
def fprop(self, inputs, evaluation=False):
"""Forward propagates a batch of inputs through the model.
Args:
inputs: Batch of inputs to the model.
Returns:
List of the activations at the output of all layers of the model
plus the inputs (to the first layer) as the first element. The
last element of the list corresponds to the model outputs.
"""
activations = [inputs]
for i, layer in enumerate(self.layers):
if evaluation:
if issubclass(type(self.layers[i]), StochasticLayer) or issubclass(type(self.layers[i]),
StochasticLayerWithParameters):
current_activations = self.layers[i].fprop(activations[i], stochastic=False)
else:
current_activations = self.layers[i].fprop(activations[i])
else:
if issubclass(type(self.layers[i]), StochasticLayer) or issubclass(type(self.layers[i]),
StochasticLayerWithParameters):
current_activations = self.layers[i].fprop(activations[i], stochastic=True)
else:
current_activations = self.layers[i].fprop(activations[i])
activations.append(current_activations)
return activations
def grads_wrt_params(self, activations, grads_wrt_outputs):
"""Calculates gradients with respect to the model parameters.
Args:
activations: List of all activations from forward pass through
model using `fprop`.
grads_wrt_outputs: Gradient with respect to the model outputs of
the scalar function parameter gradients are being calculated
for.
Returns:
List of gradients of the scalar function with respect to all model
parameters.
"""
grads_wrt_params = []
for i, layer in enumerate(self.layers[::-1]):
inputs = activations[-i - 2]
outputs = activations[-i - 1]
grads_wrt_inputs = layer.bprop(inputs, outputs, grads_wrt_outputs)
if isinstance(layer, LayerWithParameters) or isinstance(layer, StochasticLayerWithParameters):
grads_wrt_params += layer.grads_wrt_params(
inputs, grads_wrt_outputs)[::-1]
grads_wrt_outputs = grads_wrt_inputs
return grads_wrt_params[::-1]
def __repr__(self):
return (
'MultiLayerModel(\n ' +
'\n '.join([str(layer) for layer in self.layers]) +
'\n)'
)

View File

@ -1,148 +0,0 @@
# -*- coding: utf-8 -*-
"""Model optimisers.
This module contains objects implementing (batched) stochastic gradient descent
based optimisation of models.
"""
import time
import logging
from collections import OrderedDict
import numpy as np
import tqdm
logger = logging.getLogger(__name__)
class Optimiser(object):
"""Basic model optimiser."""
def __init__(self, model, error, learning_rule, train_dataset,
valid_dataset=None, data_monitors=None, notebook=False):
"""Create a new optimiser instance.
Args:
model: The model to optimise.
error: The scalar error function to minimise.
learning_rule: Gradient based learning rule to use to minimise
error.
train_dataset: Data provider for training set data batches.
valid_dataset: Data provider for validation set data batches.
data_monitors: Dictionary of functions evaluated on targets and
model outputs (averaged across both full training and
validation data sets) to monitor during training in addition
to the error. Keys should correspond to a string label for
the statistic being evaluated.
"""
self.model = model
self.error = error
self.learning_rule = learning_rule
self.learning_rule.initialise(self.model.params)
self.train_dataset = train_dataset
self.valid_dataset = valid_dataset
self.data_monitors = OrderedDict([('error', error)])
if data_monitors is not None:
self.data_monitors.update(data_monitors)
self.notebook = notebook
if notebook:
self.tqdm_progress = tqdm.tqdm_notebook
else:
self.tqdm_progress = tqdm.tqdm
def do_training_epoch(self):
"""Do a single training epoch.
This iterates through all batches in training dataset, for each
calculating the gradient of the estimated error given the batch with
respect to all the model parameters and then updates the model
parameters according to the learning rule.
"""
with self.tqdm_progress(total=self.train_dataset.num_batches) as train_progress_bar:
train_progress_bar.set_description("Epoch Progress")
for inputs_batch, targets_batch in self.train_dataset:
activations = self.model.fprop(inputs_batch)
grads_wrt_outputs = self.error.grad(activations[-1], targets_batch)
grads_wrt_params = self.model.grads_wrt_params(
activations, grads_wrt_outputs)
self.learning_rule.update_params(grads_wrt_params)
train_progress_bar.update(1)
def eval_monitors(self, dataset, label):
"""Evaluates the monitors for the given dataset.
Args:
dataset: Dataset to perform evaluation with.
label: Tag to add to end of monitor keys to identify dataset.
Returns:
OrderedDict of monitor values evaluated on dataset.
"""
data_mon_vals = OrderedDict([(key + label, 0.) for key
in self.data_monitors.keys()])
for inputs_batch, targets_batch in dataset:
activations = self.model.fprop(inputs_batch, evaluation=True)
for key, data_monitor in self.data_monitors.items():
data_mon_vals[key + label] += data_monitor(
activations[-1], targets_batch)
for key, data_monitor in self.data_monitors.items():
data_mon_vals[key + label] /= dataset.num_batches
return data_mon_vals
def get_epoch_stats(self):
"""Computes training statistics for an epoch.
Returns:
An OrderedDict with keys corresponding to the statistic labels and
values corresponding to the value of the statistic.
"""
epoch_stats = OrderedDict()
epoch_stats.update(self.eval_monitors(self.train_dataset, '(train)'))
if self.valid_dataset is not None:
epoch_stats.update(self.eval_monitors(
self.valid_dataset, '(valid)'))
return epoch_stats
def log_stats(self, epoch, epoch_time, stats):
"""Outputs stats for a training epoch to a logger.
Args:
epoch (int): Epoch counter.
epoch_time: Time taken in seconds for the epoch to complete.
stats: Monitored stats for the epoch.
"""
logger.info('Epoch {0}: {1:.1f}s to complete\n {2}'.format(
epoch, epoch_time,
', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()])
))
def train(self, num_epochs, stats_interval=5):
"""Trains a model for a set number of epochs.
Args:
num_epochs: Number of epochs (complete passes through trainin
dataset) to train for.
stats_interval: Training statistics will be recorded and logged
every `stats_interval` epochs.
Returns:
Tuple with first value being an array of training run statistics
and the second being a dict mapping the labels for the statistics
recorded to their column index in the array.
"""
start_train_time = time.time()
run_stats = [list(self.get_epoch_stats().values())]
with self.tqdm_progress(total=num_epochs) as progress_bar:
progress_bar.set_description("Experiment Progress")
for epoch in range(1, num_epochs + 1):
start_time = time.time()
self.do_training_epoch()
epoch_time = time.time()- start_time
if epoch % stats_interval == 0:
stats = self.get_epoch_stats()
self.log_stats(epoch, epoch_time, stats)
run_stats.append(list(stats.values()))
progress_bar.update(1)
finish_train_time = time.time()
total_train_time = finish_train_time - start_train_time
return np.array(run_stats), {k: i for i, k in enumerate(stats.keys())}, total_train_time

View File

@ -1,34 +0,0 @@
# -*- coding: utf-8 -*-
"""Training schedulers.
This module contains classes implementing schedulers which control the
evolution of learning rule hyperparameters (such as learning rate) over a
training run.
"""
import numpy as np
class ConstantLearningRateScheduler(object):
"""Example of scheduler interface which sets a constant learning rate."""
def __init__(self, learning_rate):
"""Construct a new constant learning rate scheduler object.
Args:
learning_rate: Learning rate to use in learning rule.
"""
self.learning_rate = learning_rate
def update_learning_rule(self, learning_rule, epoch_number):
"""Update the hyperparameters of the learning rule.
Run at the beginning of each epoch.
Args:
learning_rule: Learning rule object being used in training run,
any scheduled hyperparameters to be altered should be
attributes of this object.
epoch_number: Integer index of training epoch about to be run.
"""
learning_rule.learning_rate = self.learning_rate

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,152 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from mlp.layers import BatchNormalizationLayer\n",
"test_inputs = np.array([[-1.38066782, -0.94725498, -3.05585424, 2.28644454, 0.85520889,\n",
" 0.10575624, 0.23618609, 0.84723205, 1.06569909, -2.21704034],\n",
" [ 0.11060968, -0.0747448 , 0.56809029, 2.45926149, -2.28677816,\n",
" -0.9964566 , 2.7356007 , 1.98002308, -0.39032315, 1.46515481]])\n",
"test_grads_wrt_outputs = np.array([[-0.43857052, 1.00380109, -1.18425494, 0.00486091, 0.21470207,\n",
" -0.12179054, -0.11508482, 0.738482 , -1.17249238, 0.69188295],\n",
" [ 1.07802015, 0.69901145, 0.81603688, -1.76743026, -1.24418692,\n",
" -0.65729963, -0.50834305, -0.49016145, 1.63749743, -0.71123104]])\n",
"\n",
"#produce BatchNorm fprop and bprop\n",
"activation_layer = BatchNormalizationLayer(input_dim=10)\n",
"\n",
"beta = np.array(10*[0.3])\n",
"gamma = np.array(10*[0.5])\n",
"\n",
"activation_layer.params = [gamma, beta]\n",
"BN_fprop = activation_layer.fprop(test_inputs)\n",
"BN_bprop = activation_layer.bprop(\n",
" test_inputs, BN_fprop, test_grads_wrt_outputs)\n",
"BN_grads_wrt_params = activation_layer.grads_wrt_params(\n",
" test_inputs, test_grads_wrt_outputs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"true_fprop_outputs = np.array([[-0.1999955 , -0.19998686, -0.19999924, -0.1996655 , 0.79999899,\n",
" 0.79999177, -0.1999984 , -0.19999221, 0.79999528, -0.19999926],\n",
" [ 0.7999955 , 0.79998686, 0.79999924, 0.7996655 , -0.19999899,\n",
" -0.19999177, 0.7999984 , 0.79999221, -0.19999528, 0.79999926]])\n",
"assert BN_fprop.shape == true_fprop_outputs.shape, (\n",
" 'Layer bprop returns incorrect shaped array. '\n",
" 'Correct shape is \\n\\n{0}\\n\\n but returned shape is \\n\\n{1}.'\n",
" .format(true_fprop_outputs.shape, BN_fprop.shape)\n",
")\n",
"assert np.allclose(np.round(BN_fprop, decimals=2), np.round(true_fprop_outputs, decimals=2)), (\n",
"'Layer bprop does not return correct values. '\n",
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
".format(true_fprop_outputs, BN_fprop, BN_fprop-true_fprop_outputs)\n",
")\n",
"\n",
"print(\"Batch Normalization F-prop test passed\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"true_bprop_outputs = np.array([[ -9.14558020e-06, 9.17665617e-06, -8.40575535e-07,\n",
" 6.85384297e-03, 9.40668131e-07, 7.99795574e-06,\n",
" 5.03719464e-07, 1.69038704e-05, -1.82061629e-05,\n",
" 5.62083224e-07],\n",
" [ 9.14558020e-06, -9.17665617e-06, 8.40575535e-07,\n",
" -6.85384297e-03, -9.40668131e-07, -7.99795574e-06,\n",
" -5.03719464e-07, -1.69038704e-05, 1.82061629e-05,\n",
" -5.62083224e-07]])\n",
"assert BN_bprop.shape == true_bprop_outputs.shape, (\n",
" 'Layer bprop returns incorrect shaped array. '\n",
" 'Correct shape is \\n\\n{0}\\n\\n but returned shape is \\n\\n{1}.'\n",
" .format(true_bprop_outputs.shape, BN_bprop.shape)\n",
")\n",
"assert np.allclose(np.round(BN_bprop, decimals=2), np.round(true_bprop_outputs, decimals=2)), (\n",
"'Layer bprop does not return correct values. '\n",
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
".format(true_bprop_outputs, BN_bprop, BN_bprop-true_bprop_outputs)\n",
")\n",
"\n",
"print(\"Batch Normalization B-prop test passed\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"grads_wrt_gamma, grads_wrt_beta = BN_grads_wrt_params\n",
"true_grads_wrt_gamma = np.array(([ 1.51657703, -0.30478163, 2.00028878, -1.77110552, 1.45888603,\n",
" 0.53550028, -0.39325697, -1.2286243 , -2.8099633 , -1.40311192]))\n",
"true_grads_wrt_beta = np.array([ 0.63944963, 1.70281254, -0.36821806, -1.76256935, -1.02948485,\n",
" -0.77909018, -0.62342786, 0.24832055, 0.46500505, -0.01934809])\n",
"\n",
"assert grads_wrt_gamma.shape == true_grads_wrt_gamma.shape, (\n",
" 'Layer bprop returns incorrect shaped array. '\n",
" 'Correct shape is \\n\\n{0}\\n\\n but returned shape is \\n\\n{1}.'\n",
" .format(true_grads_wrt_gamma.shape, grads_wrt_gamma.shape)\n",
")\n",
"assert np.allclose(np.round(grads_wrt_gamma, decimals=2), np.round(true_grads_wrt_gamma, decimals=2)), (\n",
"'Layer bprop does not return correct values. '\n",
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
".format(true_grads_wrt_gamma, grads_wrt_gamma, grads_wrt_gamma-true_grads_wrt_gamma)\n",
")\n",
"\n",
"assert grads_wrt_beta.shape == true_grads_wrt_beta.shape, (\n",
" 'Layer bprop returns incorrect shaped array. '\n",
" 'Correct shape is \\n\\n{0}\\n\\n but returned shape is \\n\\n{1}.'\n",
" .format(true_grads_wrt_beta.shape, grads_wrt_beta.shape)\n",
")\n",
"assert np.allclose(np.round(grads_wrt_beta, decimals=2), np.round(true_grads_wrt_beta, decimals=2)), (\n",
"'Layer bprop does not return correct values. '\n",
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
".format(true_grads_wrt_beta, grads_wrt_beta, grads_wrt_beta-true_grads_wrt_beta)\n",
")\n",
"\n",
"print(\"Batch Normalization grads wrt to params test passed\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

View File

@ -1,307 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Below a skeleton class and associated test functions for the `fprop`, `bprop` and `grads_wrt_params` methods of the ConvolutionalLayer class are included.\n",
"\n",
"The test functions assume that in your implementation of `fprop` for the convolutional layer, outputs are calculated only for 'valid' overlaps of the kernel filters with the input - i.e. without any padding.\n",
"\n",
"It is also assumed that if convolutions with non-unit strides are implemented the default behaviour is to take unit-strides, with the test cases only correct for unit strides in both directions."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The three test functions are defined in the cell below. All the functions take as first argument the *class* corresponding to the convolutional layer implementation to be tested (**not** an instance of the class). It is assumed the class being tested has an `__init__` method with at least all of the arguments defined in the skeleton definition above. A boolean second argument to each function can be used to specify if the layer implements a cross-correlation or convolution based operation (see note in [seventh lecture slides](http://www.inf.ed.ac.uk/teaching/courses/mlp/2016/mlp07-cnn.pdf))."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"def test_conv_layer_fprop(layer_class, do_cross_correlation=False):\n",
" \"\"\"Tests `fprop` method of a convolutional layer.\n",
" \n",
" Checks the outputs of `fprop` method for a fixed input against known\n",
" reference values for the outputs and raises an AssertionError if\n",
" the outputted values are not consistent with the reference values. If\n",
" tests are all passed returns True.\n",
" \n",
" Args:\n",
" layer_class: Convolutional layer implementation following the \n",
" interface defined in the provided skeleton class.\n",
" do_cross_correlation: Whether the layer implements an operation\n",
" corresponding to cross-correlation (True) i.e kernels are\n",
" not flipped before sliding over inputs, or convolution\n",
" (False) with filters being flipped.\n",
"\n",
" Raises:\n",
" AssertionError: Raised if output of `layer.fprop` is inconsistent \n",
" with reference values either in shape or values.\n",
" \"\"\"\n",
" inputs = np.arange(96).reshape((2, 3, 4, 4))\n",
" kernels = np.arange(-12, 12).reshape((2, 3, 2, 2))\n",
" if do_cross_correlation:\n",
" kernels = kernels[:, :, ::-1, ::-1]\n",
" biases = np.arange(2)\n",
" true_output = np.array(\n",
" [[[[ -958., -1036., -1114.],\n",
" [-1270., -1348., -1426.],\n",
" [-1582., -1660., -1738.]],\n",
" [[ 1707., 1773., 1839.],\n",
" [ 1971., 2037., 2103.],\n",
" [ 2235., 2301., 2367.]]],\n",
" [[[-4702., -4780., -4858.],\n",
" [-5014., -5092., -5170.],\n",
" [-5326., -5404., -5482.]],\n",
" [[ 4875., 4941., 5007.],\n",
" [ 5139., 5205., 5271.],\n",
" [ 5403., 5469., 5535.]]]]\n",
" )\n",
" \n",
" layer = layer_class(\n",
" num_input_channels=kernels.shape[1], \n",
" num_output_channels=kernels.shape[0], \n",
" input_dim_1=inputs.shape[2], \n",
" input_dim_2=inputs.shape[3],\n",
" kernel_dim_1=kernels.shape[2],\n",
" kernel_dim_2=kernels.shape[3]\n",
" )\n",
" layer.params = [kernels, biases]\n",
" layer_output = layer.fprop(inputs)\n",
" \n",
" assert layer_output.shape == true_output.shape, (\n",
" 'Layer fprop gives incorrect shaped output. '\n",
" 'Correct shape is \\n\\n{0}\\n\\n but returned shape is \\n\\n{1}.'\n",
" .format(true_output.shape, layer_output.shape)\n",
" )\n",
" assert np.allclose(layer_output, true_output), (\n",
" 'Layer fprop does not give correct output. '\n",
" 'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}.'\n",
" .format(true_output, layer_output, true_output-layer_output)\n",
" )\n",
" return True\n",
"\n",
"def test_conv_layer_bprop(layer_class, do_cross_correlation=False):\n",
" \"\"\"Tests `bprop` method of a convolutional layer.\n",
" \n",
" Checks the outputs of `bprop` method for a fixed input against known\n",
" reference values for the gradients with respect to inputs and raises \n",
" an AssertionError if the returned values are not consistent with the\n",
" reference values. If tests are all passed returns True.\n",
" \n",
" Args:\n",
" layer_class: Convolutional layer implementation following the \n",
" interface defined in the provided skeleton class.\n",
" do_cross_correlation: Whether the layer implements an operation\n",
" corresponding to cross-correlation (True) i.e kernels are\n",
" not flipped before sliding over inputs, or convolution\n",
" (False) with filters being flipped.\n",
"\n",
" Raises:\n",
" AssertionError: Raised if output of `layer.bprop` is inconsistent \n",
" with reference values either in shape or values.\n",
" \"\"\"\n",
" inputs = np.arange(96).reshape((2, 3, 4, 4))\n",
" kernels = np.arange(-12, 12).reshape((2, 3, 2, 2))\n",
" if do_cross_correlation:\n",
" kernels = kernels[:, :, ::-1, ::-1]\n",
" biases = np.arange(2)\n",
" grads_wrt_outputs = np.arange(-20, 16).reshape((2, 2, 3, 3))\n",
" outputs = np.array(\n",
" [[[[ -958., -1036., -1114.],\n",
" [-1270., -1348., -1426.],\n",
" [-1582., -1660., -1738.]],\n",
" [[ 1707., 1773., 1839.],\n",
" [ 1971., 2037., 2103.],\n",
" [ 2235., 2301., 2367.]]],\n",
" [[[-4702., -4780., -4858.],\n",
" [-5014., -5092., -5170.],\n",
" [-5326., -5404., -5482.]],\n",
" [[ 4875., 4941., 5007.],\n",
" [ 5139., 5205., 5271.],\n",
" [ 5403., 5469., 5535.]]]]\n",
" )\n",
" true_grads_wrt_inputs = np.array(\n",
" [[[[ 147., 319., 305., 162.],\n",
" [ 338., 716., 680., 354.],\n",
" [ 290., 608., 572., 294.],\n",
" [ 149., 307., 285., 144.]],\n",
" [[ 23., 79., 81., 54.],\n",
" [ 114., 284., 280., 162.],\n",
" [ 114., 272., 268., 150.],\n",
" [ 73., 163., 157., 84.]],\n",
" [[-101., -161., -143., -54.],\n",
" [-110., -148., -120., -30.],\n",
" [ -62., -64., -36., 6.],\n",
" [ -3., 19., 29., 24.]]],\n",
" [[[ 39., 67., 53., 18.],\n",
" [ 50., 68., 32., -6.],\n",
" [ 2., -40., -76., -66.],\n",
" [ -31., -89., -111., -72.]],\n",
" [[ 59., 115., 117., 54.],\n",
" [ 114., 212., 208., 90.],\n",
" [ 114., 200., 196., 78.],\n",
" [ 37., 55., 49., 12.]],\n",
" [[ 79., 163., 181., 90.],\n",
" [ 178., 356., 384., 186.],\n",
" [ 226., 440., 468., 222.],\n",
" [ 105., 199., 209., 96.]]]])\n",
" layer = layer_class(\n",
" num_input_channels=kernels.shape[1], \n",
" num_output_channels=kernels.shape[0], \n",
" input_dim_1=inputs.shape[2], \n",
" input_dim_2=inputs.shape[3],\n",
" kernel_dim_1=kernels.shape[2],\n",
" kernel_dim_2=kernels.shape[3]\n",
" )\n",
" layer.params = [kernels, biases]\n",
" layer_grads_wrt_inputs = layer.bprop(inputs, outputs, grads_wrt_outputs)\n",
" assert layer_grads_wrt_inputs.shape == true_grads_wrt_inputs.shape, (\n",
" 'Layer bprop returns incorrect shaped array. '\n",
" 'Correct shape is \\n\\n{0}\\n\\n but returned shape is \\n\\n{1}.'\n",
" .format(true_grads_wrt_inputs.shape, layer_grads_wrt_inputs.shape)\n",
" )\n",
" assert np.allclose(layer_grads_wrt_inputs, true_grads_wrt_inputs), (\n",
" 'Layer bprop does not return correct values. '\n",
" 'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
" .format(true_grads_wrt_inputs, layer_grads_wrt_inputs, layer_grads_wrt_inputs-true_grads_wrt_inputs)\n",
" )\n",
" return True\n",
"\n",
"def test_conv_layer_grad_wrt_params(\n",
" layer_class, do_cross_correlation=False):\n",
" \"\"\"Tests `grad_wrt_params` method of a convolutional layer.\n",
" \n",
" Checks the outputs of `grad_wrt_params` method for fixed inputs \n",
" against known reference values for the gradients with respect to \n",
" kernels and biases, and raises an AssertionError if the returned\n",
" values are not consistent with the reference values. If tests\n",
" are all passed returns True.\n",
" \n",
" Args:\n",
" layer_class: Convolutional layer implementation following the \n",
" interface defined in the provided skeleton class.\n",
" do_cross_correlation: Whether the layer implements an operation\n",
" corresponding to cross-correlation (True) i.e kernels are\n",
" not flipped before sliding over inputs, or convolution\n",
" (False) with filters being flipped.\n",
"\n",
" Raises:\n",
" AssertionError: Raised if output of `layer.bprop` is inconsistent \n",
" with reference values either in shape or values.\n",
" \"\"\"\n",
" inputs = np.arange(96).reshape((2, 3, 4, 4))\n",
" kernels = np.arange(-12, 12).reshape((2, 3, 2, 2))\n",
" biases = np.arange(2)\n",
" grads_wrt_outputs = np.arange(-20, 16).reshape((2, 2, 3, 3))\n",
" true_kernel_grads = np.array(\n",
" [[[[ -240., -114.],\n",
" [ 264., 390.]],\n",
" [[-2256., -2130.],\n",
" [-1752., -1626.]],\n",
" [[-4272., -4146.],\n",
" [-3768., -3642.]]],\n",
" [[[ 5268., 5232.],\n",
" [ 5124., 5088.]],\n",
" [[ 5844., 5808.],\n",
" [ 5700., 5664.]],\n",
" [[ 6420., 6384.],\n",
" [ 6276., 6240.]]]])\n",
" if do_cross_correlation:\n",
" kernels = kernels[:, :, ::-1, ::-1]\n",
" true_kernel_grads = true_kernel_grads[:, :, ::-1, ::-1]\n",
" true_bias_grads = np.array([-126., 36.])\n",
" layer = layer_class(\n",
" num_input_channels=kernels.shape[1], \n",
" num_output_channels=kernels.shape[0], \n",
" input_dim_1=inputs.shape[2], \n",
" input_dim_2=inputs.shape[3],\n",
" kernel_dim_1=kernels.shape[2],\n",
" kernel_dim_2=kernels.shape[3]\n",
" )\n",
" layer.params = [kernels, biases]\n",
" layer_kernel_grads, layer_bias_grads = (\n",
" layer.grads_wrt_params(inputs, grads_wrt_outputs))\n",
" assert layer_kernel_grads.shape == true_kernel_grads.shape, (\n",
" 'grads_wrt_params gives incorrect shaped kernel gradients output. '\n",
" 'Correct shape is \\n\\n{0}\\n\\n but returned shape is \\n\\n{1}.'\n",
" .format(true_kernel_grads.shape, layer_kernel_grads.shape)\n",
" )\n",
" assert np.allclose(layer_kernel_grads, true_kernel_grads), (\n",
" 'grads_wrt_params does not give correct kernel gradients output. '\n",
" 'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}.'\n",
" .format(true_kernel_grads, layer_kernel_grads)\n",
" )\n",
" assert layer_bias_grads.shape == true_bias_grads.shape, (\n",
" 'grads_wrt_params gives incorrect shaped bias gradients output. '\n",
" 'Correct shape is \\n\\n{0}\\n\\n but returned shape is \\n\\n{1}.'\n",
" .format(true_bias_grads.shape, layer_bias_grads.shape)\n",
" )\n",
" assert np.allclose(layer_bias_grads, true_bias_grads), (\n",
" 'grads_wrt_params does not give correct bias gradients output. '\n",
" 'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}.'\n",
" .format(true_bias_grads, layer_bias_grads)\n",
" )\n",
" return True"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"An example of using the test functions if given in the cell below. This assumes you implement a convolution (rather than cross-correlation) operation. If the implementation is correct "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from mlp.layers import ConvolutionalLayer\n",
"fprop_correct = test_conv_layer_fprop(ConvolutionalLayer, False)\n",
"bprop_correct = test_conv_layer_bprop(ConvolutionalLayer, False)\n",
"grads_wrt_param_correct = test_conv_layer_grad_wrt_params(ConvolutionalLayer, False)\n",
"if fprop_correct and grads_wrt_param_correct and bprop_correct:\n",
" print('All tests passed.')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

View File

@ -1,147 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Coursework 2\n",
"\n",
"This notebook is intended to be used as a starting point for your experiments. The instructions can be found in the instructions file located under spec/coursework2.pdf. The methods provided here are just helper functions. If you want more complex graphs such as side by side comparisons of different experiments you should learn more about matplotlib and implement them. Before each experiment remember to re-initialize neural network weights and reset the data providers so you get a properly initialized experiment. For each experiment try to keep most hyperparameters the same except the one under investigation so you can understand what the effects of each are."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"plt.style.use('ggplot')\n",
"\n",
"def train_model_and_plot_stats(\n",
" model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):\n",
" \n",
" # As well as monitoring the error over training also monitor classification\n",
" # accuracy i.e. proportion of most-probable predicted classes being equal to targets\n",
" data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}\n",
"\n",
" # Use the created objects to initialise a new Optimiser instance.\n",
" optimiser = Optimiser(\n",
" model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)\n",
"\n",
" # Run the optimiser for 5 epochs (full passes through the training set)\n",
" # printing statistics every epoch.\n",
" stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)\n",
"\n",
" # Plot the change in the validation and training set error over training.\n",
" fig_1 = plt.figure(figsize=(8, 4))\n",
" ax_1 = fig_1.add_subplot(111)\n",
" for k in ['error(train)', 'error(valid)']:\n",
" ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, \n",
" stats[1:, keys[k]], label=k)\n",
" ax_1.legend(loc=0)\n",
" ax_1.set_xlabel('Epoch number')\n",
"\n",
" # Plot the change in the validation and training set accuracy over training.\n",
" fig_2 = plt.figure(figsize=(8, 4))\n",
" ax_2 = fig_2.add_subplot(111)\n",
" for k in ['acc(train)', 'acc(valid)']:\n",
" ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, \n",
" stats[1:, keys[k]], label=k)\n",
" ax_2.legend(loc=0)\n",
" ax_2.set_xlabel('Epoch number')\n",
" \n",
" return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The below code will set up the data providers, random number\n",
"# generator and logger objects needed for training runs. As\n",
"# loading the data from file take a little while you generally\n",
"# will probably not want to reload the data providers on\n",
"# every training run. If you wish to reset their state you\n",
"# should instead use the .reset() method of the data providers.\n",
"import numpy as np\n",
"import logging\n",
"from mlp.data_providers import MNISTDataProvider, EMNISTDataProvider\n",
"\n",
"# Seed a random number generator\n",
"seed = 10102016 \n",
"rng = np.random.RandomState(seed)\n",
"batch_size = 100\n",
"# Set up a logger object to print info about the training run to stdout\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.INFO)\n",
"logger.handlers = [logging.StreamHandler()]\n",
"\n",
"# Create data provider objects for the MNIST data set\n",
"train_data = EMNISTDataProvider('train', batch_size=batch_size, rng=rng)\n",
"valid_data = EMNISTDataProvider('valid', batch_size=batch_size, rng=rng)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The model set up code below is provided as a starting point.\n",
"# You will probably want to add further code cells for the\n",
"# different experiments you run.\n",
"\n",
"from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, LeakyReluLayer, ELULayer, SELULayer\n",
"from mlp.errors import CrossEntropySoftmaxError\n",
"from mlp.models import MultipleLayerModel\n",
"from mlp.initialisers import ConstantInit, GlorotUniformInit\n",
"from mlp.learning_rules import GradientDescentLearningRule\n",
"from mlp.optimisers import Optimiser\n",
"\n",
"#setup hyperparameters\n",
"learning_rate = 0.1\n",
"num_epochs = 100\n",
"stats_interval = 1\n",
"input_dim, output_dim, hidden_dim = 784, 47, 100\n",
"\n",
"weights_init = GlorotUniformInit(rng=rng)\n",
"biases_init = ConstantInit(0.)\n",
"model = MultipleLayerModel([\n",
" AffineLayer(input_dim, hidden_dim, weights_init, biases_init), \n",
" ReluLayer(),\n",
" AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), \n",
" ReluLayer(),\n",
" AffineLayer(hidden_dim, output_dim, weights_init, biases_init)\n",
"])\n",
"\n",
"error = CrossEntropySoftmaxError()\n",
"# Use a basic gradient descent learning rule\n",
"learning_rule = GradientDescentLearningRule(learning_rate=learning_rate)\n",
"\n",
"#Remember to use notebook=False when you write a script to be run in a terminal\n",
"_ = train_model_and_plot_stats(\n",
" model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 200 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.9 KiB

View File

@ -1,65 +0,0 @@
\documentclass[tikz]{standalone}
\usepackage{amsmath}
\usepackage{tikz}
\usetikzlibrary{arrows}
\usetikzlibrary{calc}
\usepackage{ifthen}
\newcommand{\vct}[1]{\boldsymbol{#1}}
\newcommand{\pd}[2]{\frac{\partial #1}{\partial #2}}
\tikzstyle{fprop} = [draw,fill=blue!20,minimum size=2em,align=center]
\tikzstyle{bprop} = [draw,fill=red!20,minimum size=2em,align=center]
\begin{document}
\begin{tikzpicture}[xscale=1.75] %
% define number of layers
\def\nl{2};
% model input
\node at (0, 0) (input) {$\vct{x}$};
% draw fprop through model layers
\foreach \l in {0,...,\nl} {
\node[fprop] at (2 * \l + 1, 0) (fprop\l) {\texttt{layers[\l]} \\ \texttt{.fprop}};
\ifthenelse{\l > 0}{
\node at (2 * \l, 0) (hidden\l) {$\vct{h}_\l$};
\draw[->] (hidden\l) -- (fprop\l);
\draw[->] let \n1={\l - 1} in (fprop\n1) -- (hidden\l);
}{
\draw[->] (input) -- (fprop\l);
}
}
% model output
\node at (2 * \nl + 2, 0) (output) {$\mathbf{y}$};
% error function
\node[fprop] at (2 * \nl + 3, 0) (errorfunc) {\texttt{error}};
% error value
\node at (2 * \nl + 3, -1) (error) {$\bar{E}$};
% targets
\node at (2 * \nl + 4, -1) (tgt) {$\vct{t}$};
% error gradient
\node[bprop] at (2 * \nl + 3, -2) (errorgrad) {\texttt{error} \\ \texttt{.grad}};
% gradient wrt outputs
\node at (2 * \nl + 2, -2) (gradoutput) {$\pd{\bar{E}}{\vct{y}}$};
\draw[->] (fprop\nl) -- (output);
\draw[->] (output) -- (errorfunc);
\draw[->] (errorfunc) -- (error);
\draw[->] (error) -- (errorgrad);
\draw[->] (errorgrad) -- (gradoutput);
\draw[->] (tgt) |- (errorfunc);
\draw[->] (tgt) |- (errorgrad);
\foreach \l in {0,...,\nl} {
\node[bprop] at (2 * \l + 1, -2) (bprop\l) {\texttt{layers[\l]} \\ \texttt{.bprop}};
\ifthenelse{\l > 0}{
\node at (2 * \l, -2) (grad\l) {$\pd{\bar{E}}{\vct{h}_\l}$};
\draw[<-] (grad\l) -- (bprop\l);
\draw[<-] let \n1={\l - 1} in (bprop\n1) -- (grad\l);
}{}
}
\node at (0, -2) (gradinput) {$\pd{\bar{E}}{\vct{x}}$};
\draw[->] (bprop0) -- (gradinput);
\draw[->] (gradoutput) -- (bprop\nl);
\end{tikzpicture}
\end{document}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 69 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 73 KiB

View File

@ -1,441 +0,0 @@
# Environment set up
*The instructions below are intentionally verbose as they try to explain the reasoning behind our choice of environment set up and to explain what each command we are asking you to run does. If you are already confident using bash, Conda environments and Git you may wish to instead use the much shorter [minimal set-up instructions](#minimal-set-up-instructions-for-dice) at the end which skip the explanations.*
In this course we will be using [Python 3](https://www.python.org/) for all the labs and coursework assignments. In particular we will be making heavy use of the numerical computing libraries [NumPy](http://www.numpy.org/) and [SciPy](http://www.scipy.org/), and the interactive notebook application [Jupyter](http://jupyter.org/).
A common headache in software projects is ensuring the correct versions of all dependencies are available on the current development system. Often you may be working on several distinct projects simultaneously each with its own potentially conflicting dependencies on external libraries. Additionally you may be working across multiple different machines (for example a personal laptop and University computers) with possibly different operating systems. Further, as is the case in Informatics on DICE, you may not have root-level access to a system you are working on and so not be able to install software at a system-wide level and system updates may cause library versions to be changed to incompatible versions.
One way of overcoming these issues is to use project-specific *virtual environments*. In this context a virtual environment is an isolated development environment where the external dependencies of a project can be installed and managed independent of the system-wide versions (and those of the environments of other projects).
There are several virtual environment solutions available in the Python eco-system, including the native [pyvenv](https://docs.python.org/3/library/venv.html) in Python 3 and the popular [virtualenv](https://virtualenv.pypa.io/en/stable/). Also related is [pip](https://pip.pypa.io/en/stable/) a Python package manager natively included in Python 2.7.9 and above.
Here we will instead use the environment capabilities of the [Conda](http://conda.pydata.org/docs/) package management system. Unlike pip and virtualenv/pyvenv, Conda is not limited to managing Python packages but is a language and platform agnostic package manager. Both NumPy and SciPy have many non-Python external dependencies and their performance is very dependent on correctly linking to optimised linear algebra libraries.
Conda can handle installation of the Python libraries we will be using and all their external dependencies, in particular allowing easy installation of [optimised numerical computing libraries](https://docs.continuum.io/mkl-optimizations/). Further Conda can easily be installed on Linux, OSX and Windows systems meaning if you wish to set up an environment on a personal machine as well this should be easy to do whatever your operating system of choice is.
There are several options available for installing Conda on a system. Here we will use the Python 3 version of [Miniconda](http://conda.pydata.org/miniconda.html), which installs just Conda and its dependencies. An alternative is to install the [Anaconda Python distribution](https://docs.continuum.io/anaconda/), which installs Conda and a large selection of popular Python packages. As we will require only a small subset of these packages we will use the more barebones Miniconda to avoid eating into your DICE disk quota too much, however if installing on a personal machine you may wish to consider Anaconda if you want to explore other Python packages.
## Installing Miniconda
We provide instructions here for getting an environment with all the required dependencies running on computers running
the School of Informatics [DICE desktop](http://computing.help.inf.ed.ac.uk/dice-platform). The same instructions
should be able to used on other Linux distributions such as Ubuntu and Linux Mint with minimal adjustments.
For those wishing to install on a personal Windows or OSX machine, the initial instructions for setting up Conda will
differ slightly - you should instead select the relevant installer for your system from [here](http://conda.pydata.org/miniconda.html) and following the corresponding installation instructions from [here](http://conda.pydata.org/docs/install/quick.html). After Conda is installed the [remaining instructions](#creating-the-conda-environment) should be broadly the same across different systems.
*Note: Although we are happy for you to additionally set up an environment on a personal machine, you should still set up a DICE environment now as this will make sure you are able to use shared computing resources later in the course. Also although we have tried to note when the required commands will differ on non-DICE systems, these instructions have only been tested on DICE and we will not be able to offer any support in labs on getting set up on a non-DICE system.*
---
Open a bash terminal (`Applications > Terminal` on DICE).
We first need to download the latest 64-bit Python 3 Miniconda install script:
```
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
```
This uses `wget` a command-line tool for downloading files.
Now run the install script:
```
bash Miniconda3-latest-Linux-x86_64.sh
```
You will first be asked to review the software license agreement. Assuming you choose to agree, you will then be asked
to choose an install location for Miniconda. The default is to install in the root of your home directory
`~/miniconda3`. We recommend going with this default unless you have a particular reason to do otherwise.
You will then be asked whether to prepend the Miniconda binaries directory to the `PATH` system environment variable
definition in `.bashrc`. As the DICE bash start-up mechanism differs from the standard set up
([details here](http://computing.help.inf.ed.ac.uk/dice-bash)), on DICE you should respond `no` here as we will set up the addition to `PATH` manually in the next step. On other Linux distributions you may choose to accept the default.
On DICE, append the Miniconda binaries directory to `PATH` in manually in `~/.benv` using
```
echo "export PATH=\""\$PATH":$HOME/miniconda3/bin\"" >> ~/.benv
```
For those who this appears a bit opaque to and want to know what is going on see here <sup id="a1">[1](#f1)</sup>.
We now need to `source` the updated `~/.benv` so that the `PATH` variable in the current terminal session is updated:
```
source ~/.benv
```
From the next time you log in all future terminal sessions should have the updated `PATH` loaded by default.
## Creating the Conda environment
You should now have a working Conda installation. If you run
```
conda --help
```
from a terminal you should see the Conda help page displayed. If you get a `No command 'conda' found` error you should check you have set up your `PATH` variable correctly (you can get a demonstrator to help you do this).
Assuming Conda is working, we will now create our Conda environment:
```
conda create -n mlp python=3
```
This bootstraps a new Conda environment named `mlp` with a minimal Python 3 install. You will be presented with a 'package plan' listing the packages to be installed and asked whether to proceed: type `y` then enter.
We will now *activate* our created environment:
```
source activate mlp
```
or on Windows only
```
activate mlp
```
When a environment is activated its name will be prepended on to the prompt which should now look something like `(mlp) [machine-name]:~$` on DICE.
**You need to run this `source activate mlp` command every time you wish to activate the `mlp` environment in a terminal (for example at the beginning of each lab)**. When the environment is activated, the environment will be searched first when running commands so that e.g. `python` will launch the Python interpreter installed locally in the `mlp` environment rather than a system-wide version.
If you wish to deactivate an environment loaded in the current terminal e.g. to launch the system Python interpreter, you can run `source deactivate` (just `deactivate` on Windows).
We will now install the dependencies for the course into the new environment:
```
conda install numpy scipy matplotlib jupyter
```
Again you will be given a list of the packages to be installed and asked to confirm whether to proceed. Enter `y` then wait for the packages to install (this should take around five minutes). In addition to Jupyter, NumPy and SciPy which we have already mentioned, we are also installing [matplotlib](http://matplotlib.org/) a plotting and visualisation library.
Once the installation is finished, to recover some disk space we can clear the package tarballs Conda just downloaded:
```
conda clean -t
```
These tarballs are usually cached to allow quicker installation into additional environments however we will only be using a single environment here so there is no need to keep them on disk.
## Getting the course code and a short introduction to Git
The next step in getting our environment set up will be to download the course code. This is available in a Git repository on Github:
https://github.com/CSTR-Edinburgh/mlpractical
[Git](https://git-scm.com/) is a distributed version control system and [Github](https://github.com) a popular site for hosting Git repositories. We will be using Git to distribute the code for all the labs and assignments. We will explain all the necessary `git` commands as we go, though those new to Git may find [this concise guide by Roger Dudler](http://rogerdudler.github.io/git-guide/) or [this slightly longer one from Atlassian](https://www.atlassian.com/git/tutorials/) useful.
---
***Non-DICE systems only:***
Git is installed by default on DICE desktops. If you are running a system which does not have Git installed, you can use Conda to install it in your environment using:
```
conda install git
```
---
We will now go over the process of [cloning](https://www.atlassian.com/git/tutorials/setting-up-a-repository/git-clone) a local copy of the `mlpractical` repository.
---
**Confident Git users only:**
For those who have their own Github account and are confident Git users, you may wish to consider instead [creating a private fork](http://stackoverflow.com/a/30352360) of the `CSTR-Edinburgh/mlpractical` repository on Github. This is not required for the course, however it will allow you to push your local commits to Github making it easier to for example sync your work between DICE computers and a personal machine.
**Note you should NOT create a public fork using the default forking mechanism on Github as this will make any commits you push to the fork publicly available which creates a risk of plagiarism.**
If you are already familiar with Git you may wish to skip over the explanatory sections below, though you should read [the section on how we will use branches to separate the code for different labs](#branching-explanation).
---
By default we will assume here you are cloning to your home directory however if you have an existing system for organising your workspace feel free to keep to that. **If you clone the repository to a path other than `~/mlpractical` however you will need to adjust all references to `~/mlpractical` in the commands below accordingly.**
To clone the `mlpractical` repository to the home directory run
```
git clone https://github.com/CSTR-Edinburgh/mlpractical.git ~/mlpractical
```
This will create a new `mlpractical` subdirectory with a local copy of the repository in it. Enter the directory and list all its contents, including hidden files, by running:
```
cd ~/mlpractical
ls -a # Windows equivalent: dir /a
```
For the most part this will look much like any other directory, with there being the following three non-hidden sub-directories:
* `data`: Data files used in the labs and assignments.
* `mlp`: The custom Python package we will use in this course.
* `notebooks`: The Jupyter notebook files for each lab and coursework.
Additionally there exists a hidden `.git` subdirectory (on Unix systems by default files and directories prepended with a period '.' are hidden). This directory contains the repository history database and various configuration files and references. Unless you are sure you know what you are doing you generally should not edit any of the files in this directory directly. Generally most configuration options can be enacted more safely using a `git config` command.
For instance to globally set the user name and email used in commits you can run:
```
git config --global user.name "[your name]"
git config --global user.email "[matric-number]@sms.ed.ac.uk"
```
*Note this is meant as an example of a `git config` command - you do not need to run this command though there is no harm in doing so.*
From the `~/mlpractical` directory if you now run:
`git status`
a status message containing information about your local clone of the repository should be displayed.
Providing you have not made any changes yet, all that will be displayed is the name of the current *branch* (we will explain what a branch is to those new to Git in a little while), a message that the branch is up to date with the remote repository and that there is nothing to commit in the working directory.
The two key concepts you will need to know about Git for this course are *commits* and *branches*.
A *commit* in Git is a snapshot of the state of the project. The snapshots are recorded in the repository history and allow us to track changes to the code over time and rollback changes if necessary. In Git there is a three stage process to creating a new commit.
1. The relevant edits are made to files in the working directory and any new files created.
2. The files with changes to be committed (including any new files) are added to the *staging area* by running:
```
git add file1 file2 ...
```
3. Finally the *staged changes* are used to create a new commit by running
```
git commit -m "A commit message describing the changes."
```
This writes the staged changes as a new commit in the repository history. We can see a log of the details of previous commits by running:
```
git log
```
Although it is not a requirement of the course for you to make regular commits of your work, we strongly recommend you do as it is a good habit to get into and will make recovery from accidental deletions etc. much easier.
The other key Git concept you will need to know about are *branches*. A branch in Git represents an independent line of development of a project. When a repository is first created it will contain a single branch, named `master` by default. Commits to this branch form a linear series of snapshots of the project.
A new branch is created from a commit on an existing branch. Any commits made to this new branch then evolve as an independent and parallel line of changes - that is commits to the new branch will not affect the old branch and vice versa.
A typical Git workflow in a software development setting would be to create a new branch whenever making changes to a project, for example to fix a bug or implement a new feature. These changes are then isolated from the main code base allowing regular commits without worrying about making unstable changes to the main code base. Key to this workflow is the ability to *merge* commits from a branch into another branch, e.g. when it is decided a new feature is sufficiently developed to be added to the main code base. Although merging branches is key aspect of using Git in many projects, as dealing with merge conflicts when two branches both make changes to same parts of files can be a somewhat tricky process, we will here generally try to avoid the need for merges.
<p id='branching-explanation'>We will therefore use branches here in a slightly non-standard way. The code for each week's lab and for each of the assignments will be maintained in a separate branch. This will allow us to stage the release of the notebooks and code for each lab and assignment while allowing you to commit the changes you make to the code each week without having to merge those changes when new code is released. Similarly this structure will allow us to release updated notebooks from previous labs with proposed solutions without overwriting your own work.</p>
To list the branches present in the local repository, run:
```
git branch
```
This will display a list of branches with a `*` next to the current branch. To switch to a different existing branch in the local repository run
```
git checkout branch-name
```
This will change the code in the working directory to the current state of the checked out branch. Any files added to the staging area and committed will then create a new commit on this branch.
You should make sure you are on the first lab branch now by running:
```
git checkout mlp2017-8/lab1
```
## Installing the `mlp` Python package
In your local repository we noted above the presence of a `mlp` subdirectory. This contains the custom Python package implementing the NumPy based neural network framework we will be using in this course.
In order to make the modules in this package available in your environment we need install it. A [setuptools](https://setuptools.readthedocs.io/en/latest/) `setup.py` script is provided in the root of the `mlpractical` directory for this purpose.
The standard way to install a Python package using a `setup.py` script is to run `python setup.py install`. This creates a copy of the package in the `site-packages` directory of the currently active Python environment.
As we will be updating the code in the `mlp` package during the course of the labs this would require you to re-run `python setup.py install` every time a change is made to the package. Instead therefore you should install the package in development mode by running:
```
python setup.py develop
```
Instead of copying the package, this will instead create a symbolic link to the copy in the local repository. This means any changes made will be immediately available without the need to reinstall the package.
---
**Aside on importing/reloading Python modules:**
Note that after the first time a Python module is loaded into an interpreter instance, using for example:
```
import mlp
```
Running the `import` statement any further times will have no effect even if the underlying module code has been changed. To reload an already imported module we instead need to use the [`reload`](https://docs.python.org/2.7/library/functions.html#reload) function, e.g.
```
reload(mlp)
```
**Note: To be clear as this has caused some confusion in previous labs the above `import ...` / `reload(...)` statements should NOT be run directly in a bash terminal. They are examples Python statements - you could run them in a terminal by first loading a Python interpreter using:**
```
python
```
**however you do not need to do so now. This is meant as information to help you later when importing modules as there was some confusion last year about the difference between `import` and `reload`.**
---
## Adding a data directory variable to the environment
We observed previously the presence of a `data` subdirectory in the local repository. This directory holds the data files that will be used in the course. To enable the data loaders in the `mlp` package to locate these data files we need to set a `MLP_DATA_DIR` environment variable pointing to this directory.
Assuming you used the recommended Miniconda install location and cloned the `mlpractical` repository to your home directory, this variable can be automatically defined when activating the environment by running the following commands (on non-Windows systems):
```
cd ~/miniconda3/envs/mlp
mkdir -p ./etc/conda/activate.d
mkdir -p ./etc/conda/deactivate.d
echo -e '#!/bin/sh\n' >> ./etc/conda/activate.d/env_vars.sh
echo "export MLP_DATA_DIR=$HOME/mlpractical/data" >> ./etc/conda/activate.d/env_vars.sh
echo -e '#!/bin/sh\n' >> ./etc/conda/deactivate.d/env_vars.sh
echo 'unset MLP_DATA_DIR' >> ./etc/conda/deactivate.d/env_vars.sh
export MLP_DATA_DIR=$HOME/mlpractical/data
```
And on Windows systems (replacing the `[]` placeholders with the relevant paths):
```
cd [path-to-conda-root]\envs\mlp
mkdir .\etc\conda\activate.d
mkdir .\etc\conda\deactivate.d
@echo "set MLP_DATA_DIR=[path-to-local-repository]\data" >> .\etc\conda\activate.d\env_vars.bat
@echo "set MLP_DATA_DIR=" >> .\etc\conda\deactivate.d\env_vars.bat
set MLP_DATA_DIR=[path-to-local-repository]\data
```
## Loading the first lab notebook
Your environment is now all set up so you can move on to the introductory exercises in the first lab notebook.
One of the dependencies you installed in your environment earlier was Jupyter. Jupyter notebooks allow combining formatted text with runnable code cells and visualisation of the code output in an intuitive web application interface. Although originally specific to Python (under the previous moniker IPython notebooks) the notebook interface has now been abstracted making them available to a wide range of languages.
There will be a Jupyter notebook available for each lab and assignment in this course, with a combination of explanatory sections for you to read through which will complement the material covered in lectures, as well as series of practical coding exercises to be written and run in the notebook interface. The first lab notebook will cover some of the basics of the notebook interface.
To open a notebook, you first need to launch a Jupyter notebook server instance. From within the `mlpractical` directory containing your local copy of the repository (and with the `mlp` environment activated) run:
```
jupyter notebook
```
This will start a notebook server instance in the current terminal (with a series of status messages being streamed to the terminal output) and launch a browser window which will load the notebook application interface.
By default the notebook interface will show a list of the files in the directory the notebook server was launched from when first loaded. If you click on the `notebooks` directory in this file list, a list of files in this directory should then be displayed. Click the `01_Introduction.ipynb` entry to load the first notebook.
# Minimal set-up instructions for DICE
Below are instructions for setting up the environment without additional explanation. These are intentionally terse and if you do not understand what a particular command is doing you might be better following the more detailed instructions above which explain each step.
---
Start a new bash terminal. Download the latest 64-bit Python 2.7 Miniconda install script:
```
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
```
Run the install script:
```
bash Miniconda3-latest-Linux-x86_64.sh
```
Review the software license agreement and choose whether to accept. Assuming you accept, you be asked to choose an install location for Miniconda. The default is to install in the root of your home directory `~/miniconda3`. We will assume below you have used this default. **If you use a different path you will need to adjust the paths in the commands below to suit.**
You will then be asked whether to prepend the Miniconda binaries directory to the `PATH` system environment variable definition in `.bashrc`. You should respond `no` here as we will set up the addition to `PATH` manually in the next step.
Append the Miniconda binaries directory to `PATH` in manually in `~/.benv`:
```
echo "export PATH=\""\$PATH":$HOME/miniconda3/bin\"" >> ~/.benv
```
`source` the updated `~/.benv`:
```
source ~/.benv
```
Create a new `mlp` Conda environment:
```
conda create -n mlp python=3
```
Activate our created environment:
```
source activate mlp
```
Install the dependencies for the course into the new environment:
```
conda install numpy scipy matplotlib jupyter
```
Clear the package tarballs Conda just downloaded:
```
conda clean -t
```
Clone the course repository to your home directory:
```
git clone https://github.com/CSTR-Edinburgh/mlpractical.git ~/mlpractical
```
Make sure we are on the first lab branch
```
cd ~/mlpractical
git checkout mlp2017-8/lab1
```
Install the `mlp` package in the environment in develop mode
```
python ~/mlpractical/setup.py develop
```
Add an `MLP_DATA_DIR` variable to the environment
```
cd ~/miniconda3/envs/mlp
mkdir -p ./etc/conda/activate.d
mkdir -p ./etc/conda/deactivate.d
echo -e '#!/bin/sh\n' >> ./etc/conda/activate.d/env_vars.sh
echo "export MLP_DATA_DIR=$HOME/mlpractical/data" >> ./etc/conda/activate.d/env_vars.sh
echo -e '#!/bin/sh\n' >> ./etc/conda/deactivate.d/env_vars.sh
echo 'unset MLP_DATA_DIR' >> ./etc/conda/deactivate.d/env_vars.sh
export MLP_DATA_DIR=$HOME/mlpractical/data
```
Environment is now set up. Load the notebook server from `mlpractical` directory
```
cd ~/mlpractical
jupyter notebook
```
and then open the first lab notebook from the `notebooks` directory.
---
<b id="f1">[1]</b> The `echo` command causes the following text to be streamed to an output (standard terminal output by default). Here we use the append redirection operator `>>` to redirect the `echo` output to a file `~/.benv`, with it being appended to the end of the current file. The text actually added is `export PATH="$PATH:[your-home-directory]/miniconda/bin"` with the `\"` being used to escape the quote characters. The `export` command defines system-wide environment variables (more rigorously those inherited by child shells) with `PATH` being the environment variable defining where `bash` searches for executables as a colon-seperated list of directories. Here we add the Miniconda binary directory to the end of the current `PATH` definition. [](#a1)

View File

@ -1,55 +0,0 @@
# Getting started in a lab on DICE computers
Once your [environment is set up](environment-set-up.md), at the beginning of each lab you should be able follow the steps below to get the lab notebook for that session running.
Open a terminal window (`Applications > Terminal`).
We first need to activate our `mlp` Conda environment:
```
source activate mlp
```
We now need to fetch any new code for the lab from the Github repository and create a new branch for this lab's work. First change in to the `mlpractical` repoistory directory (if you cloned the repository to a different directory than the default you will need to adjust the command below accordingly):
```
cd ~/mlpractical
```
If you have not yet commited the changes you made to the current branch in the previous lab you should do so now. You can check if you have changes not yet commited by running `git status`. If there are files with changes to be commited (they will appear in red) you should first add them to the staging area using
```
git add path/to/file1 path/to/file2
```
then commit them with a descriptive commit message using
```
git commit -m "Description of changes e.g. Exercises for first lab notebook."
```
We are now ready to fetch any updated code from the remote repository on Github. This can be done by running
```
git fetch origin
```
This should display a message indicate a new branch has been found and fetched, named `origin/mlp2017-8/lab[n]` where `[n]` is the relevant lab number e.g. `origin/mlp2017-8/lab2` for the second lab.
We now need to create and checkout a new local branch from the remote branch fetched above. This can be done by running
```
git checkout -b lab[n] origin/mlp2017-8/lab[n]
```
where again `lab[n]` corresponds to the relevant lab number fetched above e.g. `lab2`. This command creates a new local branch named `lab[n]` from the fetched branch on the remote repository `origin/mlp2017-8/lab[n]`.
Inside the `notebooks` directory there should new be a new notebook for today's lab. The notebook for the previous lab will now also have proposed solutions filled in.
To get started with the new notebook from the `~/mlpractical` directory start up a Jupyter notebook server
```
jupyter notebook
```
then open the new notebook from the dashboard.

View File

@ -1,29 +0,0 @@
# Exceeded quota problems on DICE
Apologies to those who may have issues with having insufficient quota space on DICE in the labs on Monday (25th September).
This was caused by the [dynamic AFS quota system](http://computing.help.inf.ed.ac.uk/dynamic-afs-quotas) which only initially allocates users a subset of their maximum quota and then checks hourly to increase this quota as needed. Unfortunately the amount of disk space needed to store the temporary files used in installing the course dependencies exceeded the current dynamic quota for some people. This meant when running the `conda install ...` command it exited with a quota exceeded error.
Those who experienced that issue should now have sufficient quota space available. From any DICE computer, If you run in a terminal
```
source activate mlp
conda remove -y numpy scipy matplotlib jupyter
conda install -y numpy scipy matplotlib jupyter
conda clean -t -y
```
this should clean out the old partially installed packages and reinstall them from scratch which should now run to completion without a quota exceeded error.
Your homespace can be accessed from any Informatics computer running DICE (e.g. any of the computers in the [Forrest Hill labs](http://web.inf.ed.ac.uk/infweb/student-services/ito/students/year2/student-support/facilities/computer-labs) which are open-access outside of booked lab sessions or for those who know how to use SSH you can [log in remotely](http://computing.help.inf.ed.ac.uk/external-login)). You can therefore finish your environment set up prior to the next lab if you want though it is also fine to wait till the beginning of the next lab (it will take around 5 minutes to complete the installation).
At this point assuming you ran through the rest of the instructions to clone the Git repository to your homespace and install the `mlp` package (i.e. the instructions from [here](https://github.com/CSTR-Edinburgh/mlpractical/blob/mlp2016-7/lab1/environment-set-up.md#getting-the-course-code-and-a-short-introduction-to-git) on-wards), you should have a fully working environment.
Once your environment is set up in all future labs you will only need to activate it to get started. So at the beginning of each subsequent lab we will ask you to do something like the following
```
source activate mlp # Activate the mlp environment
cd ~/mlpractical # Change the current directory to mlpractical repository
git checkout mlp2017-8/lab[...] # Checkout the branch for this week's lab
jupyter notebook # Launch the notebook server
```

View File

@ -1,84 +0,0 @@
# Running Jupyter notebooks over SSH
Below is a guide for how to start a Jupyter notebook server remotely on one of the shared-use `student.compute` servers and to connect to it on a local machine by port-forwarding over SSH. It is assumed you already have a SSH client set up on the machine you are connecting from and that you are familiar with how to use SSH. These instructions have been written for use with a SSH client running within a terminal session - although it may be possible to replicate the relevant commands within a GUI based SSH client, you will need to figure out how to do this yourself. They were written and tested on Ubuntu 14.04 and no attempt has been made to test them on other operating systems.
## Securing your notebook server
Before running a Jupyter notebook server instance on one of the shared compute servers you **must** make sure you have secured your server by configuring it to use a password and to communicate that password between the browser client and server by secure HTTP. This can be done on by running the `secure-notebook-server.sh` bash script provided in the `scripts` directory of the `mlpractical` repository. You can either do this when logged on to DICE in one of the labs or after connecting to DICE remotely over SSH as described below.
To run the script, in a DICE terminal enter the `mlpractical` repository directory and run
```
bash scripts/secure-notebook-server.sh
```
As this script creates a self-signed certificate to set up the secure HTTP encrypted communication between the browser and server, you will be shown a security warning when you load up the URL the notebooks are being served on.
If you want to manually secure the notebook server yourself or to create a certificate which will stop the security warnings appearing you can refer to the [relevant official Jupyter documentation page](http://jupyter-notebook.readthedocs.io/en/latest/public_server.html).
## Connecting to a remote `student.compute` server over SSH
To start an SSH session, open a terminal window and run
```
ssh [dice-username]@student.ssh.inf.ed.ac.uk
```
If this is this is the first time you have logged on to the SSH gateway server from this computer you will be asked to confirm you wish to connect and a ECDSA key fingerprint printed. You can check this against the reference values on the [school help pages](http://computing.help.inf.ed.ac.uk/external-login).
You will then be asked to enter your password. This is the same password you usually use to log on to DICE.
Assuming you enter the correct password, you will at this point be logged in to the SSH *gateway server*. As the message printed when you log in points out this is intended only for accessing the Informatics network externally and you should **not** attempt to work on this server. You should log in to one of the `student.compute` shared-use servers by running
```
ssh student.compute
```
You should now be logged on to one of the shared-use compute servers. The name of the server you are logged on to will appear at the bash prompt e.g.
```
ashbury:~$
```
You will need to know the name of the remote server you are using later on.
## Starting a notebook server on the remote computer
You should now activate your `mlp` Conda environment by running
```
source activate mlp
```
Now move in to the `mlpractical` local repository directory e.g. by running
```
cd ~/mlpractical
```
if you chose the default of putting the repository in your home directory.
We will now launch a notebook server on the remote compute-server. There are two key differences in the command we use to do this compared to how we usually start up a server on a local machine. First as the server will be running remotely you should set the `--no-browser` option as this will prevent the remote server attempting open a browser to connect to the notebook server.
Secondly we will prefix the command with `nice`. `nice` is a shell command which alters the scheduling priority of the process it is used to start. Its important to use `nice` when running on the shared `student.compute` servers to make sure they remain usable by all of the students who need to run jobs on them. You can set a priority level between 10 (highest priority) and 19 (lowest priority) using the `-n` argument. Running the command below will start up a notebook server at the lowest priority level.
```
nice -n 19 jupyter notebook --no-browser
```
Once the notebook server starts running you should take note of the port it is being served on as indicated in the `The Jupyter Notebook is running at: https://localhost:[port]/` message.
## Forwarding a connection to the notebook server over SSH
Now that the notebook server is running on the remote server you need to connect to it on your local machine. We will do this by forwarding the port the notebook server is being run on over SSH to you local machine. As all external connections from outside the `inf.ed.ac.uk` domain have to go via the SSH gateway server we need to go via this gateway server.
In a **new terminal window / tab** run the command below with the `[...]` placeholders substituted with the appropriate values to securely forward the specified port on the remote server to your local machine and bind it to a local port. You should choose `[remote-port]` to be the port the notebook server is running on on the remote server, `[local-port]` to be a currently unused port on your local machine and `[remote-server-name]` to be the host name of the remote server the notebook server is being run on.
```
ssh -N -o ProxyCommand="ssh -q [dice-username]@student.ssh.inf.ed.ac.uk nc [remote-server-name] 22" \
-L [local-port]:localhost:[remote-port] [dice-username]@[remote-server-name]
```
You will be asked to enter your (DICE) password twice, once to log on to the gateway server and a second time to log on to the remote compute server.
Assuming you enter your password both times correctly, the remote port will now be getting forwarded to the specified local port on your computer. If you now open up a browser on your computer and go to `https://localhost:[local-port]` you should (potentially after seeing a security warning about the self-signed certicate) now asked to enter the notebook server password you specified earlier. Once you enter this password you should be able to access the notebook dashboard and open and edit notebooks as you usually do in labratories.
When you are finished working you should both close down the notebook server by entering `Ctrl+C` twice in the terminal window the SSH session you used to start up the notebook server is running and halt the port forwarding command by entering `Ctrl+C` in the terminal it is running in.

View File

@ -1,79 +0,0 @@
% ALGORITHM STYLE -- Released 8 April 1996
% for LaTeX-2e
% Copyright -- 1994 Peter Williams
% E-mail Peter.Williams@dsto.defence.gov.au
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{algorithm}
\typeout{Document Style `algorithm' - floating environment}
\RequirePackage{float}
\RequirePackage{ifthen}
\newcommand{\ALG@within}{nothing}
\newboolean{ALG@within}
\setboolean{ALG@within}{false}
\newcommand{\ALG@floatstyle}{ruled}
\newcommand{\ALG@name}{Algorithm}
\newcommand{\listalgorithmname}{List of \ALG@name s}
% Declare Options
% first appearance
\DeclareOption{plain}{
\renewcommand{\ALG@floatstyle}{plain}
}
\DeclareOption{ruled}{
\renewcommand{\ALG@floatstyle}{ruled}
}
\DeclareOption{boxed}{
\renewcommand{\ALG@floatstyle}{boxed}
}
% then numbering convention
\DeclareOption{part}{
\renewcommand{\ALG@within}{part}
\setboolean{ALG@within}{true}
}
\DeclareOption{chapter}{
\renewcommand{\ALG@within}{chapter}
\setboolean{ALG@within}{true}
}
\DeclareOption{section}{
\renewcommand{\ALG@within}{section}
\setboolean{ALG@within}{true}
}
\DeclareOption{subsection}{
\renewcommand{\ALG@within}{subsection}
\setboolean{ALG@within}{true}
}
\DeclareOption{subsubsection}{
\renewcommand{\ALG@within}{subsubsection}
\setboolean{ALG@within}{true}
}
\DeclareOption{nothing}{
\renewcommand{\ALG@within}{nothing}
\setboolean{ALG@within}{true}
}
\DeclareOption*{\edef\ALG@name{\CurrentOption}}
% ALGORITHM
%
\ProcessOptions
\floatstyle{\ALG@floatstyle}
\ifthenelse{\boolean{ALG@within}}{
\ifthenelse{\equal{\ALG@within}{part}}
{\newfloat{algorithm}{htbp}{loa}[part]}{}
\ifthenelse{\equal{\ALG@within}{chapter}}
{\newfloat{algorithm}{htbp}{loa}[chapter]}{}
\ifthenelse{\equal{\ALG@within}{section}}
{\newfloat{algorithm}{htbp}{loa}[section]}{}
\ifthenelse{\equal{\ALG@within}{subsection}}
{\newfloat{algorithm}{htbp}{loa}[subsection]}{}
\ifthenelse{\equal{\ALG@within}{subsubsection}}
{\newfloat{algorithm}{htbp}{loa}[subsubsection]}{}
\ifthenelse{\equal{\ALG@within}{nothing}}
{\newfloat{algorithm}{htbp}{loa}}{}
}{
\newfloat{algorithm}{htbp}{loa}
}
\floatname{algorithm}{\ALG@name}
\newcommand{\listofalgorithms}{\listof{algorithm}{\listalgorithmname}}

View File

@ -1,201 +0,0 @@
% ALGORITHMIC STYLE -- Released 8 APRIL 1996
% for LaTeX version 2e
% Copyright -- 1994 Peter Williams
% E-mail PeterWilliams@dsto.defence.gov.au
%
% Modified by Alex Smola (08/2000)
% E-mail Alex.Smola@anu.edu.au
%
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{algorithmic}
\typeout{Document Style `algorithmic' - environment}
%
\RequirePackage{ifthen}
\RequirePackage{calc}
\newboolean{ALC@noend}
\setboolean{ALC@noend}{false}
\newcounter{ALC@line}
\newcounter{ALC@rem}
\newlength{\ALC@tlm}
%
\DeclareOption{noend}{\setboolean{ALC@noend}{true}}
%
\ProcessOptions
%
% ALGORITHMIC
\newcommand{\algorithmicrequire}{\textbf{Require:}}
\newcommand{\algorithmicensure}{\textbf{Ensure:}}
\newcommand{\algorithmiccomment}[1]{\{#1\}}
\newcommand{\algorithmicend}{\textbf{end}}
\newcommand{\algorithmicif}{\textbf{if}}
\newcommand{\algorithmicthen}{\textbf{then}}
\newcommand{\algorithmicelse}{\textbf{else}}
\newcommand{\algorithmicelsif}{\algorithmicelse\ \algorithmicif}
\newcommand{\algorithmicendif}{\algorithmicend\ \algorithmicif}
\newcommand{\algorithmicfor}{\textbf{for}}
\newcommand{\algorithmicforall}{\textbf{for all}}
\newcommand{\algorithmicdo}{\textbf{do}}
\newcommand{\algorithmicendfor}{\algorithmicend\ \algorithmicfor}
\newcommand{\algorithmicwhile}{\textbf{while}}
\newcommand{\algorithmicendwhile}{\algorithmicend\ \algorithmicwhile}
\newcommand{\algorithmicloop}{\textbf{loop}}
\newcommand{\algorithmicendloop}{\algorithmicend\ \algorithmicloop}
\newcommand{\algorithmicrepeat}{\textbf{repeat}}
\newcommand{\algorithmicuntil}{\textbf{until}}
%changed by alex smola
\newcommand{\algorithmicinput}{\textbf{input}}
\newcommand{\algorithmicoutput}{\textbf{output}}
\newcommand{\algorithmicset}{\textbf{set}}
\newcommand{\algorithmictrue}{\textbf{true}}
\newcommand{\algorithmicfalse}{\textbf{false}}
\newcommand{\algorithmicand}{\textbf{and\ }}
\newcommand{\algorithmicor}{\textbf{or\ }}
\newcommand{\algorithmicfunction}{\textbf{function}}
\newcommand{\algorithmicendfunction}{\algorithmicend\ \algorithmicfunction}
\newcommand{\algorithmicmain}{\textbf{main}}
\newcommand{\algorithmicendmain}{\algorithmicend\ \algorithmicmain}
%end changed by alex smola
\def\ALC@item[#1]{%
\if@noparitem \@donoparitem
\else \if@inlabel \indent \par \fi
\ifhmode \unskip\unskip \par \fi
\if@newlist \if@nobreak \@nbitem \else
\addpenalty\@beginparpenalty
\addvspace\@topsep \addvspace{-\parskip}\fi
\else \addpenalty\@itempenalty \addvspace\itemsep
\fi
\global\@inlabeltrue
\fi
\everypar{\global\@minipagefalse\global\@newlistfalse
\if@inlabel\global\@inlabelfalse \hskip -\parindent \box\@labels
\penalty\z@ \fi
\everypar{}}\global\@nobreakfalse
\if@noitemarg \@noitemargfalse \if@nmbrlist \refstepcounter{\@listctr}\fi \fi
\sbox\@tempboxa{\makelabel{#1}}%
\global\setbox\@labels
\hbox{\unhbox\@labels \hskip \itemindent
\hskip -\labelwidth \hskip -\ALC@tlm
\ifdim \wd\@tempboxa >\labelwidth
\box\@tempboxa
\else \hbox to\labelwidth {\unhbox\@tempboxa}\fi
\hskip \ALC@tlm}\ignorespaces}
%
\newenvironment{algorithmic}[1][0]{
\let\@item\ALC@item
\newcommand{\ALC@lno}{%
\ifthenelse{\equal{\arabic{ALC@rem}}{0}}
{{\footnotesize \arabic{ALC@line}:}}{}%
}
\let\@listii\@listi
\let\@listiii\@listi
\let\@listiv\@listi
\let\@listv\@listi
\let\@listvi\@listi
\let\@listvii\@listi
\newenvironment{ALC@g}{
\begin{list}{\ALC@lno}{ \itemsep\z@ \itemindent\z@
\listparindent\z@ \rightmargin\z@
\topsep\z@ \partopsep\z@ \parskip\z@\parsep\z@
\leftmargin 1em
\addtolength{\ALC@tlm}{\leftmargin}
}
}
{\end{list}}
\newcommand{\ALC@it}{\addtocounter{ALC@line}{1}\addtocounter{ALC@rem}{1}\ifthenelse{\equal{\arabic{ALC@rem}}{#1}}{\setcounter{ALC@rem}{0}}{}\item}
\newcommand{\ALC@com}[1]{\ifthenelse{\equal{##1}{default}}%
{}{\ \algorithmiccomment{##1}}}
\newcommand{\REQUIRE}{\item[\algorithmicrequire]}
\newcommand{\ENSURE}{\item[\algorithmicensure]}
\newcommand{\STATE}{\ALC@it}
\newcommand{\COMMENT}[1]{\algorithmiccomment{##1}}
%changes by alex smola
\newcommand{\INPUT}{\item[\algorithmicinput]}
\newcommand{\OUTPUT}{\item[\algorithmicoutput]}
\newcommand{\SET}{\item[\algorithmicset]}
% \newcommand{\TRUE}{\algorithmictrue}
% \newcommand{\FALSE}{\algorithmicfalse}
\newcommand{\AND}{\algorithmicand}
\newcommand{\OR}{\algorithmicor}
\newenvironment{ALC@func}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@main}{\begin{ALC@g}}{\end{ALC@g}}
%end changes by alex smola
\newenvironment{ALC@if}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@for}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@whl}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@loop}{\begin{ALC@g}}{\end{ALC@g}}
\newenvironment{ALC@rpt}{\begin{ALC@g}}{\end{ALC@g}}
\renewcommand{\\}{\@centercr}
\newcommand{\IF}[2][default]{\ALC@it\algorithmicif\ ##2\ \algorithmicthen%
\ALC@com{##1}\begin{ALC@if}}
\newcommand{\SHORTIF}[2]{\ALC@it\algorithmicif\ ##1\
\algorithmicthen\ {##2}}
\newcommand{\ELSE}[1][default]{\end{ALC@if}\ALC@it\algorithmicelse%
\ALC@com{##1}\begin{ALC@if}}
\newcommand{\ELSIF}[2][default]%
{\end{ALC@if}\ALC@it\algorithmicelsif\ ##2\ \algorithmicthen%
\ALC@com{##1}\begin{ALC@if}}
\newcommand{\FOR}[2][default]{\ALC@it\algorithmicfor\ ##2\ \algorithmicdo%
\ALC@com{##1}\begin{ALC@for}}
\newcommand{\FORALL}[2][default]{\ALC@it\algorithmicforall\ ##2\ %
\algorithmicdo%
\ALC@com{##1}\begin{ALC@for}}
\newcommand{\SHORTFORALL}[2]{\ALC@it\algorithmicforall\ ##1\ %
\algorithmicdo\ {##2}}
\newcommand{\WHILE}[2][default]{\ALC@it\algorithmicwhile\ ##2\ %
\algorithmicdo%
\ALC@com{##1}\begin{ALC@whl}}
\newcommand{\LOOP}[1][default]{\ALC@it\algorithmicloop%
\ALC@com{##1}\begin{ALC@loop}}
%changed by alex smola
\newcommand{\FUNCTION}[2][default]{\ALC@it\algorithmicfunction\ ##2\ %
\ALC@com{##1}\begin{ALC@func}}
\newcommand{\MAIN}[2][default]{\ALC@it\algorithmicmain\ ##2\ %
\ALC@com{##1}\begin{ALC@main}}
%end changed by alex smola
\newcommand{\REPEAT}[1][default]{\ALC@it\algorithmicrepeat%
\ALC@com{##1}\begin{ALC@rpt}}
\newcommand{\UNTIL}[1]{\end{ALC@rpt}\ALC@it\algorithmicuntil\ ##1}
\ifthenelse{\boolean{ALC@noend}}{
\newcommand{\ENDIF}{\end{ALC@if}}
\newcommand{\ENDFOR}{\end{ALC@for}}
\newcommand{\ENDWHILE}{\end{ALC@whl}}
\newcommand{\ENDLOOP}{\end{ALC@loop}}
\newcommand{\ENDFUNCTION}{\end{ALC@func}}
\newcommand{\ENDMAIN}{\end{ALC@main}}
}{
\newcommand{\ENDIF}{\end{ALC@if}\ALC@it\algorithmicendif}
\newcommand{\ENDFOR}{\end{ALC@for}\ALC@it\algorithmicendfor}
\newcommand{\ENDWHILE}{\end{ALC@whl}\ALC@it\algorithmicendwhile}
\newcommand{\ENDLOOP}{\end{ALC@loop}\ALC@it\algorithmicendloop}
\newcommand{\ENDFUNCTION}{\end{ALC@func}\ALC@it\algorithmicendfunction}
\newcommand{\ENDMAIN}{\end{ALC@main}\ALC@it\algorithmicendmain}
}
\renewcommand{\@toodeep}{}
\begin{list}{\ALC@lno}{\setcounter{ALC@line}{0}\setcounter{ALC@rem}{0}%
\itemsep\z@ \itemindent\z@ \listparindent\z@%
\partopsep\z@ \parskip\z@ \parsep\z@%
\labelsep 0.5em \topsep 0.2em%
\ifthenelse{\equal{#1}{0}}
{\labelwidth 0.5em }
{\labelwidth 1.2em }
\leftmargin\labelwidth \addtolength{\leftmargin}{\labelsep}
\ALC@tlm\labelsep
}
}
{\end{list}}

View File

@ -1,75 +0,0 @@
@inproceedings{langley00,
author = {P. Langley},
title = {Crafting Papers on Machine Learning},
year = {2000},
pages = {1207--1216},
editor = {Pat Langley},
booktitle = {Proceedings of the 17th International Conference
on Machine Learning (ICML 2000)},
address = {Stanford, CA},
publisher = {Morgan Kaufmann}
}
@TechReport{mitchell80,
author = "T. M. Mitchell",
title = "The Need for Biases in Learning Generalizations",
institution = "Computer Science Department, Rutgers University",
year = "1980",
address = "New Brunswick, MA",
}
@phdthesis{kearns89,
author = {M. J. Kearns},
title = {Computational Complexity of Machine Learning},
school = {Department of Computer Science, Harvard University},
year = {1989}
}
@Book{MachineLearningI,
editor = "R. S. Michalski and J. G. Carbonell and T.
M. Mitchell",
title = "Machine Learning: An Artificial Intelligence
Approach, Vol. I",
publisher = "Tioga",
year = "1983",
address = "Palo Alto, CA"
}
@Book{DudaHart2nd,
author = "R. O. Duda and P. E. Hart and D. G. Stork",
title = "Pattern Classification",
publisher = "John Wiley and Sons",
edition = "2nd",
year = "2000"
}
@misc{anonymous,
title= {Suppressed for Anonymity},
author= {Author, N. N.},
year= {2011},
}
@InCollection{Newell81,
author = "A. Newell and P. S. Rosenbloom",
title = "Mechanisms of Skill Acquisition and the Law of
Practice",
booktitle = "Cognitive Skills and Their Acquisition",
pages = "1--51",
publisher = "Lawrence Erlbaum Associates, Inc.",
year = "1981",
editor = "J. R. Anderson",
chapter = "1",
address = "Hillsdale, NJ"
}
@Article{Samuel59,
author = "A. L. Samuel",
title = "Some Studies in Machine Learning Using the Game of
Checkers",
journal = "IBM Journal of Research and Development",
year = "1959",
volume = "3",
number = "3",
pages = "211--229"
}

View File

@ -1,485 +0,0 @@
% fancyhdr.sty version 3.2
% Fancy headers and footers for LaTeX.
% Piet van Oostrum,
% Dept of Computer and Information Sciences, University of Utrecht,
% Padualaan 14, P.O. Box 80.089, 3508 TB Utrecht, The Netherlands
% Telephone: +31 30 2532180. Email: piet@cs.uu.nl
% ========================================================================
% LICENCE:
% This file may be distributed under the terms of the LaTeX Project Public
% License, as described in lppl.txt in the base LaTeX distribution.
% Either version 1 or, at your option, any later version.
% ========================================================================
% MODIFICATION HISTORY:
% Sep 16, 1994
% version 1.4: Correction for use with \reversemargin
% Sep 29, 1994:
% version 1.5: Added the \iftopfloat, \ifbotfloat and \iffloatpage commands
% Oct 4, 1994:
% version 1.6: Reset single spacing in headers/footers for use with
% setspace.sty or doublespace.sty
% Oct 4, 1994:
% version 1.7: changed \let\@mkboth\markboth to
% \def\@mkboth{\protect\markboth} to make it more robust
% Dec 5, 1994:
% version 1.8: corrections for amsbook/amsart: define \@chapapp and (more
% importantly) use the \chapter/sectionmark definitions from ps@headings if
% they exist (which should be true for all standard classes).
% May 31, 1995:
% version 1.9: The proposed \renewcommand{\headrulewidth}{\iffloatpage...
% construction in the doc did not work properly with the fancyplain style.
% June 1, 1995:
% version 1.91: The definition of \@mkboth wasn't restored on subsequent
% \pagestyle{fancy}'s.
% June 1, 1995:
% version 1.92: The sequence \pagestyle{fancyplain} \pagestyle{plain}
% \pagestyle{fancy} would erroneously select the plain version.
% June 1, 1995:
% version 1.93: \fancypagestyle command added.
% Dec 11, 1995:
% version 1.94: suggested by Conrad Hughes <chughes@maths.tcd.ie>
% CJCH, Dec 11, 1995: added \footruleskip to allow control over footrule
% position (old hardcoded value of .3\normalbaselineskip is far too high
% when used with very small footer fonts).
% Jan 31, 1996:
% version 1.95: call \@normalsize in the reset code if that is defined,
% otherwise \normalsize.
% this is to solve a problem with ucthesis.cls, as this doesn't
% define \@currsize. Unfortunately for latex209 calling \normalsize doesn't
% work as this is optimized to do very little, so there \@normalsize should
% be called. Hopefully this code works for all versions of LaTeX known to
% mankind.
% April 25, 1996:
% version 1.96: initialize \headwidth to a magic (negative) value to catch
% most common cases that people change it before calling \pagestyle{fancy}.
% Note it can't be initialized when reading in this file, because
% \textwidth could be changed afterwards. This is quite probable.
% We also switch to \MakeUppercase rather than \uppercase and introduce a
% \nouppercase command for use in headers. and footers.
% May 3, 1996:
% version 1.97: Two changes:
% 1. Undo the change in version 1.8 (using the pagestyle{headings} defaults
% for the chapter and section marks. The current version of amsbook and
% amsart classes don't seem to need them anymore. Moreover the standard
% latex classes don't use \markboth if twoside isn't selected, and this is
% confusing as \leftmark doesn't work as expected.
% 2. include a call to \ps@empty in ps@@fancy. This is to solve a problem
% in the amsbook and amsart classes, that make global changes to \topskip,
% which are reset in \ps@empty. Hopefully this doesn't break other things.
% May 7, 1996:
% version 1.98:
% Added % after the line \def\nouppercase
% May 7, 1996:
% version 1.99: This is the alpha version of fancyhdr 2.0
% Introduced the new commands \fancyhead, \fancyfoot, and \fancyhf.
% Changed \headrulewidth, \footrulewidth, \footruleskip to
% macros rather than length parameters, In this way they can be
% conditionalized and they don't consume length registers. There is no need
% to have them as length registers unless you want to do calculations with
% them, which is unlikely. Note that this may make some uses of them
% incompatible (i.e. if you have a file that uses \setlength or \xxxx=)
% May 10, 1996:
% version 1.99a:
% Added a few more % signs
% May 10, 1996:
% version 1.99b:
% Changed the syntax of \f@nfor to be resistent to catcode changes of :=
% Removed the [1] from the defs of \lhead etc. because the parameter is
% consumed by the \@[xy]lhead etc. macros.
% June 24, 1997:
% version 1.99c:
% corrected \nouppercase to also include the protected form of \MakeUppercase
% \global added to manipulation of \headwidth.
% \iffootnote command added.
% Some comments added about \@fancyhead and \@fancyfoot.
% Aug 24, 1998
% version 1.99d
% Changed the default \ps@empty to \ps@@empty in order to allow
% \fancypagestyle{empty} redefinition.
% Oct 11, 2000
% version 2.0
% Added LPPL license clause.
%
% A check for \headheight is added. An errormessage is given (once) if the
% header is too large. Empty headers don't generate the error even if
% \headheight is very small or even 0pt.
% Warning added for the use of 'E' option when twoside option is not used.
% In this case the 'E' fields will never be used.
%
% Mar 10, 2002
% version 2.1beta
% New command: \fancyhfoffset[place]{length}
% defines offsets to be applied to the header/footer to let it stick into
% the margins (if length > 0).
% place is like in fancyhead, except that only E,O,L,R can be used.
% This replaces the old calculation based on \headwidth and the marginpar
% area.
% \headwidth will be dynamically calculated in the headers/footers when
% this is used.
%
% Mar 26, 2002
% version 2.1beta2
% \fancyhfoffset now also takes h,f as possible letters in the argument to
% allow the header and footer widths to be different.
% New commands \fancyheadoffset and \fancyfootoffset added comparable to
% \fancyhead and \fancyfoot.
% Errormessages and warnings have been made more informative.
%
% Dec 9, 2002
% version 2.1
% The defaults for \footrulewidth, \plainheadrulewidth and
% \plainfootrulewidth are changed from \z@skip to 0pt. In this way when
% someone inadvertantly uses \setlength to change any of these, the value
% of \z@skip will not be changed, rather an errormessage will be given.
% March 3, 2004
% Release of version 3.0
% Oct 7, 2004
% version 3.1
% Added '\endlinechar=13' to \fancy@reset to prevent problems with
% includegraphics in header when verbatiminput is active.
% March 22, 2005
% version 3.2
% reset \everypar (the real one) in \fancy@reset because spanish.ldf does
% strange things with \everypar between << and >>.
\def\ifancy@mpty#1{\def\temp@a{#1}\ifx\temp@a\@empty}
\def\fancy@def#1#2{\ifancy@mpty{#2}\fancy@gbl\def#1{\leavevmode}\else
\fancy@gbl\def#1{#2\strut}\fi}
\let\fancy@gbl\global
\def\@fancyerrmsg#1{%
\ifx\PackageError\undefined
\errmessage{#1}\else
\PackageError{Fancyhdr}{#1}{}\fi}
\def\@fancywarning#1{%
\ifx\PackageWarning\undefined
\errmessage{#1}\else
\PackageWarning{Fancyhdr}{#1}{}\fi}
% Usage: \@forc \var{charstring}{command to be executed for each char}
% This is similar to LaTeX's \@tfor, but expands the charstring.
\def\@forc#1#2#3{\expandafter\f@rc\expandafter#1\expandafter{#2}{#3}}
\def\f@rc#1#2#3{\def\temp@ty{#2}\ifx\@empty\temp@ty\else
\f@@rc#1#2\f@@rc{#3}\fi}
\def\f@@rc#1#2#3\f@@rc#4{\def#1{#2}#4\f@rc#1{#3}{#4}}
% Usage: \f@nfor\name:=list\do{body}
% Like LaTeX's \@for but an empty list is treated as a list with an empty
% element
\newcommand{\f@nfor}[3]{\edef\@fortmp{#2}%
\expandafter\@forloop#2,\@nil,\@nil\@@#1{#3}}
% Usage: \def@ult \cs{defaults}{argument}
% sets \cs to the characters from defaults appearing in argument
% or defaults if it would be empty. All characters are lowercased.
\newcommand\def@ult[3]{%
\edef\temp@a{\lowercase{\edef\noexpand\temp@a{#3}}}\temp@a
\def#1{}%
\@forc\tmpf@ra{#2}%
{\expandafter\if@in\tmpf@ra\temp@a{\edef#1{#1\tmpf@ra}}{}}%
\ifx\@empty#1\def#1{#2}\fi}
%
% \if@in <char><set><truecase><falsecase>
%
\newcommand{\if@in}[4]{%
\edef\temp@a{#2}\def\temp@b##1#1##2\temp@b{\def\temp@b{##1}}%
\expandafter\temp@b#2#1\temp@b\ifx\temp@a\temp@b #4\else #3\fi}
\newcommand{\fancyhead}{\@ifnextchar[{\f@ncyhf\fancyhead h}%
{\f@ncyhf\fancyhead h[]}}
\newcommand{\fancyfoot}{\@ifnextchar[{\f@ncyhf\fancyfoot f}%
{\f@ncyhf\fancyfoot f[]}}
\newcommand{\fancyhf}{\@ifnextchar[{\f@ncyhf\fancyhf{}}%
{\f@ncyhf\fancyhf{}[]}}
% New commands for offsets added
\newcommand{\fancyheadoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyheadoffset h}%
{\f@ncyhfoffs\fancyheadoffset h[]}}
\newcommand{\fancyfootoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyfootoffset f}%
{\f@ncyhfoffs\fancyfootoffset f[]}}
\newcommand{\fancyhfoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyhfoffset{}}%
{\f@ncyhfoffs\fancyhfoffset{}[]}}
% The header and footer fields are stored in command sequences with
% names of the form: \f@ncy<x><y><z> with <x> for [eo], <y> from [lcr]
% and <z> from [hf].
\def\f@ncyhf#1#2[#3]#4{%
\def\temp@c{}%
\@forc\tmpf@ra{#3}%
{\expandafter\if@in\tmpf@ra{eolcrhf,EOLCRHF}%
{}{\edef\temp@c{\temp@c\tmpf@ra}}}%
\ifx\@empty\temp@c\else
\@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
[#3]}%
\fi
\f@nfor\temp@c{#3}%
{\def@ult\f@@@eo{eo}\temp@c
\if@twoside\else
\if\f@@@eo e\@fancywarning
{\string#1's `E' option without twoside option is useless}\fi\fi
\def@ult\f@@@lcr{lcr}\temp@c
\def@ult\f@@@hf{hf}{#2\temp@c}%
\@forc\f@@eo\f@@@eo
{\@forc\f@@lcr\f@@@lcr
{\@forc\f@@hf\f@@@hf
{\expandafter\fancy@def\csname
f@ncy\f@@eo\f@@lcr\f@@hf\endcsname
{#4}}}}}}
\def\f@ncyhfoffs#1#2[#3]#4{%
\def\temp@c{}%
\@forc\tmpf@ra{#3}%
{\expandafter\if@in\tmpf@ra{eolrhf,EOLRHF}%
{}{\edef\temp@c{\temp@c\tmpf@ra}}}%
\ifx\@empty\temp@c\else
\@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
[#3]}%
\fi
\f@nfor\temp@c{#3}%
{\def@ult\f@@@eo{eo}\temp@c
\if@twoside\else
\if\f@@@eo e\@fancywarning
{\string#1's `E' option without twoside option is useless}\fi\fi
\def@ult\f@@@lcr{lr}\temp@c
\def@ult\f@@@hf{hf}{#2\temp@c}%
\@forc\f@@eo\f@@@eo
{\@forc\f@@lcr\f@@@lcr
{\@forc\f@@hf\f@@@hf
{\expandafter\setlength\csname
f@ncyO@\f@@eo\f@@lcr\f@@hf\endcsname
{#4}}}}}%
\fancy@setoffs}
% Fancyheadings version 1 commands. These are more or less deprecated,
% but they continue to work.
\newcommand{\lhead}{\@ifnextchar[{\@xlhead}{\@ylhead}}
\def\@xlhead[#1]#2{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#2}}
\def\@ylhead#1{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#1}}
\newcommand{\chead}{\@ifnextchar[{\@xchead}{\@ychead}}
\def\@xchead[#1]#2{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#2}}
\def\@ychead#1{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#1}}
\newcommand{\rhead}{\@ifnextchar[{\@xrhead}{\@yrhead}}
\def\@xrhead[#1]#2{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#2}}
\def\@yrhead#1{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#1}}
\newcommand{\lfoot}{\@ifnextchar[{\@xlfoot}{\@ylfoot}}
\def\@xlfoot[#1]#2{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#2}}
\def\@ylfoot#1{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#1}}
\newcommand{\cfoot}{\@ifnextchar[{\@xcfoot}{\@ycfoot}}
\def\@xcfoot[#1]#2{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#2}}
\def\@ycfoot#1{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#1}}
\newcommand{\rfoot}{\@ifnextchar[{\@xrfoot}{\@yrfoot}}
\def\@xrfoot[#1]#2{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#2}}
\def\@yrfoot#1{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#1}}
\newlength{\fancy@headwidth}
\let\headwidth\fancy@headwidth
\newlength{\f@ncyO@elh}
\newlength{\f@ncyO@erh}
\newlength{\f@ncyO@olh}
\newlength{\f@ncyO@orh}
\newlength{\f@ncyO@elf}
\newlength{\f@ncyO@erf}
\newlength{\f@ncyO@olf}
\newlength{\f@ncyO@orf}
\newcommand{\headrulewidth}{0.4pt}
\newcommand{\footrulewidth}{0pt}
\newcommand{\footruleskip}{.3\normalbaselineskip}
% Fancyplain stuff shouldn't be used anymore (rather
% \fancypagestyle{plain} should be used), but it must be present for
% compatibility reasons.
\newcommand{\plainheadrulewidth}{0pt}
\newcommand{\plainfootrulewidth}{0pt}
\newif\if@fancyplain \@fancyplainfalse
\def\fancyplain#1#2{\if@fancyplain#1\else#2\fi}
\headwidth=-123456789sp %magic constant
% Command to reset various things in the headers:
% a.o. single spacing (taken from setspace.sty)
% and the catcode of ^^M (so that epsf files in the header work if a
% verbatim crosses a page boundary)
% It also defines a \nouppercase command that disables \uppercase and
% \Makeuppercase. It can only be used in the headers and footers.
\let\fnch@everypar\everypar% save real \everypar because of spanish.ldf
\def\fancy@reset{\fnch@everypar{}\restorecr\endlinechar=13
\def\baselinestretch{1}%
\def\nouppercase##1{{\let\uppercase\relax\let\MakeUppercase\relax
\expandafter\let\csname MakeUppercase \endcsname\relax##1}}%
\ifx\undefined\@newbaseline% NFSS not present; 2.09 or 2e
\ifx\@normalsize\undefined \normalsize % for ucthesis.cls
\else \@normalsize \fi
\else% NFSS (2.09) present
\@newbaseline%
\fi}
% Initialization of the head and foot text.
% The default values still contain \fancyplain for compatibility.
\fancyhf{} % clear all
% lefthead empty on ``plain'' pages, \rightmark on even, \leftmark on odd pages
% evenhead empty on ``plain'' pages, \leftmark on even, \rightmark on odd pages
\if@twoside
\fancyhead[el,or]{\fancyplain{}{\sl\rightmark}}
\fancyhead[er,ol]{\fancyplain{}{\sl\leftmark}}
\else
\fancyhead[l]{\fancyplain{}{\sl\rightmark}}
\fancyhead[r]{\fancyplain{}{\sl\leftmark}}
\fi
\fancyfoot[c]{\rm\thepage} % page number
% Use box 0 as a temp box and dimen 0 as temp dimen.
% This can be done, because this code will always
% be used inside another box, and therefore the changes are local.
\def\@fancyvbox#1#2{\setbox0\vbox{#2}\ifdim\ht0>#1\@fancywarning
{\string#1 is too small (\the#1): ^^J Make it at least \the\ht0.^^J
We now make it that large for the rest of the document.^^J
This may cause the page layout to be inconsistent, however\@gobble}%
\dimen0=#1\global\setlength{#1}{\ht0}\ht0=\dimen0\fi
\box0}
% Put together a header or footer given the left, center and
% right text, fillers at left and right and a rule.
% The \lap commands put the text into an hbox of zero size,
% so overlapping text does not generate an errormessage.
% These macros have 5 parameters:
% 1. LEFTSIDE BEARING % This determines at which side the header will stick
% out. When \fancyhfoffset is used this calculates \headwidth, otherwise
% it is \hss or \relax (after expansion).
% 2. \f@ncyolh, \f@ncyelh, \f@ncyolf or \f@ncyelf. This is the left component.
% 3. \f@ncyoch, \f@ncyech, \f@ncyocf or \f@ncyecf. This is the middle comp.
% 4. \f@ncyorh, \f@ncyerh, \f@ncyorf or \f@ncyerf. This is the right component.
% 5. RIGHTSIDE BEARING. This is always \relax or \hss (after expansion).
\def\@fancyhead#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
\@fancyvbox\headheight{\hbox
{\rlap{\parbox[b]{\headwidth}{\raggedright#2}}\hfill
\parbox[b]{\headwidth}{\centering#3}\hfill
\llap{\parbox[b]{\headwidth}{\raggedleft#4}}}\headrule}}#5}
\def\@fancyfoot#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
\@fancyvbox\footskip{\footrule
\hbox{\rlap{\parbox[t]{\headwidth}{\raggedright#2}}\hfill
\parbox[t]{\headwidth}{\centering#3}\hfill
\llap{\parbox[t]{\headwidth}{\raggedleft#4}}}}}#5}
\def\headrule{{\if@fancyplain\let\headrulewidth\plainheadrulewidth\fi
\hrule\@height\headrulewidth\@width\headwidth \vskip-\headrulewidth}}
\def\footrule{{\if@fancyplain\let\footrulewidth\plainfootrulewidth\fi
\vskip-\footruleskip\vskip-\footrulewidth
\hrule\@width\headwidth\@height\footrulewidth\vskip\footruleskip}}
\def\ps@fancy{%
\@ifundefined{@chapapp}{\let\@chapapp\chaptername}{}%for amsbook
%
% Define \MakeUppercase for old LaTeXen.
% Note: we used \def rather than \let, so that \let\uppercase\relax (from
% the version 1 documentation) will still work.
%
\@ifundefined{MakeUppercase}{\def\MakeUppercase{\uppercase}}{}%
\@ifundefined{chapter}{\def\sectionmark##1{\markboth
{\MakeUppercase{\ifnum \c@secnumdepth>\z@
\thesection\hskip 1em\relax \fi ##1}}{}}%
\def\subsectionmark##1{\markright {\ifnum \c@secnumdepth >\@ne
\thesubsection\hskip 1em\relax \fi ##1}}}%
{\def\chaptermark##1{\markboth {\MakeUppercase{\ifnum \c@secnumdepth>\m@ne
\@chapapp\ \thechapter. \ \fi ##1}}{}}%
\def\sectionmark##1{\markright{\MakeUppercase{\ifnum \c@secnumdepth >\z@
\thesection. \ \fi ##1}}}}%
%\csname ps@headings\endcsname % use \ps@headings defaults if they exist
\ps@@fancy
\gdef\ps@fancy{\@fancyplainfalse\ps@@fancy}%
% Initialize \headwidth if the user didn't
%
\ifdim\headwidth<0sp
%
% This catches the case that \headwidth hasn't been initialized and the
% case that the user added something to \headwidth in the expectation that
% it was initialized to \textwidth. We compensate this now. This loses if
% the user intended to multiply it by a factor. But that case is more
% likely done by saying something like \headwidth=1.2\textwidth.
% The doc says you have to change \headwidth after the first call to
% \pagestyle{fancy}. This code is just to catch the most common cases were
% that requirement is violated.
%
\global\advance\headwidth123456789sp\global\advance\headwidth\textwidth
\fi}
\def\ps@fancyplain{\ps@fancy \let\ps@plain\ps@plain@fancy}
\def\ps@plain@fancy{\@fancyplaintrue\ps@@fancy}
\let\ps@@empty\ps@empty
\def\ps@@fancy{%
\ps@@empty % This is for amsbook/amsart, which do strange things with \topskip
\def\@mkboth{\protect\markboth}%
\def\@oddhead{\@fancyhead\fancy@Oolh\f@ncyolh\f@ncyoch\f@ncyorh\fancy@Oorh}%
\def\@oddfoot{\@fancyfoot\fancy@Oolf\f@ncyolf\f@ncyocf\f@ncyorf\fancy@Oorf}%
\def\@evenhead{\@fancyhead\fancy@Oelh\f@ncyelh\f@ncyech\f@ncyerh\fancy@Oerh}%
\def\@evenfoot{\@fancyfoot\fancy@Oelf\f@ncyelf\f@ncyecf\f@ncyerf\fancy@Oerf}%
}
% Default definitions for compatibility mode:
% These cause the header/footer to take the defined \headwidth as width
% And to shift in the direction of the marginpar area
\def\fancy@Oolh{\if@reversemargin\hss\else\relax\fi}
\def\fancy@Oorh{\if@reversemargin\relax\else\hss\fi}
\let\fancy@Oelh\fancy@Oorh
\let\fancy@Oerh\fancy@Oolh
\let\fancy@Oolf\fancy@Oolh
\let\fancy@Oorf\fancy@Oorh
\let\fancy@Oelf\fancy@Oelh
\let\fancy@Oerf\fancy@Oerh
% New definitions for the use of \fancyhfoffset
% These calculate the \headwidth from \textwidth and the specified offsets.
\def\fancy@offsolh{\headwidth=\textwidth\advance\headwidth\f@ncyO@olh
\advance\headwidth\f@ncyO@orh\hskip-\f@ncyO@olh}
\def\fancy@offselh{\headwidth=\textwidth\advance\headwidth\f@ncyO@elh
\advance\headwidth\f@ncyO@erh\hskip-\f@ncyO@elh}
\def\fancy@offsolf{\headwidth=\textwidth\advance\headwidth\f@ncyO@olf
\advance\headwidth\f@ncyO@orf\hskip-\f@ncyO@olf}
\def\fancy@offself{\headwidth=\textwidth\advance\headwidth\f@ncyO@elf
\advance\headwidth\f@ncyO@erf\hskip-\f@ncyO@elf}
\def\fancy@setoffs{%
% Just in case \let\headwidth\textwidth was used
\fancy@gbl\let\headwidth\fancy@headwidth
\fancy@gbl\let\fancy@Oolh\fancy@offsolh
\fancy@gbl\let\fancy@Oelh\fancy@offselh
\fancy@gbl\let\fancy@Oorh\hss
\fancy@gbl\let\fancy@Oerh\hss
\fancy@gbl\let\fancy@Oolf\fancy@offsolf
\fancy@gbl\let\fancy@Oelf\fancy@offself
\fancy@gbl\let\fancy@Oorf\hss
\fancy@gbl\let\fancy@Oerf\hss}
\newif\iffootnote
\let\latex@makecol\@makecol
\def\@makecol{\ifvoid\footins\footnotetrue\else\footnotefalse\fi
\let\topfloat\@toplist\let\botfloat\@botlist\latex@makecol}
\def\iftopfloat#1#2{\ifx\topfloat\empty #2\else #1\fi}
\def\ifbotfloat#1#2{\ifx\botfloat\empty #2\else #1\fi}
\def\iffloatpage#1#2{\if@fcolmade #1\else #2\fi}
\newcommand{\fancypagestyle}[2]{%
\@namedef{ps@#1}{\let\fancy@gbl\relax#2\relax\ps@fancy}}

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

View File

@ -1,207 +0,0 @@
%% Template for MLP Coursework 1 / 16 October 2017
%% Based on LaTeX template for ICML 2017 - example_paper.tex at
%% https://2017.icml.cc/Conferences/2017/StyleAuthorInstructions
\documentclass{article}
\usepackage[T1]{fontenc}
\usepackage{amssymb,amsmath}
\usepackage{txfonts}
\usepackage{microtype}
% For figures
\usepackage{graphicx}
\usepackage{subfigure}
% For citations
\usepackage{natbib}
% For algorithms
\usepackage{algorithm}
\usepackage{algorithmic}
% the hyperref package is used to produce hyperlinks in the
% resulting PDF. If this breaks your system, please commend out the
% following usepackage line and replace \usepackage{mlp2017} with
% \usepackage[nohyperref]{mlp2017} below.
\usepackage{hyperref}
\usepackage{url}
\urlstyle{same}
% Packages hyperref and algorithmic misbehave sometimes. We can fix
% this with the following command.
\newcommand{\theHalgorithm}{\arabic{algorithm}}
% Set up MLP coursework style (based on ICML style)
\usepackage{mlp2017}
\mlptitlerunning{MLP Coursework 1 (\studentNumber)}
\bibliographystyle{icml2017}
\DeclareMathOperator{\softmax}{softmax}
\DeclareMathOperator{\sigmoid}{sigmoid}
\DeclareMathOperator{\sgn}{sgn}
\DeclareMathOperator{\relu}{relu}
\DeclareMathOperator{\lrelu}{lrelu}
\DeclareMathOperator{\elu}{elu}
\DeclareMathOperator{\selu}{selu}
\DeclareMathOperator{\maxout}{maxout}
%% You probably do not need to change anything above this comment
%% REPLACE this with your student number
\def\studentNumber{s1754321}
\begin{document}
\twocolumn[
\mlptitle{MLP Coursework 1: Activation Functions}
\centerline{\studentNumber}
\vskip 7mm
]
\begin{abstract}
The abstract should be 100--200 words long, providing a concise summary of the contents of your report.
\end{abstract}
\section{Introduction}
\label{sec:intro}
This document provides a template for the MLP coursework 1 report. In particular, it structures the document into five sections (plus an abstract and the references) -- you should keep to this structure for your report. If you want to use subsections within a section that is fine, but please do not use any deeper structuring. In this template the text in each section will include an outline of what you should include in each section, along with some practical LaTeX examples (for example figures, tables, algorithms). Your document should be no longer than \textbf{six pages}, with an additional page allowed for references.
The introduction should place your work in context, giving the overall motivation for the work, and clearly outlining the research questions you have explored -- in this case comparison of the behaviour of the different activation functions, experimental investigation of the impact of the depth of the network with respect to accuracy, and experimental investigation of different approaches to weight initialisation. This section should also include a concise description of the MNIST task and data -- be precise: for example state the size of the training and validation sets.
\section{Activation functions}
\label{sec:actfn}
This section should cover the theoretical methodology -- in this case you should present the four activation functions: ReLU, Leaky ReLU, ELU, and SELU. I didn't do it in this document, but the first time you use an acronym you should say what it stands for, for example Restricted Linear Unit (ReLU). You should use equations to concisely describe each activation function. For example, ReLU:
\begin{equation}
\relu(x) = \max(0, x) ,
\end{equation}
which has the gradient:
\begin{equation}
\frac{d}{dx} \relu(x) =
\begin{cases}
0 & \quad \text{if } x \leq 0 \\
1 & \quad \text{if } x > 0 .
\end{cases}
\end{equation}
The \LaTeX for the derivatives is slightly more complicated. We provided definitions near the top of the file (the part before \verb+\begin{document}+) for \verb+\relu+, \verb+\lrelu+, \verb+\elu+, and \verb+\selu+. There is no need to discuss the unit tests for these activation functions in this report.
It is probably not needed in this report, but if you would like to include an algorithm in your report, please use the \verb+algorithm+ and \verb+algorithmic+ environments to format pseudocode (for instance, Algorithm~\ref{alg:example}). These require the corresponding style files, \verb+algorithm.sty+ and \verb+algorithmic.sty+ which are supplied with this package.
\begin{algorithm}[ht]
\begin{algorithmic}
\STATE {\bfseries Input:} data $x_i$, size $m$
\REPEAT
\STATE Initialize $noChange = true$.
\FOR{$i=1$ {\bfseries to} $m-1$}
\IF{$x_i > x_{i+1}$}
\STATE Swap $x_i$ and $x_{i+1}$
\STATE $noChange = false$
\ENDIF
\ENDFOR
\UNTIL{$noChange$ is $true$}
\end{algorithmic}
\caption{Bubble Sort}
\label{alg:example}
\end{algorithm}
\section{Experimental comparison of activation functions}
\label{sec:actexpts}
In this section you should present the results and discussion of your experiments comparing networks using the different activation functions on the MNIST task. As explained in the coursework document, you should use 2 hidden layers with 100 hidden units per layer for these experiments. You can compare the learning curves (error vs epoch) for training and/or validation, and the validation set accuracies.
Your experimental sections should include graphs (for instance, figure~\ref{fig:sample-graph}) and/or tables (for instance, table~\ref{tab:sample-table})\footnote{These examples were taken from the ICML template paper.}, using the \verb+figure+ and \verb+table+ environments, in which you use \verb+\includegraphics+ to include an image (pdf, png, or jpg formats). Please export graphs as
\href{https://en.wikipedia.org/wiki/Vector_graphics}{vector graphics}
rather than \href{https://en.wikipedia.org/wiki/Raster_graphics}{raster
files} as this will make sure all detail in the plot is visible.
Matplotlib supports saving high quality figures in a wide range of
common image formats using the
\href{http://matplotlib.org/api/pyplot_api.html\#matplotlib.pyplot.savefig}{\texttt{savefig}}
function. \textbf{You should use \texttt{savefig} rather than copying
the screen-resolution raster images outputted in the notebook.} An
example of using \texttt{savefig} to save a figure as a PDF file (which
can be included as graphics in a \LaTeX document is given in the coursework document.
If you need a figure or table to stretch across two columns use the \verb+figure*+ or \verb+table*+ environment instead of the \verb+figure+ or \verb+table+ environment. Use the \verb+subfigure+ environment if you want to include multiple graphics in a single figure.
\begin{figure}[tb]
\vskip 5mm
\begin{center}
\centerline{\includegraphics[width=\columnwidth]{icml_numpapers}}
\caption{Historical locations and number of accepted papers for International
Machine Learning Conferences (ICML 1993 -- ICML 2008) and
International Workshops on Machine Learning (ML 1988 -- ML
1992). At the time this figure was produced, the number of
accepted papers for ICML 2008 was unknown and instead estimated.}
\label{fig:sample-graph}
\end{center}
\vskip -5mm
\end{figure}
\begin{table}[tb]
\vskip 3mm
\begin{center}
\begin{small}
\begin{sc}
\begin{tabular}{lcccr}
\hline
\abovespace\belowspace
Data set & Naive & Flexible & Better? \\
\hline
\abovespace
Breast & 95.9$\pm$ 0.2& 96.7$\pm$ 0.2& $\surd$ \\
Cleveland & 83.3$\pm$ 0.6& 80.0$\pm$ 0.6& $\times$\\
Glass2 & 61.9$\pm$ 1.4& 83.8$\pm$ 0.7& $\surd$ \\
Credit & 74.8$\pm$ 0.5& 78.3$\pm$ 0.6& \\
Horse & 73.3$\pm$ 0.9& 69.7$\pm$ 1.0& $\times$\\
Meta & 67.1$\pm$ 0.6& 76.5$\pm$ 0.5& $\surd$ \\
Pima & 75.1$\pm$ 0.6& 73.9$\pm$ 0.5& \\
\belowspace
Vehicle & 44.9$\pm$ 0.6& 61.5$\pm$ 0.4& $\surd$ \\
\hline
\end{tabular}
\end{sc}
\end{small}
\caption{Classification accuracies for naive Bayes and flexible
Bayes on various data sets.}
\label{tab:sample-table}
\end{center}
\vskip -3mm
\end{table}
\section{Deep neural network experiments}
\label{sec:dnnexpts}
This section should report on your experiments on deeper networks for MNIST. The two sets of experiments are to explore the impact of the depth of the network (number of hidden layers), and a comparison of different approaches to weight initialisation.
In this section, and in the previous section, you should present your experimental results clearly and concisely, followed by an interpretation and discussion of results. You need to present your results in a way that makes it easy for a reader to understand what they mean. You should facilitate comparisons either using graphs with multiple curves or (if appropriate, e.g. for accuracies) a results table. You need to avoid having too many figures, poorly labelled graphs, and graphs which should be comparable but which use different axis scales. A good presentation will enable the reader to compare trends in the same graph -- each graph should summarise the results relating to a particular research (sub)question.
Your discussion should interpret the results, both in terms of summarising the outcomes of a particular experiment, and attempting to relate to the underlying models. A good report would have some analysis, resulting in an understanding of why particular results are observed, perhaps with reference to the literature. Use bibtex to organise your references -- in this case the references are in the file \verb+example-refs.bib+. Here is a an example reference \citep{langley00}.
\section{Conclusions}
\label{sec:concl}
You should draw conclusions from the experiments, related to the research questions outlined in the introduction (section~\ref{sec:intro}). You should state the conclusions clearly and concisely. It is good if the conclusion from one experiment influenced what you did in later experiments -- your aim is to learn from your experiments. Extra credit if you relate your findings to what has been reported in the literature.
A good conclusions section would also include a further work discussion, building on work done so far, and referencing the literature where appropriate.
\bibliography{example-refs}
\end{document}
% This document was modified from the file originally made available by
% Pat Langley and Andrea Danyluk for ICML-2K. This version was
% created by Lise Getoor and Tobias Scheffer, it was slightly modified
% from the 2010 version by Thorsten Joachims & Johannes Fuernkranz,
% slightly modified from the 2009 version by Kiri Wagstaff and
% Sam Roweis's 2008 version, which is slightly modified from
% Prasad Tadepalli's 2007 version which is a lightly
% changed version of the previous year's version by Andrew Moore,
% which was in turn edited from those of Kristian Kersting and
% Codrina Lauth. Alex Smola contributed to the algorithmic style files.

Binary file not shown.

View File

@ -1,195 +0,0 @@
%% Template for MLP Coursework 2 / 6 November 2017
%% Based on LaTeX template for ICML 2017 - example_paper.tex at
%% https://2017.icml.cc/Conferences/2017/StyleAuthorInstructions
\documentclass{article}
\usepackage[T1]{fontenc}
\usepackage{amssymb,amsmath}
\usepackage{txfonts}
\usepackage{microtype}
% For figures
\usepackage{graphicx}
\usepackage{subfigure}
% For citations
\usepackage{natbib}
% For algorithms
\usepackage{algorithm}
\usepackage{algorithmic}
% the hyperref package is used to produce hyperlinks in the
% resulting PDF. If this breaks your system, please commend out the
% following usepackage line and replace \usepackage{mlp2017} with
% \usepackage[nohyperref]{mlp2017} below.
\usepackage{hyperref}
\usepackage{url}
\urlstyle{same}
% Packages hyperref and algorithmic misbehave sometimes. We can fix
% this with the following command.
\newcommand{\theHalgorithm}{\arabic{algorithm}}
% Set up MLP coursework style (based on ICML style)
\usepackage{mlp2017}
\mlptitlerunning{MLP Coursework 2 (\studentNumber)}
\bibliographystyle{icml2017}
\DeclareMathOperator{\softmax}{softmax}
\DeclareMathOperator{\sigmoid}{sigmoid}
\DeclareMathOperator{\sgn}{sgn}
\DeclareMathOperator{\relu}{relu}
\DeclareMathOperator{\lrelu}{lrelu}
\DeclareMathOperator{\elu}{elu}
\DeclareMathOperator{\selu}{selu}
\DeclareMathOperator{\maxout}{maxout}
%% You probably do not need to change anything above this comment
%% REPLACE this with your student number
\def\studentNumber{sXXXXXXX}
\begin{document}
\twocolumn[
\mlptitle{MLP Coursework 2: Learning rules, BatchNorm, and ConvNets}
\centerline{\studentNumber}
\vskip 7mm
]
\begin{abstract}
The abstract should be 100--200 words long, providing a concise summary of the contents of your report.
\end{abstract}
\section{Introduction}
\label{sec:intro}
This document provides a template for the MLP coursework 2 report. This template structures the report into sections, which you are recommended to use, but can change if you wish. If you want to use subsections within a section that is fine, but please do not use any deeper structuring. In this template the text in each section will include an outline of what you should include in each section, along with some practical LaTeX examples (for example figures, tables, algorithms). Your document should be no longer than \textbf{seven pages}, with an additional page allowed for references.
The introduction should place your work in context, giving the overall motivation for the work, and clearly outlining the research questions you have explored. This section should also include a concise description of the Balanced EMNIST task and data -- be precise: for example state the size of the training, validation, and test sets.
\section{Baseline systems}
In this section you should report your baseline experiments for EMNIST. No need for theoretical explanations of things covered in the course, but should you go beyond what was covered please explain what you did with references to relevant paper(s) if appropriate. In this section you should aim to cover the both the ``what'' and the ``why'': \emph{what} you did, giving sufficient information (hyperparameter settings, etc.) so that someone else (e.g. another student on the course) could reproduce your results; and \emph{why} you performed the experiments you are reporting - what you are aiming to discover what is the motivation for the particular experiments you undertook. You should also provide some discussion and interpretation of your results.
As before, your experimental sections should include graphs (for instance, figure~\ref{fig:sample-graph}) and/or tables (for instance, table~\ref{tab:sample-table})\footnote{These examples were taken from the ICML template paper.}, using the \verb+figure+ and \verb+table+ environments, in which you use \verb+\includegraphics+ to include an image (pdf, png, or jpg formats). Please export graphs as
\href{https://en.wikipedia.org/wiki/Vector_graphics}{vector graphics}
rather than \href{https://en.wikipedia.org/wiki/Raster_graphics}{raster
files} as this will make sure all detail in the plot is visible.
Matplotlib supports saving high quality figures in a wide range of
common image formats using the
\href{http://matplotlib.org/api/pyplot_api.html\#matplotlib.pyplot.savefig}{\texttt{savefig}}
function. \textbf{You should use \texttt{savefig} rather than copying
the screen-resolution raster images outputted in the notebook.} An
example of using \texttt{savefig} to save a figure as a PDF file (which
can be included as graphics in a \LaTeX document is given in the coursework 1 document.
If you need a figure or table to stretch across two columns use the \verb+figure*+ or \verb+table*+ environment instead of the \verb+figure+ or \verb+table+ environment. Use the \verb+subfigure+ environment if you want to include multiple graphics in a single figure.
\begin{figure}[tb]
\vskip 5mm
\begin{center}
\centerline{\includegraphics[width=\columnwidth]{icml_numpapers}}
\caption{Historical locations and number of accepted papers for International
Machine Learning Conferences (ICML 1993 -- ICML 2008) and
International Workshops on Machine Learning (ML 1988 -- ML
1992). At the time this figure was produced, the number of
accepted papers for ICML 2008 was unknown and instead estimated.}
\label{fig:sample-graph}
\end{center}
\vskip -5mm
\end{figure}
\begin{table}[tb]
\vskip 3mm
\begin{center}
\begin{small}
\begin{sc}
\begin{tabular}{lcccr}
\hline
\abovespace\belowspace
Data set & Naive & Flexible & Better? \\
\hline
\abovespace
Breast & 95.9$\pm$ 0.2& 96.7$\pm$ 0.2& $\surd$ \\
Cleveland & 83.3$\pm$ 0.6& 80.0$\pm$ 0.6& $\times$\\
Glass2 & 61.9$\pm$ 1.4& 83.8$\pm$ 0.7& $\surd$ \\
Credit & 74.8$\pm$ 0.5& 78.3$\pm$ 0.6& \\
Horse & 73.3$\pm$ 0.9& 69.7$\pm$ 1.0& $\times$\\
Meta & 67.1$\pm$ 0.6& 76.5$\pm$ 0.5& $\surd$ \\
Pima & 75.1$\pm$ 0.6& 73.9$\pm$ 0.5& \\
\belowspace
Vehicle & 44.9$\pm$ 0.6& 61.5$\pm$ 0.4& $\surd$ \\
\hline
\end{tabular}
\end{sc}
\end{small}
\caption{Classification accuracies for naive Bayes and flexible
Bayes on various data sets.}
\label{tab:sample-table}
\end{center}
\vskip -3mm
\end{table}
\section{Learning rules}
In this section you should compare RMSProp and Adam with gradient descent, introducing these learning rules either as equations or as algorithmic pseudocode. If you present the different approaches as algorithms, you can use the \verb+algorithm+ and \verb+algorithmic+ environments to format pseudocode (for instance, Algorithm~\ref{alg:example}). These require the corresponding style files, \verb+algorithm.sty+ and \verb+algorithmic.sty+ which are supplied with this package.
\begin{algorithm}[ht]
\begin{algorithmic}
\STATE {\bfseries Input:} data $x_i$, size $m$
\REPEAT
\STATE Initialize $noChange = true$.
\FOR{$i=1$ {\bfseries to} $m-1$}
\IF{$x_i > x_{i+1}$}
\STATE Swap $x_i$ and $x_{i+1}$
\STATE $noChange = false$
\ENDIF
\ENDFOR
\UNTIL{$noChange$ is $true$}
\end{algorithmic}
\caption{Bubble Sort}
\label{alg:example}
\end{algorithm}
You should, in your own words, explain what the different learning rules do, and how they differ. You should then present your experiments and results, comparing and contrasting stochastic gradient descent, RMSProp, and Adam. As before concentrate on the ``what'' (remember give enough information so someone can reproduce your experiments), the ``why'' (why did you choose the experiments that you performed -- you may have been motivated by your earlier results, by the literature, or by a specific research question), and the interpretation of your results.
In every section, you should present your results in a way that makes it easy for a reader to understand what they mean. You should facilitate comparisons either using graphs with multiple curves or (if appropriate, e.g. for accuracies) a results table. You need to avoid having too many figures, poorly labelled graphs, and graphs which should be comparable but which use different axis scales. A good presentation will enable the reader to compare trends in the same graph -- each graph should summarise the results relating to a particular research (sub)question.
Your discussion should interpret the results, both in terms of summarising the outcomes of a particular experiment, and attempting to relate to the underlying models. A good report would have some analysis, resulting in an understanding of why particular results are observed, perhaps with reference to the literature. Use bibtex to organise your references -- in this case the references are in the file \verb+example-refs.bib+. Here is a an example reference \citep{langley00}.
\section{Batch normalisation}
In this section you should present batch normalisation, supported using equations or algorithmic pseudocode. Following this present your experiments, again remembering to include the ``what'', the ``why'', and the interpretation of results.
\section{Convolutional networks}
In this section you should present your experiments with convolutional networks. Explain the idea of convolutional layers and pooling layers, and briefly explain how you did the implementation. There is no need to include chunks of code. You should report the experiments you have undertaken, again remembering to include \emph{what} experiments you performed (include details of hyperparameters, etc.), \emph{why} you performed them (what was the motivation for the experiments, what research questions are you exploring), and the interpretation and discussion of your results.
\section{Test results}
The results reported in the previous sections should be on the validation set. You should finally report results on the EMNIST test set using what you judge to the be the best deep neural network (without convolutional layers) and the best convolutional network. Again focus on what the experiments were (be precise), why you chose to do them (in particular, how did you choose the architectures/settings to use with the test set), and a discussion/interpretation of the results.
\section{Conclusions}
\label{sec:concl}
You should draw conclusions from the experiments, related to the research questions outlined in the introduction (section~\ref{sec:intro}). You should state the conclusions clearly and concisely. It is good if the conclusion from one experiment influenced what you did in later experiments -- your aim is to learn from your experiments. Extra credit if you relate your findings to what has been reported in the literature.
A good conclusions section would also include a further work discussion, building on work done so far, and referencing the literature where appropriate.
\bibliography{example-refs}
\end{document}
% This document was modified from the file originally made available by
% Pat Langley and Andrea Danyluk for ICML-2K. This version was
% created by Lise Getoor and Tobias Scheffer, it was slightly modified
% from the 2010 version by Thorsten Joachims & Johannes Fuernkranz,
% slightly modified from the 2009 version by Kiri Wagstaff and
% Sam Roweis's 2008 version, which is slightly modified from
% Prasad Tadepalli's 2007 version which is a lightly
% changed version of the previous year's version by Andrew Moore,
% which was in turn edited from those of Kristian Kersting and
% Codrina Lauth. Alex Smola contributed to the algorithmic style files.

View File

@ -1,720 +0,0 @@
% File: mlp2017.sty (LaTeX style file for ICML-2017, version of 2017-05-31)
% Modified by Daniel Roy 2017: changed byline to use footnotes for affiliations, and removed emails
% This file contains the LaTeX formatting parameters for a two-column
% conference proceedings that is 8.5 inches wide by 11 inches high.
%
% Modified by Percy Liang 12/2/2013: changed the year, location from the previous template for ICML 2014
% Modified by Fei Sha 9/2/2013: changed the year, location form the previous template for ICML 2013
%
% Modified by Fei Sha 4/24/2013: (1) remove the extra whitespace after the first author's email address (in %the camera-ready version) (2) change the Proceeding ... of ICML 2010 to 2014 so PDF's metadata will show up % correctly
%
% Modified by Sanjoy Dasgupta, 2013: changed years, location
%
% Modified by Francesco Figari, 2012: changed years, location
%
% Modified by Christoph Sawade and Tobias Scheffer, 2011: added line
% numbers, changed years
%
% Modified by Hal Daume III, 2010: changed years, added hyperlinks
%
% Modified by Kiri Wagstaff, 2009: changed years
%
% Modified by Sam Roweis, 2008: changed years
%
% Modified by Ricardo Silva, 2007: update of the ifpdf verification
%
% Modified by Prasad Tadepalli and Andrew Moore, merely changing years.
%
% Modified by Kristian Kersting, 2005, based on Jennifer Dy's 2004 version
% - running title. If the original title is to long or is breaking a line,
% use \mlptitlerunning{...} in the preamble to supply a shorter form.
% Added fancyhdr package to get a running head.
% - Updated to store the page size because pdflatex does compile the
% page size into the pdf.
%
% Hacked by Terran Lane, 2003:
% - Updated to use LaTeX2e style file conventions (ProvidesPackage,
% etc.)
% - Added an ``appearing in'' block at the base of the first column
% (thus keeping the ``appearing in'' note out of the bottom margin
% where the printer should strip in the page numbers).
% - Added a package option [accepted] that selects between the ``Under
% review'' notice (default, when no option is specified) and the
% ``Appearing in'' notice (for use when the paper has been accepted
% and will appear).
%
% Originally created as: ml2k.sty (LaTeX style file for ICML-2000)
% by P. Langley (12/23/99)
%%%%%%%%%%%%%%%%%%%%
%% This version of the style file supports both a ``review'' version
%% and a ``final/accepted'' version. The difference is only in the
%% text that appears in the note at the bottom of the first column of
%% the first page. The default behavior is to print a note to the
%% effect that the paper is under review and don't distribute it. The
%% final/accepted version prints an ``Appearing in'' note. To get the
%% latter behavior, in the calling file change the ``usepackage'' line
%% from:
%% \usepackage{icml2017}
%% to
%% \usepackage[accepted]{icml2017}
%%%%%%%%%%%%%%%%%%%%
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{mlp2017}[2017/01/01 MLP Coursework Style File]
% Use fancyhdr package
\RequirePackage{fancyhdr}
\RequirePackage{color}
\RequirePackage{algorithm}
\RequirePackage{algorithmic}
\RequirePackage{natbib}
\RequirePackage{eso-pic} % used by \AddToShipoutPicture
\RequirePackage{forloop}
%%%%%%%% Options
%\DeclareOption{accepted}{%
% \renewcommand{\Notice@String}{\ICML@appearing}
\gdef\isaccepted{1}
%}
\DeclareOption{nohyperref}{%
\gdef\nohyperref{1}
}
\ifdefined\nohyperref\else\ifdefined\hypersetup
\definecolor{mydarkblue}{rgb}{0,0.08,0.45}
\hypersetup{ %
pdftitle={},
pdfauthor={},
pdfsubject={MLP Coursework 2017-18},
pdfkeywords={},
pdfborder=0 0 0,
pdfpagemode=UseNone,
colorlinks=true,
linkcolor=mydarkblue,
citecolor=mydarkblue,
filecolor=mydarkblue,
urlcolor=mydarkblue,
pdfview=FitH}
\ifdefined\isaccepted \else
\hypersetup{pdfauthor={Anonymous Submission}}
\fi
\fi\fi
%%%%%%%%%%%%%%%%%%%%
% This string is printed at the bottom of the page for the
% final/accepted version of the ``appearing in'' note. Modify it to
% change that text.
%%%%%%%%%%%%%%%%%%%%
\newcommand{\ICML@appearing}{\textit{MLP Coursework 1 2017-18}}
%%%%%%%%%%%%%%%%%%%%
% This string is printed at the bottom of the page for the draft/under
% review version of the ``appearing in'' note. Modify it to change
% that text.
%%%%%%%%%%%%%%%%%%%%
\newcommand{\Notice@String}{MLP Coursework 1 2017-18}
% Cause the declared options to actually be parsed and activated
\ProcessOptions\relax
% Uncomment the following for debugging. It will cause LaTeX to dump
% the version of the ``appearing in'' string that will actually appear
% in the document.
%\typeout{>> Notice string='\Notice@String'}
% Change citation commands to be more like old ICML styles
\newcommand{\yrcite}[1]{\citeyearpar{#1}}
\renewcommand{\cite}[1]{\citep{#1}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% to ensure the letter format is used. pdflatex does compile the
% page size into the pdf. This is done using \pdfpagewidth and
% \pdfpageheight. As Latex does not know this directives, we first
% check whether pdflatex or latex is used.
%
% Kristian Kersting 2005
%
% in order to account for the more recent use of pdfetex as the default
% compiler, I have changed the pdf verification.
%
% Ricardo Silva 2007
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\paperwidth=210mm
\paperheight=297mm
% old PDFLaTex verification, circa 2005
%
%\newif\ifpdf\ifx\pdfoutput\undefined
% \pdffalse % we are not running PDFLaTeX
%\else
% \pdfoutput=1 % we are running PDFLaTeX
% \pdftrue
%\fi
\newif\ifpdf %adapted from ifpdf.sty
\ifx\pdfoutput\undefined
\else
\ifx\pdfoutput\relax
\else
\ifcase\pdfoutput
\else
\pdftrue
\fi
\fi
\fi
\ifpdf
% \pdfpagewidth=\paperwidth
% \pdfpageheight=\paperheight
\setlength{\pdfpagewidth}{210mm}
\setlength{\pdfpageheight}{297mm}
\fi
% Physical page layout
\evensidemargin -5.5mm
\oddsidemargin -5.5mm
\setlength\textheight{248mm}
\setlength\textwidth{170mm}
\setlength\columnsep{6.5mm}
\setlength\headheight{10pt}
\setlength\headsep{10pt}
\addtolength{\topmargin}{-20pt}
%\setlength\headheight{1em}
%\setlength\headsep{1em}
\addtolength{\topmargin}{-6mm}
%\addtolength{\topmargin}{-2em}
%% The following is adapted from code in the acmconf.sty conference
%% style file. The constants in it are somewhat magical, and appear
%% to work well with the two-column format on US letter paper that
%% ICML uses, but will break if you change that layout, or if you use
%% a longer block of text for the copyright notice string. Fiddle with
%% them if necessary to get the block to fit/look right.
%%
%% -- Terran Lane, 2003
%%
%% The following comments are included verbatim from acmconf.sty:
%%
%%% This section (written by KBT) handles the 1" box in the lower left
%%% corner of the left column of the first page by creating a picture,
%%% and inserting the predefined string at the bottom (with a negative
%%% displacement to offset the space allocated for a non-existent
%%% caption).
%%%
\def\ftype@copyrightbox{8}
\def\@copyrightspace{
% Create a float object positioned at the bottom of the column. Note
% that because of the mystical nature of floats, this has to be called
% before the first column is populated with text (e.g., from the title
% or abstract blocks). Otherwise, the text will force the float to
% the next column. -- TDRL.
\@float{copyrightbox}[b]
\begin{center}
\setlength{\unitlength}{1pc}
\begin{picture}(20,1.5)
% Create a line separating the main text from the note block.
% 4.818pc==0.8in.
\put(0,2.5){\line(1,0){4.818}}
% Insert the text string itself. Note that the string has to be
% enclosed in a parbox -- the \put call needs a box object to
% position. Without the parbox, the text gets splattered across the
% bottom of the page semi-randomly. The 19.75pc distance seems to be
% the width of the column, though I can't find an appropriate distance
% variable to substitute here. -- TDRL.
\put(0,0){\parbox[b]{19.75pc}{\small \Notice@String}}
\end{picture}
\end{center}
\end@float}
% Note: A few Latex versions need the next line instead of the former.
% \addtolength{\topmargin}{0.3in}
% \setlength\footheight{0pt}
\setlength\footskip{0pt}
%\pagestyle{empty}
\flushbottom \twocolumn
\sloppy
% Clear out the addcontentsline command
\def\addcontentsline#1#2#3{}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% commands for formatting paper title, author names, and addresses.
%%start%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%% title as running head -- Kristian Kersting 2005 %%%%%%%%%%%%%
%\makeatletter
%\newtoks\mytoksa
%\newtoks\mytoksb
%\newcommand\addtomylist[2]{%
% \mytoksa\expandafter{#1}%
% \mytoksb{#2}%
% \edef#1{\the\mytoksa\the\mytoksb}%
%}
%\makeatother
% box to check the size of the running head
\newbox\titrun
% general page style
\pagestyle{fancy}
\fancyhf{}
\fancyhead{}
\fancyfoot{}
% set the width of the head rule to 1 point
\renewcommand{\headrulewidth}{1pt}
% definition to set the head as running head in the preamble
\def\mlptitlerunning#1{\gdef\@mlptitlerunning{#1}}
% main definition adapting \mlptitle from 2004
\long\def\mlptitle#1{%
%check whether @mlptitlerunning exists
% if not \mlptitle is used as running head
\ifx\undefined\@mlptitlerunning%
\gdef\@mlptitlerunning{#1}
\fi
%add it to pdf information
\ifdefined\nohyperref\else\ifdefined\hypersetup
\hypersetup{pdftitle={#1}}
\fi\fi
%get the dimension of the running title
\global\setbox\titrun=\vbox{\small\bf\@mlptitlerunning}
% error flag
\gdef\@runningtitleerror{0}
% running title too long
\ifdim\wd\titrun>\textwidth%
{\gdef\@runningtitleerror{1}}%
% running title breaks a line
\else\ifdim\ht\titrun>6.25pt
{\gdef\@runningtitleerror{2}}%
\fi
\fi
% if there is somthing wrong with the running title
\ifnum\@runningtitleerror>0
\typeout{}%
\typeout{}%
\typeout{*******************************************************}%
\typeout{Title exceeds size limitations for running head.}%
\typeout{Please supply a shorter form for the running head}
\typeout{with \string\mlptitlerunning{...}\space prior to \string\begin{document}}%
\typeout{*******************************************************}%
\typeout{}%
\typeout{}%
% set default running title
\chead{\small\bf Title Suppressed Due to Excessive Size}%
\else
% 'everything' fine, set provided running title
\chead{\small\bf\@mlptitlerunning}%
\fi
% no running title on the first page of the paper
\thispagestyle{empty}
%%%%%%%%%%%%%%%%%%%% Kristian Kersting %%%%%%%%%%%%%%%%%%%%%%%%%
%end%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
{\center\baselineskip 18pt
\toptitlebar{\Large\bf #1}\bottomtitlebar}
}
\gdef\icmlfullauthorlist{}
\newcommand\addstringtofullauthorlist{\g@addto@macro\icmlfullauthorlist}
\newcommand\addtofullauthorlist[1]{%
\ifdefined\icmlanyauthors%
\addstringtofullauthorlist{, #1}%
\else%
\addstringtofullauthorlist{#1}%
\gdef\icmlanyauthors{1}%
\fi%
\ifdefined\nohyperref\else\ifdefined\hypersetup%
\hypersetup{pdfauthor=\icmlfullauthorlist}%
\fi\fi}
\def\toptitlebar{\hrule height1pt \vskip .25in}
\def\bottomtitlebar{\vskip .22in \hrule height1pt \vskip .3in}
\newenvironment{icmlauthorlist}{%
\setlength\topsep{0pt}
\setlength\parskip{0pt}
\begin{center}
}{%
\end{center}
}
\newcounter{@affiliationcounter}
\newcommand{\@pa}[1]{%
% ``#1''
\ifcsname the@affil#1\endcsname
% do nothing
\else
\ifcsname @icmlsymbol#1\endcsname
% nothing
\else
\stepcounter{@affiliationcounter}%
\newcounter{@affil#1}%
\setcounter{@affil#1}{\value{@affiliationcounter}}%
\fi
\fi%
\ifcsname @icmlsymbol#1\endcsname
\textsuperscript{\csname @icmlsymbol#1\endcsname\,}%
\else
%\expandafter\footnotemark[\arabic{@affil#1}\,]%
\textsuperscript{\arabic{@affil#1}\,}%
\fi
}
%\newcommand{\icmlauthor}[2]{%
%\addtofullauthorlist{#1}%
%#1\@for\theaffil:=#2\do{\pa{\theaffil}}%
%}
\newcommand{\icmlauthor}[2]{%
\ifdefined\isaccepted
\mbox{\bf #1}\,\@for\theaffil:=#2\do{\@pa{\theaffil}} \addtofullauthorlist{#1}%
\else
\ifdefined\@icmlfirsttime
\else
\gdef\@icmlfirsttime{1}
\mbox{\bf Anonymous Authors}\@pa{@anon} \addtofullauthorlist{Anonymous Authors}
\fi
\fi
}
\newcommand{\icmlsetsymbol}[2]{%
\expandafter\gdef\csname @icmlsymbol#1\endcsname{#2}
}
\newcommand{\icmlaffiliation}[2]{%
\ifdefined\isaccepted
\ifcsname the@affil#1\endcsname
\expandafter\gdef\csname @affilname\csname the@affil#1\endcsname\endcsname{#2}%
\else
{\bf AUTHORERR: Error in use of \textbackslash{}icmlaffiliation command. Label ``#1'' not mentioned in some \textbackslash{}icmlauthor\{author name\}\{labels here\} command beforehand. }
\typeout{}%
\typeout{}%
\typeout{*******************************************************}%
\typeout{Affiliation label undefined. }%
\typeout{Make sure \string\icmlaffiliation\space follows }
\typeout{all of \string\icmlauthor\space commands}%
\typeout{*******************************************************}%
\typeout{}%
\typeout{}%
\fi
\else % \isaccepted
% can be called multiple times... it's idempotent
\expandafter\gdef\csname @affilname1\endcsname{Anonymous Institution, Anonymous City, Anonymous Region, Anonymous Country}
\fi
}
\newcommand{\icmlcorrespondingauthor}[2]{
\ifdefined\isaccepted
\ifdefined\icmlcorrespondingauthor@text
\g@addto@macro\icmlcorrespondingauthor@text{, #1 \textless{}#2\textgreater{}}
\else
\gdef\icmlcorrespondingauthor@text{#1 \textless{}#2\textgreater{}}
\fi
\else
\gdef\icmlcorrespondingauthor@text{Anonymous Author \textless{}anon.email@domain.com\textgreater{}}
\fi
}
\newcommand{\icmlEqualContribution}{\textsuperscript{*}Equal contribution }
\newcounter{@affilnum}
\newcommand{\printAffiliationsAndNotice}[1]{%
\stepcounter{@affiliationcounter}%
{\let\thefootnote\relax\footnotetext{\hspace*{-\footnotesep}#1%
\forloop{@affilnum}{1}{\value{@affilnum} < \value{@affiliationcounter}}{
\textsuperscript{\arabic{@affilnum}}\ifcsname @affilname\the@affilnum\endcsname%
\csname @affilname\the@affilnum\endcsname%
\else
{\bf AUTHORERR: Missing \textbackslash{}icmlaffiliation.}
\fi
}.
\ifdefined\icmlcorrespondingauthor@text
Correspondence to: \icmlcorrespondingauthor@text.
\else
{\bf AUTHORERR: Missing \textbackslash{}icmlcorrespondingauthor.}
\fi
\ \\
\Notice@String
}
}
}
%\makeatother
\long\def\icmladdress#1{%
{\bf The \textbackslash{}icmladdress command is no longer used. See the example\_paper PDF .tex for usage of \textbackslash{}icmlauther and \textbackslash{}icmlaffiliation.}
}
%% keywords as first class citizens
\def\icmlkeywords#1{%
% \ifdefined\isaccepted \else
% \par {\bf Keywords:} #1%
% \fi
% \ifdefined\nohyperref\else\ifdefined\hypersetup
% \hypersetup{pdfkeywords={#1}}
% \fi\fi
% \ifdefined\isaccepted \else
% \par {\bf Keywords:} #1%
% \fi
\ifdefined\nohyperref\else\ifdefined\hypersetup
\hypersetup{pdfkeywords={#1}}
\fi\fi
}
% modification to natbib citations
\setcitestyle{authoryear,round,citesep={;},aysep={,},yysep={;}}
% Redefinition of the abstract environment.
\renewenvironment{abstract}
{%
% Insert the ``appearing in'' copyright notice.
%\@copyrightspace
\centerline{\large\bf Abstract}
\vspace{-0.12in}\begin{quote}}
{\par\end{quote}\vskip 0.12in}
% numbered section headings with different treatment of numbers
\def\@startsection#1#2#3#4#5#6{\if@noskipsec \leavevmode \fi
\par \@tempskipa #4\relax
\@afterindenttrue
% Altered the following line to indent a section's first paragraph.
% \ifdim \@tempskipa <\z@ \@tempskipa -\@tempskipa \@afterindentfalse\fi
\ifdim \@tempskipa <\z@ \@tempskipa -\@tempskipa \fi
\if@nobreak \everypar{}\else
\addpenalty{\@secpenalty}\addvspace{\@tempskipa}\fi \@ifstar
{\@ssect{#3}{#4}{#5}{#6}}{\@dblarg{\@sict{#1}{#2}{#3}{#4}{#5}{#6}}}}
\def\@sict#1#2#3#4#5#6[#7]#8{\ifnum #2>\c@secnumdepth
\def\@svsec{}\else
\refstepcounter{#1}\edef\@svsec{\csname the#1\endcsname}\fi
\@tempskipa #5\relax
\ifdim \@tempskipa>\z@
\begingroup #6\relax
\@hangfrom{\hskip #3\relax\@svsec.~}{\interlinepenalty \@M #8\par}
\endgroup
\csname #1mark\endcsname{#7}\addcontentsline
{toc}{#1}{\ifnum #2>\c@secnumdepth \else
\protect\numberline{\csname the#1\endcsname}\fi
#7}\else
\def\@svsechd{#6\hskip #3\@svsec #8\csname #1mark\endcsname
{#7}\addcontentsline
{toc}{#1}{\ifnum #2>\c@secnumdepth \else
\protect\numberline{\csname the#1\endcsname}\fi
#7}}\fi
\@xsect{#5}}
\def\@sect#1#2#3#4#5#6[#7]#8{\ifnum #2>\c@secnumdepth
\def\@svsec{}\else
\refstepcounter{#1}\edef\@svsec{\csname the#1\endcsname\hskip 0.4em }\fi
\@tempskipa #5\relax
\ifdim \@tempskipa>\z@
\begingroup #6\relax
\@hangfrom{\hskip #3\relax\@svsec}{\interlinepenalty \@M #8\par}
\endgroup
\csname #1mark\endcsname{#7}\addcontentsline
{toc}{#1}{\ifnum #2>\c@secnumdepth \else
\protect\numberline{\csname the#1\endcsname}\fi
#7}\else
\def\@svsechd{#6\hskip #3\@svsec #8\csname #1mark\endcsname
{#7}\addcontentsline
{toc}{#1}{\ifnum #2>\c@secnumdepth \else
\protect\numberline{\csname the#1\endcsname}\fi
#7}}\fi
\@xsect{#5}}
% section headings with less space above and below them
\def\thesection {\arabic{section}}
\def\thesubsection {\thesection.\arabic{subsection}}
\def\section{\@startsection{section}{1}{\z@}{-0.12in}{0.02in}
{\large\bf\raggedright}}
\def\subsection{\@startsection{subsection}{2}{\z@}{-0.10in}{0.01in}
{\normalsize\bf\raggedright}}
\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-0.08in}{0.01in}
{\normalsize\sc\raggedright}}
\def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
0.5ex minus .2ex}{-1em}{\normalsize\bf}}
\def\subparagraph{\@startsection{subparagraph}{5}{\z@}{1.5ex plus
0.5ex minus .2ex}{-1em}{\normalsize\bf}}
% Footnotes
\footnotesep 6.65pt %
\skip\footins 9pt
\def\footnoterule{\kern-3pt \hrule width 0.8in \kern 2.6pt }
\setcounter{footnote}{0}
% Lists and paragraphs
\parindent 0pt
\topsep 4pt plus 1pt minus 2pt
\partopsep 1pt plus 0.5pt minus 0.5pt
\itemsep 2pt plus 1pt minus 0.5pt
\parsep 2pt plus 1pt minus 0.5pt
\parskip 6pt
\leftmargin 2em \leftmargini\leftmargin \leftmarginii 2em
\leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em
\leftmarginvi .5em
\labelwidth\leftmargini\advance\labelwidth-\labelsep \labelsep 5pt
\def\@listi{\leftmargin\leftmargini}
\def\@listii{\leftmargin\leftmarginii
\labelwidth\leftmarginii\advance\labelwidth-\labelsep
\topsep 2pt plus 1pt minus 0.5pt
\parsep 1pt plus 0.5pt minus 0.5pt
\itemsep \parsep}
\def\@listiii{\leftmargin\leftmarginiii
\labelwidth\leftmarginiii\advance\labelwidth-\labelsep
\topsep 1pt plus 0.5pt minus 0.5pt
\parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
\itemsep \topsep}
\def\@listiv{\leftmargin\leftmarginiv
\labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
\def\@listv{\leftmargin\leftmarginv
\labelwidth\leftmarginv\advance\labelwidth-\labelsep}
\def\@listvi{\leftmargin\leftmarginvi
\labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
\abovedisplayskip 7pt plus2pt minus5pt%
\belowdisplayskip \abovedisplayskip
\abovedisplayshortskip 0pt plus3pt%
\belowdisplayshortskip 4pt plus3pt minus3pt%
% Less leading in most fonts (due to the narrow columns)
% The choices were between 1-pt and 1.5-pt leading
\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
\def\small{\@setsize\small{10pt}\ixpt\@ixpt}
\def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
\def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
\def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
\def\large{\@setsize\large{14pt}\xiipt\@xiipt}
\def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
\def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
\def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
\def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
% Revised formatting for figure captions and table titles.
\newsavebox\newcaptionbox\newdimen\newcaptionboxwid
\long\def\@makecaption#1#2{
\vskip 10pt
\baselineskip 11pt
\setbox\@tempboxa\hbox{#1. #2}
\ifdim \wd\@tempboxa >\hsize
\sbox{\newcaptionbox}{\small\sl #1.~}
\newcaptionboxwid=\wd\newcaptionbox
\usebox\newcaptionbox {\footnotesize #2}
% \usebox\newcaptionbox {\small #2}
\else
\centerline{{\small\sl #1.} {\small #2}}
\fi}
\def\fnum@figure{Figure \thefigure}
\def\fnum@table{Table \thetable}
% Strut macros for skipping spaces above and below text in tables.
\def\abovestrut#1{\rule[0in]{0in}{#1}\ignorespaces}
\def\belowstrut#1{\rule[-#1]{0in}{#1}\ignorespaces}
\def\abovespace{\abovestrut{0.20in}}
\def\aroundspace{\abovestrut{0.20in}\belowstrut{0.10in}}
\def\belowspace{\belowstrut{0.10in}}
% Various personal itemization commands.
\def\texitem#1{\par\noindent\hangindent 12pt
\hbox to 12pt {\hss #1 ~}\ignorespaces}
\def\icmlitem{\texitem{$\bullet$}}
% To comment out multiple lines of text.
\long\def\comment#1{}
%% Line counter (not in final version). Adapted from NIPS style file by Christoph Sawade
% Vertical Ruler
% This code is, largely, from the CVPR 2010 conference style file
% ----- define vruler
\makeatletter
\newbox\icmlrulerbox
\newcount\icmlrulercount
\newdimen\icmlruleroffset
\newdimen\cv@lineheight
\newdimen\cv@boxheight
\newbox\cv@tmpbox
\newcount\cv@refno
\newcount\cv@tot
% NUMBER with left flushed zeros \fillzeros[<WIDTH>]<NUMBER>
\newcount\cv@tmpc@ \newcount\cv@tmpc
\def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
\cv@tmpc=1 %
\loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
\ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
\ifnum#2<0\advance\cv@tmpc1\relax-\fi
\loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
% \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
\def\makevruler[#1][#2][#3][#4][#5]{
\begingroup\offinterlineskip
\textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt%
\global\setbox\icmlrulerbox=\vbox to \textheight{%
{
\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight
\cv@lineheight=#1\global\icmlrulercount=#2%
\cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2%
\cv@refno1\vskip-\cv@lineheight\vskip1ex%
\loop\setbox\cv@tmpbox=\hbox to0cm{ % side margin
\hfil {\hfil\fillzeros[#4]\icmlrulercount}
}%
\ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break
\advance\cv@refno1\global\advance\icmlrulercount#3\relax
\ifnum\cv@refno<\cv@tot\repeat
}
}
\endgroup
}%
\makeatother
% ----- end of vruler
% \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
\def\icmlruler#1{\makevruler[12pt][#1][1][3][\textheight]\usebox{\icmlrulerbox}}
\AddToShipoutPicture{%
\icmlruleroffset=\textheight
\advance\icmlruleroffset by 5.2pt % top margin
\color[rgb]{.7,.7,.7}
\ifdefined\isaccepted \else
\AtTextUpperLeft{%
\put(\LenToUnit{-35pt},\LenToUnit{-\icmlruleroffset}){%left ruler
\icmlruler{\icmlrulercount}}
% \put(\LenToUnit{1.04\textwidth},\LenToUnit{-\icmlruleroffset}){%right ruler
% \icmlruler{\icmlrulercount}}
}
\fi
}
\endinput

File diff suppressed because it is too large Load Diff

View File

@ -1,43 +0,0 @@
import numpy as np
from mlp.layers import BatchNormalizationLayer
import argparse
parser = argparse.ArgumentParser(description='Welcome to GAN-Shot-Learning script')
parser.add_argument('--student_id', nargs="?", type=str, help='Your student id in the format "sxxxxxxx"')
args = parser.parse_args()
student_id = args.student_id
def generate_inputs(student_id):
student_number = student_id
tests = np.arange(96).reshape((2, 3, 4, 4))
tests[:, 0, :, :] = float(student_number[1:3]) / 10 - 5
tests[:, :, 1, :] = float(student_number[3:5]) / 10 - 5
tests[:, 2, :, :] = float(student_number[5:7]) / 10 - 5
tests[0, 1, :, :] = float(student_number[7]) / 10 - 5
return tests
test_inputs = generate_inputs(student_id)
test_inputs = np.reshape(test_inputs, newshape=(2, -1))
test_grads_wrt_outputs = np.arange(-48, 48).reshape((2, -1))
#produce BatchNorm Layer fprop and bprop
activation_layer = BatchNormalizationLayer(input_dim=48)
beta = np.array(48*[0.3])
gamma = np.array(48*[0.8])
activation_layer.params = [gamma, beta]
BN_fprop = activation_layer.fprop(test_inputs)
BN_bprop = activation_layer.bprop(
test_inputs, BN_fprop, test_grads_wrt_outputs)
BN_grads_wrt_params = activation_layer.grads_wrt_params(
test_inputs, test_grads_wrt_outputs)
test_output = "BatchNormalization:\nFprop: {}\nBprop: {}\nGrads_wrt_params: {}\n"\
.format(BN_fprop, BN_bprop, BN_grads_wrt_params)
with open("{}_batchnorm_test_file.txt".format(student_id), "w+") as out_file:
out_file.write(test_output)

View File

@ -1,59 +0,0 @@
import numpy as np
from mlp.layers import ConvolutionalLayer
import argparse
parser = argparse.ArgumentParser(description='Welcome to GAN-Shot-Learning script')
parser.add_argument('--student_id', nargs="?", type=str, help='Your student id in the format "sxxxxxxx"')
args = parser.parse_args()
student_id = args.student_id
def generate_inputs(student_id):
student_number = student_id
tests = np.arange(96).reshape((2, 3, 4, 4))
tests[:, 0, :, :] = float(student_number[1:3]) / 10 - 5
tests[:, :, 1, :] = float(student_number[3:5]) / 10 - 5
tests[:, 2, :, :] = float(student_number[5:7]) / 10 - 5
tests[0, 1, :, :] = float(student_number[7]) / 10 - 5
return tests
test_inputs = generate_inputs(student_id)
test_grads_wrt_outputs = np.arange(-20, 16).reshape((2, 2, 3, 3))
inputs = np.arange(96).reshape((2, 3, 4, 4))
kernels = np.arange(-12, 12).reshape((2, 3, 2, 2))
biases = np.arange(2)
#produce ConvolutionalLayer fprop, bprop and grads_wrt_params
activation_layer = ConvolutionalLayer(num_input_channels=3, num_output_channels=2, input_dim_1=4, input_dim_2=4,
kernel_dim_1=2, kernel_dim_2=2)
activation_layer.params = [kernels, biases]
conv_fprop = activation_layer.fprop(test_inputs)
conv_bprop = activation_layer.bprop(
test_inputs, conv_fprop, test_grads_wrt_outputs)
conv_grads_wrt_params = activation_layer.grads_wrt_params(test_inputs,
test_grads_wrt_outputs)
test_output = "ConvolutionalLayer:\nFprop: {}\nBprop: {}\n" \
"Grads_wrt_params: {}\n".format(conv_fprop,
conv_bprop,
conv_grads_wrt_params)
cross_correlation_kernels = kernels[:, :, ::-1, ::-1]
activation_layer = ConvolutionalLayer(num_input_channels=3, num_output_channels=2, input_dim_1=4, input_dim_2=4,
kernel_dim_1=2, kernel_dim_2=2)
activation_layer.params = [cross_correlation_kernels, biases]
conv_fprop = activation_layer.fprop(test_inputs)
conv_bprop = activation_layer.bprop(
test_inputs, conv_fprop, test_grads_wrt_outputs)
conv_grads_wrt_params = activation_layer.grads_wrt_params(test_inputs,
test_grads_wrt_outputs)
test_cross_correlation_output = "Cross_Correlation_ConvolutionalLayer:\nFprop: {}\nBprop: {}\n" \
"Grads_wrt_params: {}\n".format(conv_fprop,
conv_bprop,
conv_grads_wrt_params)
test_output = test_output + "\n" + test_cross_correlation_output
with open("{}_conv_test_file.txt".format(student_id), "w+") as out_file:
out_file.write(test_output)

View File

@ -1,73 +0,0 @@
#!/bin/bash
# Configure Jupyter notebook server to use password authentication
# Make sure Conda environment is active as will assume it is later
[ -z "$CONDA_PREFIX" ] && echo "Need to have Conda environment activated." && exit 1
if [ "$#" -gt 2 ]; then
echo "Usage: bash secure-notebook-server.sh [jupyter-path] [open-ssl-config-path]"
exit 1
fi
# If specified read Jupyter directory from passed argument
JUPYTER_DIR=${1:-"$HOME/.jupyter"}
# If specified read OpenSSL config file path from passed argument
# This is needed due to bug in how Conda handles config path
export OPENSSL_CONF=${2:-"$CONDA_PREFIX/ssl/openssl.cnf"}
SEPARATOR="=================================================================\n"
# Create default config file if one does not already exist
if [ ! -f "$JUPYTER_DIR/jupyter_notebook_config.py" ]; then
echo "No existing notebook configuration file found, creating new one ..."
printf $SEPARATOR
jupyter notebook --generate-config
printf $SEPARATOR
echo "... notebook configuration file created."
fi
# Get user to enter notebook server password
echo "Getting notebook server password hash. Enter password when prompted ..."
printf $SEPARATOR
HASH=$(python -c "from notebook.auth import passwd; print(passwd());")
printf $SEPARATOR
echo "... got password hash."
# Generate self-signed OpenSSL certificate and key file
echo "Creating certificate file ..."
printf $SEPARATOR
CERT_INFO="/C=UK/ST=Scotland/L=Edinburgh/O=University of Edinburgh/OU=School of Informatics/CN=$USER/emailAddress=$USER@sms.ed.ac.uk"
openssl req \
-x509 -nodes -days 365 \
-subj "/C=UK/ST=Scotland/L=Edinburgh/O=University of Edinburgh/OU=School of Informatics/CN=$USER/emailAddress=$USER@sms.ed.ac.uk" \
-newkey rsa:1024 -keyout "$JUPYTER_DIR/key.key" \
-out "$JUPYTER_DIR/cert.pem"
printf $SEPARATOR
echo "... certificate created."
# Setting permissions on key file
chmod 600 "$JUPYTER_DIR/key.key"
# Add password hash and certificate + key file paths to config file
echo "Setting up configuration file..."
printf $SEPARATOR
echo " adding password hash"
SRC_PSW="^#\?c\.NotebookApp\.password[ ]*=[ ]*u['"'"'"]\(sha1:[a-fA-F0-9]\+\)\?['"'"'"]"
DST_PSW="c.NotebookApp.password = u'$HASH'"
grep -q "c.NotebookApp.password" $JUPYTER_DIR/jupyter_notebook_config.py
if [ ! $? -eq 0 ]; then
echo DST_PSW >> $JUPYTER_DIR/jupyter_notebook_config.py
else
sed -i "s/$SRC_PSW/$DST_PSW/" $JUPYTER_DIR/jupyter_notebook_config.py
fi
echo " adding certificate file path"
SRC_CRT="^#\?c\.NotebookApp\.certfile[ ]*=[ ]*u['"'"'"]\([^'"'"'"]+\)\?['"'"'"]"
DST_CRT="c.NotebookApp.certfile = u'$JUPYTER_DIR/cert.pem'"
grep -q "c.NotebookApp.certfile" $JUPYTER_DIR/jupyter_notebook_config.py
if [ ! $? -eq 0 ]; then
echo DST_CRT >> $JUPYTER_DIR/jupyter_notebook_config.py
else
sed -i "s|$SRC_CRT|$DST_CRT|" $JUPYTER_DIR/jupyter_notebook_config.py
fi
echo " adding key file path"
SRC_KEY="^#\?c\.NotebookApp\.keyfile[ ]*=[ ]*u['"'"'"]\([^'"'"'"]+\)\?['"'"'"]"
DST_KEY="c.NotebookApp.keyfile = u'$JUPYTER_DIR/key.key'"
grep -q "c.NotebookApp.keyfile" $JUPYTER_DIR/jupyter_notebook_config.py
if [ ! $? -eq 0 ]; then
echo DST_KEY >> $JUPYTER_DIR/jupyter_notebook_config.py
else
sed -i "s|$SRC_KEY|$DST_KEY|" $JUPYTER_DIR/jupyter_notebook_config.py
fi
printf $SEPARATOR
echo "... finished setting up configuration file."

Binary file not shown.

View File

@ -1,493 +0,0 @@
\documentclass[11pt,]{article}
\usepackage[T1]{fontenc}
\usepackage{amssymb,amsmath}
\usepackage{txfonts}
\usepackage{microtype}
\usepackage{amssymb,amsmath}
\usepackage{graphicx}
\usepackage{subfigure}
\usepackage{natbib}
\usepackage{paralist}
\usepackage{hyperref}
\usepackage{url}
\urlstyle{same}
\usepackage{color}
\usepackage{fancyvrb}
\newcommand{\VerbBar}{|}
\newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
% Add ',fontsize=\small' for more characters per line
\newenvironment{Shaded}{}{}
\newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
\newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{{#1}}}
\newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
\newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
\newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
\newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
\newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
\newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}}
\newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}}
\newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
\newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}}
\newcommand{\RegionMarkerTok}[1]{{#1}}
\newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
\newcommand{\NormalTok}[1]{{#1}}
\hypersetup{breaklinks=true,
pdfauthor={},
pdftitle={},
colorlinks=true,
citecolor=blue,
urlcolor=blue,
linkcolor=magenta,
pdfborder={0 0 0}}
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}
\setlength{\emergencystretch}{3em} % prevent overfull lines
\setcounter{secnumdepth}{0}
\usepackage[a4paper,body={170mm,250mm},top=25mm,left=25mm]{geometry}
\usepackage[sf,bf,small]{titlesec}
\usepackage{fancyhdr}
\pagestyle{fancy}
\lhead{\sffamily MLP Coursework 1}
\rhead{\sffamily Due: 30 October 2017}
\cfoot{\sffamily \thepage}
\author{}
\date{}
\DeclareMathOperator{\softmax}{softmax}
\DeclareMathOperator{\sigmoid}{sigmoid}
\DeclareMathOperator{\sgn}{sgn}
\DeclareMathOperator{\relu}{relu}
\DeclareMathOperator{\lrelu}{lrelu}
\DeclareMathOperator{\elu}{elu}
\DeclareMathOperator{\selu}{selu}
\DeclareMathOperator{\maxout}{maxout}
\begin{document}
\section{Machine Learning Practical: Coursework
1}
\label{sec:machine-learning-practical-coursework-1}
\textbf{Release date: Monday 16th October 2017}\\
\textbf{Due date: 16:00 Monday 30th October 2017}
\subsection{Introduction}
\label{sec:introduction}
This coursework is concerned with training multi-layer networks to
address the MNIST digit classification problem. It builds on the
material covered in the first three lab notebooks and the first four
lectures. \textbf{You should complete the first three lab
notebooks before starting the coursework.} The aim of the coursework is
to investigate variants of the ReLU activation function for hidden units
in multi-layer networks, with respect to the validation set accuracies
achieved by the trained models.
\subsection{Code}
\label{sec:code}
You should run all of the experiments for the coursework inside the
Conda environment you set up in first labs. The code for the coursework is available on the course
\href{https://github.com/CSTR-Edinburgh/mlpractical/}{Github repository}
on a branch \texttt{mlp2017-8/coursework1}. To create a local working
copy of this branch in your local repository you need to do the
following.
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\itemsep1pt\parskip0pt\parsep0pt
\item
Make sure all modified files on the branch you are currently have been
committed
(\href{https://github.com/CSTR-Edinburgh/mlpractical/blob/mlp2017-8/master/notes/getting-started-in-a-lab.md}{see
details here} if you are unsure how to do this).
\item
Fetch changes to the upstream \texttt{origin} repository by running\\
\texttt{git fetch origin}
\item
Checkout a new local branch from the fetched branch using\\
\texttt{git checkout -b coursework1 origin/mlp2017-8/coursework1}
\end{enumerate}
You will now have a new branch in your local repository with all the
code necessary for the coursework in it. In the \texttt{notebooks}
directory there is a notebook \texttt{Coursework\_1.ipynb} which is
intended as a starting point for structuring the code for your
experiments. You will probably want to add additional code cells to this
as you go along and run new experiments (e.g.~doing each new training
run in a new cell). You may also wish to use Markdown cells to keep
notes on the results of experiments.
There will also be a \verb+report+ directory which contains the LaTeX template and style files for the report. You should copy all these files into the directory which will contain your report.
\subsection{Standard network
architecture}
\label{sec:standard-network-architecture}
To make the results of your experiments more easily comparable, you
should try to keep as many of the free choices in the specification of
the model and learning problem the same across different experiments. If
you vary only a small number of aspects of the problem at a time this
will make it easier to interpret the effect of those changes.
In these experiments you should use a multi-layer network with two hidden layers
(corresponding to three affine transformations) and a softmax output layer. The initial baseline
should use a sigmoid activation function for the hidden layer; other experiments will explore
different nonlinear activation functions. The hidden layers should each contain 100 hidden units.
The baseline network can this be defined with the following code (which should be familiar to you from Lab 3):
\begin{Shaded}
\begin{Highlighting}[]
\CharTok{import} \NormalTok{numpy }\CharTok{as} \NormalTok{np}
\CharTok{from} \NormalTok{mlp.layers }\CharTok{import} \NormalTok{AffineLayer, SoftmaxLayer, SigmoidLayer}
\CharTok{from} \NormalTok{mlp.errors }\CharTok{import} \NormalTok{CrossEntropySoftmaxError}
\CharTok{from} \NormalTok{mlp.models }\CharTok{import} \NormalTok{MultipleLayerModel}
\CharTok{from} \NormalTok{mlp.initialisers }\CharTok{import} \NormalTok{ConstantInit, GlorotUniformInit}
\NormalTok{seed = }\DecValTok{10102016}
\NormalTok{rng = np.random.RandomState(seed)}
\NormalTok{input_dim, output_dim, hidden_dim = }\DecValTok{784}\NormalTok{, }\DecValTok{10}\NormalTok{, }\DecValTok{100}
\NormalTok{weights_init = GlorotUniformInit(rng=rng)}
\NormalTok{biases_init = ConstantInit(}\DecValTok{0}\NormalTok{.)}
\NormalTok{model = MultipleLayerModel([}
\NormalTok{AffineLayer(input_dim, hidden_dim, weights_init, biases_init),}
\NormalTok{SigmoidLayer(),}
\NormalTok{AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),}
\NormalTok{SigmoidLayer(),}
\NormalTok{AffineLayer(hidden_dim, output_dim, weights_init, biases_init)}
\NormalTok{])}
\NormalTok{error = CrossEntropySoftmaxError()}
\end{Highlighting}
\end{Shaded}
Here we are using the Glorot initialisation scheme, discussed in lecture 4. In part 2B of this coursework you will explore the effect of different initialisation schemes.
The above code creates a network using sigmoid hidden layers; you should modify it to also create a network using ReLU activation functions (see Lab 3). These two networks will form your baseline systems.
As well as standardising the network architecture, you should also fix
the hyperparameters of the training procedure not being investigated to
be the same across different runs. In particular for all experiments you
should use a \textbf{batch size of 50 and train for a total of 100
epochs} for all reported runs. You may of course use a smaller number of
epochs for initial pilot runs.
\subsection{Part 1: Implementing Activation Functions}
\label{sec:actfns}
In the first part of the assignment you will implement three further
activation functions, each of which is related to ReLU \citep{nair2010rectified}: Leaky ReLU, ELU (Exponential Linear Unit), and SELU (Scaled Exponential Linear Unit). Each of these units defines an activation function for which $f(x) = x$ when $x>0$, as for ReLU, but avoid having a zero gradient when $x<0$.
\textbf{Leaky ReLU} ($\lrelu(x)$) \citep{maas2013rectifier} has the following form:
\begin{equation}
\lrelu(x) =
\begin{cases}
\alpha x & \quad \text{if } x \leq 0 \\
x & \quad \text{if } x > 0 \\
\end{cases}
\end{equation}
Where $\alpha$ is a constant; typically $\alpha=0.01$, and you can use this value in this coursework. Note that $\alpha$ can be taken to be a parameter which is learned by back-propagation along with the weights and biases -- this is called Parametric ReLU (PReLU).
\textbf{ELU} ($\elu(x)$) \citep{clevert2015fast} has the following form:
\begin{equation}
\elu(x) =
\begin{cases}
\alpha (\exp(x) - 1) & \quad \text{if } x \leq 0 \\
x & \quad \text{if } x > 0 \\
\end{cases}
\end{equation}
Again $\alpha$ can be taken as a constant or a tunable parameter. Typically $\alpha=1$, which results in a smooth function, and you can use this value in this coursework.
\textbf{SELU} ($\selu(x)$) \citep{klambauer2017self} has the following form:
\begin{equation}
\selu(x) =
\lambda \begin{cases}
\alpha (\exp(x) - 1) & \quad \text{if } x \leq 0 \\
x & \quad \text{if } x > 0 \\
\end{cases}
\end{equation}
In the case of SELU, there is a theoretical argument for optimal values of the two parameters: $\alpha \approx 1.6733$ and $\lambda \approx 1.0507$, and you can use these values in this coursework.
\begin{enumerate}
\item Implement each of these activations function as classes \verb+LeakyReluLayer+, \verb+EluLayer+ and \verb+SeluLayer+. You need to implement \verb+fprop+ and \verb+bprop+ methods for each class.
\item Verify the correctness of your implementation using the supplied unit tests in \verb+Activation\_Tests.ipynb+
\item Automatically create a test file \verb+sXXXXXXX_test_file.txt+, by running the provided program \verb+generate_inputs.py+ which uses your code for \verb+LeakyReluLayer+, \verb+EluLayer+ and \verb+SeluLayer+ to run your fprop and bprop methods for each layer on a unique test vector generated using your student ID number.
\end{enumerate}
For Part 1 of the coursework you need to submit the test file \verb+sXXXXXXX_test_file.txt+ (where sXXXXXXX is replaced with your student number) created in step 3 above.
\subsection{Part 2: MNIST Experiments}
\label{sec:expts}
In Part 2 of the coursework you will experiment with \verb+LeakyReluLayer+, \verb+EluLayer+ and \verb+SeluLayer+ in multi-layer networks trained on MNIST.
\subsubsection{2A: Comparing activation functions}
In this sub-part you should compare the behaviour of Leaky ReLU, ELU, and SELU activation functions on the MNIST task. Carry out all experiments using 2 hidden layers, with 100 units per hidden layer. You should compare the results with baseline systems of the same architecture using sigmoid units and using ReLU units.
\subsubsection{2B: Deep neural network experiments}
In this subpart you will explore the behaviour of deeper networks. Based on the results of Part 2A, choose one activation function, and compare networks with 2--8 hidden layers, using 100 hidden units per hidden layer.
Also compare the effect of different initialisation strategies, as discussed in lecture 4. First look at the effect of weight initialisation based on
\begin{compactitem}
\item Fan-in: $w_i \sim U(-\sqrt{3/n_{in}}, \sqrt{3/n_{in}}$
\item Fan-out: $w_i \sim U(-\sqrt{3/n_{out}}, \sqrt{3/n_{out}}$
\item Fan-in and Fan-out: $w_i \sim U \left(-\sqrt{6/(n_{in}+n_{out})}, \sqrt{6/(n_{in}+n_{out})}\right)$
\end{compactitem}
where $U$ is the uniform distribution. The first of these corresponds to constraining the estimated variance of a unit to be independent of the number of incoming connections ($n_{in}$); the second to constraining the estimated variance of a unit's gradient to be independent of the number of outgoing connections ($n_{out}$); the third corresponds to Glorot and Bengio's combined initialisation.
Additionally you could also explore the effect of drawing from a Gaussian distribution compared with a uniform distribution. In particular you might like to explore initialising a SELU layer drawing from a Gaussian with mean 0 and variance $1/n_{out}$ as recommended by \cite{klambauer2017self}.
For Part 2 of the coursework you need to write and submit a report, using the template provided, in the directory \verb+report+. Please read the template document \verb+mlp-cw1-template.pdf+ very carefully, as it provides advice and instructions on writing your report. You can use the LaTeX source file \verb+mlp-cw1-template.tex+ as a template for your report (see below, in the section 'Report').
It is highly recommended that you use LaTeX for your report. If you have not used LaTeX previously, now is a good time to learn how to use it!
\subsection{Backing up your work}
\label{sec:backing-up-your-work}
It is \textbf{strongly recommended} you use some method for backing up
your work. Those working in their AFS homespace on DICE will have their
work automatically backed up as part of the
\href{http://computing.help.inf.ed.ac.uk/backups-and-mirrors}{routine
backup} of all user homespaces. If you are working on a personal
computer you should have your own backup method in place (e.g.~saving
additional copies to an external drive, syncing to a cloud service or
pushing commits to your local Git repository to a private repository on
Github). \textbf{Loss of work through failure to back up
\href{http://tinyurl.com/edinflate}{does not consitute a good reason for
late submission}}.
You may \emph{additionally} wish to keep your coursework under version
control in your local Git repository on the \texttt{coursework1} branch.
% This does not need to be limited to the coursework notebook and
% \texttt{mlp} Python modules - you can also add your report document to
% the repository.
If you make regular commits of your work on the coursework this will
allow you to better keep track of the changes you have made and if
necessary revert to previous versions of files and/or restore
accidentally deleted work. This is not however required and you should
note that keeping your work under version control is a distinct issue
from backing up to guard against hard drive failure. If you are working
on a personal computer you should still keep an additional back up of
your work as described above.
\subsection{Report}
\label{sec:report}
Part two of your coursework submission, worth 70 marks will be a report. The directory
\verb+coursework1/report+ contains a template for your report (\verb+mlp-cw1-template.txt+); the generated pdf file (\verb+mlp-cw1-template.pdf+) is also provided, and you should read this file carefully as it contains information about the required structure and experimentation. The template is written in LaTeX, and we strongly recommend that you write your own report using LaTeX, using the supplied document style \verb+mlp2017+ (as in the template).
You should copy the files in the \verb+report+ directory to the directory containing the LaTeX file of your report, as \verb+pdflatex+ will need to access these files when building the pdf document from the LaTeX source file.
Your report should be in a 2-column format, based on the document format used for the ICML conference. The report should be a \textbf{maximum of 6 pages long}, with a further page for references. We will not read or assess any parts of the report beyond the allowed 6+1 pages.
Ideally, all figures should be included in your report file as
\href{https://en.wikipedia.org/wiki/Vector_graphics}{vector graphics}
rather than \href{https://en.wikipedia.org/wiki/Raster_graphics}{raster
files} as this will make sure all detail in the plot is visible.
Matplotlib supports saving high quality figures in a wide range of
common image formats using the
\href{http://matplotlib.org/api/pyplot_api.html\#matplotlib.pyplot.savefig}{\texttt{savefig}}
function. \textbf{You should use \texttt{savefig} rather than copying
the screen-resolution raster images outputted in the notebook.} An
example of using \texttt{savefig} to save a figure as a PDF file (which
can be included as graphics in
\href{https://en.wikibooks.org/wiki/LaTeX/Importing_Graphics}{LaTeX}
compiled with \texttt{pdflatex} and in Apple Pages and
\href{https://support.office.com/en-us/article/Add-a-PDF-to-your-Office-file-74819342-8f00-4ab4-bcbe-0f3df15ab0dc}{Microsoft
Word} documents) is given below.
\begin{Shaded}
\begin{Highlighting}[]
\CharTok{import} \NormalTok{matplotlib.pyplot }\CharTok{as} \NormalTok{plt}
\CharTok{import} \NormalTok{numpy }\CharTok{as} \NormalTok{np}
\CommentTok{# Generate some example data to plot}
\NormalTok{x = np.linspace(}\DecValTok{0}\NormalTok{., }\DecValTok{1}\NormalTok{., }\DecValTok{100}\NormalTok{)}
\NormalTok{y1 = np.sin(}\DecValTok{2}\NormalTok{. * np.pi * x)}
\NormalTok{y2 = np.cos(}\DecValTok{2}\NormalTok{. * np.pi * x)}
\NormalTok{fig_size = (}\DecValTok{6}\NormalTok{, }\DecValTok{3}\NormalTok{) }\CommentTok{# Set figure size in inches (width, height)}
\NormalTok{fig = plt.figure(figsize=fig_size) }\CommentTok{# Create a new figure object}
\NormalTok{ax = fig.add_subplot(}\DecValTok{1}\NormalTok{, }\DecValTok{1}\NormalTok{, }\DecValTok{1}\NormalTok{) }\CommentTok{# Add a single axes to the figure}
\CommentTok{# Plot lines giving each a label for the legend and setting line width to 2}
\NormalTok{ax.plot(x, y1, linewidth=}\DecValTok{2}\NormalTok{, label=}\StringTok{'$y = \textbackslash{}sin(2\textbackslash{}pi x)$'}\NormalTok{)}
\NormalTok{ax.plot(x, y2, linewidth=}\DecValTok{2}\NormalTok{, label=}\StringTok{'$y = \textbackslash{}cos(2\textbackslash{}pi x)$'}\NormalTok{)}
\CommentTok{# Set the axes labels. Can use LaTeX in labels within $...$ delimiters.}
\NormalTok{ax.set_xlabel(}\StringTok{'$x$'}\NormalTok{, fontsize=}\DecValTok{12}\NormalTok{)}
\NormalTok{ax.set_ylabel(}\StringTok{'$y$'}\NormalTok{, fontsize=}\DecValTok{12}\NormalTok{)}
\NormalTok{ax.grid(}\StringTok{'on'}\NormalTok{) }\CommentTok{# Turn axes grid on}
\NormalTok{ax.legend(loc=}\StringTok{'best'}\NormalTok{, fontsize=}\DecValTok{11}\NormalTok{) }\CommentTok{# Add a legend}
\NormalTok{fig.tight_layout() }\CommentTok{# This minimises whitespace around the axes.}
\NormalTok{fig.savefig(}\StringTok{'file-name.pdf'}\NormalTok{) }\CommentTok{# Save figure to current directory in PDF format}
\end{Highlighting}
\end{Shaded}
(If you are using Libre/OpenOffice you should use Scalable Vector Format
plots instead using \\
\texttt{fig.savefig('file-name.svg')}. If the
document editor you are using for the report does not support including
either PDF or SVG graphics you can instead output high-resolution raster
images using \texttt{fig.savefig('file-name.png', dpi=200)} however note
these files will generally be larger than either SVG or PDF formatted
graphics.)
However to emphasise again: \textbf{It is highly recommended that you use LaTeX.}
If you make use of any any books, articles, web pages or other resources
you should appropriately cite these in your report. You do not need to
cite material from the course lecture slides or lab notebooks.
To create a pdf file \verb+mlp-cw1-template.pdf+ from a LaTeX source file (\verb+mlp-cw1-template.tex+), you can run the following in a terminal:
\begin{verbatim}
pdflatex mlp-cw1-template
bibtex mlp-cw1-template
pdflatex mlp-cw1-template
pdflatex mlp-cw1-template
\end{verbatim}
(Yes, you have to run pdflatex multiple times, in order for latex to construct the internal document references.)
An alternative, simpler approach uses the \verb+latexmk+ program:
\begin{verbatim}
latexmk -pdf mlp-cw1-template
\end{verbatim}
It is worth learning how to use LaTeX effectively, as it is particularly powerful for mathematical and academic writing. There are many tutorials on the web.
\subsection{Mechanics}
\label{sec:mechanics}
\textbf{Marks:}
This assignment will be assessed out of 100 marks and
forms 10\% of your final grade for the course.
\textbf{Academic conduct:}
Assessed work is subject to University
regulations on academic
conduct:\\\url{http://web.inf.ed.ac.uk/infweb/admin/policies/academic-misconduct}
\textbf{Submission:}
You can submit more than once up until the submission deadline. All
submissions are timestamped automatically. Identically named files
will overwrite earlier submitted versions, so we will mark the latest
submission that comes in before the deadline.
If you submit anything before the deadline, you may not resubmit
afterward. (This policy allows us to begin marking submissions
immediately after the deadline, without having to worry that some may
need to be re-marked).
If you do not submit anything before the deadline, you may submit {\em
exactly once} after the deadline, and a late penalty will be applied
to this submission unless you have received an approved extension.
Please be aware that late submissions may receive lower priority for
marking, and marks may not be returned within the same timeframe as
for on-time submissions.
{\em Warning:} Unfortunately the \verb+submit+ command will technically
allow you to submit late even if you submitted before the deadline
(i.e.\ it does not enforce the above policy). Don't do this! We will
mark the version that we retrieve just after the deadline, and (even
worse) you may still be penalized for submitting late because the
timestamp will update.
For additional information about late penalties and extension
requests, see the School web page below. Do {\bf not} email any course
staff directly about extension requests; you must follow the
instructions on the web page.
\url{http://web.inf.ed.ac.uk/infweb/student-services/ito/admin/coursework-projects/late-coursework-extension-requests}
\textbf{Late submission penalty:}
Following the University guidelines,
late coursework submitted without an authorised extension will be
recorded as late and the following penalties will apply: 5
percentage points will be deducted for every calendar day or part
thereof it is late, up to a maximum of 7 calendar days. After this
time a mark of zero will be recorded.
\subsection{Submission}
\label{sec:submission}
Your coursework submission should be done electronically using the
\href{http://computing.help.inf.ed.ac.uk/submit}{\texttt{submit}}
command available on DICE machines.
Your submission should include
\begin{itemize}
\itemsep1pt\parskip0pt\parsep0pt
\item
the unit test file generated in part 1, \verb+sXXXXXXX_test_file.txt+, where your student number replaces \verb+sXXXXXXX+
\item
your completed report as a PDF file, using the provided template
\item
the notebook (\verb+.ipynb+) file you used to run the experiments in
\item
and your local version of the \texttt{mlp} code including any changes
you made to the modules (\texttt{.py} files).
\end{itemize}
You should copy all of the files to a single directory, \verb+coursework1+, e.g.
\begin{verbatim}
mkdir coursework1
cp notebooks/Coursework_1.ipynb mlp/*.py coursework1
cp reports/coursework1.pdf reports/sXXXXXXX_test_file.txt coursework1
\end{verbatim}
and then submit this directory using
\begin{verbatim}
submit mlp cw1 coursework1
\end{verbatim}
The \texttt{submit} command will prompt you with the details of the
submission including the name of the files / directories you are
submitting and the name of the course and exercise you are submitting
for and ask you to check if these details are correct. You should check
these carefully and reply \texttt{y} to submit if you are sure the files
are correct and \texttt{n} otherwise.
You can amend an existing submission by rerunning the \texttt{submit}
command any time up to the deadline. It is therefore a good idea
(particularly if this is your first time using the DICE submit
mechanism) to do an initial run of the \texttt{submit} command early on
and then rerun the command if you make any further updates to your
submisison rather than leaving submission to the last minute.
\subsection{Marking Scheme}
\label{sec:marking-scheme}
\begin{itemize}
\item
Part 1, Activation function implementation (30 marks). Based on your submitted test file.
\item
Part 2, Report (70 marks). The following aspects will contribute to the mark for your report:
\begin{itemize}
\item Abstract - how clear is it? does it cover what is reported in the document
\item Introduction - do you clear outline and motivate the paper, and describe the research questions investigated?
\item Description of activation functions -- is it clear and correct?
\item Experiments -- did you carry out the experiments correctly? are the results clearly presented and described?
\item Interpretation and discussion of results
\item Conclusions
\item Presentation and clarity of report
\end{itemize}
\end{itemize}
\bibliographystyle{plainnat}
\bibliography{cw1-references}
\end{document}

Binary file not shown.

View File

@ -1,408 +0,0 @@
\documentclass[11pt,]{article}
\usepackage[T1]{fontenc}
\usepackage{amssymb,amsmath}
\usepackage{txfonts}
\usepackage{microtype}
\usepackage{amssymb,amsmath}
\usepackage{graphicx}
\usepackage{subfigure}
\usepackage{natbib}
\usepackage{paralist}
\usepackage{hyperref}
\usepackage{url}
\urlstyle{same}
\usepackage{color}
\usepackage{fancyvrb}
\newcommand{\VerbBar}{|}
\newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
% Add ',fontsize=\small' for more characters per line
\newenvironment{Shaded}{}{}
\newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
\newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{{#1}}}
\newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
\newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
\newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
\newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
\newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
\newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}}
\newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}}
\newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
\newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}}
\newcommand{\RegionMarkerTok}[1]{{#1}}
\newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
\newcommand{\NormalTok}[1]{{#1}}
\hypersetup{breaklinks=true,
pdfauthor={},
pdftitle={},
colorlinks=true,
citecolor=blue,
urlcolor=blue,
linkcolor=magenta,
pdfborder={0 0 0}}
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}
\setlength{\emergencystretch}{3em} % prevent overfull lines
\setcounter{secnumdepth}{1}
\usepackage[a4paper,body={170mm,250mm},top=25mm,left=25mm]{geometry}
\usepackage[sf,bf,small]{titlesec}
\usepackage{fancyhdr}
\pagestyle{fancy}
\lhead{\sffamily MLP Coursework 2}
\rhead{\sffamily Due: 28 November 2017}
\cfoot{\sffamily \thepage}
\author{}
\date{}
\DeclareMathOperator{\softmax}{softmax}
\DeclareMathOperator{\sigmoid}{sigmoid}
\DeclareMathOperator{\sgn}{sgn}
\DeclareMathOperator{\relu}{relu}
\DeclareMathOperator{\lrelu}{lrelu}
\DeclareMathOperator{\elu}{elu}
\DeclareMathOperator{\selu}{selu}
\DeclareMathOperator{\maxout}{maxout}
\begin{document}
\begin{center}
\textsf{\textbf{\Large Machine Learning Practical: Coursework 2}}
\bigskip
\textbf{Release date: Monday 6th November 2017}
\textbf{Due date: 16:00 Tuesday 28th November 2017}
\end{center}
\section{Introduction}
\label{sec:introduction}
% This coursework is concerned with training multi-layer networks to
% address the MNIST digit classification problem. It builds on the
% material covered in the first three lab notebooks and the first four
% lectures. \textbf{You should complete the first three lab
% notebooks before starting the coursework.} The aim of the coursework is
% to investigate variants of the ReLU activation function for hidden units
% in multi-layer networks, with respect to the validation set accuracies
% achieved by the trained models.
The aim of this coursework is to further explore the classification of images of handwritten digits using neural networks. We'll be using an extended version of the MNIST database, the EMNIST Balanced dataset, described in Section~\ref{sec:emnist}. Part A of the coursework will consist of building baseline deep neural networks for the EMNIST classification task, implementation and experimentation of the Adam and RMSProp learning rules, and implementation and experimentation of Batch Normalisation. Part B will concern implementation and experimentation of convolutional networks. As with the previous coursework, you will need to hand in test files generated from your code, and a report.
\section{Dataset}
\label{sec:emnist}
In this coursework we shall use the EMNIST (Extended MNIST) Balanced dataset, \url{https://www.nist.gov/itl/iad/image-group/emnist-dataset} \citep{cohen2017emnist}. EMNIST extends MNIST by including images of handwritten letters (upper and lower case) as well as handwritten digits. Both EMNIST and MNIST are extracted from the same underlying dataset, referred to as NIST Special Database 19. Both use the same conversion process resulting in centred images of dimension 28$\times$28.
Although there are 62 potential classes for EMNIST (10 digits, 26 lower case letters, and 26 upper case letters) we shall use a reduced label set of 47 different labels. This is because of confusions which arise when trying to discriminate upper-case and lower-case versions of the same letter, following the data conversion process. In the 47 label set, upper- and lower-case labels are merged for the following letters: C, I, J, K, L, M, O, P, S, U, V, W, X, Y and Z.
The training set for Balanced EMNIST has about twice the number of examples as MNIST, thus you should expect the run-time of your experiments to be about twice as long. The expected accuracy rates are lower for EMNIST than for MNIST (as EMNIST has more classes, and more confusable examples), and differences in accuracy between different systems should be larger. See \citet{cohen2017emnist} for some baseline results on EMNIST, as well as a description of the dataset.
You don't need to download the EMNIST database from the NIST website, it will be part of the \verb+coursework_2+ branch from the \verb+mlpractical+ Github repository, discussed in Section~\ref{sec:code} below.
\section{Code}
\label{sec:code}
You should run all of the experiments for the coursework inside the
Conda environment you set up in the first labs. The code for the coursework
is available on the course
\href{https://github.com/CSTR-Edinburgh/mlpractical/}{Github repository}
on a branch \verb+mlp2017-8/coursework_2+. To create a local working
copy of this branch in your local repository you need to do the
following.
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\itemsep1pt\parskip0pt\parsep0pt
\item
Make sure all modified files on the branch you are currently have been
committed
(\href{https://github.com/CSTR-Edinburgh/mlpractical/blob/mlp2017-8/master/notes/getting-started-in-a-lab.md}{see
details here} if you are unsure how to do this).
\item
Fetch changes to the upstream \texttt{origin} repository by running\\
\texttt{git fetch origin}
\item
Checkout a new local branch from the fetched branch using\\
\verb+git checkout -b coursework_2 origin/mlp2017-8/coursework_2+
\end{enumerate}
You will now have a new branch in your local repository with all the
code necessary for the coursework in it.
This branch includes the following additions to your setup:
\begin{itemize}
\itemsep1pt\parskip0pt\parsep0pt
\item
A notebook \verb+BatchNormalizationLayer_tests.ipynb+ which includes
test functions to check the implementations of the BatchNorm layer
\texttt{fprop}, \texttt{bprop} and \texttt{grads\_wrt\_params}
methods. The BatchNormalizationLayer skeleton code can be found in mlp.layers.
The tests use the mlp.layers implementation so be sure to reload your notebook
when you update your mlp.layers code.
\item
A notebook \verb+ConvolutionalLayer_tests.ipynb+ which includes
test functions to check the implementations of the Convolutional layer
\texttt{fprop}, \texttt{bprop} and \texttt{grads\_wrt\_params}
methods. The ConvolutionalLayer skeleton code can be found in mlp.layers.
The tests use the mlp.layers implementation so be sure to reload your notebook
when you update your mlp.layers code.
\item
A new \texttt{ReshapeLayer} class in the \verb+mlp.layers+ module.
When included in a a multiple layer model, this allows the output of
the previous layer to be reshaped before being forward propagated to
the next layer.
\item
A new \texttt{EMNISTDataProvider} class in the \verb+mlp.data_providers+ module.
This class is a small change to the \texttt{MNISTDataProvider} class, linking to the Balanced EMNIST data, and setting the number of classes to 47.
\item
Training, validation, and test sets for the \texttt{EMNIST Balanced} dataset that
you will use in this coursework
\end{itemize}
% In the \texttt{notebooks}
% directory there is a notebook \verb+Coursework_1.ipynb+ which is
% intended as a starting point for structuring the code for your
% experiments. You will probably want to add additional code cells to this
% as you go along and run new experiments (e.g.~doing each new training
% run in a new cell). You may also wish to use Markdown cells to keep
% notes on the results of experiments.
There will also be a \verb+coursework_2/report+ directory which contains the LaTeX template and style files for the report. You should copy all these files into the directory which will contain your report.
\section{Tasks}
\subsection*{Part A: Deep Neural Networks}
In part A of the coursework you will focus on using deep neural networks on EMNIST, and you should implement the Adam and RMSProp learning rules, and Batch Normalisation.
\begin{enumerate}
\item Perform baseline experiments using DNNs trained on EMNIST. Obviously there are a lot things that could be explored including hidden unit activation functions, network architectures, training hyperparameters, and the use of regularisation and dropout. You cannot explore everything and is best to carefully investigate a few things in depth.
\item Implement the RMSProp \citep{tieleman2012rmsprop} and Adam \citep{kingma2015adam} learning rules, by defining new classes inheriting from \texttt{GradientDescendLearningRule} in the \texttt{mlp/learning\_rules.py} module. The \texttt{MomentumLearningRule} class is an example of how to define a learning rules which uses an additional state variable to calculate the updates to the parameters.
\item Perform experiments to compare stochastic gradient descent, RMSProp, and Adam for deep neural network training on EMNIST, building on your earlier baseline experiments.
\item Implement batch normalisation \citep{ioffe2015batch} as a class \verb+BatchNormalizationLayer+. You need to implement \texttt{fprop}, \texttt{bprop} and \texttt{grads\_wrt\_params} methods for this class.
\item Verify the correctness of your implementation using the supplied unit tests in\\\verb+BatchNormalizationLayer_tests.ipynb+.
\item Automatically create a test file \verb+sXXXXXXX_batchnorm_test.txt+, by running the provided program \verb+generate_batchnorm_test.py+ which uses your \verb+BatchNormalizationLayer+ class methods on a unique test vector generated using your student ID number.
\item Perform experiments on EMNIST to investigate the impact of using batch normalisation in deep neural networks, building on your earlier experiments.
\end{enumerate}
In the above experiments you should use the validation set to assess accuracy. Use the test set at the end to assess the accuracy of the deep neural network architecture and training setup that you judge to be the best.
\subsection*{Part B: Convolutional Networks}
In part B of the coursework you should implement convolutional and max-pooling layers, and carry out experiments using a convolutional networks with one and two convolutional layers.
\begin{enumerate}
\item Implement a convolutional layer as a class \verb+ConvolutionalLayer+. You need to implement \texttt{fprop}, \texttt{bprop} and \texttt{grads\_wrt\_params} methods for this class.
\item Verify the correctness of your implementation using the supplied unit tests in\\\verb+ConvolutionalLayer_tests.ipynb+.
\item Automatically create a test file \verb+sXXXXXXX_conv_test.txt+, by running the provided program \verb+generate_conv_test.py+ which uses your \verb+ConvolutionalLayer+ class methods on a unique test vector generated using your student ID number.
\item Implement a max-pooling layer. Non-overlapping pooling (which was assumed in the lecture presentation) is required. You may also implement a more generic solution with striding as well.
\item Construct and train networks containing one and two convolutional layers, and max-pooling layers, using the Balanced EMNIST data, reporting your experimental results. As a default use convolutional kernels of dimension 5x5 (stride 1) and pooling regions of 2x2 (stride 2, hence non-overlapping). As a default convolutional networks with two convolutional layers, investigate a network with two convolutional+maxpooling layers with 5 feature maps in the first convolutional layer, and 10 feature maps in the second convolutional layer.
\end{enumerate}
As before you should mainly use the validation set to assess accuracy, using the test set to assess the accuracy of the convolutional network you judge to be the best.
\section{Unit Tests}
\label{sec:tests}
Part one of your coursework submission will be the test files generated for batch normalisation (\verb+sXXXXXXX_batchnorm_test.txt+) and for the convolutional layer (\verb+sXXXXXXX_conv_test.txt+), as described above. Please do not change the names of these files as they will be automatically verified.
\section{Report}
\label{sec:report}
Part two of your coursework submission, worth 70 marks will be a report. The directory
\verb+coursework_2/report+ contains a template for your report (\verb+mlp-cw2-template.txt+); the generated pdf file (\verb+mlp-cw2-template.pdf+) is also provided, and you should read this file carefully as it contains information about the required structure and experimentation. The template is written in LaTeX, and we strongly recommend that you write your own report using LaTeX, using the supplied document style \verb+mlp2017+ (as in the template).
You should copy the files in the \verb+report+ directory to the directory containing the LaTeX file of your report, as \verb+pdflatex+ will need to access these files when building the pdf document from the LaTeX source file.
Your report should be in a 2-column format, based on the document format used for the ICML conference. The report should be a \textbf{maximum of 7 pages long}, with a further page for references. We will not read or assess any parts of the report beyond the allowed 7+1 pages.
As before, all figures should be included in your report file as vector graphics;
please see the section in \verb+coursework1.pdf+ about how to do this.
If you make use of any any books, articles, web pages or other resources
you should appropriately cite these in your report. You do not need to
cite material from the course lecture slides or lab notebooks.
To create a pdf file \verb+mlp-cw2-template.pdf+ from a LaTeX source file (\verb+mlp-cw2-template.tex+), you can run the following in a terminal:
\begin{verbatim}
pdflatex mlp-cw2-template
bibtex mlp-cw2-template
pdflatex mlp-cw2-template
pdflatex mlp-cw2-template
\end{verbatim}
(Yes, you have to run pdflatex multiple times, in order for latex to construct the internal document references.)
An alternative, simpler approach uses the \verb+latexmk+ program:
\begin{verbatim}
latexmk -pdf mlp-cw2-template
\end{verbatim}
It is worth learning how to use LaTeX effectively, as it is particularly powerful for mathematical and academic writing. There are many tutorials on the web.
\section{Mechanics}
\label{sec:mechanics}
\textbf{Marks:}
This assignment will be assessed out of 100 marks and
forms 25\% of your final grade for the course.
\textbf{Academic conduct:}
Assessed work is subject to University
regulations on academic
conduct:\\\url{http://web.inf.ed.ac.uk/infweb/admin/policies/academic-misconduct}
\textbf{Submission:}
You can submit more than once up until the submission deadline. All
submissions are timestamped automatically. Identically named files
will overwrite earlier submitted versions, so we will mark the latest
submission that comes in before the deadline.
If you submit anything before the deadline, you may not resubmit
afterward. (This policy allows us to begin marking submissions
immediately after the deadline, without having to worry that some may
need to be re-marked).
If you do not submit anything before the deadline, you may submit {\em
exactly once} after the deadline, and a late penalty will be applied
to this submission unless you have received an approved extension.
Please be aware that late submissions may receive lower priority for
marking, and marks may not be returned within the same timeframe as
for on-time submissions.
{\em Warning:} Unfortunately the \verb+submit+ command will technically
allow you to submit late even if you submitted before the deadline
(i.e.\ it does not enforce the above policy). Don't do this! We will
mark the version that we retrieve just after the deadline, and (even
worse) you may still be penalized for submitting late because the
timestamp will update.
For additional information about late penalties and extension
requests, see the School web page below. Do {\bf not} email any course
staff directly about extension requests; you must follow the
instructions on the web page.
\url{http://web.inf.ed.ac.uk/infweb/student-services/ito/admin/coursework-projects/late-coursework-extension-requests}
\textbf{Late submission penalty:}
Following the University guidelines,
late coursework submitted without an authorised extension will be
recorded as late and the following penalties will apply: 5
percentage points will be deducted for every calendar day or part
thereof it is late, up to a maximum of 7 calendar days. After this
time a mark of zero will be recorded.
\section{Backing up your work}
\label{sec:backing-up-your-work}
It is \textbf{strongly recommended} you use some method for backing up
your work. Those working in their AFS homespace on DICE will have their
work automatically backed up as part of the
\href{http://computing.help.inf.ed.ac.uk/backups-and-mirrors}{routine
backup} of all user homespaces. If you are working on a personal
computer you should have your own backup method in place (e.g.~saving
additional copies to an external drive, syncing to a cloud service or
pushing commits to your local Git repository to a private repository on
Github). \textbf{Loss of work through failure to back up
\href{http://tinyurl.com/edinflate}{does not consitute a good reason for
late submission}}.
You may \emph{additionally} wish to keep your coursework under version
control in your local Git repository on the \verb+coursework_2+ branch.
If you make regular commits of your work on the coursework this will
allow you to better keep track of the changes you have made and if
necessary revert to previous versions of files and/or restore
accidentally deleted work. This is not however required and you should
note that keeping your work under version control is a distinct issue
from backing up to guard against hard drive failure. If you are working
on a personal computer you should still keep an additional back up of
your work as described above.
\section{Submission}
\label{sec:submission}
Your coursework submission should be done electronically using the
\href{http://computing.help.inf.ed.ac.uk/submit}{\texttt{submit}}
command available on DICE machines.
Your submission should include
\begin{itemize}
\itemsep1pt\parskip0pt\parsep0pt
\item
the unit test files generated for part 1, \verb+sXXXXXXX_batchnorm_test.txt+ and \verb+sXXXXXXX_conv_test.txt+, where your student number replaces \verb+sXXXXXXX+. Please do not
change the names of these files.
\item
your completed report as a PDF file, using the provided template
\item
any notebook (\verb+.ipynb+) files you used to run the experiments in
\item
and your local version of the \texttt{mlp} code including any changes
you made to the modules (\texttt{.py} files).
\end{itemize}
Please do not submit anything else (e.g. log files).
You should copy all of the files to a single directory, \verb+coursework2+, e.g.
\begin{verbatim}
mkdir coursework2
cp reports/coursework2.pdf sXXXXXXX_batchnorm_test.txt sXXXXXXX_conv_test.txt coursework2
\end{verbatim}
and then submit this directory using
\begin{verbatim}
submit mlp cw2 coursework2
\end{verbatim}
Please submit the directory, not a zip file, not a tar file.
The \texttt{submit} command will prompt you with the details of the
submission including the name of the files / directories you are
submitting and the name of the course and exercise you are submitting
for and ask you to check if these details are correct. You should check
these carefully and reply \texttt{y} to submit if you are sure the files
are correct and \texttt{n} otherwise.
You can amend an existing submission by rerunning the \texttt{submit}
command any time up to the deadline. It is therefore a good idea
(particularly if this is your first time using the DICE submit
mechanism) to do an initial run of the \texttt{submit} command early on
and then rerun the command if you make any further updates to your
submisison rather than leaving submission to the last minute.
\section{Marking Scheme}
\label{sec:marking-scheme}
\begin{itemize}
\item
Part 1, Unit tests (30 marks).
\item
Part 2, Report (70 marks). The following aspects will contribute to the mark for your report:
\begin{itemize}
\item Abstract - how clear is it? does it cover what is reported in the document
\item Introduction - do you clearly outline and motivate the paper, and describe the research questions investigated?
\item Methods -- have you carefully described the approaches you have used?
\item Experiments -- did you carry out the experiments correctly? are the results clearly presented and described?
\item Interpretation and discussion of results
\item Conclusions
\item Presentation and clarity of report
\end{itemize}
\end{itemize}
\bibliographystyle{plainnat}
\bibliography{cw2-references}
\end{document}

View File

@ -1,29 +0,0 @@
@inproceedings{maas2013rectifier,
title={Rectifier nonlinearities improve neural network acoustic models},
author={Maas, Andrew L and Hannun, Awni Y and Ng, Andrew Y},
booktitle={Proc. ICML},
volume={30},
number={1},
year={2013}
}
@inproceedings{nair2010rectified,
title={Rectified linear units improve restricted {Boltzmann} machines},
author={Nair, Vinod and Hinton, Geoffrey E},
booktitle={Proc ICML},
pages={807--814},
year={2010}
}
@article{clevert2015fast,
title={Fast and accurate deep network learning by exponential linear units ({ELU}s)},
author={Clevert, Djork-Arn{\'e} and Unterthiner, Thomas and Hochreiter, Sepp},
journal={arXiv preprint arXiv:1511.07289},
year={2015}
}
@article{klambauer2017self,
title={Self-Normalizing Neural Networks},
author={Klambauer, G{\"u}nter and Unterthiner, Thomas and Mayr, Andreas and Hochreiter, Sepp},
journal={arXiv preprint arXiv:1706.02515},
year={2017}
}

View File

@ -1,64 +0,0 @@
@inproceedings{maas2013rectifier,
title={Rectifier nonlinearities improve neural network acoustic models},
author={Maas, Andrew L and Hannun, Awni Y and Ng, Andrew Y},
booktitle={Proc. ICML},
volume={30},
number={1},
year={2013}
}
@inproceedings{nair2010rectified,
title={Rectified linear units improve restricted {Boltzmann} machines},
author={Nair, Vinod and Hinton, Geoffrey E},
booktitle={Proc ICML},
pages={807--814},
year={2010}
}
@article{clevert2015fast,
title={Fast and accurate deep network learning by exponential linear units ({ELU}s)},
author={Clevert, Djork-Arn{\'e} and Unterthiner, Thomas and Hochreiter, Sepp},
journal={arXiv preprint arXiv:1511.07289},
year={2015}
}
@article{klambauer2017self,
title={Self-Normalizing Neural Networks},
author={Klambauer, G{\"u}nter and Unterthiner, Thomas and Mayr, Andreas and Hochreiter, Sepp},
journal={arXiv preprint arXiv:1706.02515},
year={2017}
}
@article{cohen2017emnist,
title = {{EMNIST}: an extension of {MNIST} to handwritten letters},
author = {Cohen, G. and Afshar, S. and Tapson, J. and van Schaik, A.},
journal={arXiv preprint arXiv:1702.05373},
year={2017},
url = {https://arxiv.org/abs/1702.05373}
}
@inproceedings{kingma2015adam,
title = {Adam: A Method for Stochastic Optimization},
author = {Diederik P. Kingma and Jimmy Ba},
booktitle = {ICML},
year = {2015},
url = {https://arxiv.org/abs/1412.6980}
}
@article{tieleman2012rmsprop,
title={Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude},
author={Tieleman, T. and Hinton, G. E.},
journal={COURSERA: Neural Networks for Machine Learning},
volume={4},
number={2},
year={2012},
url = {https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf}
}
@inproceedings{ioffe2015batch,
title={Batch normalization: Accelerating deep network training by reducing internal covariate shift},
author={Ioffe, Sergey and Szegedy, Christian},
booktitle={ICML},
pages={448--456},
year={2015},
url = {http://proceedings.mlr.press/v37/ioffe15.html}
}