Update
This commit is contained in:
parent
9b9a7d50fa
commit
be1f124dff
@ -9,7 +9,7 @@ The code in this repository is split into:
|
||||
* a Python package `mlp`, a [NumPy](http://www.numpy.org/) based neural network package designed specifically for the course that students will implement parts of and extend during the course labs and assignments,
|
||||
* a series of [Jupyter](http://jupyter.org/) notebooks in the `notebooks` directory containing explanatory material and coding exercises to be completed during the course labs.
|
||||
|
||||
## Coursework 1
|
||||
## Coursework 2
|
||||
This branch contains the python code and latex files of the first coursework. The code follows the same structure as the labs, in particular the mlp package, and a specific notebook is provided to help you run experiments.
|
||||
* Detailed instructions are given in MLP2024_25_CW1_Spec.pdf (see Learn, Assessment, CW1).
|
||||
* The [report directory](https://github.com/VICO-UoE/mlpractical/tree/mlp2024-25/coursework1/report) contains the latex files that you will use to create your report.
|
||||
* Detailed instructions are given in MLP2024_25_CW2_Spec.pdf (see Learn, Assessment, CW2).
|
||||
* The [report directory](https://github.com/VICO-UoE/mlpractical/tree/mlp2024-25/coursework2/report) contains the latex files that you will use to create your report.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Machine Learning Practical package."""
|
||||
|
||||
__authors__ = ['Pawel Swietojanski', 'Steve Renals', 'Matt Graham']
|
||||
__authors__ = ['Pawel Swietojanski', 'Steve Renals', 'Matt Graham', 'Antreas Antoniou']
|
||||
|
||||
DEFAULT_SEED = 123456 # Default random number generator seed if none provided.
|
||||
|
@ -7,8 +7,17 @@ data points.
|
||||
|
||||
import pickle
|
||||
import gzip
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
from PIL import Image
|
||||
from torch.utils import data
|
||||
from torch.utils.data import Dataset
|
||||
from torchvision import transforms
|
||||
from torchvision.datasets.utils import download_url, check_integrity
|
||||
|
||||
from mlp import DEFAULT_SEED
|
||||
|
||||
|
||||
@ -16,7 +25,7 @@ class DataProvider(object):
|
||||
"""Generic data provider."""
|
||||
|
||||
def __init__(self, inputs, targets, batch_size, max_num_batches=-1,
|
||||
shuffle_order=True, rng=None, smooth_labels=False):
|
||||
shuffle_order=True, rng=None):
|
||||
"""Create a new data provider object.
|
||||
|
||||
Args:
|
||||
@ -32,7 +41,6 @@ class DataProvider(object):
|
||||
shuffle_order (bool): Whether to randomly permute the order of
|
||||
the data before each epoch.
|
||||
rng (RandomState): A seeded random number generator.
|
||||
smooth_labels (bool): turn on label smoothing
|
||||
"""
|
||||
self.inputs = inputs
|
||||
self.targets = targets
|
||||
@ -44,12 +52,10 @@ class DataProvider(object):
|
||||
self._max_num_batches = max_num_batches
|
||||
self._update_num_batches()
|
||||
self.shuffle_order = shuffle_order
|
||||
|
||||
self._current_order = np.arange(inputs.shape[0])
|
||||
if rng is None:
|
||||
rng = np.random.RandomState(DEFAULT_SEED)
|
||||
self.rng = rng
|
||||
self.smooth_labels = smooth_labels
|
||||
self.new_epoch()
|
||||
|
||||
@property
|
||||
@ -140,7 +146,7 @@ class MNISTDataProvider(DataProvider):
|
||||
"""Data provider for MNIST handwritten digit images."""
|
||||
|
||||
def __init__(self, which_set='train', batch_size=100, max_num_batches=-1,
|
||||
shuffle_order=True, rng=None, smooth_labels=False):
|
||||
shuffle_order=True, rng=None):
|
||||
"""Create a new MNIST data provider object.
|
||||
|
||||
Args:
|
||||
@ -154,7 +160,6 @@ class MNISTDataProvider(DataProvider):
|
||||
shuffle_order (bool): Whether to randomly permute the order of
|
||||
the data before each epoch.
|
||||
rng (RandomState): A seeded random number generator.
|
||||
smooth_labels (bool): enable/disable label smoothing
|
||||
"""
|
||||
# check a valid which_set was provided
|
||||
assert which_set in ['train', 'valid', 'test'], (
|
||||
@ -177,7 +182,7 @@ class MNISTDataProvider(DataProvider):
|
||||
inputs = inputs.astype(np.float32)
|
||||
# pass the loaded data to the parent class __init__
|
||||
super(MNISTDataProvider, self).__init__(
|
||||
inputs, targets, batch_size, max_num_batches, shuffle_order, rng, smooth_labels)
|
||||
inputs, targets, batch_size, max_num_batches, shuffle_order, rng)
|
||||
|
||||
def next(self):
|
||||
"""Returns next data batch or raises `StopIteration` if at end."""
|
||||
@ -207,7 +212,7 @@ class EMNISTDataProvider(DataProvider):
|
||||
"""Data provider for EMNIST handwritten digit images."""
|
||||
|
||||
def __init__(self, which_set='train', batch_size=100, max_num_batches=-1,
|
||||
shuffle_order=True, rng=None, smooth_labels=False):
|
||||
shuffle_order=True, rng=None, flatten=False):
|
||||
"""Create a new EMNIST data provider object.
|
||||
|
||||
Args:
|
||||
@ -221,7 +226,6 @@ class EMNISTDataProvider(DataProvider):
|
||||
shuffle_order (bool): Whether to randomly permute the order of
|
||||
the data before each epoch.
|
||||
rng (RandomState): A seeded random number generator.
|
||||
smooth_labels (bool): enable/disable label smoothing
|
||||
"""
|
||||
# check a valid which_set was provided
|
||||
assert which_set in ['train', 'valid', 'test'], (
|
||||
@ -243,23 +247,20 @@ class EMNISTDataProvider(DataProvider):
|
||||
print(loaded.keys())
|
||||
inputs, targets = loaded['inputs'], loaded['targets']
|
||||
inputs = inputs.astype(np.float32)
|
||||
targets = targets.astype(np.int)
|
||||
if flatten:
|
||||
inputs = np.reshape(inputs, newshape=(-1, 28*28))
|
||||
else:
|
||||
inputs = np.reshape(inputs, newshape=(-1, 28, 28, 1))
|
||||
inputs = inputs / 255.0
|
||||
# pass the loaded data to the parent class __init__
|
||||
super(EMNISTDataProvider, self).__init__(
|
||||
inputs, targets, batch_size, max_num_batches, shuffle_order, rng, smooth_labels)
|
||||
inputs, targets, batch_size, max_num_batches, shuffle_order, rng)
|
||||
|
||||
def next(self):
|
||||
"""Returns next data batch or raises `StopIteration` if at end."""
|
||||
inputs_batch, targets_batch = super(EMNISTDataProvider, self).next()
|
||||
|
||||
if self.smooth_labels:
|
||||
targets_batch_mat = self.label_smoothing(targets_batch)
|
||||
else:
|
||||
targets_batch_mat = self.to_one_of_k(targets_batch)
|
||||
return inputs_batch, targets_batch_mat
|
||||
|
||||
|
||||
return inputs_batch, self.to_one_of_k(targets_batch)
|
||||
|
||||
def to_one_of_k(self, int_targets):
|
||||
"""Converts integer coded class target to 1 of K coded targets.
|
||||
@ -280,26 +281,6 @@ class EMNISTDataProvider(DataProvider):
|
||||
one_of_k_targets[range(int_targets.shape[0]), int_targets] = 1
|
||||
return one_of_k_targets
|
||||
|
||||
def label_smoothing(self, int_targets, alpha=0.1):
|
||||
"""Converts integer coded class target to 1 of K coded targets with label smoothing.
|
||||
|
||||
Args:
|
||||
int_targets (ndarray): Array of integer coded class targets (i.e.
|
||||
where an integer from 0 to `num_classes` - 1 is used to
|
||||
indicate which is the correct class). This should be of shape
|
||||
(num_data,).
|
||||
alpha (float): Smoothing factor.
|
||||
|
||||
Returns:
|
||||
Array of 1 of K coded targets with label smoothing i.e. an array of shape
|
||||
(num_data, num_classes)
|
||||
|
||||
"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
|
||||
class MetOfficeDataProvider(DataProvider):
|
||||
"""South Scotland Met Office weather data provider."""
|
||||
|
||||
@ -393,6 +374,21 @@ class CCPPDataProvider(DataProvider):
|
||||
super(CCPPDataProvider, self).__init__(
|
||||
inputs, targets, batch_size, max_num_batches, shuffle_order, rng)
|
||||
|
||||
class EMNISTPytorchDataProvider(Dataset):
|
||||
def __init__(self, which_set='train', batch_size=100, max_num_batches=-1,
|
||||
shuffle_order=True, rng=None, flatten=False, transforms=None):
|
||||
self.numpy_data_provider = EMNISTDataProvider(which_set=which_set, batch_size=batch_size, max_num_batches=max_num_batches,
|
||||
shuffle_order=shuffle_order, rng=rng, flatten=flatten)
|
||||
self.transforms = transforms
|
||||
|
||||
def __getitem__(self, item):
|
||||
x = self.numpy_data_provider.inputs[item]
|
||||
for augmentation in self.transforms:
|
||||
x = augmentation(x)
|
||||
return x, int(self.numpy_data_provider.targets[item])
|
||||
|
||||
def __len__(self):
|
||||
return len(self.numpy_data_provider.targets)
|
||||
|
||||
class AugmentedMNISTDataProvider(MNISTDataProvider):
|
||||
"""Data provider for MNIST dataset which randomly transforms images."""
|
||||
@ -430,3 +426,321 @@ class AugmentedMNISTDataProvider(MNISTDataProvider):
|
||||
AugmentedMNISTDataProvider, self).next()
|
||||
transformed_inputs_batch = self.transformer(inputs_batch, self.rng)
|
||||
return transformed_inputs_batch, targets_batch
|
||||
|
||||
class Omniglot(data.Dataset):
|
||||
"""`CIFAR10 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
|
||||
Args:
|
||||
root (string): Root directory of dataset where directory
|
||||
``cifar-10-batches-py`` exists or will be saved to if download is set to True.
|
||||
train (bool, optional): If True, creates dataset from training set, otherwise
|
||||
creates from test set.
|
||||
transform (callable, optional): A function/transform that takes in an PIL image
|
||||
and returns a transformed version. E.g, ``transforms.RandomCrop``
|
||||
target_transform (callable, optional): A function/transform that takes in the
|
||||
target and transforms it.
|
||||
download (bool, optional): If true, downloads the dataset from the internet and
|
||||
puts it in root directory. If dataset is already downloaded, it is not
|
||||
downloaded again.
|
||||
"""
|
||||
def collect_data_paths(self, root):
|
||||
data_dict = dict()
|
||||
print(root)
|
||||
for subdir, dir, files in os.walk(root):
|
||||
for file in files:
|
||||
if file.endswith('.png'):
|
||||
filepath = os.path.join(subdir, file)
|
||||
class_label = '_'.join(subdir.split("/")[-2:])
|
||||
if class_label in data_dict:
|
||||
data_dict[class_label].append(filepath)
|
||||
else:
|
||||
data_dict[class_label] = [filepath]
|
||||
|
||||
return data_dict
|
||||
|
||||
def __init__(self, root, set_name,
|
||||
transform=None, target_transform=None,
|
||||
download=False):
|
||||
self.root = os.path.expanduser(root)
|
||||
self.root = os.path.abspath(os.path.join(self.root, 'omniglot_dataset'))
|
||||
self.transform = transform
|
||||
self.target_transform = target_transform
|
||||
self.set_name = set_name # training set or test set
|
||||
self.data_dict = self.collect_data_paths(root=self.root)
|
||||
|
||||
x = []
|
||||
label_to_idx = {label: idx for idx, label in enumerate(self.data_dict.keys())}
|
||||
y = []
|
||||
|
||||
for key, value in self.data_dict.items():
|
||||
x.extend(value)
|
||||
y.extend(len(value) * [label_to_idx[key]])
|
||||
|
||||
y = np.array(y)
|
||||
|
||||
|
||||
rng = np.random.RandomState(seed=0)
|
||||
|
||||
idx = np.arange(len(x))
|
||||
rng.shuffle(idx)
|
||||
|
||||
x = [x[current_idx] for current_idx in idx]
|
||||
y = y[idx]
|
||||
|
||||
train_sample_idx = rng.choice(a=[i for i in range(len(x))], size=int(len(x) * 0.80), replace=False)
|
||||
evaluation_sample_idx = [i for i in range(len(x)) if i not in train_sample_idx]
|
||||
validation_sample_idx = rng.choice(a=[i for i in range(len(evaluation_sample_idx))], size=int(len(evaluation_sample_idx) * 0.40), replace=False)
|
||||
test_sample_idx = [i for i in range(len(evaluation_sample_idx)) if i not in evaluation_sample_idx]
|
||||
|
||||
if self.set_name=='train':
|
||||
self.data = [item for idx, item in enumerate(x) if idx in train_sample_idx]
|
||||
self.labels = y[train_sample_idx]
|
||||
|
||||
elif self.set_name=='val':
|
||||
self.data = [item for idx, item in enumerate(x) if idx in validation_sample_idx]
|
||||
self.labels = y[validation_sample_idx]
|
||||
|
||||
else:
|
||||
self.data = [item for idx, item in enumerate(x) if idx in test_sample_idx]
|
||||
self.labels = y[test_sample_idx]
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""
|
||||
Args:
|
||||
index (int): Index
|
||||
Returns:
|
||||
tuple: (image, target) where target is index of the target class.
|
||||
"""
|
||||
img, target = self.data[index], self.labels[index]
|
||||
|
||||
img = Image.open(img)
|
||||
img.show()
|
||||
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
if self.target_transform is not None:
|
||||
target = self.target_transform(target)
|
||||
|
||||
return img, target
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
|
||||
fmt_str += ' Number of datapoints: {}\n'.format(self.__len__())
|
||||
tmp = self.set_name
|
||||
fmt_str += ' Split: {}\n'.format(tmp)
|
||||
fmt_str += ' Root Location: {}\n'.format(self.root)
|
||||
tmp = ' Transforms (if any): '
|
||||
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
|
||||
tmp = ' Target Transforms (if any): '
|
||||
fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
|
||||
return fmt_str
|
||||
|
||||
class CIFAR10(data.Dataset):
|
||||
"""`CIFAR10 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
|
||||
Args:
|
||||
root (string): Root directory of dataset where directory
|
||||
``cifar-10-batches-py`` exists or will be saved to if download is set to True.
|
||||
train (bool, optional): If True, creates dataset from training set, otherwise
|
||||
creates from test set.
|
||||
transform (callable, optional): A function/transform that takes in an PIL image
|
||||
and returns a transformed version. E.g, ``transforms.RandomCrop``
|
||||
target_transform (callable, optional): A function/transform that takes in the
|
||||
target and transforms it.
|
||||
download (bool, optional): If true, downloads the dataset from the internet and
|
||||
puts it in root directory. If dataset is already downloaded, it is not
|
||||
downloaded again.
|
||||
"""
|
||||
base_folder = 'cifar-10-batches-py'
|
||||
url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
|
||||
filename = "cifar-10-python.tar.gz"
|
||||
tgz_md5 = 'c58f30108f718f92721af3b95e74349a'
|
||||
train_list = [
|
||||
['data_batch_1', 'c99cafc152244af753f735de768cd75f'],
|
||||
['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'],
|
||||
['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'],
|
||||
['data_batch_4', '634d18415352ddfa80567beed471001a'],
|
||||
['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'],
|
||||
]
|
||||
|
||||
test_list = [
|
||||
['test_batch', '40351d587109b95175f43aff81a1287e'],
|
||||
]
|
||||
|
||||
def __init__(self, root, set_name,
|
||||
transform=None, target_transform=None,
|
||||
download=False):
|
||||
self.root = os.path.expanduser(root)
|
||||
self.transform = transform
|
||||
self.target_transform = target_transform
|
||||
self.set_name = set_name # training set or test set
|
||||
|
||||
if download:
|
||||
self.download()
|
||||
|
||||
if not self._check_integrity():
|
||||
raise RuntimeError('Dataset not found or corrupted.' +
|
||||
' You can use download=True to download it')
|
||||
|
||||
# now load the picked numpy arrays
|
||||
rng = np.random.RandomState(seed=0)
|
||||
|
||||
train_sample_idx = rng.choice(a=[i for i in range(50000)], size=47500, replace=False)
|
||||
val_sample_idx = [i for i in range(50000) if i not in train_sample_idx]
|
||||
|
||||
if self.set_name=='train':
|
||||
self.data = []
|
||||
self.labels = []
|
||||
for fentry in self.train_list:
|
||||
f = fentry[0]
|
||||
file = os.path.join(self.root, self.base_folder, f)
|
||||
fo = open(file, 'rb')
|
||||
if sys.version_info[0] == 2:
|
||||
entry = pickle.load(fo)
|
||||
else:
|
||||
entry = pickle.load(fo, encoding='latin1')
|
||||
self.data.append(entry['data'])
|
||||
if 'labels' in entry:
|
||||
self.labels += entry['labels']
|
||||
else:
|
||||
self.labels += entry['fine_labels']
|
||||
fo.close()
|
||||
|
||||
self.data = np.concatenate(self.data)
|
||||
|
||||
self.data = self.data.reshape((50000, 3, 32, 32))
|
||||
self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC
|
||||
self.data = self.data[train_sample_idx]
|
||||
self.labels = np.array(self.labels)[train_sample_idx]
|
||||
print(set_name, self.data.shape)
|
||||
print(set_name, self.labels.shape)
|
||||
|
||||
elif self.set_name=='val':
|
||||
self.data = []
|
||||
self.labels = []
|
||||
for fentry in self.train_list:
|
||||
f = fentry[0]
|
||||
file = os.path.join(self.root, self.base_folder, f)
|
||||
fo = open(file, 'rb')
|
||||
if sys.version_info[0] == 2:
|
||||
entry = pickle.load(fo)
|
||||
else:
|
||||
entry = pickle.load(fo, encoding='latin1')
|
||||
self.data.append(entry['data'])
|
||||
if 'labels' in entry:
|
||||
self.labels += entry['labels']
|
||||
else:
|
||||
self.labels += entry['fine_labels']
|
||||
fo.close()
|
||||
|
||||
self.data = np.concatenate(self.data)
|
||||
self.data = self.data.reshape((50000, 3, 32, 32))
|
||||
self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC
|
||||
self.data = self.data[val_sample_idx]
|
||||
self.labels = np.array(self.labels)[val_sample_idx]
|
||||
print(set_name, self.data.shape)
|
||||
print(set_name, self.labels.shape)
|
||||
|
||||
else:
|
||||
f = self.test_list[0][0]
|
||||
file = os.path.join(self.root, self.base_folder, f)
|
||||
fo = open(file, 'rb')
|
||||
if sys.version_info[0] == 2:
|
||||
entry = pickle.load(fo)
|
||||
else:
|
||||
entry = pickle.load(fo, encoding='latin1')
|
||||
self.data = entry['data']
|
||||
if 'labels' in entry:
|
||||
self.labels = entry['labels']
|
||||
else:
|
||||
self.labels = entry['fine_labels']
|
||||
fo.close()
|
||||
self.data = self.data.reshape((10000, 3, 32, 32))
|
||||
self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC
|
||||
self.labels = np.array(self.labels)
|
||||
print(set_name, self.data.shape)
|
||||
print(set_name, self.labels.shape)
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""
|
||||
Args:
|
||||
index (int): Index
|
||||
Returns:
|
||||
tuple: (image, target) where target is index of the target class.
|
||||
"""
|
||||
img, target = self.data[index], self.labels[index]
|
||||
|
||||
# doing this so that it is consistent with all other datasets
|
||||
# to return a PIL Image
|
||||
|
||||
img = Image.fromarray(img)
|
||||
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
if self.target_transform is not None:
|
||||
target = self.target_transform(target)
|
||||
|
||||
return img, target
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
def _check_integrity(self):
|
||||
root = self.root
|
||||
for fentry in (self.train_list + self.test_list):
|
||||
filename, md5 = fentry[0], fentry[1]
|
||||
fpath = os.path.join(root, self.base_folder, filename)
|
||||
if not check_integrity(fpath, md5):
|
||||
return False
|
||||
return True
|
||||
|
||||
def download(self):
|
||||
import tarfile
|
||||
|
||||
if self._check_integrity():
|
||||
print('Files already downloaded and verified')
|
||||
return
|
||||
|
||||
root = self.root
|
||||
download_url(self.url, root, self.filename, self.tgz_md5)
|
||||
|
||||
# extract file
|
||||
cwd = os.getcwd()
|
||||
tar = tarfile.open(os.path.join(root, self.filename), "r:gz")
|
||||
os.chdir(root)
|
||||
tar.extractall()
|
||||
tar.close()
|
||||
os.chdir(cwd)
|
||||
|
||||
def __repr__(self):
|
||||
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
|
||||
fmt_str += ' Number of datapoints: {}\n'.format(self.__len__())
|
||||
tmp = self.set_name
|
||||
fmt_str += ' Split: {}\n'.format(tmp)
|
||||
fmt_str += ' Root Location: {}\n'.format(self.root)
|
||||
tmp = ' Transforms (if any): '
|
||||
fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
|
||||
tmp = ' Target Transforms (if any): '
|
||||
fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
|
||||
return fmt_str
|
||||
|
||||
|
||||
class CIFAR100(CIFAR10):
|
||||
"""`CIFAR100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
|
||||
This is a subclass of the `CIFAR10` Dataset.
|
||||
"""
|
||||
base_folder = 'cifar-100-python'
|
||||
url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
|
||||
filename = "cifar-100-python.tar.gz"
|
||||
tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85'
|
||||
train_list = [
|
||||
['train', '16019d7e3df5f24257cddd939b257f8d'],
|
||||
]
|
||||
|
||||
test_list = [
|
||||
['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'],
|
||||
]
|
252
mlp/layers.py
252
mlp/layers.py
@ -16,6 +16,7 @@ import numpy as np
|
||||
import mlp.initialisers as init
|
||||
from mlp import DEFAULT_SEED
|
||||
|
||||
|
||||
class Layer(object):
|
||||
"""Abstract class defining the interface for a layer."""
|
||||
|
||||
@ -95,6 +96,7 @@ class LayerWithParameters(Layer):
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class StochasticLayerWithParameters(Layer):
|
||||
"""Specialised layer which uses a stochastic forward propagation."""
|
||||
|
||||
@ -124,6 +126,7 @@ class StochasticLayerWithParameters(Layer):
|
||||
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def grads_wrt_params(self, inputs, grads_wrt_outputs):
|
||||
"""Calculates gradients with respect to layer parameters.
|
||||
|
||||
@ -166,6 +169,7 @@ class StochasticLayerWithParameters(Layer):
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class StochasticLayer(Layer):
|
||||
"""Specialised layer which uses a stochastic forward propagation."""
|
||||
|
||||
@ -329,6 +333,7 @@ class AffineLayer(LayerWithParameters):
|
||||
return 'AffineLayer(input_dim={0}, output_dim={1})'.format(
|
||||
self.input_dim, self.output_dim)
|
||||
|
||||
|
||||
class SigmoidLayer(Layer):
|
||||
"""Layer implementing an element-wise logistic sigmoid transformation."""
|
||||
|
||||
@ -368,6 +373,152 @@ class SigmoidLayer(Layer):
|
||||
def __repr__(self):
|
||||
return 'SigmoidLayer'
|
||||
|
||||
|
||||
class ConvolutionalLayer(LayerWithParameters):
|
||||
"""Layer implementing a 2D convolution-based transformation of its inputs.
|
||||
The layer is parameterised by a set of 2D convolutional kernels, a four
|
||||
dimensional array of shape
|
||||
(num_output_channels, num_input_channels, kernel_height, kernel_dim_2)
|
||||
and a bias vector, a one dimensional array of shape
|
||||
(num_output_channels,)
|
||||
i.e. one shared bias per output channel.
|
||||
Assuming no-padding is applied to the inputs so that outputs are only
|
||||
calculated for positions where the kernel filters fully overlap with the
|
||||
inputs, and that unit strides are used the outputs will have spatial extent
|
||||
output_height = input_height - kernel_height + 1
|
||||
output_width = input_width - kernel_width + 1
|
||||
"""
|
||||
|
||||
def __init__(self, num_input_channels, num_output_channels,
|
||||
input_height, input_width,
|
||||
kernel_height, kernel_width,
|
||||
kernels_init=init.UniformInit(-0.01, 0.01),
|
||||
biases_init=init.ConstantInit(0.),
|
||||
kernels_penalty=None, biases_penalty=None):
|
||||
"""Initialises a parameterised convolutional layer.
|
||||
Args:
|
||||
num_input_channels (int): Number of channels in inputs to
|
||||
layer (this may be number of colour channels in the input
|
||||
images if used as the first layer in a model, or the
|
||||
number of output channels, a.k.a. feature maps, from a
|
||||
a previous convolutional layer).
|
||||
num_output_channels (int): Number of channels in outputs
|
||||
from the layer, a.k.a. number of feature maps.
|
||||
input_height (int): Size of first input dimension of each 2D
|
||||
channel of inputs.
|
||||
input_width (int): Size of second input dimension of each 2D
|
||||
channel of inputs.
|
||||
kernel_height (int): Size of first dimension of each 2D channel of
|
||||
kernels.
|
||||
kernel_width (int): Size of second dimension of each 2D channel of
|
||||
kernels.
|
||||
kernels_intialiser: Initialiser for the kernel parameters.
|
||||
biases_initialiser: Initialiser for the bias parameters.
|
||||
kernels_penalty: Kernel-dependent penalty term (regulariser) or
|
||||
None if no regularisation is to be applied to the kernels.
|
||||
biases_penalty: Biases-dependent penalty term (regulariser) or
|
||||
None if no regularisation is to be applied to the biases.
|
||||
"""
|
||||
self.num_input_channels = num_input_channels
|
||||
self.num_output_channels = num_output_channels
|
||||
self.input_height = input_height
|
||||
self.input_width = input_width
|
||||
self.kernel_height = kernel_height
|
||||
self.kernel_width = kernel_width
|
||||
self.kernels_init = kernels_init
|
||||
self.biases_init = biases_init
|
||||
self.kernels_shape = (
|
||||
num_output_channels, num_input_channels, kernel_height, kernel_width
|
||||
)
|
||||
self.inputs_shape = (
|
||||
None, num_input_channels, input_height, input_width
|
||||
)
|
||||
self.kernels = self.kernels_init(self.kernels_shape)
|
||||
self.biases = self.biases_init(num_output_channels)
|
||||
self.kernels_penalty = kernels_penalty
|
||||
self.biases_penalty = biases_penalty
|
||||
|
||||
self.cache = None
|
||||
|
||||
def fprop(self, inputs):
|
||||
"""Forward propagates activations through the layer transformation.
|
||||
For inputs `x`, outputs `y`, kernels `K` and biases `b` the layer
|
||||
corresponds to `y = conv2d(x, K) + b`.
|
||||
Args:
|
||||
inputs: Array of layer inputs of shape (batch_size, num_input_channels, image_height, image_width).
|
||||
Returns:
|
||||
outputs: Array of layer outputs of shape (batch_size, num_output_channels, output_height, output_width).
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||
"""Back propagates gradients through a layer.
|
||||
Given gradients with respect to the outputs of the layer calculates the
|
||||
gradients with respect to the layer inputs.
|
||||
Args:
|
||||
inputs: Array of layer inputs of shape
|
||||
(batch_size, num_input_channels, input_height, input_width).
|
||||
outputs: Array of layer outputs calculated in forward pass of
|
||||
shape
|
||||
(batch_size, num_output_channels, output_height, output_width).
|
||||
grads_wrt_outputs: Array of gradients with respect to the layer
|
||||
outputs of shape
|
||||
(batch_size, num_output_channels, output_height, output_width).
|
||||
Returns:
|
||||
Array of gradients with respect to the layer inputs of shape
|
||||
(batch_size, num_input_channels, input_height, input_width).
|
||||
"""
|
||||
# Pad the grads_wrt_outputs
|
||||
raise NotImplementedError
|
||||
|
||||
def grads_wrt_params(self, inputs, grads_wrt_outputs):
|
||||
"""Calculates gradients with respect to layer parameters.
|
||||
Args:
|
||||
inputs: array of inputs to layer of shape (batch_size, input_dim)
|
||||
grads_wrt_to_outputs: array of gradients with respect to the layer
|
||||
outputs of shape
|
||||
(batch_size, num_output_channels, output_height, output_width).
|
||||
Returns:
|
||||
list of arrays of gradients with respect to the layer parameters
|
||||
`[grads_wrt_kernels, grads_wrt_biases]`.
|
||||
"""
|
||||
# Get inputs_col from previous fprop
|
||||
raise NotImplementedError
|
||||
|
||||
def params_penalty(self):
|
||||
"""Returns the parameter dependent penalty term for this layer.
|
||||
If no parameter-dependent penalty terms are set this returns zero.
|
||||
"""
|
||||
params_penalty = 0
|
||||
if self.kernels_penalty is not None:
|
||||
params_penalty += self.kernels_penalty(self.kernels)
|
||||
if self.biases_penalty is not None:
|
||||
params_penalty += self.biases_penalty(self.biases)
|
||||
return params_penalty
|
||||
|
||||
@property
|
||||
def params(self):
|
||||
"""A list of layer parameter values: `[kernels, biases]`."""
|
||||
return [self.kernels, self.biases]
|
||||
|
||||
@params.setter
|
||||
def params(self, values):
|
||||
self.kernels = values[0]
|
||||
self.biases = values[1]
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
'ConvolutionalLayer(\n'
|
||||
' num_input_channels={0}, num_output_channels={1},\n'
|
||||
' input_height={2}, input_width={3},\n'
|
||||
' kernel_height={4}, kernel_width={5}\n'
|
||||
')'
|
||||
.format(self.num_input_channels, self.num_output_channels,
|
||||
self.input_height, self.input_width, self.kernel_height,
|
||||
self.kernel_width)
|
||||
)
|
||||
|
||||
|
||||
class ReluLayer(Layer):
|
||||
"""Layer implementing an element-wise rectified linear transformation."""
|
||||
|
||||
@ -406,84 +557,6 @@ class ReluLayer(Layer):
|
||||
def __repr__(self):
|
||||
return 'ReluLayer'
|
||||
|
||||
class LeakyReluLayer(Layer):
|
||||
"""Layer implementing an element-wise leaky rectified linear transformation."""
|
||||
def __init__(self, alpha=0.01):
|
||||
self.alpha = alpha
|
||||
|
||||
def fprop(self, inputs):
|
||||
"""Forward propagates activations through the layer transformation.
|
||||
|
||||
For inputs `x` and outputs `y` this corresponds to `y = ..., else`.
|
||||
"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||
"""Back propagates gradients through a layer.
|
||||
|
||||
Given gradients with respect to the outputs of the layer calculates the
|
||||
gradients with respect to the layer inputs.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def __repr__(self):
|
||||
return 'LeakyReluLayer'
|
||||
|
||||
|
||||
|
||||
class ParametricReluLayer(LayerWithParameters):
|
||||
"""Layer implementing an element-wise parametric rectified linear transformation."""
|
||||
|
||||
def __init__(self, alpha=0.25):
|
||||
self.alpha = np.array([alpha])
|
||||
|
||||
@property
|
||||
def params(self):
|
||||
"""A list of layer parameter values: `[weights, biases]`."""
|
||||
return [self.alpha]
|
||||
|
||||
def fprop(self, inputs):
|
||||
"""Forward propagates activations through the layer transformation.
|
||||
|
||||
For inputs `x` and outputs `y` this corresponds to `y = ..., else`.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||
"""Back propagates gradients through a layer.
|
||||
|
||||
Given gradients with respect to the outputs of the layer calculates the
|
||||
gradients with respect to the layer inputs.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def grads_wrt_params(self, inputs, grads_wrt_outputs):
|
||||
"""Calculates gradients with respect to layer parameters.
|
||||
|
||||
Args:
|
||||
inputs: array of inputs to layer of shape (batch_size, input_dim)
|
||||
grads_wrt_to_outputs: array of gradients with respect to the layer
|
||||
outputs of shape (batch_size, output_dim)
|
||||
|
||||
Returns:
|
||||
list of arrays of gradients with respect to the layer parameters
|
||||
`[grads_wrt_params]`. Where params is the alpha parameter.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def params(self):
|
||||
"""A list of layer parameter values: `[weights, biases]`."""
|
||||
return [self.alpha]
|
||||
|
||||
@params.setter
|
||||
def params(self, values):
|
||||
self.alpha = values[0]
|
||||
|
||||
def __repr__(self):
|
||||
return 'ParametricReluLayer'
|
||||
|
||||
|
||||
class TanhLayer(Layer):
|
||||
"""Layer implementing an element-wise hyperbolic tangent transformation."""
|
||||
@ -518,11 +591,12 @@ class TanhLayer(Layer):
|
||||
Array of gradients with respect to the layer inputs of shape
|
||||
(batch_size, input_dim).
|
||||
"""
|
||||
return (1. - outputs**2) * grads_wrt_outputs
|
||||
return (1. - outputs ** 2) * grads_wrt_outputs
|
||||
|
||||
def __repr__(self):
|
||||
return 'TanhLayer'
|
||||
|
||||
|
||||
class SoftmaxLayer(Layer):
|
||||
"""Layer implementing a softmax transformation."""
|
||||
|
||||
@ -567,6 +641,7 @@ class SoftmaxLayer(Layer):
|
||||
def __repr__(self):
|
||||
return 'SoftmaxLayer'
|
||||
|
||||
|
||||
class RadialBasisFunctionLayer(Layer):
|
||||
"""Layer implementing projection to a grid of radial basis functions."""
|
||||
|
||||
@ -582,7 +657,7 @@ class RadialBasisFunctionLayer(Layer):
|
||||
tile basis functions in grid across. For example for a 2D input
|
||||
space spanning [0, 1] x [0, 1] use intervals=[[0, 1], [0, 1]].
|
||||
"""
|
||||
num_basis = grid_dim**len(intervals)
|
||||
num_basis = grid_dim ** len(intervals)
|
||||
self.centres = np.array(np.meshgrid(*[
|
||||
np.linspace(low, high, grid_dim) for (low, high) in intervals])
|
||||
).reshape((len(intervals), -1))
|
||||
@ -598,8 +673,8 @@ class RadialBasisFunctionLayer(Layer):
|
||||
Returns:
|
||||
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||
"""
|
||||
return np.exp(-(inputs[..., None] - self.centres[None, ...])**2 /
|
||||
self.scales**2).reshape((inputs.shape[0], -1))
|
||||
return np.exp(-(inputs[..., None] - self.centres[None, ...]) ** 2 /
|
||||
self.scales ** 2).reshape((inputs.shape[0], -1))
|
||||
|
||||
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||
"""Back propagates gradients through a layer.
|
||||
@ -620,13 +695,14 @@ class RadialBasisFunctionLayer(Layer):
|
||||
"""
|
||||
num_basis = self.centres.shape[1]
|
||||
return -2 * (
|
||||
((inputs[..., None] - self.centres[None, ...]) / self.scales**2) *
|
||||
((inputs[..., None] - self.centres[None, ...]) / self.scales ** 2) *
|
||||
grads_wrt_outputs.reshape((inputs.shape[0], -1, num_basis))
|
||||
).sum(-1)
|
||||
|
||||
def __repr__(self):
|
||||
return 'RadialBasisFunctionLayer(grid_dim={0})'.format(self.grid_dim)
|
||||
|
||||
|
||||
class DropoutLayer(StochasticLayer):
|
||||
"""Layer which stochastically drops input dimensions in its output."""
|
||||
|
||||
@ -661,7 +737,12 @@ class DropoutLayer(StochasticLayer):
|
||||
Returns:
|
||||
outputs: Array of layer outputs of shape (batch_size, output_dim).
|
||||
"""
|
||||
raise NotImplementedError
|
||||
if stochastic:
|
||||
mask_shape = (1,) + inputs.shape[1:] if self.share_across_batch else inputs.shape
|
||||
self._mask = (self.rng.uniform(size=mask_shape) < self.incl_prob)
|
||||
return inputs * self._mask
|
||||
else:
|
||||
return inputs * self.incl_prob
|
||||
|
||||
def bprop(self, inputs, outputs, grads_wrt_outputs):
|
||||
"""Back propagates gradients through a layer.
|
||||
@ -681,11 +762,12 @@ class DropoutLayer(StochasticLayer):
|
||||
Array of gradients with respect to the layer inputs of shape
|
||||
(batch_size, input_dim).
|
||||
"""
|
||||
raise NotImplementedError
|
||||
return grads_wrt_outputs * self._mask
|
||||
|
||||
def __repr__(self):
|
||||
return 'DropoutLayer(incl_prob={0:.1f})'.format(self.incl_prob)
|
||||
|
||||
|
||||
class ReshapeLayer(Layer):
|
||||
"""Layer which reshapes dimensions of inputs."""
|
||||
|
||||
|
@ -315,3 +315,74 @@ class AdaGradLearningRule(GradientDescentLearningRule):
|
||||
param -= (self.learning_rate * grad /
|
||||
(sum_sq_grad + self.epsilon) ** 0.5)
|
||||
|
||||
|
||||
class RMSPropLearningRule(GradientDescentLearningRule):
|
||||
"""Root mean squared gradient normalised learning rule (RMSProp).
|
||||
First-order gradient-descent based learning rule which normalises gradient
|
||||
updates by a exponentially smoothed estimate of the gradient second
|
||||
moments.
|
||||
References:
|
||||
[1]: Neural Networks for Machine Learning: Lecture 6a slides
|
||||
University of Toronto,Computer Science Course CSC321
|
||||
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
|
||||
"""
|
||||
|
||||
def __init__(self, learning_rate=1e-3, beta=0.9, epsilon=1e-8):
|
||||
"""Creates a new learning rule object.
|
||||
Args:
|
||||
learning_rate: A postive scalar to scale gradient updates to the
|
||||
parameters by. This needs to be carefully set - if too large
|
||||
the learning dynamic will be unstable and may diverge, while
|
||||
if set too small learning will proceed very slowly.
|
||||
beta: Exponential decay rate for gradient second moment
|
||||
estimates. This should be a scalar value in [0, 1]. The running
|
||||
gradient second moment estimate is calculated using
|
||||
`m_2 = beta * m_2_prev + (1 - beta) * g**2`
|
||||
where `m_2_prev` is the previous estimate and `g` the current
|
||||
parameter gradients.
|
||||
epsilon: 'Softening' parameter to stop updates diverging when
|
||||
gradient second moment estimates are close to zero. Should be
|
||||
set to a small positive value.
|
||||
"""
|
||||
super(RMSPropLearningRule, self).__init__(learning_rate)
|
||||
assert beta >= 0. and beta <= 1., 'beta should be in [0, 1].'
|
||||
assert epsilon > 0., 'epsilon should be > 0.'
|
||||
self.beta = beta
|
||||
self.epsilon = epsilon
|
||||
|
||||
def initialise(self, params):
|
||||
"""Initialises the state of the learning rule for a set or parameters.
|
||||
This must be called before `update_params` is first called.
|
||||
Args:
|
||||
params: A list of the parameters to be optimised. Note these will
|
||||
be updated *in-place* to avoid reallocating arrays on each
|
||||
update.
|
||||
"""
|
||||
super(RMSPropLearningRule, self).initialise(params)
|
||||
self.moms_2 = []
|
||||
for param in self.params:
|
||||
self.moms_2.append(np.zeros_like(param))
|
||||
|
||||
def reset(self):
|
||||
"""Resets any additional state variables to their initial values.
|
||||
For this learning rule this corresponds to zeroing all gradient
|
||||
second moment estimates.
|
||||
"""
|
||||
for mom_2 in self.moms_2:
|
||||
mom_2 *= 0.
|
||||
|
||||
def update_params(self, grads_wrt_params):
|
||||
"""Applies a single update to all parameters.
|
||||
All parameter updates are performed using in-place operations and so
|
||||
nothing is returned.
|
||||
Args:
|
||||
grads_wrt_params: A list of gradients of the scalar loss function
|
||||
with respect to each of the parameters passed to `initialise`
|
||||
previously, with this list expected to be in the same order.
|
||||
"""
|
||||
for param, mom_2, grad in zip(
|
||||
self.params, self.moms_2, grads_wrt_params):
|
||||
mom_2 *= self.beta
|
||||
mom_2 += (1. - self.beta) * grad ** 2
|
||||
param -= (self.learning_rate * grad /
|
||||
(mom_2 + self.epsilon) ** 0.5)
|
||||
|
@ -27,7 +27,7 @@ class SingleLayerModel(object):
|
||||
"""A list of all of the parameters of the model."""
|
||||
return self.layer.params
|
||||
|
||||
def fprop(self, inputs):
|
||||
def fprop(self, inputs, evaluation=False):
|
||||
"""Calculate the model outputs corresponding to a batch of inputs.
|
||||
|
||||
Args:
|
||||
|
@ -58,7 +58,7 @@ class Optimiser(object):
|
||||
parameters according to the learning rule.
|
||||
"""
|
||||
with self.tqdm_progress(total=self.train_dataset.num_batches) as train_progress_bar:
|
||||
train_progress_bar.set_description("Ep Prog")
|
||||
train_progress_bar.set_description("Epoch Progress")
|
||||
for inputs_batch, targets_batch in self.train_dataset:
|
||||
activations = self.model.fprop(inputs_batch)
|
||||
grads_wrt_outputs = self.error.grad(activations[-1], targets_batch)
|
||||
@ -112,7 +112,7 @@ class Optimiser(object):
|
||||
"""
|
||||
logger.info('Epoch {0}: {1:.1f}s to complete\n {2}'.format(
|
||||
epoch, epoch_time,
|
||||
', '.join(['{}={:.2e}'.format(k, v) for (k, v) in stats.items()])
|
||||
', '.join(['{0}={1:.2e}'.format(k, v) for (k, v) in stats.items()])
|
||||
))
|
||||
|
||||
def train(self, num_epochs, stats_interval=5):
|
||||
@ -132,7 +132,7 @@ class Optimiser(object):
|
||||
start_train_time = time.time()
|
||||
run_stats = [list(self.get_epoch_stats().values())]
|
||||
with self.tqdm_progress(total=num_epochs) as progress_bar:
|
||||
progress_bar.set_description("Exp Prog")
|
||||
progress_bar.set_description("Experiment Progress")
|
||||
for epoch in range(1, num_epochs + 1):
|
||||
start_time = time.time()
|
||||
self.do_training_epoch()
|
||||
|
@ -29,7 +29,7 @@ class L1Penalty(object):
|
||||
Returns:
|
||||
Value of penalty term.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
return self.coefficient * abs(parameter).sum()
|
||||
|
||||
def grad(self, parameter):
|
||||
"""Calculate the penalty gradient with respect to the parameter.
|
||||
@ -41,7 +41,7 @@ class L1Penalty(object):
|
||||
Value of penalty gradient with respect to parameter. This
|
||||
should be an array of the same shape as the parameter.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
return self.coefficient * np.sign(parameter)
|
||||
|
||||
def __repr__(self):
|
||||
return 'L1Penalty({0})'.format(self.coefficient)
|
||||
@ -72,7 +72,7 @@ class L2Penalty(object):
|
||||
Returns:
|
||||
Value of penalty term.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
return 0.5 * self.coefficient * (parameter ** 2).sum()
|
||||
|
||||
def grad(self, parameter):
|
||||
"""Calculate the penalty gradient with respect to the parameter.
|
||||
@ -84,7 +84,7 @@ class L2Penalty(object):
|
||||
Value of penalty gradient with respect to parameter. This
|
||||
should be an array of the same shape as the parameter.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
return self.coefficient * parameter
|
||||
|
||||
def __repr__(self):
|
||||
return 'L2Penalty({0})'.format(self.coefficient)
|
@ -32,42 +32,3 @@ class ConstantLearningRateScheduler(object):
|
||||
epoch_number: Integer index of training epoch about to be run.
|
||||
"""
|
||||
learning_rule.learning_rate = self.learning_rate
|
||||
|
||||
class CosineAnnealingWithWarmRestarts(object):
|
||||
"""Cosine annealing scheduler, implemented as in https://arxiv.org/pdf/1608.03983.pdf"""
|
||||
|
||||
def __init__(self, min_learning_rate, max_learning_rate, total_iters_per_period, max_learning_rate_discount_factor,
|
||||
period_iteration_expansion_factor):
|
||||
"""
|
||||
Instantiates a new cosine annealing with warm restarts learning rate scheduler
|
||||
:param min_learning_rate: The minimum learning rate the scheduler can assign
|
||||
:param max_learning_rate: The maximum learning rate the scheduler can assign
|
||||
:param total_epochs_per_period: The number of epochs in a period
|
||||
:param max_learning_rate_discount_factor: The rate of discount for the maximum learning rate after each restart i.e. how many times smaller the max learning rate will be after a restart compared to the previous one
|
||||
:param period_iteration_expansion_factor: The rate of expansion of the period epochs. e.g. if it's set to 1 then all periods have the same number of epochs, if it's larger than 1 then each subsequent period will have more epochs and vice versa.
|
||||
"""
|
||||
self.min_learning_rate = min_learning_rate
|
||||
self.max_learning_rate = max_learning_rate
|
||||
self.total_epochs_per_period = total_iters_per_period
|
||||
|
||||
self.max_learning_rate_discount_factor = max_learning_rate_discount_factor
|
||||
self.period_iteration_expansion_factor = period_iteration_expansion_factor
|
||||
|
||||
|
||||
def update_learning_rule(self, learning_rule, epoch_number):
|
||||
"""Update the hyperparameters of the learning rule.
|
||||
|
||||
Run at the beginning of each epoch.
|
||||
|
||||
Args:
|
||||
learning_rule: Learning rule object being used in training run,
|
||||
any scheduled hyperparameters to be altered should be
|
||||
attributes of this object.
|
||||
epoch_number: Integer index of training epoch about to be run.
|
||||
Returns:
|
||||
effective_learning_rate at step 'epoch_number'
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
|
||||
|
@ -1,80 +0,0 @@
|
||||
# from mlp.learning_rules import AdamLearningRuleWithWeightDecay
|
||||
# from mlp.schedulers import CosineAnnealingWithWarmRestarts
|
||||
from mlp.layers import DropoutLayer
|
||||
from mlp.penalties import L1Penalty, L2Penalty
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
|
||||
|
||||
def test_dropout_layer():
|
||||
# loaded = np.load("../data/correct_results.npz")
|
||||
rng = np.random.RandomState(92019)
|
||||
|
||||
x = rng.normal(loc=0, scale=5.0, size=(50, 3, 64, 64))
|
||||
|
||||
correct_outputs = np.load(os.path.join(os.environ['MLP_DATA_DIR'], 'regularization_debug_pack.npy'), allow_pickle=True).item()
|
||||
|
||||
rng = np.random.RandomState(92019)
|
||||
layer = DropoutLayer(rng=rng)
|
||||
|
||||
out = layer.fprop(x)
|
||||
|
||||
grads = layer.bprop(inputs=x, outputs=out, grads_wrt_outputs=np.ones(x.shape))
|
||||
|
||||
# correct_outputs = correct_outputs['dropout']
|
||||
|
||||
fprop_test = np.allclose(correct_outputs['DropoutLayer_fprop'], out)
|
||||
|
||||
bprop_test = np.allclose(correct_outputs['DropoutLayer_bprop'], grads)
|
||||
|
||||
return fprop_test, out, correct_outputs['DropoutLayer_fprop'], bprop_test, grads, correct_outputs['DropoutLayer_bprop']
|
||||
|
||||
|
||||
def test_L1_Penalty():
|
||||
|
||||
|
||||
rng = np.random.RandomState(92019)
|
||||
|
||||
x = rng.normal(loc=0, scale=5.0, size=(50, 3, 64, 64))
|
||||
|
||||
correct_outputs = np.load(os.path.join(os.environ['MLP_DATA_DIR'], 'regularization_debug_pack.npy'), allow_pickle=True).item()
|
||||
|
||||
layer = L1Penalty(1e-4)
|
||||
|
||||
out = layer(x)
|
||||
|
||||
grads = layer.grad(x)
|
||||
|
||||
# correct_outputs = correct_outputs['l1penalty']
|
||||
|
||||
__call__test = np.allclose(correct_outputs['L1Penalty___call__correct'], out)
|
||||
|
||||
grad_test = np.allclose(correct_outputs['L1Penalty_grad_correct'], grads)
|
||||
|
||||
return __call__test, out, correct_outputs['L1Penalty___call__correct'], grad_test, grads, correct_outputs['L1Penalty_grad_correct']
|
||||
|
||||
|
||||
def test_L2_Penalty():
|
||||
|
||||
|
||||
rng = np.random.RandomState(92019)
|
||||
|
||||
x = rng.normal(loc=0, scale=5.0, size=(50, 3, 64, 64))
|
||||
|
||||
correct_outputs = np.load(os.path.join(os.environ['MLP_DATA_DIR'], 'regularization_debug_pack.npy'), allow_pickle=True).item()
|
||||
|
||||
layer = L2Penalty(1e-4)
|
||||
|
||||
out = layer(x)
|
||||
|
||||
grads = layer.grad(x)
|
||||
|
||||
# correct_outputs = correct_outputs['l2penalty']
|
||||
|
||||
__call__test = np.allclose(correct_outputs['L2Penalty___call__correct'], out)
|
||||
|
||||
grad_test = np.allclose(correct_outputs['L2Penalty_grad_correct'], grads)
|
||||
|
||||
return __call__test, out, correct_outputs['L2Penalty___call__correct'], grad_test, grads, correct_outputs['L2Penalty_grad_correct']
|
||||
|
@ -1,242 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Introduction\n",
|
||||
"\n",
|
||||
"## Getting started with Jupyter notebooks\n",
|
||||
"\n",
|
||||
"The majority of your work in this course will be done using Jupyter notebooks so we will here introduce some of the basics of the notebook system. If you are already comfortable using notebooks or just would rather get on with some coding feel free to [skip straight to the exercises below](#Exercises).\n",
|
||||
"\n",
|
||||
"*Note: Jupyter notebooks are also known as IPython notebooks. The Jupyter system now supports languages other than Python [hence the name was changed to make it more language agnostic](https://ipython.org/#jupyter-and-the-future-of-ipython) however IPython notebook is still commonly used.*\n",
|
||||
"\n",
|
||||
"### Jupyter basics: the server, dashboard and kernels\n",
|
||||
"\n",
|
||||
"In launching this notebook you will have already come across two of the other key components of the Jupyter system - the notebook *server* and *dashboard* interface.\n",
|
||||
"\n",
|
||||
"We began by starting a notebook server instance in the terminal by running\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"jupyter notebook\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This will have begun printing a series of log messages to terminal output similar to\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ jupyter notebook\n",
|
||||
"[I 08:58:24.417 NotebookApp] Serving notebooks from local directory: ~/mlpractical\n",
|
||||
"[I 08:58:24.417 NotebookApp] 0 active kernels\n",
|
||||
"[I 08:58:24.417 NotebookApp] The Jupyter Notebook is running at: http://localhost:8888/\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"The last message included here indicates the URL the application is being served at. The default behaviour of the `jupyter notebook` command is to open a tab in a web browser pointing to this address after the server has started up. The server can be launched without opening a browser window by running `jupyter notebook --no-browser`. This can be useful for example when running a notebook server on a remote machine over SSH. Descriptions of various other command options can be found by displaying the command help page using\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"jupyter notebook --help\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"While the notebook server is running it will continue printing log messages to terminal it was started from. Unless you detach the process from the terminal session you will need to keep the session open to keep the notebook server alive. If you want to close down a running server instance from the terminal you can use `Ctrl+C` - this will bring up a confirmation message asking you to confirm you wish to shut the server down. You can either enter `y` or skip the confirmation by hitting `Ctrl+C` again.\n",
|
||||
"\n",
|
||||
"When the notebook application first opens in your browser you are taken to the notebook *dashboard*. This will appear something like this\n",
|
||||
"\n",
|
||||
"<img src='res/jupyter-dashboard.png' />\n",
|
||||
"\n",
|
||||
"The dashboard above is showing the `Files` tab, a list of files in the directory the notebook server was launched from. We can navigate in to a sub-directory by clicking on a directory name and back up to the parent directory by clicking the `..` link. An important point to note is that the top-most level that you will be able to navigate to is the directory you run the server from. This is a security feature and generally you should try to limit the access the server has by launching it in the highest level directory which gives you access to all the files you need to work with.\n",
|
||||
"\n",
|
||||
"As well as allowing you to launch existing notebooks, the `Files` tab of the dashboard also allows new notebooks to be created using the `New` drop-down on the right. It can also perform basic file-management tasks such as renaming and deleting files (select a file by checking the box alongside it to bring up a context menu toolbar).\n",
|
||||
"\n",
|
||||
"In addition to opening notebook files, we can also edit text files such as `.py` source files, directly in the browser by opening them from the dashboard. The in-built text-editor is less-featured than a full IDE but is useful for quick edits of source files and previewing data files.\n",
|
||||
"\n",
|
||||
"The `Running` tab of the dashboard gives a list of the currently running notebook instances. This can be useful to keep track of which notebooks are still running and to shutdown (or reopen) old notebook processes when the corresponding tab has been closed.\n",
|
||||
"\n",
|
||||
"### The notebook interface\n",
|
||||
"\n",
|
||||
"The top of your notebook window should appear something like this:\n",
|
||||
"\n",
|
||||
"<img src='res/jupyter-notebook-interface.png' />\n",
|
||||
"\n",
|
||||
"The name of the current notebook is displayed at the top of the page and can be edited by clicking on the text of the name. Displayed alongside this is an indication of the last manual *checkpoint* of the notebook file. On-going changes are auto-saved at regular intervals; the check-point mechanism is mainly meant as a way to recover an earlier version of a notebook after making unwanted changes. Note the default system only currently supports storing a single previous checkpoint despite the `Revert to checkpoint` dropdown under the `File` menu perhaps suggesting otherwise.\n",
|
||||
"\n",
|
||||
"As well as having options to save and revert to checkpoints, the `File` menu also allows new notebooks to be created in same directory as the current notebook, a copy of the current notebook to be made and the ability to export the current notebook to various formats.\n",
|
||||
"\n",
|
||||
"The `Edit` menu contains standard clipboard functions as well as options for reorganising notebook *cells*. Cells are the basic units of notebooks, and can contain formatted text like the one you are reading at the moment or runnable code as we will see below. The `Edit` and `Insert` drop down menus offer various options for moving cells around the notebook, merging and splitting cells and inserting new ones, while the `Cell` menu allow running of code cells and changing cell types.\n",
|
||||
"\n",
|
||||
"The `Kernel` menu offers some useful commands for managing the Python process (kernel) running in the notebook. In particular it provides options for interrupting a busy kernel (useful for example if you realise you have set a slow code cell running with incorrect parameters) and to restart the current kernel. This will cause all variables currently defined in the workspace to be lost but may be necessary to get the kernel back to a consistent state after polluting the namespace with lots of global variables or when trying to run code from an updated module and `reload` is failing to work. \n",
|
||||
"\n",
|
||||
"To the far right of the menu toolbar is a kernel status indicator. When a dark filled circle is shown this means the kernel is currently busy and any further code cell run commands will be queued to happen after the currently running cell has completed. An open status circle indicates the kernel is currently idle.\n",
|
||||
"\n",
|
||||
"The final row of the top notebook interface is the notebook toolbar which contains shortcut buttons to some common commands such as clipboard actions and cell / kernel management. If you are interested in learning more about the notebook user interface you may wish to run through the `User Interface Tour` under the `Help` menu drop down.\n",
|
||||
"\n",
|
||||
"### Markdown cells: easy text formatting\n",
|
||||
"\n",
|
||||
"This entire introduction has been written in what is termed a *Markdown* cell of a notebook. [Markdown](https://en.wikipedia.org/wiki/Markdown) is a lightweight markup language intended to be readable in plain-text. As you may wish to use Markdown cells to keep your own formatted notes in notebooks, a small sampling of the formatting syntax available is below (escaped mark-up on top and corresponding rendered output below that); there are many much more extensive syntax guides - for example [this cheatsheet](https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet).\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"## Level 2 heading\n",
|
||||
"### Level 3 heading\n",
|
||||
"\n",
|
||||
"*Italicised* and **bold** text.\n",
|
||||
"\n",
|
||||
" * bulleted\n",
|
||||
" * lists\n",
|
||||
" \n",
|
||||
"and\n",
|
||||
"\n",
|
||||
" 1. enumerated\n",
|
||||
" 2. lists\n",
|
||||
"\n",
|
||||
"Inline maths $y = mx + c$ using [MathJax](https://www.mathjax.org/) as well as display style\n",
|
||||
"\n",
|
||||
"$$ ax^2 + bx + c = 0 \\qquad \\Rightarrow \\qquad x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a} $$\n",
|
||||
"```\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Level 2 heading\n",
|
||||
"### Level 3 heading\n",
|
||||
"\n",
|
||||
"*Italicised* and **bold** text.\n",
|
||||
"\n",
|
||||
" * bulleted\n",
|
||||
" * lists\n",
|
||||
" \n",
|
||||
"and\n",
|
||||
"\n",
|
||||
" 1. enumerated\n",
|
||||
" 2. lists\n",
|
||||
"\n",
|
||||
"Inline maths $y = mx + c$ using [MathJax]() as well as display maths\n",
|
||||
"\n",
|
||||
"$$ ax^2 + bx + c = 0 \\qquad \\Rightarrow \\qquad x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a} $$\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"We can also directly use HTML tags in Markdown cells to embed rich content such as images and videos.\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"```\n",
|
||||
"<img src=\"http://placehold.it/350x150\" />\n",
|
||||
"```\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"<img src=\"http://placehold.it/350x150\" />\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"### Code cells: in browser code execution\n",
|
||||
"\n",
|
||||
"Up to now we have not seen any runnable code. An example of a executable code cell is below. To run it first click on the cell so that it is highlighted, then either click the <i class=\"fa-step-forward fa\"></i> button on the notebook toolbar, go to `Cell > Run Cells` or use the keyboard shortcut `Ctrl+Enter`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from __future__ import print_function\n",
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"print('Hello world!')\n",
|
||||
"print('Alarming hello!', file=sys.stderr)\n",
|
||||
"print('Hello again!')\n",
|
||||
"'And again!'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This example shows the three main components of a code cell.\n",
|
||||
"\n",
|
||||
"The most obvious is the input area. This (unsuprisingly) is used to enter the code to be run which will be automatically syntax highlighted.\n",
|
||||
"\n",
|
||||
"To the immediate left of the input area is the execution indicator / counter. Before a code cell is first run this will display `In [ ]:`. After the cell is run this is updated to `In [n]:` where `n` is a number corresponding to the current execution counter which is incremented whenever any code cell in the notebook is run. This can therefore be used to keep track of the relative order in which cells were last run. There is no fundamental requirement to run cells in the order they are organised in the notebook, though things will usually be more readable if you keep things in roughly in order!\n",
|
||||
"\n",
|
||||
"Immediately below the input area is the output area. This shows any output produced by the code in the cell. This is dealt with a little bit confusingly in the current Jupyter version. At the top any output to [`stdout`](https://en.wikipedia.org/wiki/Standard_streams#Standard_output_.28stdout.29) is displayed. Immediately below that output to [`stderr`](https://en.wikipedia.org/wiki/Standard_streams#Standard_error_.28stderr.29) is displayed. All of the output to `stdout` is displayed together even if there has been output to `stderr` between as shown by the suprising ordering in the output here. \n",
|
||||
"\n",
|
||||
"The final part of the output area is the *display* area. By default this will just display the returned output of the last Python statement as would usually be the case in a (I)Python interpreter run in a terminal. What is displayed for a particular object is by default determined by its special `__repr__` method e.g. for a string it is just the quote enclosed value of the string itself.\n",
|
||||
"\n",
|
||||
"### Useful keyboard shortcuts\n",
|
||||
"\n",
|
||||
"There are a wealth of keyboard shortcuts available in the notebook interface. For an exhaustive list see the `Keyboard Shortcuts` option under the `Help` menu. We will cover a few of those we find most useful below.\n",
|
||||
"\n",
|
||||
"Shortcuts come in two flavours: those applicable in *command mode*, active when no cell is currently being edited and indicated by a blue highlight around the current cell; those applicable in *edit mode* when the content of a cell is being edited, indicated by a green current cell highlight.\n",
|
||||
"\n",
|
||||
"In edit mode of a code cell, two of the more generically useful keyboard shortcuts are offered by the `Tab` key.\n",
|
||||
"\n",
|
||||
" * Pressing `Tab` a single time while editing code will bring up suggested completions of what you have typed so far. This is done in a scope aware manner so for example typing `a` + `[Tab]` in a code cell will come up with a list of objects beginning with `a` in the current global namespace, while typing `np.a` + `[Tab]` (assuming `import numpy as np` has been run already) will bring up a list of objects in the root NumPy namespace beginning with `a`.\n",
|
||||
" * Pressing `Shift+Tab` once immediately after opening parenthesis of a function or method will cause a tool-tip to appear with the function signature (including argument names and defaults) and its docstring. Pressing `Shift+Tab` twice in succession will cause an expanded version of the same tooltip to appear, useful for longer docstrings. Pressing `Shift+Tab` four times in succession will cause the information to be instead displayed in a pager docked to bottom of the notebook interface which stays attached even when making further edits to the code cell and so can be useful for keeping documentation visible when editing e.g. to help remember the name of arguments to a function and their purposes.\n",
|
||||
"\n",
|
||||
"A series of useful shortcuts available in both command and edit mode are `[modifier]+Enter` where `[modifier]` is one of `Ctrl` (run selected cell), `Shift` (run selected cell and select next) or `Alt` (run selected cell and insert a new cell after).\n",
|
||||
"\n",
|
||||
"A useful command mode shortcut to know about is the ability to toggle line numbers on and off for a cell by pressing `L` which can be useful when trying to diagnose stack traces printed when an exception is raised or when referring someone else to a section of code.\n",
|
||||
" \n",
|
||||
"### Magics\n",
|
||||
"\n",
|
||||
"There are a range of *magic* commands in IPython notebooks, than provide helpful tools outside of the usual Python syntax. A full list of the inbuilt magic commands is given [here](http://ipython.readthedocs.io/en/stable/interactive/magics.html), however three that are particularly useful for this course:\n",
|
||||
"\n",
|
||||
" * [`%%timeit`](http://ipython.readthedocs.io/en/stable/interactive/magics.html?highlight=matplotlib#magic-timeit) Put at the beginning of a cell to time its execution and print the resulting timing statistics.\n",
|
||||
" * [`%precision`](http://ipython.readthedocs.io/en/stable/interactive/magics.html?highlight=matplotlib#magic-precision) Set the precision for pretty printing of floating point values and NumPy arrays.\n",
|
||||
" * [`%debug`](http://ipython.readthedocs.io/en/stable/interactive/magics.html?highlight=matplotlib#magic-debug) Activates the interactive debugger in a cell. Run after an exception has been occured to help diagnose the issue.\n",
|
||||
" \n",
|
||||
"### Plotting with `matplotlib`\n",
|
||||
"\n",
|
||||
"When setting up your environment one of the dependencies we asked you to install was `matplotlib`. This is an extensive plotting and data visualisation library which is tightly integrated with NumPy and Jupyter notebooks.\n",
|
||||
"\n",
|
||||
"When using `matplotlib` in a notebook you should first run the [magic command](http://ipython.readthedocs.io/en/stable/interactive/magics.html?highlight=matplotlib)\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"%matplotlib inline\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This will cause all plots to be automatically displayed as images in the output area of the cell they are created in. Below we give a toy example of plotting two sinusoids using `matplotlib` to show case some of the basic plot options. To see the output produced select the cell and then run it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# generate a pair of sinusoids\n",
|
||||
"x = np.linspace(0., 2. * np.pi, 100)\n",
|
||||
"y1 = np.sin(x)\n",
|
||||
"y2 = np.cos(x)\n",
|
||||
"\n",
|
||||
"# produce a new figure object with a defined (width, height) in inches\n",
|
||||
"fig = plt.figure(figsize=(8, 4))\n",
|
||||
"# add a single axis to the figure\n",
|
||||
"ax = fig.add_subplot(111)\n",
|
||||
"# plot the two sinusoidal traces on the axis, adjusting the line width\n",
|
||||
"# and adding LaTeX legend labels\n",
|
||||
"ax.plot(x, y1, linewidth=2, label=r'$\\sin(x)$')\n",
|
||||
"ax.plot(x, y2, linewidth=2, label=r'$\\cos(x)$')\n",
|
||||
"# set the axis labels\n",
|
||||
"ax.set_xlabel('$x$', fontsize=16)\n",
|
||||
"ax.set_ylabel('$y$', fontsize=16)\n",
|
||||
"# force the legend to be displayed\n",
|
||||
"ax.legend()\n",
|
||||
"# adjust the limits of the horizontal axis\n",
|
||||
"ax.set_xlim(0., 2. * np.pi)\n",
|
||||
"# make a grid be displayed in the axis background\n",
|
||||
"ax.grid(True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,614 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Generalisation and overfitting\n",
|
||||
"\n",
|
||||
"In this notebook we will explore the issue of overfitting and how we can measure how well the models we train generalise their predictions to unseen data. This will build upon the introduction to generalisation given in the fourth lecture."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Exercise 1: overfitting and model complexity in a 1D regression problem\n",
|
||||
"\n",
|
||||
"As an exercise we will consider a regression problem. In particular, given a fixed set of (noisy) observations of the underlying functional relationship between inputs and outputs, we will attempt to use a multiple layer network model to learn to predict output values from inputs. The aim of the exercise will be to visualise how increasing the complexity of the model we fit to the training data effects the ability of the model to make predictions across the input space.\n",
|
||||
"\n",
|
||||
"### Function\n",
|
||||
"\n",
|
||||
"To keep things simple we will consider a single input-output function defined by a fourth degree polynomial (quartic)\n",
|
||||
"\n",
|
||||
"$$ f(x) = 10 x^4 - 17 x^3 + 8 x^2 - x $$\n",
|
||||
"\n",
|
||||
"with the observed values being the function values plus zero-mean Gaussian noise\n",
|
||||
"\n",
|
||||
"$$ y = f(x) + 0.01 \\epsilon \\qquad \\epsilon \\sim \\mathcal{N}\\left(\\cdot;\\,0,\\,1\\right) $$\n",
|
||||
"\n",
|
||||
"The inputs will be drawn from the uniform distribution on $[0, 1]$.\n",
|
||||
"\n",
|
||||
"First import the necessary modules and seed the random number generator by running the cell below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"plt.style.use('ggplot')\n",
|
||||
"seed = 17102016 \n",
|
||||
"rng = np.random.RandomState(seed)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Your Tasks:**\n",
|
||||
"\n",
|
||||
"Write code in the cell below to calculate a polynomial function of one dimensional inputs. \n",
|
||||
"\n",
|
||||
"If $\\boldsymbol{c}$ is a length $P$ vector of coefficients corresponding to increasing powers in the polynomial (starting from the constant zero power term up to the $P-1^{\\textrm{th}}$ power) the function should correspond to the following\n",
|
||||
"\n",
|
||||
"\\begin{equation}\n",
|
||||
" f_{\\textrm{polynomial}}(x,\\ \\boldsymbol{c}) = \\sum_{p=0}^{P-1} \\left( c_p x^p \\right)\n",
|
||||
"\\end{equation}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def polynomial_function(inputs, coefficients):\n",
|
||||
" \"\"\"Calculates polynomial with given coefficients of an array of inputs.\n",
|
||||
" \n",
|
||||
" Args:\n",
|
||||
" inputs: One-dimensional array of input values of shape (num_inputs,)\n",
|
||||
" coefficients: One-dimensional array of polynomial coefficient terms\n",
|
||||
" with `coefficients[0]` corresponding to the coefficient for the\n",
|
||||
" zero order term in the polynomial (constant) and `coefficients[-1]`\n",
|
||||
" corresponding to the highest order term.\n",
|
||||
" \n",
|
||||
" Returns:\n",
|
||||
" One dimensional array of output values of shape (num_inputs,)\n",
|
||||
" \n",
|
||||
" \"\"\"\n",
|
||||
" raise NotImplementedError(\"TODO Implement this function\") "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the cell below to test your implementation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_coefficients = np.array([-1., 3., 4.])\n",
|
||||
"test_inputs = np.array([0., 0.5, 1., 2.])\n",
|
||||
"test_outputs = np.array([-1., 1.5, 6., 21.])\n",
|
||||
"assert polynomial_function(test_inputs, test_coefficients).shape == (4,), (\n",
|
||||
" 'Function gives wrong shape output.'\n",
|
||||
")\n",
|
||||
"assert np.allclose(polynomial_function(test_inputs, test_coefficients), test_outputs), (\n",
|
||||
" 'Function gives incorrect output values.'\n",
|
||||
")\n",
|
||||
"print(\"Function is correct!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We now need to use the random number generator to sample input values and calculate the corresponding target outputs using your polynomial implementation with the relevant coefficients for our function. Do this by running the cell below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"coefficients = np.array([0, -1., 8., -17., 10.])\n",
|
||||
"input_dim, output_dim = 1, 1\n",
|
||||
"noise_std = 0.01\n",
|
||||
"num_data = 80\n",
|
||||
"inputs = rng.uniform(size=(num_data, input_dim))\n",
|
||||
"epsilons = rng.normal(size=num_data)\n",
|
||||
"targets = (polynomial_function(inputs[:, 0], coefficients) + \n",
|
||||
" epsilons * noise_std)[:, None]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will split the generated data points in to equal sized training and validation data sets and use these to create data provider objects which we can use to train models in our framework. As the dataset is small here we will use a batch size equal to the size of the data set. Run the cell below to split the data and set up the data provider objects."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from mlp.data_providers import DataProvider\n",
|
||||
"num_train = num_data // 2\n",
|
||||
"batch_size = num_train\n",
|
||||
"inputs_train, targets_train = inputs[:num_train], targets[:num_train]\n",
|
||||
"inputs_valid, targets_valid = inputs[num_train:], targets[num_train:]\n",
|
||||
"train_data = DataProvider(inputs_train, targets_train, batch_size=batch_size, rng=rng)\n",
|
||||
"valid_data = DataProvider(inputs_valid, targets_valid, batch_size=batch_size, rng=rng)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can now visualise the data we will be modelling. Run the cell below to plot the target outputs against inputs for both the training and validation sets. Note the clear underlying smooth functional relationship evident in the noisy data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig = plt.figure(figsize=(8, 4))\n",
|
||||
"ax = fig.add_subplot(111)\n",
|
||||
"ax.plot(inputs_train[:, 0], targets_train[:, 0], '.', label='training data')\n",
|
||||
"ax.plot(inputs_valid[:, 0], targets_valid[:, 0], '.', label='validation data')\n",
|
||||
"ax.set_xlabel('Inputs $x$', fontsize=14)\n",
|
||||
"ax.set_ylabel('Ouputs $y$', fontsize=14)\n",
|
||||
"ax.legend(loc='best')\n",
|
||||
"fig.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Model\n",
|
||||
"\n",
|
||||
"We will fit models with a varying number of parameters to the training data. As multi-layer logistic sigmoid models tend to perform poorly in regressions tasks like this we will instead use a [radial basis function (RBF) network](https://en.wikipedia.org/wiki/Radial_basis_function_network).\n",
|
||||
"\n",
|
||||
"This model predicts the output as the weighted sum of basis functions (here Gaussian like bumps) tiled across the input space. The cell below generates a random set of weights and bias for a RBF network and plots the modelled input-output function across inputs $[0, 1]$. Run the cell below for several different number of weight parameters (specified with `num_weights` variable) to get a feel for the sort of predictions the RBF network models produce."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"num_weights = 15\n",
|
||||
"weights_scale = 1.\n",
|
||||
"bias_scale = 1.\n",
|
||||
"\n",
|
||||
"def basis_function(x, centre, scale):\n",
|
||||
" return np.exp(-(x - centre)**2 / scale**2)\n",
|
||||
"\n",
|
||||
"weights = rng.normal(size=num_weights) * weights_scale\n",
|
||||
"bias = rng.normal() * bias_scale\n",
|
||||
"\n",
|
||||
"centres = np.linspace(0, 1, weights.shape[0])\n",
|
||||
"scale = 1. / weights.shape[0]\n",
|
||||
"\n",
|
||||
"xs = np.linspace(0, 1, 200)\n",
|
||||
"ys = np.zeros(xs.shape[0])\n",
|
||||
"\n",
|
||||
"fig = plt.figure(figsize=(12, 4))\n",
|
||||
"ax = fig.add_subplot(1, 1, 1)\n",
|
||||
"for weight, centre in zip(weights, centres):\n",
|
||||
" ys += weight * basis_function(xs, centre, scale)\n",
|
||||
"ys += bias\n",
|
||||
"ax.plot(xs, ys)\n",
|
||||
"ax.set_xlabel('Input', fontsize=14)\n",
|
||||
"ax.set_ylabel('Output', fontsize=14)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You do not need to study the details of how to implement this model. All of the additional code you need to fit RBF networks is provided in the `RadialBasisFunctionLayer` in the `mlp.layers` module. The `RadialBasisFunctionLayer` class has the same interface as the layer classes we encountered in the previous lab, defining both `fprop` and `bprop` methods, and we can therefore include it as a layer in a `MultipleLayerModel` as with any other layer. \n",
|
||||
"\n",
|
||||
"Here we will use the `RadialBasisFunctionLayer` as the first layer in a two layer model. This first layer calculates the basis function terms which are then be weighted and summed together in an `AffineLayer`, the second and final layer. This illustrates the advantage of using a modular framework - we can reuse the code we previously implemented to train a quite different model architecture just by defining a new layer class. \n",
|
||||
"\n",
|
||||
"Run the cell below which contains some necessary setup code."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from mlp.models import MultipleLayerModel\n",
|
||||
"from mlp.layers import AffineLayer, RadialBasisFunctionLayer\n",
|
||||
"from mlp.errors import SumOfSquaredDiffsError\n",
|
||||
"from mlp.initialisers import ConstantInit, UniformInit\n",
|
||||
"from mlp.learning_rules import GradientDescentLearningRule\n",
|
||||
"from mlp.optimisers import Optimiser\n",
|
||||
"\n",
|
||||
"# Regression problem therefore use sum of squared differences error\n",
|
||||
"error = SumOfSquaredDiffsError()\n",
|
||||
"# Use basic gradient descent learning rule with fixed learning rate\n",
|
||||
"learning_rule = GradientDescentLearningRule(0.1)\n",
|
||||
"# Initialise weights from uniform distribution and zero bias\n",
|
||||
"weights_init = UniformInit(-0.1, 0.1)\n",
|
||||
"biases_init = ConstantInit(0.)\n",
|
||||
"# Train all models for 2000 epochs\n",
|
||||
"num_epoch = 2000"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The next cell defines RBF network models with varying number of weight parameters (equal to the number of basis functions) and fits each to the training set, recording the final training and validation set errors for the fitted models. Run it now to fit the models and calculate the error values."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"num_weight_list = [2, 5, 10, 25, 50, 100]\n",
|
||||
"models = []\n",
|
||||
"train_errors = []\n",
|
||||
"valid_errors = []\n",
|
||||
"for num_weight in num_weight_list:\n",
|
||||
" model = MultipleLayerModel([\n",
|
||||
" RadialBasisFunctionLayer(num_weight),\n",
|
||||
" AffineLayer(input_dim * num_weight, output_dim, \n",
|
||||
" weights_init, biases_init)\n",
|
||||
" ])\n",
|
||||
" optimiser = Optimiser(model, error, learning_rule, \n",
|
||||
" train_data, valid_data)\n",
|
||||
" print('-' * 80)\n",
|
||||
" print('Training model with {0} weights'.format(num_weight))\n",
|
||||
" print('-' * 80)\n",
|
||||
" _ = optimiser.train(num_epoch, -1)\n",
|
||||
" outputs_train = model.fprop(inputs_train)[-1]\n",
|
||||
" outputs_valid = model.fprop(inputs_valid)[-1]\n",
|
||||
" models.append(model)\n",
|
||||
" train_errors.append(error(outputs_train, targets_train))\n",
|
||||
" valid_errors.append(error(outputs_valid, targets_valid))\n",
|
||||
" print(' Final training set error: {0:.1e}'.format(train_errors[-1]))\n",
|
||||
" print(' Final validation set error: {0:.1e}'.format(valid_errors[-1]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Your Tasks**\n",
|
||||
"\n",
|
||||
"In the cell below write code to [plot bar charts](http://matplotlib.org/examples/api/barchart_demo.html) of the training and validation set errors for the different fitted models.\n",
|
||||
"\n",
|
||||
"Some questions to think about from the plots:\n",
|
||||
"\n",
|
||||
" * Do the models with more free parameters fit the training data better or worse?\n",
|
||||
" * What does the validation set error value tell us about the models?\n",
|
||||
" * Of the models fitted here which would you say seems like it is most likely to generalise well to unseen data? \n",
|
||||
" * Do any of the models seem to be overfitting?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TODO plot the bar charts here"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's visualise what the fitted model's predictions look like across the whole input space compared to the 'true' function we were trying to fit. \n",
|
||||
"\n",
|
||||
"**Your Tasks:** \n",
|
||||
"\n",
|
||||
"In the cell below, for each of the fitted models stored in the `models` list above:\n",
|
||||
" * Compute output predictions for the model across a linearly spaced series of 500 input points between 0 and 1 in the input space.\n",
|
||||
" * Plot the computed predicted outputs and true function values at the corresponding inputs as line plots on the same axis (use a new axis for each model).\n",
|
||||
" * On the same axis plot the training data sets input-target pairs as points."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#TODO plot the graphs here"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You should be able to relate your answers to the questions above to what you see in these plots - ask a demonstrator if you are unsure what is going on. In particular for the models which appeared to be overfitting and generalising poorly you should now have an idea how this looks in terms of the model's predictions and how these relate to the training data points and true function values.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyTorch\n",
|
||||
"\n",
|
||||
"As we have seen in the [previous lab](https://github.com/VICO-UoE/mlpractical/tree/mlp2023-24/lab3/notebooks/03_Multiple_layer_models.ipynb), our model shows signs of overfitting after $15$ epochs. \n",
|
||||
"\n",
|
||||
"Overfitting occurs when the model learns the training data too well, and fails to generalise to unseen data. In this case, the model learns the noise in the training data, and fails to learn the underlying function. \n",
|
||||
"\n",
|
||||
"The model may be too complex, and we can reduce the complexity by reducing the number of parameters. However, this may not be the best solution, as we may not be able to learn the underlying function with a simple model.\n",
|
||||
"\n",
|
||||
"In practice, a model that overfits the training data will have a low training error, but a high validation error.\n",
|
||||
"\n",
|
||||
"*What can we deduce if we observe a high training error and a high validation error?*\n",
|
||||
"\n",
|
||||
"Overfitting is a common problem in machine learning, and there are many techniques to avoid it. In this lab, we will explore one of them: *early stopping*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"import torch.nn as nn\n",
|
||||
"import torch.optim as optim\n",
|
||||
"from torchvision import datasets,transforms\n",
|
||||
"from torch.utils.data.sampler import SubsetRandomSampler\n",
|
||||
"\n",
|
||||
"torch.manual_seed(seed)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Device configuration\n",
|
||||
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
|
||||
"\n",
|
||||
"# Set training run hyperparameters\n",
|
||||
"batch_size = 128 # number of data points in a batch\n",
|
||||
"learning_rate = 0.001 # learning rate for gradient descent\n",
|
||||
"num_epochs = 50 # number of training epochs to perform\n",
|
||||
"stats_interval = 1 # epoch interval between recording and printing stats"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"transform=transforms.Compose([\n",
|
||||
" transforms.ToTensor(),\n",
|
||||
" transforms.Normalize((0.1307,), (0.3081,))\n",
|
||||
" ])\n",
|
||||
"\n",
|
||||
"train_dataset = datasets.MNIST('../data', train=True, download=True, transform=transform)\n",
|
||||
"test_dataset = datasets.MNIST('../data', train=False, download=True, transform=transform)\n",
|
||||
"\n",
|
||||
"valid_size=0.2 # Leave 20% of training set as validation set\n",
|
||||
"num_train = len(train_dataset)\n",
|
||||
"indices = list(range(num_train))\n",
|
||||
"split = int(np.floor(valid_size * num_train))\n",
|
||||
"np.random.shuffle(indices) # Shuffle indices in-place\n",
|
||||
"train_idx, valid_idx = indices[split:], indices[:split] # Split indices into training and validation sets\n",
|
||||
"train_sampler = SubsetRandomSampler(train_idx)\n",
|
||||
"valid_sampler = SubsetRandomSampler(valid_idx)\n",
|
||||
"\n",
|
||||
"# Create the dataloaders\n",
|
||||
"train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, pin_memory=True)\n",
|
||||
"valid_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=valid_sampler, pin_memory=True)\n",
|
||||
"test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class MultipleLayerModel(nn.Module):\n",
|
||||
" \"\"\"Multiple layer model.\"\"\"\n",
|
||||
" def __init__(self, input_dim, output_dim, hidden_dim):\n",
|
||||
" super().__init__()\n",
|
||||
" self.flatten = nn.Flatten()\n",
|
||||
" self.linear_relu_stack = nn.Sequential(\n",
|
||||
" nn.Linear(input_dim, hidden_dim),\n",
|
||||
" nn.ReLU(),\n",
|
||||
" nn.Linear(hidden_dim, hidden_dim),\n",
|
||||
" nn.ReLU(),\n",
|
||||
" nn.Linear(hidden_dim, output_dim),\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" def forward(self, x):\n",
|
||||
" x = self.flatten(x)\n",
|
||||
" logits = self.linear_relu_stack(x)\n",
|
||||
" return logits\n",
|
||||
" \n",
|
||||
"input_dim = 1*28*28\n",
|
||||
"output_dim = 10\n",
|
||||
"hidden_dim = 100\n",
|
||||
"\n",
|
||||
"model = MultipleLayerModel(input_dim, output_dim, hidden_dim).to(device)\n",
|
||||
"\n",
|
||||
"loss = nn.CrossEntropyLoss() # Cross-entropy loss function\n",
|
||||
"optimizer = optim.Adam(model.parameters(), lr=learning_rate) # Adam optimiser"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Early stopping is a simple technique to avoid overfitting. The idea is to stop training when the validation error starts to increase. This is usually done by monitoring the validation error during training, and stopping when it has not decreased for a certain number of epochs.\n",
|
||||
"\n",
|
||||
"*Can we state that overfitting is ultimatelly inevitable given training over a very large number of epochs?*\n",
|
||||
"\n",
|
||||
"In this section, we will implement early stopping in PyTorch. We will use the same model as in the previous lab, but we will train it for $50$ epochs with an early stopping."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class EarlyStopping:\n",
|
||||
" \"\"\"Early stops the training if validation loss doesn't improve after a given patience.\"\"\"\n",
|
||||
" def __init__(self, patience=5, min_delta=0):\n",
|
||||
"\n",
|
||||
" self.patience = patience # Number of epochs with no improvement after which training will be stopped\n",
|
||||
" self.min_delta = min_delta # Minimum change in the monitored quantity to qualify as an improvement\n",
|
||||
" self.counter = 0\n",
|
||||
" self.min_validation_loss = float('inf')\n",
|
||||
" self.early_stop = False\n",
|
||||
"\n",
|
||||
" def __call__(self, validation_loss):\n",
|
||||
" # If validation loss is lower than minimum validation loss so far,\n",
|
||||
" # reset counter and set minimum validation loss to current validation loss\n",
|
||||
" if validation_loss < self.min_validation_loss:\n",
|
||||
" self.min_validation_loss = validation_loss\n",
|
||||
" self.counter = 0\n",
|
||||
" # If validation loss hasn't improved since minimum validation loss,\n",
|
||||
" # increment counter\n",
|
||||
" elif validation_loss > (self.min_validation_loss + self.min_delta):\n",
|
||||
" self.counter += 1\n",
|
||||
" # If counter has reached patience, set early_stop flag to True\n",
|
||||
" if self.counter >= self.patience: \n",
|
||||
" self.early_stop = True"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialise early stopping object\n",
|
||||
"early_stopping = EarlyStopping(patience=5, min_delta=0.01)\n",
|
||||
"\n",
|
||||
"# Keep track of the loss values over training\n",
|
||||
"train_loss = [] \n",
|
||||
"valid_loss = []\n",
|
||||
"\n",
|
||||
"# Train model\n",
|
||||
"for i in range(num_epochs+1):\n",
|
||||
" # Training\n",
|
||||
" model.train()\n",
|
||||
" batch_loss = []\n",
|
||||
" for batch_idx, (x, t) in enumerate(train_loader):\n",
|
||||
" x = x.to(device)\n",
|
||||
" t = t.to(device)\n",
|
||||
" \n",
|
||||
" # Forward pass\n",
|
||||
" y = model(x)\n",
|
||||
" E_value = loss(y, t)\n",
|
||||
" \n",
|
||||
" # Backward pass\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
" E_value.backward()\n",
|
||||
" optimizer.step()\n",
|
||||
" \n",
|
||||
" # Logging\n",
|
||||
" batch_loss.append(E_value.item())\n",
|
||||
" \n",
|
||||
" train_loss.append(np.mean(batch_loss))\n",
|
||||
"\n",
|
||||
" # Validation\n",
|
||||
" model.eval()\n",
|
||||
" batch_loss = []\n",
|
||||
" for batch_idx, (x, t) in enumerate(valid_loader):\n",
|
||||
" x = x.to(device)\n",
|
||||
" t = t.to(device)\n",
|
||||
" \n",
|
||||
" # Forward pass\n",
|
||||
" y = model(x)\n",
|
||||
" E_value = loss(y, t)\n",
|
||||
" \n",
|
||||
" # Logging\n",
|
||||
" batch_loss.append(E_value.item())\n",
|
||||
" \n",
|
||||
" valid_loss.append(np.mean(batch_loss))\n",
|
||||
"\n",
|
||||
" if i % stats_interval == 0:\n",
|
||||
" print('Epoch: {} \\tError(train): {:.6f} \\tError(valid): {:.6f} '.format(\n",
|
||||
" i, train_loss[-1], valid_loss[-1]))\n",
|
||||
" \n",
|
||||
" # Check for early stopping\n",
|
||||
" early_stopping(valid_loss[-1])\n",
|
||||
"\n",
|
||||
" if early_stopping.early_stop:\n",
|
||||
" print(\"Early stopping\")\n",
|
||||
" break # Stop training"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Plot the change in the validation and training set error over training.\n",
|
||||
"fig_1 = plt.figure(figsize=(8, 4))\n",
|
||||
"ax_1 = fig_1.add_subplot(111)\n",
|
||||
"ax_1.plot(train_loss, label='Error(train)')\n",
|
||||
"ax_1.plot(valid_loss, label='Error(valid)')\n",
|
||||
"ax_1.legend(loc=0)\n",
|
||||
"ax_1.set_xlabel('Epoch number')\n",
|
||||
"plt.show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"anaconda-cloud": {},
|
||||
"kernelspec": {
|
||||
"display_name": "mlp",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
@ -1,182 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Coursework 1\n",
|
||||
"\n",
|
||||
"This notebook is intended to be used as a starting point for your experiments. The instructions can be found in the MLP2024_25_CW1_Spec.pdf (see Learn, Assignment Submission, Coursework 1). The methods provided here are just helper functions. If you want more complex graphs such as side by side comparisons of different experiments you should learn more about matplotlib and implement them. Before each experiment remember to re-initialize neural network weights and reset the data providers so you get a properly initialized experiment. For each experiment try to keep most hyperparameters the same except the one under investigation so you can understand what the effects of each are."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"%matplotlib inline\n",
|
||||
"plt.style.use('ggplot')\n",
|
||||
"\n",
|
||||
"def train_model_and_plot_stats(\n",
|
||||
" model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):\n",
|
||||
" \n",
|
||||
" # As well as monitoring the error over training also monitor classification\n",
|
||||
" # accuracy i.e. proportion of most-probable predicted classes being equal to targets\n",
|
||||
" data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}\n",
|
||||
"\n",
|
||||
" # Use the created objects to initialise a new Optimiser instance.\n",
|
||||
" optimiser = Optimiser(\n",
|
||||
" model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)\n",
|
||||
"\n",
|
||||
" # Run the optimiser for num_epochs epochs (full passes through the training set)\n",
|
||||
" # printing statistics every epoch.\n",
|
||||
" stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)\n",
|
||||
"\n",
|
||||
" # Plot the change in the validation and training set error over training.\n",
|
||||
" fig_1 = plt.figure(figsize=(8, 4))\n",
|
||||
" ax_1 = fig_1.add_subplot(111)\n",
|
||||
" for k in ['error(train)', 'error(valid)']:\n",
|
||||
" ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, \n",
|
||||
" stats[1:, keys[k]], label=k)\n",
|
||||
" ax_1.legend(loc=0)\n",
|
||||
" ax_1.set_xlabel('Epoch number')\n",
|
||||
" ax_1.set_ylabel('Error')\n",
|
||||
"\n",
|
||||
" # Plot the change in the validation and training set accuracy over training.\n",
|
||||
" fig_2 = plt.figure(figsize=(8, 4))\n",
|
||||
" ax_2 = fig_2.add_subplot(111)\n",
|
||||
" for k in ['acc(train)', 'acc(valid)']:\n",
|
||||
" ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, \n",
|
||||
" stats[1:, keys[k]], label=k)\n",
|
||||
" ax_2.legend(loc=0)\n",
|
||||
" ax_2.set_xlabel('Epoch number')\n",
|
||||
" ax_2.set_xlabel('Accuracy')\n",
|
||||
" \n",
|
||||
" return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The below code will set up the data providers, random number\n",
|
||||
"# generator and logger objects needed for training runs. As\n",
|
||||
"# loading the data from file take a little while you generally\n",
|
||||
"# will probably not want to reload the data providers on\n",
|
||||
"# every training run. If you wish to reset their state you\n",
|
||||
"# should instead use the .reset() method of the data providers.\n",
|
||||
"import numpy as np\n",
|
||||
"import logging\n",
|
||||
"import sys\n",
|
||||
"# sys.path.append('/path/to/mlpractical')\n",
|
||||
"from mlp.data_providers import MNISTDataProvider, EMNISTDataProvider\n",
|
||||
"\n",
|
||||
"# Seed a random number generator\n",
|
||||
"seed = 11102019 \n",
|
||||
"rng = np.random.RandomState(seed)\n",
|
||||
"batch_size = 100\n",
|
||||
"# Set up a logger object to print info about the training run to stdout\n",
|
||||
"logger = logging.getLogger()\n",
|
||||
"logger.setLevel(logging.INFO)\n",
|
||||
"logger.handlers = [logging.StreamHandler()]\n",
|
||||
"\n",
|
||||
"# Create data provider objects for the MNIST data set\n",
|
||||
"train_data = EMNISTDataProvider('train', batch_size=batch_size, rng=rng)\n",
|
||||
"valid_data = EMNISTDataProvider('valid', batch_size=batch_size, rng=rng)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The model set up code below is provided as a starting point.\n",
|
||||
"# You will probably want to add further code cells for the\n",
|
||||
"# different experiments you run.\n",
|
||||
"\n",
|
||||
"%pip install tqdm\n",
|
||||
"\n",
|
||||
"from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer\n",
|
||||
"from mlp.errors import CrossEntropySoftmaxError\n",
|
||||
"from mlp.models import MultipleLayerModel\n",
|
||||
"from mlp.initialisers import ConstantInit, GlorotUniformInit\n",
|
||||
"from mlp.learning_rules import AdamLearningRule\n",
|
||||
"from mlp.optimisers import Optimiser\n",
|
||||
"\n",
|
||||
"# Setup hyperparameters\n",
|
||||
"learning_rate = 0.001\n",
|
||||
"num_epochs = 100\n",
|
||||
"stats_interval = 1\n",
|
||||
"input_dim, output_dim, hidden_dim = 784, 47, 128\n",
|
||||
"\n",
|
||||
"weights_init = GlorotUniformInit(rng=rng)\n",
|
||||
"biases_init = ConstantInit(0.)\n",
|
||||
"\n",
|
||||
"# Create model with ONE hidden layer\n",
|
||||
"model = MultipleLayerModel([\n",
|
||||
" AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # hidden layer\n",
|
||||
" ReluLayer(),\n",
|
||||
" AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"error = CrossEntropySoftmaxError()\n",
|
||||
"# Use a Adam learning rule\n",
|
||||
"learning_rule = AdamLearningRule(learning_rate=learning_rate)\n",
|
||||
"\n",
|
||||
"# Remember to use notebook=False when you write a script to be run in a terminal\n",
|
||||
"_ = train_model_and_plot_stats(\n",
|
||||
" model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create model with TWO hidden layers\n",
|
||||
"model = MultipleLayerModel([\n",
|
||||
" AffineLayer(input_dim, hidden_dim, weights_init, biases_init), # first hidden layer\n",
|
||||
" ReluLayer(),\n",
|
||||
" AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), # second hidden layer\n",
|
||||
" ReluLayer(),\n",
|
||||
" AffineLayer(hidden_dim, output_dim, weights_init, biases_init) # output layer\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"error = CrossEntropySoftmaxError()\n",
|
||||
"# Use a Adam learning rule\n",
|
||||
"learning_rule = AdamLearningRule(learning_rate=learning_rate)\n",
|
||||
"\n",
|
||||
"# Remember to use notebook=False when you write a script to be run in a terminal\n",
|
||||
"_ = train_model_and_plot_stats(\n",
|
||||
" model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
665
notebooks/Coursework_2_Pytorch_Introduction.ipynb
Normal file
665
notebooks/Coursework_2_Pytorch_Introduction.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -1,122 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"# sys.path.append('/path/to/mlpractical')\n",
|
||||
"from mlp.test_methods import test_dropout_layer\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"fprop_test, fprop_output, fprop_correct, \\\n",
|
||||
"bprop_test, bprop_output, bprop_correct = test_dropout_layer()\n",
|
||||
"\n",
|
||||
"assert fprop_test == 1.0, (\n",
|
||||
"'The dropout layer fprop functionality test failed'\n",
|
||||
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
|
||||
".format(fprop_correct, fprop_output, fprop_output-fprop_correct)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"Dropout Layer Fprop Functionality Test Passed\")\n",
|
||||
"\n",
|
||||
"assert bprop_test == 1.0, (\n",
|
||||
"'The dropout layer bprop functionality test failed'\n",
|
||||
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
|
||||
".format(bprop_correct, bprop_output, bprop_output-bprop_correct)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"Dropout Layer Bprop Test Passed\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from mlp.test_methods import test_L1_Penalty\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"call_test, call_output, call_correct, \\\n",
|
||||
"grad_test, grad_output, grad_correct = test_L1_Penalty()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"assert call_test == 1.0, (\n",
|
||||
"'The call function for L1 Penalty test failed'\n",
|
||||
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
|
||||
".format(call_correct, call_output, call_output-call_correct)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"L1 Penalty Call Functionality Test Passed\")\n",
|
||||
"\n",
|
||||
"assert grad_test == 1.0, (\n",
|
||||
"'The grad function for L1 Penalty test failed'\n",
|
||||
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
|
||||
".format(grad_correct, grad_output, grad_output-grad_correct, grad_output/grad_correct)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"L1 Penalty Grad Function Test Passed\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from mlp.test_methods import test_L2_Penalty\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"call_test, call_output, call_correct, \\\n",
|
||||
"grad_test, grad_output, grad_correct = test_L2_Penalty()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"assert call_test == 1.0, (\n",
|
||||
"'The call function for L2 Penalty test failed'\n",
|
||||
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
|
||||
".format(call_correct, call_output, call_output-call_correct)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"L2 Penalty Call Functionality Test Passed\")\n",
|
||||
"\n",
|
||||
"assert grad_test == 1.0, (\n",
|
||||
"'The grad function for L2 Penalty test failed'\n",
|
||||
"'Correct output is \\n\\n{0}\\n\\n but returned output is \\n\\n{1}\\n\\n difference is \\n\\n{2}'\n",
|
||||
".format(grad_correct, grad_output, grad_output-grad_correct, grad_output/grad_correct)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"L2 Penalty Grad Function Test Passed\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
200
notebooks/Plot_Results.ipynb
Normal file
200
notebooks/Plot_Results.ipynb
Normal file
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,105 +0,0 @@
|
||||
import argparse
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
import sys
|
||||
# sys.path.append('/path/to/mlpractical')
|
||||
|
||||
from mlp.layers import DropoutLayer
|
||||
from mlp.penalties import L1Penalty, L2Penalty
|
||||
parser = argparse.ArgumentParser(description='Welcome to regularization test script')
|
||||
|
||||
parser.add_argument('--student_id', nargs="?", type=str, help='Your student id in the format "Sxxxxxxx"')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
student_id = args.student_id
|
||||
|
||||
def fprop_bprop_layer(inputs, dropout_layer, grads_wrt_outputs, weights, params=False):
|
||||
if params:
|
||||
dropout_layer.params = [weights]
|
||||
|
||||
fprop = dropout_layer.fprop(inputs)
|
||||
bprop = dropout_layer.bprop(inputs, fprop, grads_wrt_outputs)
|
||||
|
||||
outputs = [fprop, bprop]
|
||||
if params:
|
||||
grads_wrt_weights = dropout_layer.grads_wrt_params(
|
||||
inputs, grads_wrt_outputs)
|
||||
outputs.append(grads_wrt_weights)
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
def call_grad_layer(inputs, penalty_layer, grads_wrt_outputs, weights, params=False):
|
||||
if params:
|
||||
penalty_layer.params = [weights]
|
||||
|
||||
call = penalty_layer(inputs)
|
||||
grad = penalty_layer.grad(inputs)
|
||||
|
||||
outputs = [call, grad]
|
||||
if params:
|
||||
grads_wrt_weights = penalty_layer.grads_wrt_params(
|
||||
inputs, grads_wrt_outputs)
|
||||
outputs.append(grads_wrt_weights)
|
||||
|
||||
return outputs
|
||||
|
||||
def get_student_seed(student_id):
|
||||
student_seed_number = int(student_id[1:])
|
||||
return student_seed_number
|
||||
|
||||
|
||||
seed = get_student_seed(student_id)
|
||||
rng = np.random.RandomState(seed)
|
||||
|
||||
reg_output_dict = dict()
|
||||
|
||||
inputs = rng.normal(loc=0.0, scale=1.0, size=(32, 3, 8, 8))
|
||||
grads_wrt_outputs = rng.normal(loc=0.0, scale=1.0, size=(32, 3, 8, 8))
|
||||
weights = rng.normal(loc=0.0, scale=1.0, size=(1))
|
||||
|
||||
reg_output_dict['inputs'] = inputs
|
||||
reg_output_dict['weights'] = weights
|
||||
reg_output_dict['grads_wrt_outputs'] = grads_wrt_outputs
|
||||
|
||||
for dropout_layer, params_flag in zip(
|
||||
[DropoutLayer],
|
||||
[False]):
|
||||
if isinstance(dropout_layer(), DropoutLayer):
|
||||
rng = np.random.RandomState(92019)
|
||||
print(True)
|
||||
outputs = fprop_bprop_layer(inputs, dropout_layer(
|
||||
rng=rng), grads_wrt_outputs, weights, params_flag)
|
||||
else:
|
||||
outputs = fprop_bprop_layer(
|
||||
inputs, dropout_layer(), grads_wrt_outputs, weights, params_flag)
|
||||
reg_output_dict['{}_{}'.format(
|
||||
dropout_layer.__name__, 'fprop')] = outputs[0]
|
||||
reg_output_dict['{}_{}'.format(
|
||||
dropout_layer.__name__, 'bprop')] = outputs[1]
|
||||
if params_flag:
|
||||
reg_output_dict['{}_{}'.format(
|
||||
dropout_layer.__name__, 'grads_wrt_outputs')] = outputs[2]
|
||||
|
||||
for penalty_layer, params_flag in zip(
|
||||
[L1Penalty, L2Penalty], [False, False]):
|
||||
outputs = call_grad_layer(inputs, penalty_layer(
|
||||
1e-4), grads_wrt_outputs, weights, params_flag)
|
||||
reg_output_dict['{}_{}'.format(
|
||||
penalty_layer.__name__, '__call__correct')] = outputs[0]
|
||||
reg_output_dict['{}_{}'.format(
|
||||
penalty_layer.__name__, 'grad_correct')] = outputs[1]
|
||||
if params_flag:
|
||||
reg_output_dict['{}_{}'.format(
|
||||
penalty_layer.__name__, 'grads_wrt_outputs')] = outputs[2]
|
||||
|
||||
np.save(os.path.join(os.environ['MLP_DATA_DIR'],
|
||||
'{}_regularization_test_pack.npy'.format(seed)), reg_output_dict)
|
||||
|
||||
test_data = np.load(os.path.join(
|
||||
os.environ['MLP_DATA_DIR'], '{}_regularization_test_pack.npy'.format(seed)), allow_pickle=True)
|
||||
|
||||
for key, value in test_data.item().items():
|
||||
print(key, value)
|
Loading…
Reference in New Issue
Block a user