1st labs
This commit is contained in:
parent
fd35da1b86
commit
c35e81a211
1
data/.dropbox.attr
Executable file
1
data/.dropbox.attr
Executable file
@ -0,0 +1 @@
|
||||
{}
|
1023
data/HadSSP_daily_qc.txt
Normal file
1023
data/HadSSP_daily_qc.txt
Normal file
File diff suppressed because it is too large
Load Diff
BIN
data/mnist_eval.pkl.gz
Normal file
BIN
data/mnist_eval.pkl.gz
Normal file
Binary file not shown.
BIN
data/mnist_train.pkl.gz
Normal file
BIN
data/mnist_train.pkl.gz
Normal file
Binary file not shown.
BIN
data/mnist_valid.pkl.gz
Normal file
BIN
data/mnist_valid.pkl.gz
Normal file
Binary file not shown.
0
mlp/__init__.py
Normal file
0
mlp/__init__.py
Normal file
187
mlp/dataset.py
Normal file
187
mlp/dataset.py
Normal file
@ -0,0 +1,187 @@
|
||||
|
||||
# Machine Learning Practical (INFR11119),
|
||||
# Pawel Swietojanski, University of Edinburgh
|
||||
|
||||
import cPickle
|
||||
import gzip
|
||||
import numpy
|
||||
import os
|
||||
|
||||
|
||||
class DataProvider(object):
|
||||
"""
|
||||
Data provider defines an interface for our
|
||||
generic data-independent readers.
|
||||
"""
|
||||
def __init__(self, batch_size, randomize=True):
|
||||
"""
|
||||
:param batch_size: int, specifies the number
|
||||
of elements returned at each step
|
||||
:param randomize: bool, shuffles examples prior
|
||||
to iteration, so they are presented in random
|
||||
order for stochastic gradient descent training
|
||||
:return:
|
||||
"""
|
||||
self.batch_size = batch_size
|
||||
self.randomize = randomize
|
||||
self._curr_idx = 0
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Resets the provider to the initial state to
|
||||
use in another epoch
|
||||
:return: None
|
||||
"""
|
||||
self._curr_idx = 0
|
||||
|
||||
def __randomize(self):
|
||||
"""
|
||||
Data-specific implementation of shuffling mechanism
|
||||
:return:
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def next(self):
|
||||
"""
|
||||
Data-specific iteration mechanism.
|
||||
:return:
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class MNISTDataProvider(DataProvider):
|
||||
"""
|
||||
The class iterates over MNIST digits dataset, in possibly
|
||||
random order.
|
||||
"""
|
||||
def __init__(self, dset,
|
||||
batch_size=10,
|
||||
max_num_examples=-1,
|
||||
randomize=True):
|
||||
|
||||
super(MNISTDataProvider, self).\
|
||||
__init__(batch_size, randomize)
|
||||
|
||||
assert dset in ['train', 'valid', 'eval'], (
|
||||
"Expected dset to be either 'train', "
|
||||
"'valid' or 'eval' got %s" % dset
|
||||
)
|
||||
|
||||
dset_path = './data/mnist_%s.pkl.gz' % dset
|
||||
assert os.path.isfile(dset_path), (
|
||||
"File %s was expected to exist!." % dset_path
|
||||
)
|
||||
|
||||
with gzip.open(dset_path) as f:
|
||||
x, t = cPickle.load(f)
|
||||
|
||||
self._max_num_examples = max_num_examples
|
||||
self.x = x
|
||||
self.t = t
|
||||
self.num_classes = 10
|
||||
|
||||
self._rand_idx = None
|
||||
if self.randomize:
|
||||
self._rand_idx = self.__randomize()
|
||||
|
||||
def reset(self):
|
||||
super(MNISTDataProvider, self).reset()
|
||||
if self.randomize:
|
||||
self._rand_idx = self.__randomize()
|
||||
|
||||
def __randomize(self):
|
||||
assert isinstance(self.x, numpy.ndarray)
|
||||
return numpy.random.permute(numpy.arange(0, self.x.shape[0]))
|
||||
|
||||
def next(self):
|
||||
|
||||
has_enough = (self._curr_idx + self.batch_size) <= self.x.shape[0]
|
||||
presented_max = (self._max_num_examples > 0 and
|
||||
self._curr_idx + self.batch_size > self._max_num_examples)
|
||||
|
||||
if not has_enough or presented_max:
|
||||
raise StopIteration()
|
||||
|
||||
if self._rand_idx is not None:
|
||||
range_idx = \
|
||||
self._rand_idx[self._curr_idx:self._curr_idx + self.batch_size]
|
||||
else:
|
||||
range_idx = \
|
||||
numpy.arange(self._curr_idx, self._curr_idx + self.batch_size)
|
||||
|
||||
rval_x = self.x[range_idx]
|
||||
rval_t = self.t[range_idx]
|
||||
|
||||
self._curr_idx += self.batch_size
|
||||
|
||||
#return rval_x, self.__to_one_of_k(rval_y)
|
||||
return rval_x, rval_t
|
||||
|
||||
def __to_one_of_k(self, y):
|
||||
raise NotImplementedError('Write me!')
|
||||
|
||||
|
||||
class FuncDataProvider(DataProvider):
|
||||
"""
|
||||
Function gets as an argument a list of functions random samples
|
||||
drawn from normal distribution which means are defined by those
|
||||
functions.
|
||||
"""
|
||||
def __init__(self,
|
||||
fn_list=[lambda x: x ** 2, lambda x: numpy.sin(x)],
|
||||
std_list=[0.1, 0.1],
|
||||
x_from = 0.0,
|
||||
x_to = 1.0,
|
||||
points_per_fn=200,
|
||||
batch_size=10,
|
||||
randomize=True):
|
||||
|
||||
super(FuncDataProvider, self).__init__(batch_size, randomize)
|
||||
|
||||
def sample_points(y, std):
|
||||
ys = numpy.zeros_like(y)
|
||||
for i in xrange(y.shape[0]):
|
||||
ys[i] = numpy.random.normal(y[i], std)
|
||||
return ys
|
||||
|
||||
x = numpy.linspace(x_from, x_to, points_per_fn, dtype=numpy.float32)
|
||||
means = [fn(x) for fn in fn_list]
|
||||
y = [sample_points(mean, std) for mean, std in zip(means, std_list)]
|
||||
|
||||
self.x_orig = x
|
||||
self.y_class = y
|
||||
|
||||
self.x = numpy.concatenate([x for ys in y])
|
||||
self.y = numpy.concatenate([ys for ys in y])
|
||||
|
||||
if self.randomize:
|
||||
self._rand_idx = self.__randomize()
|
||||
else:
|
||||
self._rand_idx = None
|
||||
|
||||
def __randomize(self):
|
||||
assert isinstance(self.x, numpy.ndarray)
|
||||
return numpy.random.permute(numpy.arange(0, self.x.shape[0]))
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def next(self):
|
||||
if (self._curr_idx + self.batch_size) >= self.x.shape[0]:
|
||||
raise StopIteration()
|
||||
|
||||
if self._rand_idx is not None:
|
||||
range_idx = self._rand_idx[self._curr_idx:self._curr_idx + self.batch_size]
|
||||
else:
|
||||
range_idx = numpy.arange(self._curr_idx, self._curr_idx + self.batch_size)
|
||||
|
||||
x = self.x[range_idx]
|
||||
y = self.y[range_idx]
|
||||
|
||||
self._curr_idx += self.batch_size
|
||||
|
||||
return x, y
|
||||
|
Loading…
Reference in New Issue
Block a user