Adding CCPP data and data provider.
This commit is contained in:
parent
15425cff0e
commit
2f9253bd21
BIN
data/ccpp_data.npz
Normal file
BIN
data/ccpp_data.npz
Normal file
Binary file not shown.
@ -163,7 +163,7 @@ class MetOfficeDataProvider(DataProvider):
|
|||||||
|
|
||||||
def __init__(self, window_size, batch_size=10, max_num_batches=-1,
|
def __init__(self, window_size, batch_size=10, max_num_batches=-1,
|
||||||
shuffle_order=True, rng=None):
|
shuffle_order=True, rng=None):
|
||||||
"""Create a new Met Offfice data provider object.
|
"""Create a new Met Office data provider object.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
window_size (int): Size of windows to split weather time series
|
window_size (int): Size of windows to split weather time series
|
||||||
@ -204,3 +204,50 @@ class MetOfficeDataProvider(DataProvider):
|
|||||||
targets = windowed[:, -1]
|
targets = windowed[:, -1]
|
||||||
super(MetOfficeDataProvider, self).__init__(
|
super(MetOfficeDataProvider, self).__init__(
|
||||||
inputs, targets, batch_size, max_num_batches, shuffle_order, rng)
|
inputs, targets, batch_size, max_num_batches, shuffle_order, rng)
|
||||||
|
|
||||||
|
|
||||||
|
class CCPPDataProvider(DataProvider):
|
||||||
|
|
||||||
|
def __init__(self, which_set='train', input_dims=None, batch_size=10,
|
||||||
|
max_num_batches=-1, shuffle_order=True, rng=None):
|
||||||
|
"""Create a new Combined Cycle Power Plant data provider object.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
which_set: One of 'train' or 'valid'. Determines which portion of
|
||||||
|
data this object should provide.
|
||||||
|
input_dims: Which of the four input dimension to use. If `None` all
|
||||||
|
are used. If an iterable of integers are provided (consisting
|
||||||
|
of a subset of {0, 1, 2, 3}) then only the corresponding
|
||||||
|
input dimensions are included.
|
||||||
|
batch_size (int): Number of data points to include in each batch.
|
||||||
|
max_num_batches (int): Maximum number of batches to iterate over
|
||||||
|
in an epoch. If `max_num_batches * batch_size > num_data` then
|
||||||
|
only as many batches as the data can be split into will be
|
||||||
|
used. If set to -1 all of the data will be used.
|
||||||
|
shuffle_order (bool): Whether to randomly permute the order of
|
||||||
|
the data before each epoch.
|
||||||
|
rng (RandomState): A seeded random number generator.
|
||||||
|
"""
|
||||||
|
data_path = os.path.join(
|
||||||
|
os.environ['MLP_DATA_DIR'], 'ccpp_data.npz')
|
||||||
|
assert os.path.isfile(data_path), (
|
||||||
|
'Data file does not exist at expected path: ' + data_path
|
||||||
|
)
|
||||||
|
# check a valid which_set was provided
|
||||||
|
assert which_set in ['train', 'valid'], (
|
||||||
|
'Expected which_set to be either train or valid '
|
||||||
|
'Got {0}'.format(which_set)
|
||||||
|
)
|
||||||
|
# check input_dims are valid
|
||||||
|
if not input_dims is not None:
|
||||||
|
input_dims = set(input_dims)
|
||||||
|
assert input_dims.issubset({0, 1, 2, 3}), (
|
||||||
|
'input_dims should be a subset of {0, 1, 2, 3}'
|
||||||
|
)
|
||||||
|
loaded = np.load(data_path)
|
||||||
|
inputs = loaded[which_set + '_inputs']
|
||||||
|
if input_dims is not None:
|
||||||
|
inputs = inputs[:, input_dims]
|
||||||
|
targets = loaded[which_set + '_targets']
|
||||||
|
super(CCPPDataProvider, self).__init__(
|
||||||
|
inputs, targets, batch_size, max_num_batches, shuffle_order, rng)
|
||||||
|
Loading…
Reference in New Issue
Block a user