2024-09-20 20:09:17 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""Parameter initialisers.
|
|
|
|
|
|
|
|
This module defines classes to initialise the parameters in a layer.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
from mlp import DEFAULT_SEED
|
|
|
|
|
|
|
|
|
|
|
|
class ConstantInit(object):
|
|
|
|
"""Constant parameter initialiser."""
|
|
|
|
|
|
|
|
def __init__(self, value):
|
|
|
|
"""Construct a constant parameter initialiser.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
value: Value to initialise parameter to.
|
|
|
|
"""
|
|
|
|
self.value = value
|
|
|
|
|
|
|
|
def __call__(self, shape):
|
|
|
|
return np.ones(shape=shape) * self.value
|
|
|
|
|
|
|
|
|
|
|
|
class UniformInit(object):
|
|
|
|
"""Random uniform parameter initialiser."""
|
|
|
|
|
|
|
|
def __init__(self, low, high, rng=None):
|
|
|
|
"""Construct a random uniform parameter initialiser.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
low: Lower bound of interval to sample from.
|
|
|
|
high: Upper bound of interval to sample from.
|
|
|
|
rng (RandomState): Seeded random number generator.
|
|
|
|
"""
|
|
|
|
self.low = low
|
|
|
|
self.high = high
|
|
|
|
if rng is None:
|
|
|
|
rng = np.random.RandomState(DEFAULT_SEED)
|
|
|
|
self.rng = rng
|
|
|
|
|
|
|
|
def __call__(self, shape):
|
|
|
|
return self.rng.uniform(low=self.low, high=self.high, size=shape)
|
|
|
|
|
|
|
|
|
|
|
|
class NormalInit(object):
|
|
|
|
"""Random normal parameter initialiser."""
|
|
|
|
|
|
|
|
def __init__(self, mean, std, rng=None):
|
|
|
|
"""Construct a random uniform parameter initialiser.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
mean: Mean of distribution to sample from.
|
|
|
|
std: Standard deviation of distribution to sample from.
|
|
|
|
rng (RandomState): Seeded random number generator.
|
|
|
|
"""
|
|
|
|
self.mean = mean
|
|
|
|
self.std = std
|
|
|
|
if rng is None:
|
|
|
|
rng = np.random.RandomState(DEFAULT_SEED)
|
|
|
|
self.rng = rng
|
|
|
|
|
|
|
|
def __call__(self, shape):
|
|
|
|
return self.rng.normal(loc=self.mean, scale=self.std, size=shape)
|
2024-10-14 11:51:43 +02:00
|
|
|
|
|
|
|
class GlorotUniformInit(object):
|
|
|
|
"""Glorot and Bengio (2010) random uniform weights initialiser.
|
|
|
|
|
|
|
|
Initialises an two-dimensional parameter array using the 'normalized
|
|
|
|
initialisation' scheme suggested in [1] which attempts to maintain a
|
|
|
|
roughly constant variance in the activations and backpropagated gradients
|
|
|
|
of a multi-layer model consisting of interleaved affine and logistic
|
|
|
|
sigmoidal transformation layers.
|
|
|
|
|
|
|
|
Weights are sampled from a zero-mean uniform distribution with standard
|
|
|
|
deviation `sqrt(2 / (input_dim * output_dim))` where `input_dim` and
|
|
|
|
`output_dim` are the input and output dimensions of the weight matrix
|
|
|
|
respectively.
|
|
|
|
|
|
|
|
References:
|
|
|
|
[1]: Understanding the difficulty of training deep feedforward neural
|
|
|
|
networks, Glorot and Bengio (2010)
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, gain=1., rng=None):
|
|
|
|
"""Construct a normalised initilisation random initialiser object.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
gain: Multiplicative factor to scale initialised weights by.
|
|
|
|
Recommended values is 1 for affine layers followed by
|
|
|
|
logistic sigmoid layers (or another affine layer).
|
|
|
|
rng (RandomState): Seeded random number generator.
|
|
|
|
"""
|
|
|
|
self.gain = gain
|
|
|
|
if rng is None:
|
|
|
|
rng = np.random.RandomState(DEFAULT_SEED)
|
|
|
|
self.rng = rng
|
|
|
|
|
|
|
|
def __call__(self, shape):
|
|
|
|
assert len(shape) == 2, (
|
|
|
|
'Initialiser should only be used for two dimensional arrays.')
|
|
|
|
std = self.gain * (2. / (shape[0] + shape[1]))**0.5
|
|
|
|
half_width = 3.**0.5 * std
|
|
|
|
return self.rng.uniform(low=-half_width, high=half_width, size=shape)
|
|
|
|
|
|
|
|
|
|
|
|
class GlorotNormalInit(object):
|
|
|
|
"""Glorot and Bengio (2010) random normal weights initialiser.
|
|
|
|
|
|
|
|
Initialises an two-dimensional parameter array using the 'normalized
|
|
|
|
initialisation' scheme suggested in [1] which attempts to maintain a
|
|
|
|
roughly constant variance in the activations and backpropagated gradients
|
|
|
|
of a multi-layer model consisting of interleaved affine and logistic
|
|
|
|
sigmoidal transformation layers.
|
|
|
|
|
|
|
|
Weights are sampled from a zero-mean normal distribution with standard
|
|
|
|
deviation `sqrt(2 / (input_dim * output_dim))` where `input_dim` and
|
|
|
|
`output_dim` are the input and output dimensions of the weight matrix
|
|
|
|
respectively.
|
|
|
|
|
|
|
|
References:
|
|
|
|
[1]: Understanding the difficulty of training deep feedforward neural
|
|
|
|
networks, Glorot and Bengio (2010)
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, gain=1., rng=None):
|
|
|
|
"""Construct a normalised initilisation random initialiser object.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
gain: Multiplicative factor to scale initialised weights by.
|
|
|
|
Recommended values is 1 for affine layers followed by
|
|
|
|
logistic sigmoid layers (or another affine layer).
|
|
|
|
rng (RandomState): Seeded random number generator.
|
|
|
|
"""
|
|
|
|
self.gain = gain
|
|
|
|
if rng is None:
|
|
|
|
rng = np.random.RandomState(DEFAULT_SEED)
|
|
|
|
self.rng = rng
|
|
|
|
|
|
|
|
def __call__(self, shape):
|
|
|
|
std = self.gain * (2. / (shape[0] + shape[1]))**0.5
|
|
|
|
return self.rng.normal(loc=0., scale=std, size=shape)
|