mlpractical/network_architectures.py

161 lines
9.1 KiB
Python

import tensorflow as tf
from tensorflow.contrib.layers import batch_norm
from tensorflow.python.ops.nn_ops import leaky_relu
from utils.network_summary import count_parameters
class VGGClassifier:
def __init__(self, batch_size, layer_stage_sizes, name, num_classes, batch_norm_use=False,
inner_layer_depth=2, strided_dim_reduction=True):
"""
Initializes a VGG Classifier architecture
:param batch_size: The size of the data batch
:param layer_stage_sizes: A list containing the filters for each layer stage, where layer stage is a series of
convolutional layers with stride=1 and no max pooling followed by a dimensionality reducing stage which is
either a convolution with stride=1 followed by max pooling or a convolution with stride=2
(i.e. strided convolution). So if we pass a list [64, 128, 256] it means that if we have inner_layer_depth=2
then stage 0 will have 2 layers with stride=1 and filter size=64 and another dimensionality reducing convolution
with either stride=1 and max pooling or stride=2 to dimensionality reduce. Similarly for the other stages.
:param name: Name of the network
:param num_classes: Number of classes we will need to classify
:param num_channels: Number of channels of our image data.
:param batch_norm_use: Whether to use batch norm between layers or not.
:param inner_layer_depth: The amount of extra layers on top of the dimensionality reducing stage to have per
layer stage.
:param strided_dim_reduction: Whether to use strided convolutions instead of max pooling.
"""
self.reuse = False
self.batch_size = batch_size
self.layer_stage_sizes = layer_stage_sizes
self.name = name
self.num_classes = num_classes
self.batch_norm_use = batch_norm_use
self.inner_layer_depth = inner_layer_depth
self.strided_dim_reduction = strided_dim_reduction
self.build_completed = False
def __call__(self, image_input, training=False, dropout_rate=0.0):
"""
Runs the CNN producing the predictions and the gradients.
:param image_input: Image input to produce embeddings for. e.g. for EMNIST [batch_size, 28, 28, 1]
:param training: A flag indicating training or evaluation
:param dropout_rate: A tf placeholder of type tf.float32 indicating the amount of dropout applied
:return: Embeddings of size [batch_size, self.num_classes]
"""
with tf.variable_scope(self.name, reuse=self.reuse):
layer_features = []
with tf.variable_scope('VGGNet'):
outputs = image_input
for i in range(len(self.layer_stage_sizes)):
with tf.variable_scope('conv_stage_{}'.format(i)):
for j in range(self.inner_layer_depth):
with tf.variable_scope('conv_{}_{}'.format(i, j)):
if (j == self.inner_layer_depth-1) and self.strided_dim_reduction:
stride = 2
else:
stride = 1
outputs = tf.layers.conv2d(outputs, self.layer_stage_sizes[i], [3, 3],
strides=(stride, stride),
padding='SAME', activation=None)
outputs = leaky_relu(outputs, name="leaky_relu{}".format(i))
layer_features.append(outputs)
if self.batch_norm_use:
outputs = batch_norm(outputs, decay=0.99, scale=True,
center=True, is_training=training, renorm=False)
if self.strided_dim_reduction==False:
outputs = tf.layers.max_pooling2d(outputs, pool_size=(2, 2), strides=2)
outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=training)
# apply dropout only at dimensionality
# reducing steps, i.e. the last layer in
# every group
c_conv_encoder = outputs
c_conv_encoder = tf.contrib.layers.flatten(c_conv_encoder)
c_conv_encoder = tf.layers.dense(c_conv_encoder, units=self.num_classes)
self.reuse = True
self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)
if not self.build_completed:
self.build_completed = True
count_parameters(self.variables, "VGGNet")
return c_conv_encoder, layer_features
class FCCLayerClassifier:
def __init__(self, batch_size, layer_stage_sizes, name, num_classes, batch_norm_use=False,
inner_layer_depth=2, strided_dim_reduction=True):
"""
Initializes a VGG Classifier architecture
:param batch_size: The size of the data batch
:param layer_stage_sizes: A list containing the filters for each layer stage, where layer stage is a series of
convolutional layers with stride=1 and no max pooling followed by a dimensionality reducing stage which is
either a convolution with stride=1 followed by max pooling or a convolution with stride=2
(i.e. strided convolution). So if we pass a list [64, 128, 256] it means that if we have inner_layer_depth=2
then stage 0 will have 2 layers with stride=1 and filter size=64 and another dimensionality reducing convolution
with either stride=1 and max pooling or stride=2 to dimensionality reduce. Similarly for the other stages.
:param name: Name of the network
:param num_classes: Number of classes we will need to classify
:param num_channels: Number of channels of our image data.
:param batch_norm_use: Whether to use batch norm between layers or not.
:param inner_layer_depth: The amount of extra layers on top of the dimensionality reducing stage to have per
layer stage.
:param strided_dim_reduction: Whether to use strided convolutions instead of max pooling.
"""
self.reuse = False
self.batch_size = batch_size
self.layer_stage_sizes = layer_stage_sizes
self.name = name
self.num_classes = num_classes
self.batch_norm_use = batch_norm_use
self.inner_layer_depth = inner_layer_depth
self.strided_dim_reduction = strided_dim_reduction
self.build_completed = False
def __call__(self, image_input, training=False, dropout_rate=0.0):
"""
Runs the CNN producing the predictions and the gradients.
:param image_input: Image input to produce embeddings for. e.g. for EMNIST [batch_size, 28, 28, 1]
:param training: A flag indicating training or evaluation
:param dropout_rate: A tf placeholder of type tf.float32 indicating the amount of dropout applied
:return: Embeddings of size [batch_size, self.num_classes]
"""
with tf.variable_scope(self.name, reuse=self.reuse):
layer_features = []
with tf.variable_scope('FCCLayerNet'):
outputs = image_input
for i in range(len(self.layer_stage_sizes)):
with tf.variable_scope('conv_stage_{}'.format(i)):
for j in range(self.inner_layer_depth):
with tf.variable_scope('conv_{}_{}'.format(i, j)):
outputs = tf.layers.dense(outputs, units=self.layer_stage_sizes[i])
outputs = leaky_relu(outputs, name="leaky_relu{}".format(i))
layer_features.append(outputs)
if self.batch_norm_use:
outputs = batch_norm(outputs, decay=0.99, scale=True,
center=True, is_training=training, renorm=False)
outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=training)
# apply dropout only at dimensionality
# reducing steps, i.e. the last layer in
# every group
c_conv_encoder = outputs
c_conv_encoder = tf.contrib.layers.flatten(c_conv_encoder)
c_conv_encoder = tf.layers.dense(c_conv_encoder, units=self.num_classes)
self.reuse = True
self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)
if not self.build_completed:
self.build_completed = True
count_parameters(self.variables, "FCCLayerNet")
return c_conv_encoder, layer_features