163 lines
9.2 KiB
Python
163 lines
9.2 KiB
Python
import tensorflow as tf
|
|
from tensorflow.contrib.layers import batch_norm
|
|
from tensorflow.python.ops.nn_ops import leaky_relu
|
|
|
|
from utils.network_summary import count_parameters
|
|
|
|
|
|
class VGGClassifier:
|
|
def __init__(self, batch_size, layer_stage_sizes, name, num_classes, num_channels=1, batch_norm_use=False,
|
|
inner_layer_depth=2, strided_dim_reduction=True):
|
|
|
|
"""
|
|
Initializes a VGG Classifier architecture
|
|
:param batch_size: The size of the data batch
|
|
:param layer_stage_sizes: A list containing the filters for each layer stage, where layer stage is a series of
|
|
convolutional layers with stride=1 and no max pooling followed by a dimensionality reducing stage which is
|
|
either a convolution with stride=1 followed by max pooling or a convolution with stride=2
|
|
(i.e. strided convolution). So if we pass a list [64, 128, 256] it means that if we have inner_layer_depth=2
|
|
then stage 0 will have 2 layers with stride=1 and filter size=64 and another dimensionality reducing convolution
|
|
with either stride=1 and max pooling or stride=2 to dimensionality reduce. Similarly for the other stages.
|
|
:param name: Name of the network
|
|
:param num_classes: Number of classes we will need to classify
|
|
:param num_channels: Number of channels of our image data.
|
|
:param batch_norm_use: Whether to use batch norm between layers or not.
|
|
:param inner_layer_depth: The amount of extra layers on top of the dimensionality reducing stage to have per
|
|
layer stage.
|
|
:param strided_dim_reduction: Whether to use strided convolutions instead of max pooling.
|
|
"""
|
|
self.reuse = False
|
|
self.batch_size = batch_size
|
|
self.num_channels = num_channels
|
|
self.layer_stage_sizes = layer_stage_sizes
|
|
self.name = name
|
|
self.num_classes = num_classes
|
|
self.batch_norm_use = batch_norm_use
|
|
self.inner_layer_depth = inner_layer_depth
|
|
self.strided_dim_reduction = strided_dim_reduction
|
|
self.build_completed = False
|
|
|
|
def __call__(self, image_input, training=False, dropout_rate=0.0):
|
|
"""
|
|
Runs the CNN producing the predictions and the gradients.
|
|
:param image_input: Image input to produce embeddings for. e.g. for EMNIST [batch_size, 28, 28, 1]
|
|
:param training: A flag indicating training or evaluation
|
|
:param dropout_rate: A tf placeholder of type tf.float32 indicating the amount of dropout applied
|
|
:return: Embeddings of size [batch_size, self.num_classes]
|
|
"""
|
|
|
|
with tf.variable_scope(self.name, reuse=self.reuse):
|
|
layer_features = []
|
|
with tf.variable_scope('VGGNet'):
|
|
outputs = image_input
|
|
for i in range(len(self.layer_stage_sizes)):
|
|
with tf.variable_scope('conv_stage_{}'.format(i)):
|
|
for j in range(self.inner_layer_depth):
|
|
with tf.variable_scope('conv_{}_{}'.format(i, j)):
|
|
if (j == self.inner_layer_depth-1) and self.strided_dim_reduction:
|
|
stride = 2
|
|
else:
|
|
stride = 1
|
|
outputs = tf.layers.conv2d(outputs, self.layer_stage_sizes[i], [3, 3],
|
|
strides=(stride, stride),
|
|
padding='SAME', activation=None)
|
|
outputs = leaky_relu(outputs, name="leaky_relu{}".format(i))
|
|
layer_features.append(outputs)
|
|
if self.batch_norm_use:
|
|
outputs = batch_norm(outputs, decay=0.99, scale=True,
|
|
center=True, is_training=training, renorm=False)
|
|
if self.strided_dim_reduction==False:
|
|
outputs = tf.layers.max_pooling2d(outputs, pool_size=(2, 2), strides=2)
|
|
|
|
outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=training)
|
|
# apply dropout only at dimensionality
|
|
# reducing steps, i.e. the last layer in
|
|
# every group
|
|
|
|
c_conv_encoder = outputs
|
|
c_conv_encoder = tf.contrib.layers.flatten(c_conv_encoder)
|
|
c_conv_encoder = tf.layers.dense(c_conv_encoder, units=self.num_classes)
|
|
|
|
self.reuse = True
|
|
self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)
|
|
|
|
if not self.build_completed:
|
|
self.build_completed = True
|
|
count_parameters(self.variables, "VGGNet")
|
|
|
|
return c_conv_encoder, layer_features
|
|
|
|
|
|
class FCCLayerClassifier:
|
|
def __init__(self, batch_size, layer_stage_sizes, name, num_classes, num_channels=1, batch_norm_use=False,
|
|
inner_layer_depth=2, strided_dim_reduction=True):
|
|
|
|
"""
|
|
Initializes a VGG Classifier architecture
|
|
:param batch_size: The size of the data batch
|
|
:param layer_stage_sizes: A list containing the filters for each layer stage, where layer stage is a series of
|
|
convolutional layers with stride=1 and no max pooling followed by a dimensionality reducing stage which is
|
|
either a convolution with stride=1 followed by max pooling or a convolution with stride=2
|
|
(i.e. strided convolution). So if we pass a list [64, 128, 256] it means that if we have inner_layer_depth=2
|
|
then stage 0 will have 2 layers with stride=1 and filter size=64 and another dimensionality reducing convolution
|
|
with either stride=1 and max pooling or stride=2 to dimensionality reduce. Similarly for the other stages.
|
|
:param name: Name of the network
|
|
:param num_classes: Number of classes we will need to classify
|
|
:param num_channels: Number of channels of our image data.
|
|
:param batch_norm_use: Whether to use batch norm between layers or not.
|
|
:param inner_layer_depth: The amount of extra layers on top of the dimensionality reducing stage to have per
|
|
layer stage.
|
|
:param strided_dim_reduction: Whether to use strided convolutions instead of max pooling.
|
|
"""
|
|
self.reuse = False
|
|
self.batch_size = batch_size
|
|
self.num_channels = num_channels
|
|
self.layer_stage_sizes = layer_stage_sizes
|
|
self.name = name
|
|
self.num_classes = num_classes
|
|
self.batch_norm_use = batch_norm_use
|
|
self.inner_layer_depth = inner_layer_depth
|
|
self.strided_dim_reduction = strided_dim_reduction
|
|
self.build_completed = False
|
|
|
|
def __call__(self, image_input, training=False, dropout_rate=0.0):
|
|
"""
|
|
Runs the CNN producing the predictions and the gradients.
|
|
:param image_input: Image input to produce embeddings for. e.g. for EMNIST [batch_size, 28, 28, 1]
|
|
:param training: A flag indicating training or evaluation
|
|
:param dropout_rate: A tf placeholder of type tf.float32 indicating the amount of dropout applied
|
|
:return: Embeddings of size [batch_size, self.num_classes]
|
|
"""
|
|
|
|
with tf.variable_scope(self.name, reuse=self.reuse):
|
|
layer_features = []
|
|
with tf.variable_scope('FCCLayerNet'):
|
|
outputs = image_input
|
|
for i in range(len(self.layer_stage_sizes)):
|
|
with tf.variable_scope('conv_stage_{}'.format(i)):
|
|
for j in range(self.inner_layer_depth):
|
|
with tf.variable_scope('conv_{}_{}'.format(i, j)):
|
|
outputs = tf.layers.dense(outputs, units=self.layer_stage_sizes[i])
|
|
outputs = leaky_relu(outputs, name="leaky_relu{}".format(i))
|
|
layer_features.append(outputs)
|
|
if self.batch_norm_use:
|
|
outputs = batch_norm(outputs, decay=0.99, scale=True,
|
|
center=True, is_training=training, renorm=False)
|
|
outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=training)
|
|
# apply dropout only at dimensionality
|
|
# reducing steps, i.e. the last layer in
|
|
# every group
|
|
|
|
c_conv_encoder = outputs
|
|
c_conv_encoder = tf.contrib.layers.flatten(c_conv_encoder)
|
|
c_conv_encoder = tf.layers.dense(c_conv_encoder, units=self.num_classes)
|
|
|
|
self.reuse = True
|
|
self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name)
|
|
|
|
if not self.build_completed:
|
|
self.build_completed = True
|
|
count_parameters(self.variables, "FCCLayerNet")
|
|
|
|
return c_conv_encoder, layer_features
|