formatting and BN
This commit is contained in:
parent
92fccb8eb2
commit
cb5c6f4e19
@ -2,12 +2,12 @@ import argparse
|
|||||||
|
|
||||||
|
|
||||||
def str2bool(v):
|
def str2bool(v):
|
||||||
if v.lower() in ('yes', 'true', 't', 'y', '1'):
|
if v.lower() in ("yes", "true", "t", "y", "1"):
|
||||||
return True
|
return True
|
||||||
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
|
elif v.lower() in ("no", "false", "f", "n", "0"):
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
raise argparse.ArgumentTypeError('Boolean value expected.')
|
raise argparse.ArgumentTypeError("Boolean value expected.")
|
||||||
|
|
||||||
|
|
||||||
def get_args():
|
def get_args():
|
||||||
@ -16,38 +16,111 @@ def get_args():
|
|||||||
:return: A namedtuple with arguments
|
:return: A namedtuple with arguments
|
||||||
"""
|
"""
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='Welcome to the MLP course\'s Pytorch training and inference helper script')
|
description="Welcome to the MLP course's Pytorch training and inference helper script"
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument('--batch_size', nargs="?", type=int, default=100, help='Batch_size for experiment')
|
parser.add_argument(
|
||||||
parser.add_argument('--continue_from_epoch', nargs="?", type=int, default=-1, help='Epoch you want to continue training from while restarting an experiment')
|
"--batch_size",
|
||||||
parser.add_argument('--seed', nargs="?", type=int, default=7112018,
|
nargs="?",
|
||||||
help='Seed to use for random number generator for experiment')
|
type=int,
|
||||||
parser.add_argument('--image_num_channels', nargs="?", type=int, default=3,
|
default=100,
|
||||||
help='The channel dimensionality of our image-data')
|
help="Batch_size for experiment",
|
||||||
parser.add_argument('--image_height', nargs="?", type=int, default=32, help='Height of image data')
|
)
|
||||||
parser.add_argument('--image_width', nargs="?", type=int, default=32, help='Width of image data')
|
parser.add_argument(
|
||||||
parser.add_argument('--num_stages', nargs="?", type=int, default=3,
|
"--continue_from_epoch",
|
||||||
help='Number of convolutional stages in the network. A stage is considered a sequence of '
|
nargs="?",
|
||||||
'convolutional layers where the input volume remains the same in the spacial dimension and'
|
type=int,
|
||||||
' is always terminated by a dimensionality reduction stage')
|
default=-1,
|
||||||
parser.add_argument('--num_blocks_per_stage', nargs="?", type=int, default=5,
|
help="Epoch you want to continue training from while restarting an experiment",
|
||||||
help='Number of convolutional blocks in each stage, not including the reduction stage.'
|
)
|
||||||
' A convolutional block is made up of two convolutional layers activated using the '
|
parser.add_argument(
|
||||||
' leaky-relu non-linearity')
|
"--seed",
|
||||||
parser.add_argument('--num_filters', nargs="?", type=int, default=16,
|
nargs="?",
|
||||||
help='Number of convolutional filters per convolutional layer in the network (excluding '
|
type=int,
|
||||||
'dimensionality reduction layers)')
|
default=7112018,
|
||||||
parser.add_argument('--num_epochs', nargs="?", type=int, default=100, help='Total number of epochs for model training')
|
help="Seed to use for random number generator for experiment",
|
||||||
parser.add_argument('--num_classes', nargs="?", type=int, default=100, help='Number of classes in the dataset')
|
)
|
||||||
parser.add_argument('--experiment_name', nargs="?", type=str, default="exp_1",
|
parser.add_argument(
|
||||||
help='Experiment name - to be used for building the experiment folder')
|
"--image_num_channels",
|
||||||
parser.add_argument('--use_gpu', nargs="?", type=str2bool, default=True,
|
nargs="?",
|
||||||
help='A flag indicating whether we will use GPU acceleration or not')
|
type=int,
|
||||||
parser.add_argument('--weight_decay_coefficient', nargs="?", type=float, default=0,
|
default=3,
|
||||||
help='Weight decay to use for Adam')
|
help="The channel dimensionality of our image-data",
|
||||||
parser.add_argument('--block_type', type=str, default='conv_block',
|
)
|
||||||
help='Type of convolutional blocks to use in our network '
|
parser.add_argument(
|
||||||
'(This argument will be useful in running experiments to debug your network)')
|
"--image_height", nargs="?", type=int, default=32, help="Height of image data"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--image_width", nargs="?", type=int, default=32, help="Width of image data"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--num_stages",
|
||||||
|
nargs="?",
|
||||||
|
type=int,
|
||||||
|
default=3,
|
||||||
|
help="Number of convolutional stages in the network. A stage is considered a sequence of "
|
||||||
|
"convolutional layers where the input volume remains the same in the spacial dimension and"
|
||||||
|
" is always terminated by a dimensionality reduction stage",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--num_blocks_per_stage",
|
||||||
|
nargs="?",
|
||||||
|
type=int,
|
||||||
|
default=5,
|
||||||
|
help="Number of convolutional blocks in each stage, not including the reduction stage."
|
||||||
|
" A convolutional block is made up of two convolutional layers activated using the "
|
||||||
|
" leaky-relu non-linearity",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--num_filters",
|
||||||
|
nargs="?",
|
||||||
|
type=int,
|
||||||
|
default=16,
|
||||||
|
help="Number of convolutional filters per convolutional layer in the network (excluding "
|
||||||
|
"dimensionality reduction layers)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--num_epochs",
|
||||||
|
nargs="?",
|
||||||
|
type=int,
|
||||||
|
default=100,
|
||||||
|
help="Total number of epochs for model training",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--num_classes",
|
||||||
|
nargs="?",
|
||||||
|
type=int,
|
||||||
|
default=100,
|
||||||
|
help="Number of classes in the dataset",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--experiment_name",
|
||||||
|
nargs="?",
|
||||||
|
type=str,
|
||||||
|
default="exp_1",
|
||||||
|
help="Experiment name - to be used for building the experiment folder",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--use_gpu",
|
||||||
|
nargs="?",
|
||||||
|
type=str2bool,
|
||||||
|
default=True,
|
||||||
|
help="A flag indicating whether we will use GPU acceleration or not",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--weight_decay_coefficient",
|
||||||
|
nargs="?",
|
||||||
|
type=float,
|
||||||
|
default=0,
|
||||||
|
help="Weight decay to use for Adam",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--block_type",
|
||||||
|
type=str,
|
||||||
|
default="conv_block",
|
||||||
|
help="Type of convolutional blocks to use in our network "
|
||||||
|
"(This argument will be useful in running experiments to debug your network)",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
print(args)
|
print(args)
|
||||||
return args
|
return args
|
||||||
|
@ -10,11 +10,23 @@ import time
|
|||||||
from pytorch_mlp_framework.storage_utils import save_statistics
|
from pytorch_mlp_framework.storage_utils import save_statistics
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
import matplotlib
|
import matplotlib
|
||||||
matplotlib.rcParams.update({'font.size': 8})
|
|
||||||
|
matplotlib.rcParams.update({"font.size": 8})
|
||||||
|
|
||||||
|
|
||||||
class ExperimentBuilder(nn.Module):
|
class ExperimentBuilder(nn.Module):
|
||||||
def __init__(self, network_model, experiment_name, num_epochs, train_data, val_data,
|
def __init__(
|
||||||
test_data, weight_decay_coefficient, use_gpu, continue_from_epoch=-1):
|
self,
|
||||||
|
network_model,
|
||||||
|
experiment_name,
|
||||||
|
num_epochs,
|
||||||
|
train_data,
|
||||||
|
val_data,
|
||||||
|
test_data,
|
||||||
|
weight_decay_coefficient,
|
||||||
|
use_gpu,
|
||||||
|
continue_from_epoch=-1,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Initializes an ExperimentBuilder object. Such an object takes care of running training and evaluation of a deep net
|
Initializes an ExperimentBuilder object. Such an object takes care of running training and evaluation of a deep net
|
||||||
on a given dataset. It also takes care of saving per epoch models and automatically inferring the best val model
|
on a given dataset. It also takes care of saving per epoch models and automatically inferring the best val model
|
||||||
@ -31,75 +43,95 @@ class ExperimentBuilder(nn.Module):
|
|||||||
"""
|
"""
|
||||||
super(ExperimentBuilder, self).__init__()
|
super(ExperimentBuilder, self).__init__()
|
||||||
|
|
||||||
|
|
||||||
self.experiment_name = experiment_name
|
self.experiment_name = experiment_name
|
||||||
self.model = network_model
|
self.model = network_model
|
||||||
|
|
||||||
if torch.cuda.device_count() >= 1 and use_gpu:
|
if torch.cuda.device_count() >= 1 and use_gpu:
|
||||||
self.device = torch.device('cuda')
|
self.device = torch.device("cuda")
|
||||||
self.model.to(self.device) # sends the model from the cpu to the gpu
|
self.model.to(self.device) # sends the model from the cpu to the gpu
|
||||||
print('Use GPU', self.device)
|
print("Use GPU", self.device)
|
||||||
else:
|
else:
|
||||||
print("use CPU")
|
print("use CPU")
|
||||||
self.device = torch.device('cpu') # sets the device to be CPU
|
self.device = torch.device("cpu") # sets the device to be CPU
|
||||||
print(self.device)
|
print(self.device)
|
||||||
|
|
||||||
print('here')
|
print("here")
|
||||||
|
|
||||||
self.model.reset_parameters() # re-initialize network parameters
|
self.model.reset_parameters() # re-initialize network parameters
|
||||||
self.train_data = train_data
|
self.train_data = train_data
|
||||||
self.val_data = val_data
|
self.val_data = val_data
|
||||||
self.test_data = test_data
|
self.test_data = test_data
|
||||||
|
|
||||||
print('System learnable parameters')
|
print("System learnable parameters")
|
||||||
num_conv_layers = 0
|
num_conv_layers = 0
|
||||||
num_linear_layers = 0
|
num_linear_layers = 0
|
||||||
total_num_parameters = 0
|
total_num_parameters = 0
|
||||||
for name, value in self.named_parameters():
|
for name, value in self.named_parameters():
|
||||||
print(name, value.shape)
|
print(name, value.shape)
|
||||||
if all(item in name for item in ['conv', 'weight']):
|
if all(item in name for item in ["conv", "weight"]):
|
||||||
num_conv_layers += 1
|
num_conv_layers += 1
|
||||||
if all(item in name for item in ['linear', 'weight']):
|
if all(item in name for item in ["linear", "weight"]):
|
||||||
num_linear_layers += 1
|
num_linear_layers += 1
|
||||||
total_num_parameters += np.prod(value.shape)
|
total_num_parameters += np.prod(value.shape)
|
||||||
|
|
||||||
print('Total number of parameters', total_num_parameters)
|
print("Total number of parameters", total_num_parameters)
|
||||||
print('Total number of conv layers', num_conv_layers)
|
print("Total number of conv layers", num_conv_layers)
|
||||||
print('Total number of linear layers', num_linear_layers)
|
print("Total number of linear layers", num_linear_layers)
|
||||||
|
|
||||||
self.optimizer = optim.Adam(self.parameters(), amsgrad=False,
|
self.optimizer = optim.Adam(
|
||||||
weight_decay=weight_decay_coefficient)
|
self.parameters(), amsgrad=False, weight_decay=weight_decay_coefficient
|
||||||
self.learning_rate_scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer,
|
)
|
||||||
T_max=num_epochs,
|
self.learning_rate_scheduler = optim.lr_scheduler.CosineAnnealingLR(
|
||||||
eta_min=0.00002)
|
self.optimizer, T_max=num_epochs, eta_min=0.00002
|
||||||
|
)
|
||||||
# Generate the directory names
|
# Generate the directory names
|
||||||
self.experiment_folder = os.path.abspath(experiment_name)
|
self.experiment_folder = os.path.abspath(experiment_name)
|
||||||
self.experiment_logs = os.path.abspath(os.path.join(self.experiment_folder, "result_outputs"))
|
self.experiment_logs = os.path.abspath(
|
||||||
self.experiment_saved_models = os.path.abspath(os.path.join(self.experiment_folder, "saved_models"))
|
os.path.join(self.experiment_folder, "result_outputs")
|
||||||
|
)
|
||||||
|
self.experiment_saved_models = os.path.abspath(
|
||||||
|
os.path.join(self.experiment_folder, "saved_models")
|
||||||
|
)
|
||||||
|
|
||||||
# Set best models to be at 0 since we are just starting
|
# Set best models to be at 0 since we are just starting
|
||||||
self.best_val_model_idx = 0
|
self.best_val_model_idx = 0
|
||||||
self.best_val_model_acc = 0.
|
self.best_val_model_acc = 0.0
|
||||||
|
|
||||||
if not os.path.exists(self.experiment_folder): # If experiment directory does not exist
|
if not os.path.exists(
|
||||||
|
self.experiment_folder
|
||||||
|
): # If experiment directory does not exist
|
||||||
os.mkdir(self.experiment_folder) # create the experiment directory
|
os.mkdir(self.experiment_folder) # create the experiment directory
|
||||||
os.mkdir(self.experiment_logs) # create the experiment log directory
|
os.mkdir(self.experiment_logs) # create the experiment log directory
|
||||||
os.mkdir(self.experiment_saved_models) # create the experiment saved models directory
|
os.mkdir(
|
||||||
|
self.experiment_saved_models
|
||||||
|
) # create the experiment saved models directory
|
||||||
|
|
||||||
self.num_epochs = num_epochs
|
self.num_epochs = num_epochs
|
||||||
self.criterion = nn.CrossEntropyLoss().to(self.device) # send the loss computation to the GPU
|
self.criterion = nn.CrossEntropyLoss().to(
|
||||||
|
self.device
|
||||||
|
) # send the loss computation to the GPU
|
||||||
|
|
||||||
if continue_from_epoch == -2: # if continue from epoch is -2 then continue from latest saved model
|
if (
|
||||||
self.state, self.best_val_model_idx, self.best_val_model_acc = self.load_model(
|
continue_from_epoch == -2
|
||||||
model_save_dir=self.experiment_saved_models, model_save_name="train_model",
|
): # if continue from epoch is -2 then continue from latest saved model
|
||||||
model_idx='latest') # reload existing model from epoch and return best val model index
|
self.state, self.best_val_model_idx, self.best_val_model_acc = (
|
||||||
|
self.load_model(
|
||||||
|
model_save_dir=self.experiment_saved_models,
|
||||||
|
model_save_name="train_model",
|
||||||
|
model_idx="latest",
|
||||||
|
)
|
||||||
|
) # reload existing model from epoch and return best val model index
|
||||||
# and the best val acc of that model
|
# and the best val acc of that model
|
||||||
self.starting_epoch = int(self.state['model_epoch'])
|
self.starting_epoch = int(self.state["model_epoch"])
|
||||||
|
|
||||||
elif continue_from_epoch > -1: # if continue from epoch is greater than -1 then
|
elif continue_from_epoch > -1: # if continue from epoch is greater than -1 then
|
||||||
self.state, self.best_val_model_idx, self.best_val_model_acc = self.load_model(
|
self.state, self.best_val_model_idx, self.best_val_model_acc = (
|
||||||
model_save_dir=self.experiment_saved_models, model_save_name="train_model",
|
self.load_model(
|
||||||
model_idx=continue_from_epoch) # reload existing model from epoch and return best val model index
|
model_save_dir=self.experiment_saved_models,
|
||||||
|
model_save_name="train_model",
|
||||||
|
model_idx=continue_from_epoch,
|
||||||
|
)
|
||||||
|
) # reload existing model from epoch and return best val model index
|
||||||
# and the best val acc of that model
|
# and the best val acc of that model
|
||||||
self.starting_epoch = continue_from_epoch
|
self.starting_epoch = continue_from_epoch
|
||||||
else:
|
else:
|
||||||
@ -113,10 +145,7 @@ class ExperimentBuilder(nn.Module):
|
|||||||
|
|
||||||
return total_num_params
|
return total_num_params
|
||||||
|
|
||||||
|
|
||||||
def plot_func_def(self, all_grads, layers):
|
def plot_func_def(self, all_grads, layers):
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Plot function definition to plot the average gradient with respect to the number of layers in the given model
|
Plot function definition to plot the average gradient with respect to the number of layers in the given model
|
||||||
:param all_grads: Gradients wrt weights for each layer in the model.
|
:param all_grads: Gradients wrt weights for each layer in the model.
|
||||||
@ -124,34 +153,33 @@ class ExperimentBuilder(nn.Module):
|
|||||||
:return: plot for gradient flow
|
:return: plot for gradient flow
|
||||||
"""
|
"""
|
||||||
plt.plot(all_grads, alpha=0.3, color="b")
|
plt.plot(all_grads, alpha=0.3, color="b")
|
||||||
plt.hlines(0, 0, len(all_grads)+1, linewidth=1, color="k" )
|
plt.hlines(0, 0, len(all_grads) + 1, linewidth=1, color="k")
|
||||||
plt.xticks(range(0,len(all_grads), 1), layers, rotation="vertical")
|
plt.xticks(range(0, len(all_grads), 1), layers, rotation="vertical")
|
||||||
plt.xlim(xmin=0, xmax=len(all_grads))
|
plt.xlim(xmin=0, xmax=len(all_grads))
|
||||||
plt.xlabel("Layers")
|
plt.xlabel("Layers")
|
||||||
plt.ylabel("Average Gradient")
|
plt.ylabel("Average Gradient")
|
||||||
plt.title("Gradient flow")
|
plt.title("Gradient flow")
|
||||||
plt.grid(True)
|
plt.grid(True)
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
|
|
||||||
return plt
|
return plt
|
||||||
|
|
||||||
|
|
||||||
def plot_grad_flow(self, named_parameters):
|
def plot_grad_flow(self, named_parameters):
|
||||||
"""
|
"""
|
||||||
The function is being called in Line 298 of this file.
|
The function is being called in Line 298 of this file.
|
||||||
Receives the parameters of the model being trained. Returns plot of gradient flow for the given model parameters.
|
Receives the parameters of the model being trained. Returns plot of gradient flow for the given model parameters.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
all_grads = []
|
all_grads = []
|
||||||
layers = []
|
layers = []
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Complete the code in the block below to collect absolute mean of the gradients for each layer in all_grads with the layer names in layers.
|
Complete the code in the block below to collect absolute mean of the gradients for each layer in all_grads with the layer names in layers.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for name, param in named_parameters:
|
for name, param in named_parameters:
|
||||||
# Check if the parameter requires gradient and has a gradient
|
# Check if the parameter requires gradient and has a gradient
|
||||||
if param.requires_grad and param.grad is not None:
|
if param.requires_grad and param.grad is not None:
|
||||||
try:
|
try:
|
||||||
_, a, _, b, _ = name.split(".", 4)
|
_, a, _, b, _ = name.split(".", 4)
|
||||||
except:
|
except:
|
||||||
@ -165,23 +193,22 @@ class ExperimentBuilder(nn.Module):
|
|||||||
|
|
||||||
return plt
|
return plt
|
||||||
|
|
||||||
|
|
||||||
def run_train_iter(self, x, y):
|
def run_train_iter(self, x, y):
|
||||||
|
|
||||||
self.train() # sets model to training mode (in case batch normalization or other methods have different procedures for training and evaluation)
|
self.train() # sets model to training mode (in case batch normalization or other methods have different procedures for training and evaluation)
|
||||||
x, y = x.float().to(device=self.device), y.long().to(
|
x, y = x.float().to(device=self.device), y.long().to(
|
||||||
device=self.device) # send data to device as torch tensors
|
device=self.device
|
||||||
|
) # send data to device as torch tensors
|
||||||
out = self.model.forward(x) # forward the data in the model
|
out = self.model.forward(x) # forward the data in the model
|
||||||
|
|
||||||
|
|
||||||
loss = F.cross_entropy(input=out, target=y) # compute loss
|
loss = F.cross_entropy(input=out, target=y) # compute loss
|
||||||
|
|
||||||
self.optimizer.zero_grad() # set all weight grads from previous training iters to 0
|
self.optimizer.zero_grad() # set all weight grads from previous training iters to 0
|
||||||
loss.backward() # backpropagate to compute gradients for current iter loss
|
loss.backward() # backpropagate to compute gradients for current iter loss
|
||||||
|
|
||||||
self.optimizer.step() # update network parameters
|
self.optimizer.step() # update network parameters
|
||||||
self.learning_rate_scheduler.step() # update learning rate scheduler
|
self.learning_rate_scheduler.step() # update learning rate scheduler
|
||||||
|
|
||||||
_, predicted = torch.max(out.data, 1) # get argmax of predictions
|
_, predicted = torch.max(out.data, 1) # get argmax of predictions
|
||||||
accuracy = np.mean(list(predicted.eq(y.data).cpu())) # compute accuracy
|
accuracy = np.mean(list(predicted.eq(y.data).cpu())) # compute accuracy
|
||||||
return loss.cpu().data.numpy(), accuracy
|
return loss.cpu().data.numpy(), accuracy
|
||||||
@ -195,7 +222,8 @@ class ExperimentBuilder(nn.Module):
|
|||||||
"""
|
"""
|
||||||
self.eval() # sets the system to validation mode
|
self.eval() # sets the system to validation mode
|
||||||
x, y = x.float().to(device=self.device), y.long().to(
|
x, y = x.float().to(device=self.device), y.long().to(
|
||||||
device=self.device) # convert data to pytorch tensors and send to the computation device
|
device=self.device
|
||||||
|
) # convert data to pytorch tensors and send to the computation device
|
||||||
out = self.model.forward(x) # forward the data in the model
|
out = self.model.forward(x) # forward the data in the model
|
||||||
|
|
||||||
loss = F.cross_entropy(input=out, target=y) # compute loss
|
loss = F.cross_entropy(input=out, target=y) # compute loss
|
||||||
@ -204,8 +232,14 @@ class ExperimentBuilder(nn.Module):
|
|||||||
accuracy = np.mean(list(predicted.eq(y.data).cpu())) # compute accuracy
|
accuracy = np.mean(list(predicted.eq(y.data).cpu())) # compute accuracy
|
||||||
return loss.cpu().data.numpy(), accuracy
|
return loss.cpu().data.numpy(), accuracy
|
||||||
|
|
||||||
def save_model(self, model_save_dir, model_save_name, model_idx, best_validation_model_idx,
|
def save_model(
|
||||||
best_validation_model_acc):
|
self,
|
||||||
|
model_save_dir,
|
||||||
|
model_save_name,
|
||||||
|
model_idx,
|
||||||
|
best_validation_model_idx,
|
||||||
|
best_validation_model_acc,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Save the network parameter state and current best val epoch idx and best val accuracy.
|
Save the network parameter state and current best val epoch idx and best val accuracy.
|
||||||
:param model_save_name: Name to use to save model without the epoch index
|
:param model_save_name: Name to use to save model without the epoch index
|
||||||
@ -216,11 +250,21 @@ class ExperimentBuilder(nn.Module):
|
|||||||
:param state: The dictionary containing the system state.
|
:param state: The dictionary containing the system state.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self.state['network'] = self.state_dict() # save network parameter and other variables.
|
self.state["network"] = (
|
||||||
self.state['best_val_model_idx'] = best_validation_model_idx # save current best val idx
|
self.state_dict()
|
||||||
self.state['best_val_model_acc'] = best_validation_model_acc # save current best val acc
|
) # save network parameter and other variables.
|
||||||
torch.save(self.state, f=os.path.join(model_save_dir, "{}_{}".format(model_save_name, str(
|
self.state["best_val_model_idx"] = (
|
||||||
model_idx)))) # save state at prespecified filepath
|
best_validation_model_idx # save current best val idx
|
||||||
|
)
|
||||||
|
self.state["best_val_model_acc"] = (
|
||||||
|
best_validation_model_acc # save current best val acc
|
||||||
|
)
|
||||||
|
torch.save(
|
||||||
|
self.state,
|
||||||
|
f=os.path.join(
|
||||||
|
model_save_dir, "{}_{}".format(model_save_name, str(model_idx))
|
||||||
|
),
|
||||||
|
) # save state at prespecified filepath
|
||||||
|
|
||||||
def load_model(self, model_save_dir, model_save_name, model_idx):
|
def load_model(self, model_save_dir, model_save_name, model_idx):
|
||||||
"""
|
"""
|
||||||
@ -230,98 +274,182 @@ class ExperimentBuilder(nn.Module):
|
|||||||
:param model_idx: The index to save the model with.
|
:param model_idx: The index to save the model with.
|
||||||
:return: best val idx and best val model acc, also it loads the network state into the system state without returning it
|
:return: best val idx and best val model acc, also it loads the network state into the system state without returning it
|
||||||
"""
|
"""
|
||||||
state = torch.load(f=os.path.join(model_save_dir, "{}_{}".format(model_save_name, str(model_idx))))
|
state = torch.load(
|
||||||
self.load_state_dict(state_dict=state['network'])
|
f=os.path.join(
|
||||||
return state, state['best_val_model_idx'], state['best_val_model_acc']
|
model_save_dir, "{}_{}".format(model_save_name, str(model_idx))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.load_state_dict(state_dict=state["network"])
|
||||||
|
return state, state["best_val_model_idx"], state["best_val_model_acc"]
|
||||||
|
|
||||||
def run_experiment(self):
|
def run_experiment(self):
|
||||||
"""
|
"""
|
||||||
Runs experiment train and evaluation iterations, saving the model and best val model and val model accuracy after each epoch
|
Runs experiment train and evaluation iterations, saving the model and best val model and val model accuracy after each epoch
|
||||||
:return: The summary current_epoch_losses from starting epoch to total_epochs.
|
:return: The summary current_epoch_losses from starting epoch to total_epochs.
|
||||||
"""
|
"""
|
||||||
total_losses = {"train_acc": [], "train_loss": [], "val_acc": [],
|
total_losses = {
|
||||||
"val_loss": []} # initialize a dict to keep the per-epoch metrics
|
"train_acc": [],
|
||||||
|
"train_loss": [],
|
||||||
|
"val_acc": [],
|
||||||
|
"val_loss": [],
|
||||||
|
} # initialize a dict to keep the per-epoch metrics
|
||||||
for i, epoch_idx in enumerate(range(self.starting_epoch, self.num_epochs)):
|
for i, epoch_idx in enumerate(range(self.starting_epoch, self.num_epochs)):
|
||||||
epoch_start_time = time.time()
|
epoch_start_time = time.time()
|
||||||
current_epoch_losses = {"train_acc": [], "train_loss": [], "val_acc": [], "val_loss": []}
|
current_epoch_losses = {
|
||||||
|
"train_acc": [],
|
||||||
|
"train_loss": [],
|
||||||
|
"val_acc": [],
|
||||||
|
"val_loss": [],
|
||||||
|
}
|
||||||
self.current_epoch = epoch_idx
|
self.current_epoch = epoch_idx
|
||||||
with tqdm.tqdm(total=len(self.train_data)) as pbar_train: # create a progress bar for training
|
with tqdm.tqdm(
|
||||||
|
total=len(self.train_data)
|
||||||
|
) as pbar_train: # create a progress bar for training
|
||||||
for idx, (x, y) in enumerate(self.train_data): # get data batches
|
for idx, (x, y) in enumerate(self.train_data): # get data batches
|
||||||
loss, accuracy = self.run_train_iter(x=x, y=y) # take a training iter step
|
loss, accuracy = self.run_train_iter(
|
||||||
current_epoch_losses["train_loss"].append(loss) # add current iter loss to the train loss list
|
x=x, y=y
|
||||||
current_epoch_losses["train_acc"].append(accuracy) # add current iter acc to the train acc list
|
) # take a training iter step
|
||||||
|
current_epoch_losses["train_loss"].append(
|
||||||
|
loss
|
||||||
|
) # add current iter loss to the train loss list
|
||||||
|
current_epoch_losses["train_acc"].append(
|
||||||
|
accuracy
|
||||||
|
) # add current iter acc to the train acc list
|
||||||
pbar_train.update(1)
|
pbar_train.update(1)
|
||||||
pbar_train.set_description("loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy))
|
pbar_train.set_description(
|
||||||
|
"loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy)
|
||||||
|
)
|
||||||
|
|
||||||
with tqdm.tqdm(total=len(self.val_data)) as pbar_val: # create a progress bar for validation
|
with tqdm.tqdm(
|
||||||
|
total=len(self.val_data)
|
||||||
|
) as pbar_val: # create a progress bar for validation
|
||||||
for x, y in self.val_data: # get data batches
|
for x, y in self.val_data: # get data batches
|
||||||
loss, accuracy = self.run_evaluation_iter(x=x, y=y) # run a validation iter
|
loss, accuracy = self.run_evaluation_iter(
|
||||||
current_epoch_losses["val_loss"].append(loss) # add current iter loss to val loss list.
|
x=x, y=y
|
||||||
current_epoch_losses["val_acc"].append(accuracy) # add current iter acc to val acc lst.
|
) # run a validation iter
|
||||||
|
current_epoch_losses["val_loss"].append(
|
||||||
|
loss
|
||||||
|
) # add current iter loss to val loss list.
|
||||||
|
current_epoch_losses["val_acc"].append(
|
||||||
|
accuracy
|
||||||
|
) # add current iter acc to val acc lst.
|
||||||
pbar_val.update(1) # add 1 step to the progress bar
|
pbar_val.update(1) # add 1 step to the progress bar
|
||||||
pbar_val.set_description("loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy))
|
pbar_val.set_description(
|
||||||
val_mean_accuracy = np.mean(current_epoch_losses['val_acc'])
|
"loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy)
|
||||||
if val_mean_accuracy > self.best_val_model_acc: # if current epoch's mean val acc is greater than the saved best val acc then
|
)
|
||||||
|
val_mean_accuracy = np.mean(current_epoch_losses["val_acc"])
|
||||||
|
if (
|
||||||
|
val_mean_accuracy > self.best_val_model_acc
|
||||||
|
): # if current epoch's mean val acc is greater than the saved best val acc then
|
||||||
self.best_val_model_acc = val_mean_accuracy # set the best val model acc to be current epoch's val accuracy
|
self.best_val_model_acc = val_mean_accuracy # set the best val model acc to be current epoch's val accuracy
|
||||||
self.best_val_model_idx = epoch_idx # set the experiment-wise best val idx to be the current epoch's idx
|
self.best_val_model_idx = epoch_idx # set the experiment-wise best val idx to be the current epoch's idx
|
||||||
|
|
||||||
for key, value in current_epoch_losses.items():
|
for key, value in current_epoch_losses.items():
|
||||||
total_losses[key].append(np.mean(
|
total_losses[key].append(
|
||||||
value)) # get mean of all metrics of current epoch metrics dict, to get them ready for storage and output on the terminal.
|
np.mean(value)
|
||||||
|
) # get mean of all metrics of current epoch metrics dict, to get them ready for storage and output on the terminal.
|
||||||
|
|
||||||
save_statistics(experiment_log_dir=self.experiment_logs, filename='summary.csv',
|
save_statistics(
|
||||||
stats_dict=total_losses, current_epoch=i,
|
experiment_log_dir=self.experiment_logs,
|
||||||
continue_from_mode=True if (self.starting_epoch != 0 or i > 0) else False) # save statistics to stats file.
|
filename="summary.csv",
|
||||||
|
stats_dict=total_losses,
|
||||||
|
current_epoch=i,
|
||||||
|
continue_from_mode=(
|
||||||
|
True if (self.starting_epoch != 0 or i > 0) else False
|
||||||
|
),
|
||||||
|
) # save statistics to stats file.
|
||||||
|
|
||||||
# load_statistics(experiment_log_dir=self.experiment_logs, filename='summary.csv') # How to load a csv file if you need to
|
# load_statistics(experiment_log_dir=self.experiment_logs, filename='summary.csv') # How to load a csv file if you need to
|
||||||
|
|
||||||
out_string = "_".join(
|
out_string = "_".join(
|
||||||
["{}_{:.4f}".format(key, np.mean(value)) for key, value in current_epoch_losses.items()])
|
[
|
||||||
|
"{}_{:.4f}".format(key, np.mean(value))
|
||||||
|
for key, value in current_epoch_losses.items()
|
||||||
|
]
|
||||||
|
)
|
||||||
# create a string to use to report our epoch metrics
|
# create a string to use to report our epoch metrics
|
||||||
epoch_elapsed_time = time.time() - epoch_start_time # calculate time taken for epoch
|
epoch_elapsed_time = (
|
||||||
|
time.time() - epoch_start_time
|
||||||
|
) # calculate time taken for epoch
|
||||||
epoch_elapsed_time = "{:.4f}".format(epoch_elapsed_time)
|
epoch_elapsed_time = "{:.4f}".format(epoch_elapsed_time)
|
||||||
print("Epoch {}:".format(epoch_idx), out_string, "epoch time", epoch_elapsed_time, "seconds")
|
print(
|
||||||
self.state['model_epoch'] = epoch_idx
|
"Epoch {}:".format(epoch_idx),
|
||||||
self.save_model(model_save_dir=self.experiment_saved_models,
|
out_string,
|
||||||
# save model and best val idx and best val acc, using the model dir, model name and model idx
|
"epoch time",
|
||||||
model_save_name="train_model", model_idx=epoch_idx,
|
epoch_elapsed_time,
|
||||||
best_validation_model_idx=self.best_val_model_idx,
|
"seconds",
|
||||||
best_validation_model_acc=self.best_val_model_acc)
|
)
|
||||||
self.save_model(model_save_dir=self.experiment_saved_models,
|
self.state["model_epoch"] = epoch_idx
|
||||||
# save model and best val idx and best val acc, using the model dir, model name and model idx
|
self.save_model(
|
||||||
model_save_name="train_model", model_idx='latest',
|
model_save_dir=self.experiment_saved_models,
|
||||||
best_validation_model_idx=self.best_val_model_idx,
|
# save model and best val idx and best val acc, using the model dir, model name and model idx
|
||||||
best_validation_model_acc=self.best_val_model_acc)
|
model_save_name="train_model",
|
||||||
|
model_idx=epoch_idx,
|
||||||
|
best_validation_model_idx=self.best_val_model_idx,
|
||||||
|
best_validation_model_acc=self.best_val_model_acc,
|
||||||
|
)
|
||||||
|
self.save_model(
|
||||||
|
model_save_dir=self.experiment_saved_models,
|
||||||
|
# save model and best val idx and best val acc, using the model dir, model name and model idx
|
||||||
|
model_save_name="train_model",
|
||||||
|
model_idx="latest",
|
||||||
|
best_validation_model_idx=self.best_val_model_idx,
|
||||||
|
best_validation_model_acc=self.best_val_model_acc,
|
||||||
|
)
|
||||||
|
|
||||||
################################################################
|
################################################################
|
||||||
##### Plot Gradient Flow at each Epoch during Training ######
|
##### Plot Gradient Flow at each Epoch during Training ######
|
||||||
print("Generating Gradient Flow Plot at epoch {}".format(epoch_idx))
|
print("Generating Gradient Flow Plot at epoch {}".format(epoch_idx))
|
||||||
plt = self.plot_grad_flow(self.model.named_parameters())
|
plt = self.plot_grad_flow(self.model.named_parameters())
|
||||||
if not os.path.exists(os.path.join(self.experiment_saved_models, 'gradient_flow_plots')):
|
if not os.path.exists(
|
||||||
os.mkdir(os.path.join(self.experiment_saved_models, 'gradient_flow_plots'))
|
os.path.join(self.experiment_saved_models, "gradient_flow_plots")
|
||||||
|
):
|
||||||
|
os.mkdir(
|
||||||
|
os.path.join(self.experiment_saved_models, "gradient_flow_plots")
|
||||||
|
)
|
||||||
# plt.legend(loc="best")
|
# plt.legend(loc="best")
|
||||||
plt.savefig(os.path.join(self.experiment_saved_models, 'gradient_flow_plots', "epoch{}.pdf".format(str(epoch_idx))))
|
plt.savefig(
|
||||||
|
os.path.join(
|
||||||
|
self.experiment_saved_models,
|
||||||
|
"gradient_flow_plots",
|
||||||
|
"epoch{}.pdf".format(str(epoch_idx)),
|
||||||
|
)
|
||||||
|
)
|
||||||
################################################################
|
################################################################
|
||||||
|
|
||||||
print("Generating test set evaluation metrics")
|
print("Generating test set evaluation metrics")
|
||||||
self.load_model(model_save_dir=self.experiment_saved_models, model_idx=self.best_val_model_idx,
|
self.load_model(
|
||||||
# load best validation model
|
model_save_dir=self.experiment_saved_models,
|
||||||
model_save_name="train_model")
|
model_idx=self.best_val_model_idx,
|
||||||
current_epoch_losses = {"test_acc": [], "test_loss": []} # initialize a statistics dict
|
# load best validation model
|
||||||
|
model_save_name="train_model",
|
||||||
|
)
|
||||||
|
current_epoch_losses = {
|
||||||
|
"test_acc": [],
|
||||||
|
"test_loss": [],
|
||||||
|
} # initialize a statistics dict
|
||||||
with tqdm.tqdm(total=len(self.test_data)) as pbar_test: # ini a progress bar
|
with tqdm.tqdm(total=len(self.test_data)) as pbar_test: # ini a progress bar
|
||||||
for x, y in self.test_data: # sample batch
|
for x, y in self.test_data: # sample batch
|
||||||
loss, accuracy = self.run_evaluation_iter(x=x,
|
loss, accuracy = self.run_evaluation_iter(
|
||||||
y=y) # compute loss and accuracy by running an evaluation step
|
x=x, y=y
|
||||||
|
) # compute loss and accuracy by running an evaluation step
|
||||||
current_epoch_losses["test_loss"].append(loss) # save test loss
|
current_epoch_losses["test_loss"].append(loss) # save test loss
|
||||||
current_epoch_losses["test_acc"].append(accuracy) # save test accuracy
|
current_epoch_losses["test_acc"].append(accuracy) # save test accuracy
|
||||||
pbar_test.update(1) # update progress bar status
|
pbar_test.update(1) # update progress bar status
|
||||||
pbar_test.set_description(
|
pbar_test.set_description(
|
||||||
"loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy)) # update progress bar string output
|
"loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy)
|
||||||
|
) # update progress bar string output
|
||||||
|
|
||||||
test_losses = {key: [np.mean(value)] for key, value in
|
test_losses = {
|
||||||
current_epoch_losses.items()} # save test set metrics in dict format
|
key: [np.mean(value)] for key, value in current_epoch_losses.items()
|
||||||
save_statistics(experiment_log_dir=self.experiment_logs, filename='test_summary.csv',
|
} # save test set metrics in dict format
|
||||||
# save test set metrics on disk in .csv format
|
save_statistics(
|
||||||
stats_dict=test_losses, current_epoch=0, continue_from_mode=False)
|
experiment_log_dir=self.experiment_logs,
|
||||||
|
filename="test_summary.csv",
|
||||||
|
# save test set metrics on disk in .csv format
|
||||||
|
stats_dict=test_losses,
|
||||||
|
current_epoch=0,
|
||||||
|
continue_from_mode=False,
|
||||||
|
)
|
||||||
|
|
||||||
return total_losses, test_losses
|
return total_losses, test_losses
|
||||||
|
@ -4,7 +4,9 @@ import torch.nn.functional as F
|
|||||||
|
|
||||||
|
|
||||||
class FCCNetwork(nn.Module):
|
class FCCNetwork(nn.Module):
|
||||||
def __init__(self, input_shape, num_output_classes, num_filters, num_layers, use_bias=False):
|
def __init__(
|
||||||
|
self, input_shape, num_output_classes, num_filters, num_layers, use_bias=False
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Initializes a fully connected network similar to the ones implemented previously in the MLP package.
|
Initializes a fully connected network similar to the ones implemented previously in the MLP package.
|
||||||
:param input_shape: The shape of the inputs going in to the network.
|
:param input_shape: The shape of the inputs going in to the network.
|
||||||
@ -35,17 +37,25 @@ class FCCNetwork(nn.Module):
|
|||||||
# shapes of all dimensions after the 0th dim
|
# shapes of all dimensions after the 0th dim
|
||||||
|
|
||||||
for i in range(self.num_layers):
|
for i in range(self.num_layers):
|
||||||
self.layer_dict['fcc_{}'.format(i)] = nn.Linear(in_features=out.shape[1], # initialize a fcc layer
|
self.layer_dict["fcc_{}".format(i)] = nn.Linear(
|
||||||
out_features=self.num_filters,
|
in_features=out.shape[1], # initialize a fcc layer
|
||||||
bias=self.use_bias)
|
out_features=self.num_filters,
|
||||||
|
bias=self.use_bias,
|
||||||
|
)
|
||||||
|
|
||||||
out = self.layer_dict['fcc_{}'.format(i)](out) # apply ith fcc layer to the previous layers outputs
|
out = self.layer_dict["fcc_{}".format(i)](
|
||||||
|
out
|
||||||
|
) # apply ith fcc layer to the previous layers outputs
|
||||||
out = F.relu(out) # apply a ReLU on the outputs
|
out = F.relu(out) # apply a ReLU on the outputs
|
||||||
|
|
||||||
self.logits_linear_layer = nn.Linear(in_features=out.shape[1], # initialize the prediction output linear layer
|
self.logits_linear_layer = nn.Linear(
|
||||||
out_features=self.num_output_classes,
|
in_features=out.shape[1], # initialize the prediction output linear layer
|
||||||
bias=self.use_bias)
|
out_features=self.num_output_classes,
|
||||||
out = self.logits_linear_layer(out) # apply the layer to the previous layer's outputs
|
bias=self.use_bias,
|
||||||
|
)
|
||||||
|
out = self.logits_linear_layer(
|
||||||
|
out
|
||||||
|
) # apply the layer to the previous layer's outputs
|
||||||
print("Block is built, output volume is", out.shape)
|
print("Block is built, output volume is", out.shape)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
@ -61,10 +71,14 @@ class FCCNetwork(nn.Module):
|
|||||||
# shapes of all dimensions after the 0th dim
|
# shapes of all dimensions after the 0th dim
|
||||||
|
|
||||||
for i in range(self.num_layers):
|
for i in range(self.num_layers):
|
||||||
out = self.layer_dict['fcc_{}'.format(i)](out) # apply ith fcc layer to the previous layers outputs
|
out = self.layer_dict["fcc_{}".format(i)](
|
||||||
|
out
|
||||||
|
) # apply ith fcc layer to the previous layers outputs
|
||||||
out = F.relu(out) # apply a ReLU on the outputs
|
out = F.relu(out) # apply a ReLU on the outputs
|
||||||
|
|
||||||
out = self.logits_linear_layer(out) # apply the layer to the previous layer's outputs
|
out = self.logits_linear_layer(
|
||||||
|
out
|
||||||
|
) # apply the layer to the previous layer's outputs
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def reset_parameters(self):
|
def reset_parameters(self):
|
||||||
@ -78,8 +92,16 @@ class FCCNetwork(nn.Module):
|
|||||||
|
|
||||||
|
|
||||||
class EmptyBlock(nn.Module):
|
class EmptyBlock(nn.Module):
|
||||||
def __init__(self, input_shape=None, num_filters=None, kernel_size=None, padding=None, bias=None, dilation=None,
|
def __init__(
|
||||||
reduction_factor=None):
|
self,
|
||||||
|
input_shape=None,
|
||||||
|
num_filters=None,
|
||||||
|
kernel_size=None,
|
||||||
|
padding=None,
|
||||||
|
bias=None,
|
||||||
|
dilation=None,
|
||||||
|
reduction_factor=None,
|
||||||
|
):
|
||||||
super(EmptyBlock, self).__init__()
|
super(EmptyBlock, self).__init__()
|
||||||
|
|
||||||
self.num_filters = num_filters
|
self.num_filters = num_filters
|
||||||
@ -94,12 +116,12 @@ class EmptyBlock(nn.Module):
|
|||||||
def build_module(self):
|
def build_module(self):
|
||||||
self.layer_dict = nn.ModuleDict()
|
self.layer_dict = nn.ModuleDict()
|
||||||
x = torch.zeros(self.input_shape)
|
x = torch.zeros(self.input_shape)
|
||||||
self.layer_dict['Identity'] = nn.Identity()
|
self.layer_dict["Identity"] = nn.Identity()
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
out = x
|
out = x
|
||||||
|
|
||||||
out = self.layer_dict['Identity'].forward(out)
|
out = self.layer_dict["Identity"].forward(out)
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
@ -122,21 +144,27 @@ class EntryConvolutionalBlock(nn.Module):
|
|||||||
x = torch.zeros(self.input_shape)
|
x = torch.zeros(self.input_shape)
|
||||||
out = x
|
out = x
|
||||||
|
|
||||||
self.layer_dict['conv_0'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
|
self.layer_dict["conv_0"] = nn.Conv2d(
|
||||||
kernel_size=self.kernel_size, dilation=self.dilation,
|
in_channels=out.shape[1],
|
||||||
padding=self.padding, stride=1)
|
out_channels=self.num_filters,
|
||||||
|
bias=self.bias,
|
||||||
|
kernel_size=self.kernel_size,
|
||||||
|
dilation=self.dilation,
|
||||||
|
padding=self.padding,
|
||||||
|
stride=1,
|
||||||
|
)
|
||||||
|
|
||||||
out = self.layer_dict['conv_0'].forward(out)
|
out = self.layer_dict["conv_0"].forward(out)
|
||||||
self.layer_dict['bn_0'] = nn.BatchNorm2d(num_features=out.shape[1])
|
self.layer_dict["bn_0"] = nn.BatchNorm2d(num_features=out.shape[1])
|
||||||
out = F.leaky_relu(self.layer_dict['bn_0'].forward(out))
|
out = F.leaky_relu(self.layer_dict["bn_0"].forward(out))
|
||||||
|
|
||||||
print(out.shape)
|
print(out.shape)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
out = x
|
out = x
|
||||||
|
|
||||||
out = self.layer_dict['conv_0'].forward(out)
|
out = self.layer_dict["conv_0"].forward(out)
|
||||||
out = F.leaky_relu(self.layer_dict['bn_0'].forward(out))
|
out = F.leaky_relu(self.layer_dict["bn_0"].forward(out))
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
@ -159,18 +187,30 @@ class ConvolutionalProcessingBlock(nn.Module):
|
|||||||
x = torch.zeros(self.input_shape)
|
x = torch.zeros(self.input_shape)
|
||||||
out = x
|
out = x
|
||||||
|
|
||||||
self.layer_dict['conv_0'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
|
self.layer_dict["conv_0"] = nn.Conv2d(
|
||||||
kernel_size=self.kernel_size, dilation=self.dilation,
|
in_channels=out.shape[1],
|
||||||
padding=self.padding, stride=1)
|
out_channels=self.num_filters,
|
||||||
|
bias=self.bias,
|
||||||
|
kernel_size=self.kernel_size,
|
||||||
|
dilation=self.dilation,
|
||||||
|
padding=self.padding,
|
||||||
|
stride=1,
|
||||||
|
)
|
||||||
|
|
||||||
out = self.layer_dict['conv_0'].forward(out)
|
out = self.layer_dict["conv_0"].forward(out)
|
||||||
out = F.leaky_relu(out)
|
out = F.leaky_relu(out)
|
||||||
|
|
||||||
self.layer_dict['conv_1'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
|
self.layer_dict["conv_1"] = nn.Conv2d(
|
||||||
kernel_size=self.kernel_size, dilation=self.dilation,
|
in_channels=out.shape[1],
|
||||||
padding=self.padding, stride=1)
|
out_channels=self.num_filters,
|
||||||
|
bias=self.bias,
|
||||||
|
kernel_size=self.kernel_size,
|
||||||
|
dilation=self.dilation,
|
||||||
|
padding=self.padding,
|
||||||
|
stride=1,
|
||||||
|
)
|
||||||
|
|
||||||
out = self.layer_dict['conv_1'].forward(out)
|
out = self.layer_dict["conv_1"].forward(out)
|
||||||
out = F.leaky_relu(out)
|
out = F.leaky_relu(out)
|
||||||
|
|
||||||
print(out.shape)
|
print(out.shape)
|
||||||
@ -178,17 +218,26 @@ class ConvolutionalProcessingBlock(nn.Module):
|
|||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
out = x
|
out = x
|
||||||
|
|
||||||
out = self.layer_dict['conv_0'].forward(out)
|
out = self.layer_dict["conv_0"].forward(out)
|
||||||
out = F.leaky_relu(out)
|
out = F.leaky_relu(out)
|
||||||
|
|
||||||
out = self.layer_dict['conv_1'].forward(out)
|
out = self.layer_dict["conv_1"].forward(out)
|
||||||
out = F.leaky_relu(out)
|
out = F.leaky_relu(out)
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
class ConvolutionalDimensionalityReductionBlock(nn.Module):
|
class ConvolutionalDimensionalityReductionBlock(nn.Module):
|
||||||
def __init__(self, input_shape, num_filters, kernel_size, padding, bias, dilation, reduction_factor):
|
def __init__(
|
||||||
|
self,
|
||||||
|
input_shape,
|
||||||
|
num_filters,
|
||||||
|
kernel_size,
|
||||||
|
padding,
|
||||||
|
bias,
|
||||||
|
dilation,
|
||||||
|
reduction_factor,
|
||||||
|
):
|
||||||
super(ConvolutionalDimensionalityReductionBlock, self).__init__()
|
super(ConvolutionalDimensionalityReductionBlock, self).__init__()
|
||||||
|
|
||||||
self.num_filters = num_filters
|
self.num_filters = num_filters
|
||||||
@ -205,20 +254,32 @@ class ConvolutionalDimensionalityReductionBlock(nn.Module):
|
|||||||
x = torch.zeros(self.input_shape)
|
x = torch.zeros(self.input_shape)
|
||||||
out = x
|
out = x
|
||||||
|
|
||||||
self.layer_dict['conv_0'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
|
self.layer_dict["conv_0"] = nn.Conv2d(
|
||||||
kernel_size=self.kernel_size, dilation=self.dilation,
|
in_channels=out.shape[1],
|
||||||
padding=self.padding, stride=1)
|
out_channels=self.num_filters,
|
||||||
|
bias=self.bias,
|
||||||
|
kernel_size=self.kernel_size,
|
||||||
|
dilation=self.dilation,
|
||||||
|
padding=self.padding,
|
||||||
|
stride=1,
|
||||||
|
)
|
||||||
|
|
||||||
out = self.layer_dict['conv_0'].forward(out)
|
out = self.layer_dict["conv_0"].forward(out)
|
||||||
out = F.leaky_relu(out)
|
out = F.leaky_relu(out)
|
||||||
|
|
||||||
out = F.avg_pool2d(out, self.reduction_factor)
|
out = F.avg_pool2d(out, self.reduction_factor)
|
||||||
|
|
||||||
self.layer_dict['conv_1'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
|
self.layer_dict["conv_1"] = nn.Conv2d(
|
||||||
kernel_size=self.kernel_size, dilation=self.dilation,
|
in_channels=out.shape[1],
|
||||||
padding=self.padding, stride=1)
|
out_channels=self.num_filters,
|
||||||
|
bias=self.bias,
|
||||||
|
kernel_size=self.kernel_size,
|
||||||
|
dilation=self.dilation,
|
||||||
|
padding=self.padding,
|
||||||
|
stride=1,
|
||||||
|
)
|
||||||
|
|
||||||
out = self.layer_dict['conv_1'].forward(out)
|
out = self.layer_dict["conv_1"].forward(out)
|
||||||
out = F.leaky_relu(out)
|
out = F.leaky_relu(out)
|
||||||
|
|
||||||
print(out.shape)
|
print(out.shape)
|
||||||
@ -226,21 +287,29 @@ class ConvolutionalDimensionalityReductionBlock(nn.Module):
|
|||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
out = x
|
out = x
|
||||||
|
|
||||||
out = self.layer_dict['conv_0'].forward(out)
|
out = self.layer_dict["conv_0"].forward(out)
|
||||||
out = F.leaky_relu(out)
|
out = F.leaky_relu(out)
|
||||||
|
|
||||||
out = F.avg_pool2d(out, self.reduction_factor)
|
out = F.avg_pool2d(out, self.reduction_factor)
|
||||||
|
|
||||||
out = self.layer_dict['conv_1'].forward(out)
|
out = self.layer_dict["conv_1"].forward(out)
|
||||||
out = F.leaky_relu(out)
|
out = F.leaky_relu(out)
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
class ConvolutionalNetwork(nn.Module):
|
class ConvolutionalNetwork(nn.Module):
|
||||||
def __init__(self, input_shape, num_output_classes, num_filters,
|
def __init__(
|
||||||
num_blocks_per_stage, num_stages, use_bias=False, processing_block_type=ConvolutionalProcessingBlock,
|
self,
|
||||||
dimensionality_reduction_block_type=ConvolutionalDimensionalityReductionBlock):
|
input_shape,
|
||||||
|
num_output_classes,
|
||||||
|
num_filters,
|
||||||
|
num_blocks_per_stage,
|
||||||
|
num_stages,
|
||||||
|
use_bias=False,
|
||||||
|
processing_block_type=ConvolutionalProcessingBlock,
|
||||||
|
dimensionality_reduction_block_type=ConvolutionalDimensionalityReductionBlock,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Initializes a convolutional network module
|
Initializes a convolutional network module
|
||||||
:param input_shape: The shape of the tensor to be passed into this network
|
:param input_shape: The shape of the tensor to be passed into this network
|
||||||
@ -274,37 +343,59 @@ class ConvolutionalNetwork(nn.Module):
|
|||||||
"""
|
"""
|
||||||
self.layer_dict = nn.ModuleDict()
|
self.layer_dict = nn.ModuleDict()
|
||||||
# initialize a module dict, which is effectively a dictionary that can collect layers and integrate them into pytorch
|
# initialize a module dict, which is effectively a dictionary that can collect layers and integrate them into pytorch
|
||||||
print("Building basic block of ConvolutionalNetwork using input shape", self.input_shape)
|
print(
|
||||||
x = torch.zeros((self.input_shape)) # create dummy inputs to be used to infer shapes of layers
|
"Building basic block of ConvolutionalNetwork using input shape",
|
||||||
|
self.input_shape,
|
||||||
|
)
|
||||||
|
x = torch.zeros(
|
||||||
|
(self.input_shape)
|
||||||
|
) # create dummy inputs to be used to infer shapes of layers
|
||||||
|
|
||||||
out = x
|
out = x
|
||||||
self.layer_dict['input_conv'] = EntryConvolutionalBlock(input_shape=out.shape, num_filters=self.num_filters,
|
self.layer_dict["input_conv"] = EntryConvolutionalBlock(
|
||||||
kernel_size=3, padding=1, bias=self.use_bias,
|
input_shape=out.shape,
|
||||||
dilation=1)
|
num_filters=self.num_filters,
|
||||||
out = self.layer_dict['input_conv'].forward(out)
|
kernel_size=3,
|
||||||
|
padding=1,
|
||||||
|
bias=self.use_bias,
|
||||||
|
dilation=1,
|
||||||
|
)
|
||||||
|
out = self.layer_dict["input_conv"].forward(out)
|
||||||
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
|
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
|
||||||
for i in range(self.num_stages): # for number of layers times
|
for i in range(self.num_stages): # for number of layers times
|
||||||
for j in range(self.num_blocks_per_stage):
|
for j in range(self.num_blocks_per_stage):
|
||||||
self.layer_dict['block_{}_{}'.format(i, j)] = self.processing_block_type(input_shape=out.shape,
|
self.layer_dict["block_{}_{}".format(i, j)] = (
|
||||||
num_filters=self.num_filters,
|
self.processing_block_type(
|
||||||
bias=self.use_bias,
|
input_shape=out.shape,
|
||||||
kernel_size=3, dilation=1,
|
num_filters=self.num_filters,
|
||||||
padding=1)
|
bias=self.use_bias,
|
||||||
out = self.layer_dict['block_{}_{}'.format(i, j)].forward(out)
|
kernel_size=3,
|
||||||
self.layer_dict['reduction_block_{}'.format(i)] = self.dimensionality_reduction_block_type(
|
dilation=1,
|
||||||
input_shape=out.shape,
|
padding=1,
|
||||||
num_filters=self.num_filters, bias=True,
|
)
|
||||||
kernel_size=3, dilation=1,
|
)
|
||||||
padding=1,
|
out = self.layer_dict["block_{}_{}".format(i, j)].forward(out)
|
||||||
reduction_factor=2)
|
self.layer_dict["reduction_block_{}".format(i)] = (
|
||||||
out = self.layer_dict['reduction_block_{}'.format(i)].forward(out)
|
self.dimensionality_reduction_block_type(
|
||||||
|
input_shape=out.shape,
|
||||||
|
num_filters=self.num_filters,
|
||||||
|
bias=True,
|
||||||
|
kernel_size=3,
|
||||||
|
dilation=1,
|
||||||
|
padding=1,
|
||||||
|
reduction_factor=2,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
out = self.layer_dict["reduction_block_{}".format(i)].forward(out)
|
||||||
|
|
||||||
out = F.avg_pool2d(out, out.shape[-1])
|
out = F.avg_pool2d(out, out.shape[-1])
|
||||||
print('shape before final linear layer', out.shape)
|
print("shape before final linear layer", out.shape)
|
||||||
out = out.view(out.shape[0], -1)
|
out = out.view(out.shape[0], -1)
|
||||||
self.logit_linear_layer = nn.Linear(in_features=out.shape[1], # add a linear layer
|
self.logit_linear_layer = nn.Linear(
|
||||||
out_features=self.num_output_classes,
|
in_features=out.shape[1], # add a linear layer
|
||||||
bias=True)
|
out_features=self.num_output_classes,
|
||||||
|
bias=True,
|
||||||
|
)
|
||||||
out = self.logit_linear_layer(out) # apply linear layer on flattened inputs
|
out = self.logit_linear_layer(out) # apply linear layer on flattened inputs
|
||||||
print("Block is built, output volume is", out.shape)
|
print("Block is built, output volume is", out.shape)
|
||||||
return out
|
return out
|
||||||
@ -316,15 +407,19 @@ class ConvolutionalNetwork(nn.Module):
|
|||||||
:return: preds (b, num_classes)
|
:return: preds (b, num_classes)
|
||||||
"""
|
"""
|
||||||
out = x
|
out = x
|
||||||
out = self.layer_dict['input_conv'].forward(out)
|
out = self.layer_dict["input_conv"].forward(out)
|
||||||
for i in range(self.num_stages): # for number of layers times
|
for i in range(self.num_stages): # for number of layers times
|
||||||
for j in range(self.num_blocks_per_stage):
|
for j in range(self.num_blocks_per_stage):
|
||||||
out = self.layer_dict['block_{}_{}'.format(i, j)].forward(out)
|
out = self.layer_dict["block_{}_{}".format(i, j)].forward(out)
|
||||||
out = self.layer_dict['reduction_block_{}'.format(i)].forward(out)
|
out = self.layer_dict["reduction_block_{}".format(i)].forward(out)
|
||||||
|
|
||||||
out = F.avg_pool2d(out, out.shape[-1])
|
out = F.avg_pool2d(out, out.shape[-1])
|
||||||
out = out.view(out.shape[0], -1) # flatten outputs from (b, c, h, w) to (b, c*h*w)
|
out = out.view(
|
||||||
out = self.logit_linear_layer(out) # pass through a linear layer to get logits/preds
|
out.shape[0], -1
|
||||||
|
) # flatten outputs from (b, c, h, w) to (b, c*h*w)
|
||||||
|
out = self.logit_linear_layer(
|
||||||
|
out
|
||||||
|
) # pass through a linear layer to get logits/preds
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def reset_parameters(self):
|
def reset_parameters(self):
|
||||||
@ -338,3 +433,138 @@ class ConvolutionalNetwork(nn.Module):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
self.logit_linear_layer.reset_parameters()
|
self.logit_linear_layer.reset_parameters()
|
||||||
|
|
||||||
|
|
||||||
|
# My Implementation:
|
||||||
|
|
||||||
|
|
||||||
|
class ConvolutionalProcessingBlockBN(nn.Module):
|
||||||
|
def __init__(self, input_shape, num_filters, kernel_size, padding, bias, dilation):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.num_filters = num_filters
|
||||||
|
self.kernel_size = kernel_size
|
||||||
|
self.input_shape = input_shape
|
||||||
|
self.padding = padding
|
||||||
|
self.bias = bias
|
||||||
|
self.dilation = dilation
|
||||||
|
|
||||||
|
self.build_module()
|
||||||
|
|
||||||
|
def build_module(self):
|
||||||
|
self.layer_dict = nn.ModuleDict()
|
||||||
|
x = torch.zeros(self.input_shape)
|
||||||
|
out = x
|
||||||
|
|
||||||
|
# First convolutional layer with Batch Normalization
|
||||||
|
self.layer_dict["conv_0"] = nn.Conv2d(
|
||||||
|
in_channels=out.shape[1],
|
||||||
|
out_channels=self.num_filters,
|
||||||
|
bias=self.bias,
|
||||||
|
kernel_size=self.kernel_size,
|
||||||
|
dilation=self.dilation,
|
||||||
|
padding=self.padding,
|
||||||
|
stride=1,
|
||||||
|
)
|
||||||
|
self.layer_dict["bn_0"] = nn.BatchNorm2d(self.num_filters)
|
||||||
|
out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
|
||||||
|
|
||||||
|
# Second convolutional layer with Batch Normalization
|
||||||
|
self.layer_dict["conv_1"] = nn.Conv2d(
|
||||||
|
in_channels=out.shape[1],
|
||||||
|
out_channels=self.num_filters,
|
||||||
|
bias=self.bias,
|
||||||
|
kernel_size=self.kernel_size,
|
||||||
|
dilation=self.dilation,
|
||||||
|
padding=self.padding,
|
||||||
|
stride=1,
|
||||||
|
)
|
||||||
|
self.layer_dict["bn_1"] = nn.BatchNorm2d(self.num_filters)
|
||||||
|
out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
|
||||||
|
|
||||||
|
print(out.shape)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = x
|
||||||
|
|
||||||
|
# Apply first conv layer + BN + ReLU
|
||||||
|
out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
|
||||||
|
|
||||||
|
# Apply second conv layer + BN + ReLU
|
||||||
|
out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class ConvolutionalDimensionalityReductionBlockBN(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
input_shape,
|
||||||
|
num_filters,
|
||||||
|
kernel_size,
|
||||||
|
padding,
|
||||||
|
bias,
|
||||||
|
dilation,
|
||||||
|
reduction_factor,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.num_filters = num_filters
|
||||||
|
self.kernel_size = kernel_size
|
||||||
|
self.input_shape = input_shape
|
||||||
|
self.padding = padding
|
||||||
|
self.bias = bias
|
||||||
|
self.dilation = dilation
|
||||||
|
self.reduction_factor = reduction_factor
|
||||||
|
|
||||||
|
self.build_module()
|
||||||
|
|
||||||
|
def build_module(self):
|
||||||
|
self.layer_dict = nn.ModuleDict()
|
||||||
|
x = torch.zeros(self.input_shape)
|
||||||
|
out = x
|
||||||
|
|
||||||
|
# First convolutional layer with Batch Normalization
|
||||||
|
self.layer_dict["conv_0"] = nn.Conv2d(
|
||||||
|
in_channels=out.shape[1],
|
||||||
|
out_channels=self.num_filters,
|
||||||
|
bias=self.bias,
|
||||||
|
kernel_size=self.kernel_size,
|
||||||
|
dilation=self.dilation,
|
||||||
|
padding=self.padding,
|
||||||
|
stride=1,
|
||||||
|
)
|
||||||
|
self.layer_dict["bn_0"] = nn.BatchNorm2d(self.num_filters)
|
||||||
|
out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
|
||||||
|
|
||||||
|
# Dimensionality reduction through average pooling
|
||||||
|
out = F.avg_pool2d(out, self.reduction_factor)
|
||||||
|
|
||||||
|
# Second convolutional layer with Batch Normalization
|
||||||
|
self.layer_dict["conv_1"] = nn.Conv2d(
|
||||||
|
in_channels=out.shape[1],
|
||||||
|
out_channels=self.num_filters,
|
||||||
|
bias=self.bias,
|
||||||
|
kernel_size=self.kernel_size,
|
||||||
|
dilation=self.dilation,
|
||||||
|
padding=self.padding,
|
||||||
|
stride=1,
|
||||||
|
)
|
||||||
|
self.layer_dict["bn_1"] = nn.BatchNorm2d(self.num_filters)
|
||||||
|
out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
|
||||||
|
|
||||||
|
print(out.shape)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = x
|
||||||
|
|
||||||
|
# Apply first conv layer + BN + ReLU
|
||||||
|
out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
|
||||||
|
|
||||||
|
# Dimensionality reduction through average pooling
|
||||||
|
out = F.avg_pool2d(out, self.reduction_factor)
|
||||||
|
|
||||||
|
# Apply second conv layer + BN + ReLU
|
||||||
|
out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
|
||||||
|
|
||||||
|
return out
|
||||||
|
@ -17,7 +17,14 @@ def load_from_stats_pkl_file(experiment_log_filepath, filename):
|
|||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
|
||||||
def save_statistics(experiment_log_dir, filename, stats_dict, current_epoch, continue_from_mode=False, save_full_dict=False):
|
def save_statistics(
|
||||||
|
experiment_log_dir,
|
||||||
|
filename,
|
||||||
|
stats_dict,
|
||||||
|
current_epoch,
|
||||||
|
continue_from_mode=False,
|
||||||
|
save_full_dict=False,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Saves the statistics in stats dict into a csv file. Using the keys as the header entries and the values as the
|
Saves the statistics in stats dict into a csv file. Using the keys as the header entries and the values as the
|
||||||
columns of a particular header entry
|
columns of a particular header entry
|
||||||
@ -29,7 +36,7 @@ def save_statistics(experiment_log_dir, filename, stats_dict, current_epoch, con
|
|||||||
:return: The filepath to the summary file
|
:return: The filepath to the summary file
|
||||||
"""
|
"""
|
||||||
summary_filename = os.path.join(experiment_log_dir, filename)
|
summary_filename = os.path.join(experiment_log_dir, filename)
|
||||||
mode = 'a' if continue_from_mode else 'w'
|
mode = "a" if continue_from_mode else "w"
|
||||||
with open(summary_filename, mode) as f:
|
with open(summary_filename, mode) as f:
|
||||||
writer = csv.writer(f)
|
writer = csv.writer(f)
|
||||||
if not continue_from_mode:
|
if not continue_from_mode:
|
||||||
@ -57,7 +64,7 @@ def load_statistics(experiment_log_dir, filename):
|
|||||||
"""
|
"""
|
||||||
summary_filename = os.path.join(experiment_log_dir, filename)
|
summary_filename = os.path.join(experiment_log_dir, filename)
|
||||||
|
|
||||||
with open(summary_filename, 'r+') as f:
|
with open(summary_filename, "r+") as f:
|
||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
|
|
||||||
keys = lines[0].split(",")
|
keys = lines[0].split(",")
|
||||||
|
@ -7,7 +7,8 @@ import mlp.data_providers as data_providers
|
|||||||
from pytorch_mlp_framework.arg_extractor import get_args
|
from pytorch_mlp_framework.arg_extractor import get_args
|
||||||
from pytorch_mlp_framework.experiment_builder import ExperimentBuilder
|
from pytorch_mlp_framework.experiment_builder import ExperimentBuilder
|
||||||
from pytorch_mlp_framework.model_architectures import *
|
from pytorch_mlp_framework.model_architectures import *
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# os.environ["CUDA_VISIBLE_DEVICES"]="0"
|
# os.environ["CUDA_VISIBLE_DEVICES"]="0"
|
||||||
|
|
||||||
args = get_args() # get arguments from command line
|
args = get_args() # get arguments from command line
|
||||||
@ -15,54 +16,83 @@ rng = np.random.RandomState(seed=args.seed) # set the seeds for the experiment
|
|||||||
torch.manual_seed(seed=args.seed) # sets pytorch's seed
|
torch.manual_seed(seed=args.seed) # sets pytorch's seed
|
||||||
|
|
||||||
# set up data augmentation transforms for training and testing
|
# set up data augmentation transforms for training and testing
|
||||||
transform_train = transforms.Compose([
|
transform_train = transforms.Compose(
|
||||||
|
[
|
||||||
transforms.RandomCrop(32, padding=4),
|
transforms.RandomCrop(32, padding=4),
|
||||||
transforms.RandomHorizontalFlip(),
|
transforms.RandomHorizontalFlip(),
|
||||||
transforms.ToTensor(),
|
transforms.ToTensor(),
|
||||||
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
|
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
|
||||||
])
|
]
|
||||||
|
)
|
||||||
|
|
||||||
transform_test = transforms.Compose([
|
transform_test = transforms.Compose(
|
||||||
transforms.ToTensor(),
|
[
|
||||||
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
|
transforms.ToTensor(),
|
||||||
])
|
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
train_data = data_providers.CIFAR100(root='data', set_name='train',
|
train_data = data_providers.CIFAR100(
|
||||||
transform=transform_train,
|
root="data", set_name="train", transform=transform_train, download=True
|
||||||
download=True) # initialize our rngs using the argument set seed
|
) # initialize our rngs using the argument set seed
|
||||||
val_data = data_providers.CIFAR100(root='data', set_name='val',
|
val_data = data_providers.CIFAR100(
|
||||||
transform=transform_test,
|
root="data", set_name="val", transform=transform_test, download=True
|
||||||
download=True) # initialize our rngs using the argument set seed
|
) # initialize our rngs using the argument set seed
|
||||||
test_data = data_providers.CIFAR100(root='data', set_name='test',
|
test_data = data_providers.CIFAR100(
|
||||||
transform=transform_test,
|
root="data", set_name="test", transform=transform_test, download=True
|
||||||
download=True) # initialize our rngs using the argument set seed
|
) # initialize our rngs using the argument set seed
|
||||||
|
|
||||||
train_data_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=2)
|
train_data_loader = DataLoader(
|
||||||
val_data_loader = DataLoader(val_data, batch_size=args.batch_size, shuffle=True, num_workers=2)
|
train_data, batch_size=args.batch_size, shuffle=True, num_workers=2
|
||||||
test_data_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=True, num_workers=2)
|
)
|
||||||
|
val_data_loader = DataLoader(
|
||||||
|
val_data, batch_size=args.batch_size, shuffle=True, num_workers=2
|
||||||
|
)
|
||||||
|
test_data_loader = DataLoader(
|
||||||
|
test_data, batch_size=args.batch_size, shuffle=True, num_workers=2
|
||||||
|
)
|
||||||
|
|
||||||
if args.block_type == 'conv_block':
|
if args.block_type == "conv_block":
|
||||||
processing_block_type = ConvolutionalProcessingBlock
|
processing_block_type = ConvolutionalProcessingBlock
|
||||||
dim_reduction_block_type = ConvolutionalDimensionalityReductionBlock
|
dim_reduction_block_type = ConvolutionalDimensionalityReductionBlock
|
||||||
elif args.block_type == 'empty_block':
|
elif args.block_type == "empty_block":
|
||||||
processing_block_type = EmptyBlock
|
processing_block_type = EmptyBlock
|
||||||
dim_reduction_block_type = EmptyBlock
|
dim_reduction_block_type = EmptyBlock
|
||||||
|
elif args.block_type == "conv_bn":
|
||||||
|
processing_block_type = ConvolutionalProcessingBlockBN
|
||||||
|
dim_reduction_block_type = ConvolutionalDimensionalityReductionBlockBN
|
||||||
else:
|
else:
|
||||||
raise ModuleNotFoundError
|
raise ModuleNotFoundError
|
||||||
|
|
||||||
custom_conv_net = ConvolutionalNetwork( # initialize our network object, in this case a ConvNet
|
custom_conv_net = (
|
||||||
input_shape=(args.batch_size, args.image_num_channels, args.image_height, args.image_width),
|
ConvolutionalNetwork( # initialize our network object, in this case a ConvNet
|
||||||
num_output_classes=args.num_classes, num_filters=args.num_filters, use_bias=False,
|
input_shape=(
|
||||||
num_blocks_per_stage=args.num_blocks_per_stage, num_stages=args.num_stages,
|
args.batch_size,
|
||||||
processing_block_type=processing_block_type,
|
args.image_num_channels,
|
||||||
dimensionality_reduction_block_type=dim_reduction_block_type)
|
args.image_height,
|
||||||
|
args.image_width,
|
||||||
|
),
|
||||||
|
num_output_classes=args.num_classes,
|
||||||
|
num_filters=args.num_filters,
|
||||||
|
use_bias=False,
|
||||||
|
num_blocks_per_stage=args.num_blocks_per_stage,
|
||||||
|
num_stages=args.num_stages,
|
||||||
|
processing_block_type=processing_block_type,
|
||||||
|
dimensionality_reduction_block_type=dim_reduction_block_type,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
conv_experiment = ExperimentBuilder(network_model=custom_conv_net,
|
conv_experiment = ExperimentBuilder(
|
||||||
experiment_name=args.experiment_name,
|
network_model=custom_conv_net,
|
||||||
num_epochs=args.num_epochs,
|
experiment_name=args.experiment_name,
|
||||||
weight_decay_coefficient=args.weight_decay_coefficient,
|
num_epochs=args.num_epochs,
|
||||||
use_gpu=args.use_gpu,
|
weight_decay_coefficient=args.weight_decay_coefficient,
|
||||||
continue_from_epoch=args.continue_from_epoch,
|
use_gpu=args.use_gpu,
|
||||||
train_data=train_data_loader, val_data=val_data_loader,
|
continue_from_epoch=args.continue_from_epoch,
|
||||||
test_data=test_data_loader) # build an experiment object
|
train_data=train_data_loader,
|
||||||
experiment_metrics, test_metrics = conv_experiment.run_experiment() # run experiment and return experiment metrics
|
val_data=val_data_loader,
|
||||||
|
test_data=test_data_loader,
|
||||||
|
) # build an experiment object
|
||||||
|
experiment_metrics, test_metrics = (
|
||||||
|
conv_experiment.run_experiment()
|
||||||
|
) # run experiment and return experiment metrics
|
||||||
|
Loading…
Reference in New Issue
Block a user