formatting and BN
This commit is contained in:
parent
92fccb8eb2
commit
cb5c6f4e19
@ -2,12 +2,12 @@ import argparse
|
||||
|
||||
|
||||
def str2bool(v):
|
||||
if v.lower() in ('yes', 'true', 't', 'y', '1'):
|
||||
if v.lower() in ("yes", "true", "t", "y", "1"):
|
||||
return True
|
||||
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
|
||||
elif v.lower() in ("no", "false", "f", "n", "0"):
|
||||
return False
|
||||
else:
|
||||
raise argparse.ArgumentTypeError('Boolean value expected.')
|
||||
raise argparse.ArgumentTypeError("Boolean value expected.")
|
||||
|
||||
|
||||
def get_args():
|
||||
@ -16,38 +16,111 @@ def get_args():
|
||||
:return: A namedtuple with arguments
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Welcome to the MLP course\'s Pytorch training and inference helper script')
|
||||
description="Welcome to the MLP course's Pytorch training and inference helper script"
|
||||
)
|
||||
|
||||
parser.add_argument('--batch_size', nargs="?", type=int, default=100, help='Batch_size for experiment')
|
||||
parser.add_argument('--continue_from_epoch', nargs="?", type=int, default=-1, help='Epoch you want to continue training from while restarting an experiment')
|
||||
parser.add_argument('--seed', nargs="?", type=int, default=7112018,
|
||||
help='Seed to use for random number generator for experiment')
|
||||
parser.add_argument('--image_num_channels', nargs="?", type=int, default=3,
|
||||
help='The channel dimensionality of our image-data')
|
||||
parser.add_argument('--image_height', nargs="?", type=int, default=32, help='Height of image data')
|
||||
parser.add_argument('--image_width', nargs="?", type=int, default=32, help='Width of image data')
|
||||
parser.add_argument('--num_stages', nargs="?", type=int, default=3,
|
||||
help='Number of convolutional stages in the network. A stage is considered a sequence of '
|
||||
'convolutional layers where the input volume remains the same in the spacial dimension and'
|
||||
' is always terminated by a dimensionality reduction stage')
|
||||
parser.add_argument('--num_blocks_per_stage', nargs="?", type=int, default=5,
|
||||
help='Number of convolutional blocks in each stage, not including the reduction stage.'
|
||||
' A convolutional block is made up of two convolutional layers activated using the '
|
||||
' leaky-relu non-linearity')
|
||||
parser.add_argument('--num_filters', nargs="?", type=int, default=16,
|
||||
help='Number of convolutional filters per convolutional layer in the network (excluding '
|
||||
'dimensionality reduction layers)')
|
||||
parser.add_argument('--num_epochs', nargs="?", type=int, default=100, help='Total number of epochs for model training')
|
||||
parser.add_argument('--num_classes', nargs="?", type=int, default=100, help='Number of classes in the dataset')
|
||||
parser.add_argument('--experiment_name', nargs="?", type=str, default="exp_1",
|
||||
help='Experiment name - to be used for building the experiment folder')
|
||||
parser.add_argument('--use_gpu', nargs="?", type=str2bool, default=True,
|
||||
help='A flag indicating whether we will use GPU acceleration or not')
|
||||
parser.add_argument('--weight_decay_coefficient', nargs="?", type=float, default=0,
|
||||
help='Weight decay to use for Adam')
|
||||
parser.add_argument('--block_type', type=str, default='conv_block',
|
||||
help='Type of convolutional blocks to use in our network '
|
||||
'(This argument will be useful in running experiments to debug your network)')
|
||||
parser.add_argument(
|
||||
"--batch_size",
|
||||
nargs="?",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Batch_size for experiment",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--continue_from_epoch",
|
||||
nargs="?",
|
||||
type=int,
|
||||
default=-1,
|
||||
help="Epoch you want to continue training from while restarting an experiment",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--seed",
|
||||
nargs="?",
|
||||
type=int,
|
||||
default=7112018,
|
||||
help="Seed to use for random number generator for experiment",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--image_num_channels",
|
||||
nargs="?",
|
||||
type=int,
|
||||
default=3,
|
||||
help="The channel dimensionality of our image-data",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--image_height", nargs="?", type=int, default=32, help="Height of image data"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--image_width", nargs="?", type=int, default=32, help="Width of image data"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_stages",
|
||||
nargs="?",
|
||||
type=int,
|
||||
default=3,
|
||||
help="Number of convolutional stages in the network. A stage is considered a sequence of "
|
||||
"convolutional layers where the input volume remains the same in the spacial dimension and"
|
||||
" is always terminated by a dimensionality reduction stage",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_blocks_per_stage",
|
||||
nargs="?",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Number of convolutional blocks in each stage, not including the reduction stage."
|
||||
" A convolutional block is made up of two convolutional layers activated using the "
|
||||
" leaky-relu non-linearity",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_filters",
|
||||
nargs="?",
|
||||
type=int,
|
||||
default=16,
|
||||
help="Number of convolutional filters per convolutional layer in the network (excluding "
|
||||
"dimensionality reduction layers)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_epochs",
|
||||
nargs="?",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Total number of epochs for model training",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_classes",
|
||||
nargs="?",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Number of classes in the dataset",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--experiment_name",
|
||||
nargs="?",
|
||||
type=str,
|
||||
default="exp_1",
|
||||
help="Experiment name - to be used for building the experiment folder",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use_gpu",
|
||||
nargs="?",
|
||||
type=str2bool,
|
||||
default=True,
|
||||
help="A flag indicating whether we will use GPU acceleration or not",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--weight_decay_coefficient",
|
||||
nargs="?",
|
||||
type=float,
|
||||
default=0,
|
||||
help="Weight decay to use for Adam",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--block_type",
|
||||
type=str,
|
||||
default="conv_block",
|
||||
help="Type of convolutional blocks to use in our network "
|
||||
"(This argument will be useful in running experiments to debug your network)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
print(args)
|
||||
return args
|
||||
|
@ -10,11 +10,23 @@ import time
|
||||
from pytorch_mlp_framework.storage_utils import save_statistics
|
||||
from matplotlib import pyplot as plt
|
||||
import matplotlib
|
||||
matplotlib.rcParams.update({'font.size': 8})
|
||||
|
||||
matplotlib.rcParams.update({"font.size": 8})
|
||||
|
||||
|
||||
class ExperimentBuilder(nn.Module):
|
||||
def __init__(self, network_model, experiment_name, num_epochs, train_data, val_data,
|
||||
test_data, weight_decay_coefficient, use_gpu, continue_from_epoch=-1):
|
||||
def __init__(
|
||||
self,
|
||||
network_model,
|
||||
experiment_name,
|
||||
num_epochs,
|
||||
train_data,
|
||||
val_data,
|
||||
test_data,
|
||||
weight_decay_coefficient,
|
||||
use_gpu,
|
||||
continue_from_epoch=-1,
|
||||
):
|
||||
"""
|
||||
Initializes an ExperimentBuilder object. Such an object takes care of running training and evaluation of a deep net
|
||||
on a given dataset. It also takes care of saving per epoch models and automatically inferring the best val model
|
||||
@ -31,75 +43,95 @@ class ExperimentBuilder(nn.Module):
|
||||
"""
|
||||
super(ExperimentBuilder, self).__init__()
|
||||
|
||||
|
||||
self.experiment_name = experiment_name
|
||||
self.model = network_model
|
||||
|
||||
if torch.cuda.device_count() >= 1 and use_gpu:
|
||||
self.device = torch.device('cuda')
|
||||
self.device = torch.device("cuda")
|
||||
self.model.to(self.device) # sends the model from the cpu to the gpu
|
||||
print('Use GPU', self.device)
|
||||
print("Use GPU", self.device)
|
||||
else:
|
||||
print("use CPU")
|
||||
self.device = torch.device('cpu') # sets the device to be CPU
|
||||
self.device = torch.device("cpu") # sets the device to be CPU
|
||||
print(self.device)
|
||||
|
||||
print('here')
|
||||
print("here")
|
||||
|
||||
self.model.reset_parameters() # re-initialize network parameters
|
||||
self.train_data = train_data
|
||||
self.val_data = val_data
|
||||
self.test_data = test_data
|
||||
|
||||
print('System learnable parameters')
|
||||
print("System learnable parameters")
|
||||
num_conv_layers = 0
|
||||
num_linear_layers = 0
|
||||
total_num_parameters = 0
|
||||
for name, value in self.named_parameters():
|
||||
print(name, value.shape)
|
||||
if all(item in name for item in ['conv', 'weight']):
|
||||
if all(item in name for item in ["conv", "weight"]):
|
||||
num_conv_layers += 1
|
||||
if all(item in name for item in ['linear', 'weight']):
|
||||
if all(item in name for item in ["linear", "weight"]):
|
||||
num_linear_layers += 1
|
||||
total_num_parameters += np.prod(value.shape)
|
||||
|
||||
print('Total number of parameters', total_num_parameters)
|
||||
print('Total number of conv layers', num_conv_layers)
|
||||
print('Total number of linear layers', num_linear_layers)
|
||||
print("Total number of parameters", total_num_parameters)
|
||||
print("Total number of conv layers", num_conv_layers)
|
||||
print("Total number of linear layers", num_linear_layers)
|
||||
|
||||
self.optimizer = optim.Adam(self.parameters(), amsgrad=False,
|
||||
weight_decay=weight_decay_coefficient)
|
||||
self.learning_rate_scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer,
|
||||
T_max=num_epochs,
|
||||
eta_min=0.00002)
|
||||
self.optimizer = optim.Adam(
|
||||
self.parameters(), amsgrad=False, weight_decay=weight_decay_coefficient
|
||||
)
|
||||
self.learning_rate_scheduler = optim.lr_scheduler.CosineAnnealingLR(
|
||||
self.optimizer, T_max=num_epochs, eta_min=0.00002
|
||||
)
|
||||
# Generate the directory names
|
||||
self.experiment_folder = os.path.abspath(experiment_name)
|
||||
self.experiment_logs = os.path.abspath(os.path.join(self.experiment_folder, "result_outputs"))
|
||||
self.experiment_saved_models = os.path.abspath(os.path.join(self.experiment_folder, "saved_models"))
|
||||
self.experiment_logs = os.path.abspath(
|
||||
os.path.join(self.experiment_folder, "result_outputs")
|
||||
)
|
||||
self.experiment_saved_models = os.path.abspath(
|
||||
os.path.join(self.experiment_folder, "saved_models")
|
||||
)
|
||||
|
||||
# Set best models to be at 0 since we are just starting
|
||||
self.best_val_model_idx = 0
|
||||
self.best_val_model_acc = 0.
|
||||
self.best_val_model_acc = 0.0
|
||||
|
||||
if not os.path.exists(self.experiment_folder): # If experiment directory does not exist
|
||||
if not os.path.exists(
|
||||
self.experiment_folder
|
||||
): # If experiment directory does not exist
|
||||
os.mkdir(self.experiment_folder) # create the experiment directory
|
||||
os.mkdir(self.experiment_logs) # create the experiment log directory
|
||||
os.mkdir(self.experiment_saved_models) # create the experiment saved models directory
|
||||
os.mkdir(
|
||||
self.experiment_saved_models
|
||||
) # create the experiment saved models directory
|
||||
|
||||
self.num_epochs = num_epochs
|
||||
self.criterion = nn.CrossEntropyLoss().to(self.device) # send the loss computation to the GPU
|
||||
self.criterion = nn.CrossEntropyLoss().to(
|
||||
self.device
|
||||
) # send the loss computation to the GPU
|
||||
|
||||
if continue_from_epoch == -2: # if continue from epoch is -2 then continue from latest saved model
|
||||
self.state, self.best_val_model_idx, self.best_val_model_acc = self.load_model(
|
||||
model_save_dir=self.experiment_saved_models, model_save_name="train_model",
|
||||
model_idx='latest') # reload existing model from epoch and return best val model index
|
||||
if (
|
||||
continue_from_epoch == -2
|
||||
): # if continue from epoch is -2 then continue from latest saved model
|
||||
self.state, self.best_val_model_idx, self.best_val_model_acc = (
|
||||
self.load_model(
|
||||
model_save_dir=self.experiment_saved_models,
|
||||
model_save_name="train_model",
|
||||
model_idx="latest",
|
||||
)
|
||||
) # reload existing model from epoch and return best val model index
|
||||
# and the best val acc of that model
|
||||
self.starting_epoch = int(self.state['model_epoch'])
|
||||
self.starting_epoch = int(self.state["model_epoch"])
|
||||
|
||||
elif continue_from_epoch > -1: # if continue from epoch is greater than -1 then
|
||||
self.state, self.best_val_model_idx, self.best_val_model_acc = self.load_model(
|
||||
model_save_dir=self.experiment_saved_models, model_save_name="train_model",
|
||||
model_idx=continue_from_epoch) # reload existing model from epoch and return best val model index
|
||||
self.state, self.best_val_model_idx, self.best_val_model_acc = (
|
||||
self.load_model(
|
||||
model_save_dir=self.experiment_saved_models,
|
||||
model_save_name="train_model",
|
||||
model_idx=continue_from_epoch,
|
||||
)
|
||||
) # reload existing model from epoch and return best val model index
|
||||
# and the best val acc of that model
|
||||
self.starting_epoch = continue_from_epoch
|
||||
else:
|
||||
@ -113,10 +145,7 @@ class ExperimentBuilder(nn.Module):
|
||||
|
||||
return total_num_params
|
||||
|
||||
|
||||
def plot_func_def(self, all_grads, layers):
|
||||
|
||||
|
||||
"""
|
||||
Plot function definition to plot the average gradient with respect to the number of layers in the given model
|
||||
:param all_grads: Gradients wrt weights for each layer in the model.
|
||||
@ -135,7 +164,6 @@ class ExperimentBuilder(nn.Module):
|
||||
|
||||
return plt
|
||||
|
||||
|
||||
def plot_grad_flow(self, named_parameters):
|
||||
"""
|
||||
The function is being called in Line 298 of this file.
|
||||
@ -165,15 +193,14 @@ class ExperimentBuilder(nn.Module):
|
||||
|
||||
return plt
|
||||
|
||||
|
||||
def run_train_iter(self, x, y):
|
||||
|
||||
self.train() # sets model to training mode (in case batch normalization or other methods have different procedures for training and evaluation)
|
||||
x, y = x.float().to(device=self.device), y.long().to(
|
||||
device=self.device) # send data to device as torch tensors
|
||||
device=self.device
|
||||
) # send data to device as torch tensors
|
||||
out = self.model.forward(x) # forward the data in the model
|
||||
|
||||
|
||||
loss = F.cross_entropy(input=out, target=y) # compute loss
|
||||
|
||||
self.optimizer.zero_grad() # set all weight grads from previous training iters to 0
|
||||
@ -195,7 +222,8 @@ class ExperimentBuilder(nn.Module):
|
||||
"""
|
||||
self.eval() # sets the system to validation mode
|
||||
x, y = x.float().to(device=self.device), y.long().to(
|
||||
device=self.device) # convert data to pytorch tensors and send to the computation device
|
||||
device=self.device
|
||||
) # convert data to pytorch tensors and send to the computation device
|
||||
out = self.model.forward(x) # forward the data in the model
|
||||
|
||||
loss = F.cross_entropy(input=out, target=y) # compute loss
|
||||
@ -204,8 +232,14 @@ class ExperimentBuilder(nn.Module):
|
||||
accuracy = np.mean(list(predicted.eq(y.data).cpu())) # compute accuracy
|
||||
return loss.cpu().data.numpy(), accuracy
|
||||
|
||||
def save_model(self, model_save_dir, model_save_name, model_idx, best_validation_model_idx,
|
||||
best_validation_model_acc):
|
||||
def save_model(
|
||||
self,
|
||||
model_save_dir,
|
||||
model_save_name,
|
||||
model_idx,
|
||||
best_validation_model_idx,
|
||||
best_validation_model_acc,
|
||||
):
|
||||
"""
|
||||
Save the network parameter state and current best val epoch idx and best val accuracy.
|
||||
:param model_save_name: Name to use to save model without the epoch index
|
||||
@ -216,11 +250,21 @@ class ExperimentBuilder(nn.Module):
|
||||
:param state: The dictionary containing the system state.
|
||||
|
||||
"""
|
||||
self.state['network'] = self.state_dict() # save network parameter and other variables.
|
||||
self.state['best_val_model_idx'] = best_validation_model_idx # save current best val idx
|
||||
self.state['best_val_model_acc'] = best_validation_model_acc # save current best val acc
|
||||
torch.save(self.state, f=os.path.join(model_save_dir, "{}_{}".format(model_save_name, str(
|
||||
model_idx)))) # save state at prespecified filepath
|
||||
self.state["network"] = (
|
||||
self.state_dict()
|
||||
) # save network parameter and other variables.
|
||||
self.state["best_val_model_idx"] = (
|
||||
best_validation_model_idx # save current best val idx
|
||||
)
|
||||
self.state["best_val_model_acc"] = (
|
||||
best_validation_model_acc # save current best val acc
|
||||
)
|
||||
torch.save(
|
||||
self.state,
|
||||
f=os.path.join(
|
||||
model_save_dir, "{}_{}".format(model_save_name, str(model_idx))
|
||||
),
|
||||
) # save state at prespecified filepath
|
||||
|
||||
def load_model(self, model_save_dir, model_save_name, model_idx):
|
||||
"""
|
||||
@ -230,98 +274,182 @@ class ExperimentBuilder(nn.Module):
|
||||
:param model_idx: The index to save the model with.
|
||||
:return: best val idx and best val model acc, also it loads the network state into the system state without returning it
|
||||
"""
|
||||
state = torch.load(f=os.path.join(model_save_dir, "{}_{}".format(model_save_name, str(model_idx))))
|
||||
self.load_state_dict(state_dict=state['network'])
|
||||
return state, state['best_val_model_idx'], state['best_val_model_acc']
|
||||
state = torch.load(
|
||||
f=os.path.join(
|
||||
model_save_dir, "{}_{}".format(model_save_name, str(model_idx))
|
||||
)
|
||||
)
|
||||
self.load_state_dict(state_dict=state["network"])
|
||||
return state, state["best_val_model_idx"], state["best_val_model_acc"]
|
||||
|
||||
def run_experiment(self):
|
||||
"""
|
||||
Runs experiment train and evaluation iterations, saving the model and best val model and val model accuracy after each epoch
|
||||
:return: The summary current_epoch_losses from starting epoch to total_epochs.
|
||||
"""
|
||||
total_losses = {"train_acc": [], "train_loss": [], "val_acc": [],
|
||||
"val_loss": []} # initialize a dict to keep the per-epoch metrics
|
||||
total_losses = {
|
||||
"train_acc": [],
|
||||
"train_loss": [],
|
||||
"val_acc": [],
|
||||
"val_loss": [],
|
||||
} # initialize a dict to keep the per-epoch metrics
|
||||
for i, epoch_idx in enumerate(range(self.starting_epoch, self.num_epochs)):
|
||||
epoch_start_time = time.time()
|
||||
current_epoch_losses = {"train_acc": [], "train_loss": [], "val_acc": [], "val_loss": []}
|
||||
current_epoch_losses = {
|
||||
"train_acc": [],
|
||||
"train_loss": [],
|
||||
"val_acc": [],
|
||||
"val_loss": [],
|
||||
}
|
||||
self.current_epoch = epoch_idx
|
||||
with tqdm.tqdm(total=len(self.train_data)) as pbar_train: # create a progress bar for training
|
||||
with tqdm.tqdm(
|
||||
total=len(self.train_data)
|
||||
) as pbar_train: # create a progress bar for training
|
||||
for idx, (x, y) in enumerate(self.train_data): # get data batches
|
||||
loss, accuracy = self.run_train_iter(x=x, y=y) # take a training iter step
|
||||
current_epoch_losses["train_loss"].append(loss) # add current iter loss to the train loss list
|
||||
current_epoch_losses["train_acc"].append(accuracy) # add current iter acc to the train acc list
|
||||
loss, accuracy = self.run_train_iter(
|
||||
x=x, y=y
|
||||
) # take a training iter step
|
||||
current_epoch_losses["train_loss"].append(
|
||||
loss
|
||||
) # add current iter loss to the train loss list
|
||||
current_epoch_losses["train_acc"].append(
|
||||
accuracy
|
||||
) # add current iter acc to the train acc list
|
||||
pbar_train.update(1)
|
||||
pbar_train.set_description("loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy))
|
||||
pbar_train.set_description(
|
||||
"loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy)
|
||||
)
|
||||
|
||||
with tqdm.tqdm(total=len(self.val_data)) as pbar_val: # create a progress bar for validation
|
||||
with tqdm.tqdm(
|
||||
total=len(self.val_data)
|
||||
) as pbar_val: # create a progress bar for validation
|
||||
for x, y in self.val_data: # get data batches
|
||||
loss, accuracy = self.run_evaluation_iter(x=x, y=y) # run a validation iter
|
||||
current_epoch_losses["val_loss"].append(loss) # add current iter loss to val loss list.
|
||||
current_epoch_losses["val_acc"].append(accuracy) # add current iter acc to val acc lst.
|
||||
loss, accuracy = self.run_evaluation_iter(
|
||||
x=x, y=y
|
||||
) # run a validation iter
|
||||
current_epoch_losses["val_loss"].append(
|
||||
loss
|
||||
) # add current iter loss to val loss list.
|
||||
current_epoch_losses["val_acc"].append(
|
||||
accuracy
|
||||
) # add current iter acc to val acc lst.
|
||||
pbar_val.update(1) # add 1 step to the progress bar
|
||||
pbar_val.set_description("loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy))
|
||||
val_mean_accuracy = np.mean(current_epoch_losses['val_acc'])
|
||||
if val_mean_accuracy > self.best_val_model_acc: # if current epoch's mean val acc is greater than the saved best val acc then
|
||||
pbar_val.set_description(
|
||||
"loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy)
|
||||
)
|
||||
val_mean_accuracy = np.mean(current_epoch_losses["val_acc"])
|
||||
if (
|
||||
val_mean_accuracy > self.best_val_model_acc
|
||||
): # if current epoch's mean val acc is greater than the saved best val acc then
|
||||
self.best_val_model_acc = val_mean_accuracy # set the best val model acc to be current epoch's val accuracy
|
||||
self.best_val_model_idx = epoch_idx # set the experiment-wise best val idx to be the current epoch's idx
|
||||
|
||||
for key, value in current_epoch_losses.items():
|
||||
total_losses[key].append(np.mean(
|
||||
value)) # get mean of all metrics of current epoch metrics dict, to get them ready for storage and output on the terminal.
|
||||
total_losses[key].append(
|
||||
np.mean(value)
|
||||
) # get mean of all metrics of current epoch metrics dict, to get them ready for storage and output on the terminal.
|
||||
|
||||
save_statistics(experiment_log_dir=self.experiment_logs, filename='summary.csv',
|
||||
stats_dict=total_losses, current_epoch=i,
|
||||
continue_from_mode=True if (self.starting_epoch != 0 or i > 0) else False) # save statistics to stats file.
|
||||
save_statistics(
|
||||
experiment_log_dir=self.experiment_logs,
|
||||
filename="summary.csv",
|
||||
stats_dict=total_losses,
|
||||
current_epoch=i,
|
||||
continue_from_mode=(
|
||||
True if (self.starting_epoch != 0 or i > 0) else False
|
||||
),
|
||||
) # save statistics to stats file.
|
||||
|
||||
# load_statistics(experiment_log_dir=self.experiment_logs, filename='summary.csv') # How to load a csv file if you need to
|
||||
|
||||
out_string = "_".join(
|
||||
["{}_{:.4f}".format(key, np.mean(value)) for key, value in current_epoch_losses.items()])
|
||||
[
|
||||
"{}_{:.4f}".format(key, np.mean(value))
|
||||
for key, value in current_epoch_losses.items()
|
||||
]
|
||||
)
|
||||
# create a string to use to report our epoch metrics
|
||||
epoch_elapsed_time = time.time() - epoch_start_time # calculate time taken for epoch
|
||||
epoch_elapsed_time = (
|
||||
time.time() - epoch_start_time
|
||||
) # calculate time taken for epoch
|
||||
epoch_elapsed_time = "{:.4f}".format(epoch_elapsed_time)
|
||||
print("Epoch {}:".format(epoch_idx), out_string, "epoch time", epoch_elapsed_time, "seconds")
|
||||
self.state['model_epoch'] = epoch_idx
|
||||
self.save_model(model_save_dir=self.experiment_saved_models,
|
||||
print(
|
||||
"Epoch {}:".format(epoch_idx),
|
||||
out_string,
|
||||
"epoch time",
|
||||
epoch_elapsed_time,
|
||||
"seconds",
|
||||
)
|
||||
self.state["model_epoch"] = epoch_idx
|
||||
self.save_model(
|
||||
model_save_dir=self.experiment_saved_models,
|
||||
# save model and best val idx and best val acc, using the model dir, model name and model idx
|
||||
model_save_name="train_model", model_idx=epoch_idx,
|
||||
model_save_name="train_model",
|
||||
model_idx=epoch_idx,
|
||||
best_validation_model_idx=self.best_val_model_idx,
|
||||
best_validation_model_acc=self.best_val_model_acc)
|
||||
self.save_model(model_save_dir=self.experiment_saved_models,
|
||||
best_validation_model_acc=self.best_val_model_acc,
|
||||
)
|
||||
self.save_model(
|
||||
model_save_dir=self.experiment_saved_models,
|
||||
# save model and best val idx and best val acc, using the model dir, model name and model idx
|
||||
model_save_name="train_model", model_idx='latest',
|
||||
model_save_name="train_model",
|
||||
model_idx="latest",
|
||||
best_validation_model_idx=self.best_val_model_idx,
|
||||
best_validation_model_acc=self.best_val_model_acc)
|
||||
best_validation_model_acc=self.best_val_model_acc,
|
||||
)
|
||||
|
||||
################################################################
|
||||
##### Plot Gradient Flow at each Epoch during Training ######
|
||||
print("Generating Gradient Flow Plot at epoch {}".format(epoch_idx))
|
||||
plt = self.plot_grad_flow(self.model.named_parameters())
|
||||
if not os.path.exists(os.path.join(self.experiment_saved_models, 'gradient_flow_plots')):
|
||||
os.mkdir(os.path.join(self.experiment_saved_models, 'gradient_flow_plots'))
|
||||
if not os.path.exists(
|
||||
os.path.join(self.experiment_saved_models, "gradient_flow_plots")
|
||||
):
|
||||
os.mkdir(
|
||||
os.path.join(self.experiment_saved_models, "gradient_flow_plots")
|
||||
)
|
||||
# plt.legend(loc="best")
|
||||
plt.savefig(os.path.join(self.experiment_saved_models, 'gradient_flow_plots', "epoch{}.pdf".format(str(epoch_idx))))
|
||||
plt.savefig(
|
||||
os.path.join(
|
||||
self.experiment_saved_models,
|
||||
"gradient_flow_plots",
|
||||
"epoch{}.pdf".format(str(epoch_idx)),
|
||||
)
|
||||
)
|
||||
################################################################
|
||||
|
||||
print("Generating test set evaluation metrics")
|
||||
self.load_model(model_save_dir=self.experiment_saved_models, model_idx=self.best_val_model_idx,
|
||||
self.load_model(
|
||||
model_save_dir=self.experiment_saved_models,
|
||||
model_idx=self.best_val_model_idx,
|
||||
# load best validation model
|
||||
model_save_name="train_model")
|
||||
current_epoch_losses = {"test_acc": [], "test_loss": []} # initialize a statistics dict
|
||||
model_save_name="train_model",
|
||||
)
|
||||
current_epoch_losses = {
|
||||
"test_acc": [],
|
||||
"test_loss": [],
|
||||
} # initialize a statistics dict
|
||||
with tqdm.tqdm(total=len(self.test_data)) as pbar_test: # ini a progress bar
|
||||
for x, y in self.test_data: # sample batch
|
||||
loss, accuracy = self.run_evaluation_iter(x=x,
|
||||
y=y) # compute loss and accuracy by running an evaluation step
|
||||
loss, accuracy = self.run_evaluation_iter(
|
||||
x=x, y=y
|
||||
) # compute loss and accuracy by running an evaluation step
|
||||
current_epoch_losses["test_loss"].append(loss) # save test loss
|
||||
current_epoch_losses["test_acc"].append(accuracy) # save test accuracy
|
||||
pbar_test.update(1) # update progress bar status
|
||||
pbar_test.set_description(
|
||||
"loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy)) # update progress bar string output
|
||||
"loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy)
|
||||
) # update progress bar string output
|
||||
|
||||
test_losses = {key: [np.mean(value)] for key, value in
|
||||
current_epoch_losses.items()} # save test set metrics in dict format
|
||||
save_statistics(experiment_log_dir=self.experiment_logs, filename='test_summary.csv',
|
||||
test_losses = {
|
||||
key: [np.mean(value)] for key, value in current_epoch_losses.items()
|
||||
} # save test set metrics in dict format
|
||||
save_statistics(
|
||||
experiment_log_dir=self.experiment_logs,
|
||||
filename="test_summary.csv",
|
||||
# save test set metrics on disk in .csv format
|
||||
stats_dict=test_losses, current_epoch=0, continue_from_mode=False)
|
||||
stats_dict=test_losses,
|
||||
current_epoch=0,
|
||||
continue_from_mode=False,
|
||||
)
|
||||
|
||||
return total_losses, test_losses
|
||||
|
@ -4,7 +4,9 @@ import torch.nn.functional as F
|
||||
|
||||
|
||||
class FCCNetwork(nn.Module):
|
||||
def __init__(self, input_shape, num_output_classes, num_filters, num_layers, use_bias=False):
|
||||
def __init__(
|
||||
self, input_shape, num_output_classes, num_filters, num_layers, use_bias=False
|
||||
):
|
||||
"""
|
||||
Initializes a fully connected network similar to the ones implemented previously in the MLP package.
|
||||
:param input_shape: The shape of the inputs going in to the network.
|
||||
@ -35,17 +37,25 @@ class FCCNetwork(nn.Module):
|
||||
# shapes of all dimensions after the 0th dim
|
||||
|
||||
for i in range(self.num_layers):
|
||||
self.layer_dict['fcc_{}'.format(i)] = nn.Linear(in_features=out.shape[1], # initialize a fcc layer
|
||||
self.layer_dict["fcc_{}".format(i)] = nn.Linear(
|
||||
in_features=out.shape[1], # initialize a fcc layer
|
||||
out_features=self.num_filters,
|
||||
bias=self.use_bias)
|
||||
bias=self.use_bias,
|
||||
)
|
||||
|
||||
out = self.layer_dict['fcc_{}'.format(i)](out) # apply ith fcc layer to the previous layers outputs
|
||||
out = self.layer_dict["fcc_{}".format(i)](
|
||||
out
|
||||
) # apply ith fcc layer to the previous layers outputs
|
||||
out = F.relu(out) # apply a ReLU on the outputs
|
||||
|
||||
self.logits_linear_layer = nn.Linear(in_features=out.shape[1], # initialize the prediction output linear layer
|
||||
self.logits_linear_layer = nn.Linear(
|
||||
in_features=out.shape[1], # initialize the prediction output linear layer
|
||||
out_features=self.num_output_classes,
|
||||
bias=self.use_bias)
|
||||
out = self.logits_linear_layer(out) # apply the layer to the previous layer's outputs
|
||||
bias=self.use_bias,
|
||||
)
|
||||
out = self.logits_linear_layer(
|
||||
out
|
||||
) # apply the layer to the previous layer's outputs
|
||||
print("Block is built, output volume is", out.shape)
|
||||
return out
|
||||
|
||||
@ -61,10 +71,14 @@ class FCCNetwork(nn.Module):
|
||||
# shapes of all dimensions after the 0th dim
|
||||
|
||||
for i in range(self.num_layers):
|
||||
out = self.layer_dict['fcc_{}'.format(i)](out) # apply ith fcc layer to the previous layers outputs
|
||||
out = self.layer_dict["fcc_{}".format(i)](
|
||||
out
|
||||
) # apply ith fcc layer to the previous layers outputs
|
||||
out = F.relu(out) # apply a ReLU on the outputs
|
||||
|
||||
out = self.logits_linear_layer(out) # apply the layer to the previous layer's outputs
|
||||
out = self.logits_linear_layer(
|
||||
out
|
||||
) # apply the layer to the previous layer's outputs
|
||||
return out
|
||||
|
||||
def reset_parameters(self):
|
||||
@ -78,8 +92,16 @@ class FCCNetwork(nn.Module):
|
||||
|
||||
|
||||
class EmptyBlock(nn.Module):
|
||||
def __init__(self, input_shape=None, num_filters=None, kernel_size=None, padding=None, bias=None, dilation=None,
|
||||
reduction_factor=None):
|
||||
def __init__(
|
||||
self,
|
||||
input_shape=None,
|
||||
num_filters=None,
|
||||
kernel_size=None,
|
||||
padding=None,
|
||||
bias=None,
|
||||
dilation=None,
|
||||
reduction_factor=None,
|
||||
):
|
||||
super(EmptyBlock, self).__init__()
|
||||
|
||||
self.num_filters = num_filters
|
||||
@ -94,12 +116,12 @@ class EmptyBlock(nn.Module):
|
||||
def build_module(self):
|
||||
self.layer_dict = nn.ModuleDict()
|
||||
x = torch.zeros(self.input_shape)
|
||||
self.layer_dict['Identity'] = nn.Identity()
|
||||
self.layer_dict["Identity"] = nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
out = x
|
||||
|
||||
out = self.layer_dict['Identity'].forward(out)
|
||||
out = self.layer_dict["Identity"].forward(out)
|
||||
|
||||
return out
|
||||
|
||||
@ -122,21 +144,27 @@ class EntryConvolutionalBlock(nn.Module):
|
||||
x = torch.zeros(self.input_shape)
|
||||
out = x
|
||||
|
||||
self.layer_dict['conv_0'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
|
||||
kernel_size=self.kernel_size, dilation=self.dilation,
|
||||
padding=self.padding, stride=1)
|
||||
self.layer_dict["conv_0"] = nn.Conv2d(
|
||||
in_channels=out.shape[1],
|
||||
out_channels=self.num_filters,
|
||||
bias=self.bias,
|
||||
kernel_size=self.kernel_size,
|
||||
dilation=self.dilation,
|
||||
padding=self.padding,
|
||||
stride=1,
|
||||
)
|
||||
|
||||
out = self.layer_dict['conv_0'].forward(out)
|
||||
self.layer_dict['bn_0'] = nn.BatchNorm2d(num_features=out.shape[1])
|
||||
out = F.leaky_relu(self.layer_dict['bn_0'].forward(out))
|
||||
out = self.layer_dict["conv_0"].forward(out)
|
||||
self.layer_dict["bn_0"] = nn.BatchNorm2d(num_features=out.shape[1])
|
||||
out = F.leaky_relu(self.layer_dict["bn_0"].forward(out))
|
||||
|
||||
print(out.shape)
|
||||
|
||||
def forward(self, x):
|
||||
out = x
|
||||
|
||||
out = self.layer_dict['conv_0'].forward(out)
|
||||
out = F.leaky_relu(self.layer_dict['bn_0'].forward(out))
|
||||
out = self.layer_dict["conv_0"].forward(out)
|
||||
out = F.leaky_relu(self.layer_dict["bn_0"].forward(out))
|
||||
|
||||
return out
|
||||
|
||||
@ -159,18 +187,30 @@ class ConvolutionalProcessingBlock(nn.Module):
|
||||
x = torch.zeros(self.input_shape)
|
||||
out = x
|
||||
|
||||
self.layer_dict['conv_0'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
|
||||
kernel_size=self.kernel_size, dilation=self.dilation,
|
||||
padding=self.padding, stride=1)
|
||||
self.layer_dict["conv_0"] = nn.Conv2d(
|
||||
in_channels=out.shape[1],
|
||||
out_channels=self.num_filters,
|
||||
bias=self.bias,
|
||||
kernel_size=self.kernel_size,
|
||||
dilation=self.dilation,
|
||||
padding=self.padding,
|
||||
stride=1,
|
||||
)
|
||||
|
||||
out = self.layer_dict['conv_0'].forward(out)
|
||||
out = self.layer_dict["conv_0"].forward(out)
|
||||
out = F.leaky_relu(out)
|
||||
|
||||
self.layer_dict['conv_1'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
|
||||
kernel_size=self.kernel_size, dilation=self.dilation,
|
||||
padding=self.padding, stride=1)
|
||||
self.layer_dict["conv_1"] = nn.Conv2d(
|
||||
in_channels=out.shape[1],
|
||||
out_channels=self.num_filters,
|
||||
bias=self.bias,
|
||||
kernel_size=self.kernel_size,
|
||||
dilation=self.dilation,
|
||||
padding=self.padding,
|
||||
stride=1,
|
||||
)
|
||||
|
||||
out = self.layer_dict['conv_1'].forward(out)
|
||||
out = self.layer_dict["conv_1"].forward(out)
|
||||
out = F.leaky_relu(out)
|
||||
|
||||
print(out.shape)
|
||||
@ -178,17 +218,26 @@ class ConvolutionalProcessingBlock(nn.Module):
|
||||
def forward(self, x):
|
||||
out = x
|
||||
|
||||
out = self.layer_dict['conv_0'].forward(out)
|
||||
out = self.layer_dict["conv_0"].forward(out)
|
||||
out = F.leaky_relu(out)
|
||||
|
||||
out = self.layer_dict['conv_1'].forward(out)
|
||||
out = self.layer_dict["conv_1"].forward(out)
|
||||
out = F.leaky_relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ConvolutionalDimensionalityReductionBlock(nn.Module):
|
||||
def __init__(self, input_shape, num_filters, kernel_size, padding, bias, dilation, reduction_factor):
|
||||
def __init__(
|
||||
self,
|
||||
input_shape,
|
||||
num_filters,
|
||||
kernel_size,
|
||||
padding,
|
||||
bias,
|
||||
dilation,
|
||||
reduction_factor,
|
||||
):
|
||||
super(ConvolutionalDimensionalityReductionBlock, self).__init__()
|
||||
|
||||
self.num_filters = num_filters
|
||||
@ -205,20 +254,32 @@ class ConvolutionalDimensionalityReductionBlock(nn.Module):
|
||||
x = torch.zeros(self.input_shape)
|
||||
out = x
|
||||
|
||||
self.layer_dict['conv_0'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
|
||||
kernel_size=self.kernel_size, dilation=self.dilation,
|
||||
padding=self.padding, stride=1)
|
||||
self.layer_dict["conv_0"] = nn.Conv2d(
|
||||
in_channels=out.shape[1],
|
||||
out_channels=self.num_filters,
|
||||
bias=self.bias,
|
||||
kernel_size=self.kernel_size,
|
||||
dilation=self.dilation,
|
||||
padding=self.padding,
|
||||
stride=1,
|
||||
)
|
||||
|
||||
out = self.layer_dict['conv_0'].forward(out)
|
||||
out = self.layer_dict["conv_0"].forward(out)
|
||||
out = F.leaky_relu(out)
|
||||
|
||||
out = F.avg_pool2d(out, self.reduction_factor)
|
||||
|
||||
self.layer_dict['conv_1'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
|
||||
kernel_size=self.kernel_size, dilation=self.dilation,
|
||||
padding=self.padding, stride=1)
|
||||
self.layer_dict["conv_1"] = nn.Conv2d(
|
||||
in_channels=out.shape[1],
|
||||
out_channels=self.num_filters,
|
||||
bias=self.bias,
|
||||
kernel_size=self.kernel_size,
|
||||
dilation=self.dilation,
|
||||
padding=self.padding,
|
||||
stride=1,
|
||||
)
|
||||
|
||||
out = self.layer_dict['conv_1'].forward(out)
|
||||
out = self.layer_dict["conv_1"].forward(out)
|
||||
out = F.leaky_relu(out)
|
||||
|
||||
print(out.shape)
|
||||
@ -226,21 +287,29 @@ class ConvolutionalDimensionalityReductionBlock(nn.Module):
|
||||
def forward(self, x):
|
||||
out = x
|
||||
|
||||
out = self.layer_dict['conv_0'].forward(out)
|
||||
out = self.layer_dict["conv_0"].forward(out)
|
||||
out = F.leaky_relu(out)
|
||||
|
||||
out = F.avg_pool2d(out, self.reduction_factor)
|
||||
|
||||
out = self.layer_dict['conv_1'].forward(out)
|
||||
out = self.layer_dict["conv_1"].forward(out)
|
||||
out = F.leaky_relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ConvolutionalNetwork(nn.Module):
|
||||
def __init__(self, input_shape, num_output_classes, num_filters,
|
||||
num_blocks_per_stage, num_stages, use_bias=False, processing_block_type=ConvolutionalProcessingBlock,
|
||||
dimensionality_reduction_block_type=ConvolutionalDimensionalityReductionBlock):
|
||||
def __init__(
|
||||
self,
|
||||
input_shape,
|
||||
num_output_classes,
|
||||
num_filters,
|
||||
num_blocks_per_stage,
|
||||
num_stages,
|
||||
use_bias=False,
|
||||
processing_block_type=ConvolutionalProcessingBlock,
|
||||
dimensionality_reduction_block_type=ConvolutionalDimensionalityReductionBlock,
|
||||
):
|
||||
"""
|
||||
Initializes a convolutional network module
|
||||
:param input_shape: The shape of the tensor to be passed into this network
|
||||
@ -274,37 +343,59 @@ class ConvolutionalNetwork(nn.Module):
|
||||
"""
|
||||
self.layer_dict = nn.ModuleDict()
|
||||
# initialize a module dict, which is effectively a dictionary that can collect layers and integrate them into pytorch
|
||||
print("Building basic block of ConvolutionalNetwork using input shape", self.input_shape)
|
||||
x = torch.zeros((self.input_shape)) # create dummy inputs to be used to infer shapes of layers
|
||||
print(
|
||||
"Building basic block of ConvolutionalNetwork using input shape",
|
||||
self.input_shape,
|
||||
)
|
||||
x = torch.zeros(
|
||||
(self.input_shape)
|
||||
) # create dummy inputs to be used to infer shapes of layers
|
||||
|
||||
out = x
|
||||
self.layer_dict['input_conv'] = EntryConvolutionalBlock(input_shape=out.shape, num_filters=self.num_filters,
|
||||
kernel_size=3, padding=1, bias=self.use_bias,
|
||||
dilation=1)
|
||||
out = self.layer_dict['input_conv'].forward(out)
|
||||
self.layer_dict["input_conv"] = EntryConvolutionalBlock(
|
||||
input_shape=out.shape,
|
||||
num_filters=self.num_filters,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
bias=self.use_bias,
|
||||
dilation=1,
|
||||
)
|
||||
out = self.layer_dict["input_conv"].forward(out)
|
||||
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
|
||||
for i in range(self.num_stages): # for number of layers times
|
||||
for j in range(self.num_blocks_per_stage):
|
||||
self.layer_dict['block_{}_{}'.format(i, j)] = self.processing_block_type(input_shape=out.shape,
|
||||
self.layer_dict["block_{}_{}".format(i, j)] = (
|
||||
self.processing_block_type(
|
||||
input_shape=out.shape,
|
||||
num_filters=self.num_filters,
|
||||
bias=self.use_bias,
|
||||
kernel_size=3, dilation=1,
|
||||
padding=1)
|
||||
out = self.layer_dict['block_{}_{}'.format(i, j)].forward(out)
|
||||
self.layer_dict['reduction_block_{}'.format(i)] = self.dimensionality_reduction_block_type(
|
||||
input_shape=out.shape,
|
||||
num_filters=self.num_filters, bias=True,
|
||||
kernel_size=3, dilation=1,
|
||||
kernel_size=3,
|
||||
dilation=1,
|
||||
padding=1,
|
||||
reduction_factor=2)
|
||||
out = self.layer_dict['reduction_block_{}'.format(i)].forward(out)
|
||||
)
|
||||
)
|
||||
out = self.layer_dict["block_{}_{}".format(i, j)].forward(out)
|
||||
self.layer_dict["reduction_block_{}".format(i)] = (
|
||||
self.dimensionality_reduction_block_type(
|
||||
input_shape=out.shape,
|
||||
num_filters=self.num_filters,
|
||||
bias=True,
|
||||
kernel_size=3,
|
||||
dilation=1,
|
||||
padding=1,
|
||||
reduction_factor=2,
|
||||
)
|
||||
)
|
||||
out = self.layer_dict["reduction_block_{}".format(i)].forward(out)
|
||||
|
||||
out = F.avg_pool2d(out, out.shape[-1])
|
||||
print('shape before final linear layer', out.shape)
|
||||
print("shape before final linear layer", out.shape)
|
||||
out = out.view(out.shape[0], -1)
|
||||
self.logit_linear_layer = nn.Linear(in_features=out.shape[1], # add a linear layer
|
||||
self.logit_linear_layer = nn.Linear(
|
||||
in_features=out.shape[1], # add a linear layer
|
||||
out_features=self.num_output_classes,
|
||||
bias=True)
|
||||
bias=True,
|
||||
)
|
||||
out = self.logit_linear_layer(out) # apply linear layer on flattened inputs
|
||||
print("Block is built, output volume is", out.shape)
|
||||
return out
|
||||
@ -316,15 +407,19 @@ class ConvolutionalNetwork(nn.Module):
|
||||
:return: preds (b, num_classes)
|
||||
"""
|
||||
out = x
|
||||
out = self.layer_dict['input_conv'].forward(out)
|
||||
out = self.layer_dict["input_conv"].forward(out)
|
||||
for i in range(self.num_stages): # for number of layers times
|
||||
for j in range(self.num_blocks_per_stage):
|
||||
out = self.layer_dict['block_{}_{}'.format(i, j)].forward(out)
|
||||
out = self.layer_dict['reduction_block_{}'.format(i)].forward(out)
|
||||
out = self.layer_dict["block_{}_{}".format(i, j)].forward(out)
|
||||
out = self.layer_dict["reduction_block_{}".format(i)].forward(out)
|
||||
|
||||
out = F.avg_pool2d(out, out.shape[-1])
|
||||
out = out.view(out.shape[0], -1) # flatten outputs from (b, c, h, w) to (b, c*h*w)
|
||||
out = self.logit_linear_layer(out) # pass through a linear layer to get logits/preds
|
||||
out = out.view(
|
||||
out.shape[0], -1
|
||||
) # flatten outputs from (b, c, h, w) to (b, c*h*w)
|
||||
out = self.logit_linear_layer(
|
||||
out
|
||||
) # pass through a linear layer to get logits/preds
|
||||
return out
|
||||
|
||||
def reset_parameters(self):
|
||||
@ -338,3 +433,138 @@ class ConvolutionalNetwork(nn.Module):
|
||||
pass
|
||||
|
||||
self.logit_linear_layer.reset_parameters()
|
||||
|
||||
|
||||
# My Implementation:
|
||||
|
||||
|
||||
class ConvolutionalProcessingBlockBN(nn.Module):
|
||||
def __init__(self, input_shape, num_filters, kernel_size, padding, bias, dilation):
|
||||
super().__init__()
|
||||
|
||||
self.num_filters = num_filters
|
||||
self.kernel_size = kernel_size
|
||||
self.input_shape = input_shape
|
||||
self.padding = padding
|
||||
self.bias = bias
|
||||
self.dilation = dilation
|
||||
|
||||
self.build_module()
|
||||
|
||||
def build_module(self):
|
||||
self.layer_dict = nn.ModuleDict()
|
||||
x = torch.zeros(self.input_shape)
|
||||
out = x
|
||||
|
||||
# First convolutional layer with Batch Normalization
|
||||
self.layer_dict["conv_0"] = nn.Conv2d(
|
||||
in_channels=out.shape[1],
|
||||
out_channels=self.num_filters,
|
||||
bias=self.bias,
|
||||
kernel_size=self.kernel_size,
|
||||
dilation=self.dilation,
|
||||
padding=self.padding,
|
||||
stride=1,
|
||||
)
|
||||
self.layer_dict["bn_0"] = nn.BatchNorm2d(self.num_filters)
|
||||
out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
|
||||
|
||||
# Second convolutional layer with Batch Normalization
|
||||
self.layer_dict["conv_1"] = nn.Conv2d(
|
||||
in_channels=out.shape[1],
|
||||
out_channels=self.num_filters,
|
||||
bias=self.bias,
|
||||
kernel_size=self.kernel_size,
|
||||
dilation=self.dilation,
|
||||
padding=self.padding,
|
||||
stride=1,
|
||||
)
|
||||
self.layer_dict["bn_1"] = nn.BatchNorm2d(self.num_filters)
|
||||
out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
|
||||
|
||||
print(out.shape)
|
||||
|
||||
def forward(self, x):
|
||||
out = x
|
||||
|
||||
# Apply first conv layer + BN + ReLU
|
||||
out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
|
||||
|
||||
# Apply second conv layer + BN + ReLU
|
||||
out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ConvolutionalDimensionalityReductionBlockBN(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
input_shape,
|
||||
num_filters,
|
||||
kernel_size,
|
||||
padding,
|
||||
bias,
|
||||
dilation,
|
||||
reduction_factor,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.num_filters = num_filters
|
||||
self.kernel_size = kernel_size
|
||||
self.input_shape = input_shape
|
||||
self.padding = padding
|
||||
self.bias = bias
|
||||
self.dilation = dilation
|
||||
self.reduction_factor = reduction_factor
|
||||
|
||||
self.build_module()
|
||||
|
||||
def build_module(self):
|
||||
self.layer_dict = nn.ModuleDict()
|
||||
x = torch.zeros(self.input_shape)
|
||||
out = x
|
||||
|
||||
# First convolutional layer with Batch Normalization
|
||||
self.layer_dict["conv_0"] = nn.Conv2d(
|
||||
in_channels=out.shape[1],
|
||||
out_channels=self.num_filters,
|
||||
bias=self.bias,
|
||||
kernel_size=self.kernel_size,
|
||||
dilation=self.dilation,
|
||||
padding=self.padding,
|
||||
stride=1,
|
||||
)
|
||||
self.layer_dict["bn_0"] = nn.BatchNorm2d(self.num_filters)
|
||||
out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
|
||||
|
||||
# Dimensionality reduction through average pooling
|
||||
out = F.avg_pool2d(out, self.reduction_factor)
|
||||
|
||||
# Second convolutional layer with Batch Normalization
|
||||
self.layer_dict["conv_1"] = nn.Conv2d(
|
||||
in_channels=out.shape[1],
|
||||
out_channels=self.num_filters,
|
||||
bias=self.bias,
|
||||
kernel_size=self.kernel_size,
|
||||
dilation=self.dilation,
|
||||
padding=self.padding,
|
||||
stride=1,
|
||||
)
|
||||
self.layer_dict["bn_1"] = nn.BatchNorm2d(self.num_filters)
|
||||
out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
|
||||
|
||||
print(out.shape)
|
||||
|
||||
def forward(self, x):
|
||||
out = x
|
||||
|
||||
# Apply first conv layer + BN + ReLU
|
||||
out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
|
||||
|
||||
# Dimensionality reduction through average pooling
|
||||
out = F.avg_pool2d(out, self.reduction_factor)
|
||||
|
||||
# Apply second conv layer + BN + ReLU
|
||||
out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
|
||||
|
||||
return out
|
||||
|
@ -17,7 +17,14 @@ def load_from_stats_pkl_file(experiment_log_filepath, filename):
|
||||
return stats
|
||||
|
||||
|
||||
def save_statistics(experiment_log_dir, filename, stats_dict, current_epoch, continue_from_mode=False, save_full_dict=False):
|
||||
def save_statistics(
|
||||
experiment_log_dir,
|
||||
filename,
|
||||
stats_dict,
|
||||
current_epoch,
|
||||
continue_from_mode=False,
|
||||
save_full_dict=False,
|
||||
):
|
||||
"""
|
||||
Saves the statistics in stats dict into a csv file. Using the keys as the header entries and the values as the
|
||||
columns of a particular header entry
|
||||
@ -29,7 +36,7 @@ def save_statistics(experiment_log_dir, filename, stats_dict, current_epoch, con
|
||||
:return: The filepath to the summary file
|
||||
"""
|
||||
summary_filename = os.path.join(experiment_log_dir, filename)
|
||||
mode = 'a' if continue_from_mode else 'w'
|
||||
mode = "a" if continue_from_mode else "w"
|
||||
with open(summary_filename, mode) as f:
|
||||
writer = csv.writer(f)
|
||||
if not continue_from_mode:
|
||||
@ -57,7 +64,7 @@ def load_statistics(experiment_log_dir, filename):
|
||||
"""
|
||||
summary_filename = os.path.join(experiment_log_dir, filename)
|
||||
|
||||
with open(summary_filename, 'r+') as f:
|
||||
with open(summary_filename, "r+") as f:
|
||||
lines = f.readlines()
|
||||
|
||||
keys = lines[0].split(",")
|
||||
|
@ -8,6 +8,7 @@ from pytorch_mlp_framework.arg_extractor import get_args
|
||||
from pytorch_mlp_framework.experiment_builder import ExperimentBuilder
|
||||
from pytorch_mlp_framework.model_architectures import *
|
||||
import os
|
||||
|
||||
# os.environ["CUDA_VISIBLE_DEVICES"]="0"
|
||||
|
||||
args = get_args() # get arguments from command line
|
||||
@ -15,54 +16,83 @@ rng = np.random.RandomState(seed=args.seed) # set the seeds for the experiment
|
||||
torch.manual_seed(seed=args.seed) # sets pytorch's seed
|
||||
|
||||
# set up data augmentation transforms for training and testing
|
||||
transform_train = transforms.Compose([
|
||||
transform_train = transforms.Compose(
|
||||
[
|
||||
transforms.RandomCrop(32, padding=4),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
|
||||
])
|
||||
]
|
||||
)
|
||||
|
||||
transform_test = transforms.Compose([
|
||||
transform_test = transforms.Compose(
|
||||
[
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
|
||||
])
|
||||
]
|
||||
)
|
||||
|
||||
train_data = data_providers.CIFAR100(root='data', set_name='train',
|
||||
transform=transform_train,
|
||||
download=True) # initialize our rngs using the argument set seed
|
||||
val_data = data_providers.CIFAR100(root='data', set_name='val',
|
||||
transform=transform_test,
|
||||
download=True) # initialize our rngs using the argument set seed
|
||||
test_data = data_providers.CIFAR100(root='data', set_name='test',
|
||||
transform=transform_test,
|
||||
download=True) # initialize our rngs using the argument set seed
|
||||
train_data = data_providers.CIFAR100(
|
||||
root="data", set_name="train", transform=transform_train, download=True
|
||||
) # initialize our rngs using the argument set seed
|
||||
val_data = data_providers.CIFAR100(
|
||||
root="data", set_name="val", transform=transform_test, download=True
|
||||
) # initialize our rngs using the argument set seed
|
||||
test_data = data_providers.CIFAR100(
|
||||
root="data", set_name="test", transform=transform_test, download=True
|
||||
) # initialize our rngs using the argument set seed
|
||||
|
||||
train_data_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=2)
|
||||
val_data_loader = DataLoader(val_data, batch_size=args.batch_size, shuffle=True, num_workers=2)
|
||||
test_data_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=True, num_workers=2)
|
||||
train_data_loader = DataLoader(
|
||||
train_data, batch_size=args.batch_size, shuffle=True, num_workers=2
|
||||
)
|
||||
val_data_loader = DataLoader(
|
||||
val_data, batch_size=args.batch_size, shuffle=True, num_workers=2
|
||||
)
|
||||
test_data_loader = DataLoader(
|
||||
test_data, batch_size=args.batch_size, shuffle=True, num_workers=2
|
||||
)
|
||||
|
||||
if args.block_type == 'conv_block':
|
||||
if args.block_type == "conv_block":
|
||||
processing_block_type = ConvolutionalProcessingBlock
|
||||
dim_reduction_block_type = ConvolutionalDimensionalityReductionBlock
|
||||
elif args.block_type == 'empty_block':
|
||||
elif args.block_type == "empty_block":
|
||||
processing_block_type = EmptyBlock
|
||||
dim_reduction_block_type = EmptyBlock
|
||||
elif args.block_type == "conv_bn":
|
||||
processing_block_type = ConvolutionalProcessingBlockBN
|
||||
dim_reduction_block_type = ConvolutionalDimensionalityReductionBlockBN
|
||||
else:
|
||||
raise ModuleNotFoundError
|
||||
|
||||
custom_conv_net = ConvolutionalNetwork( # initialize our network object, in this case a ConvNet
|
||||
input_shape=(args.batch_size, args.image_num_channels, args.image_height, args.image_width),
|
||||
num_output_classes=args.num_classes, num_filters=args.num_filters, use_bias=False,
|
||||
num_blocks_per_stage=args.num_blocks_per_stage, num_stages=args.num_stages,
|
||||
custom_conv_net = (
|
||||
ConvolutionalNetwork( # initialize our network object, in this case a ConvNet
|
||||
input_shape=(
|
||||
args.batch_size,
|
||||
args.image_num_channels,
|
||||
args.image_height,
|
||||
args.image_width,
|
||||
),
|
||||
num_output_classes=args.num_classes,
|
||||
num_filters=args.num_filters,
|
||||
use_bias=False,
|
||||
num_blocks_per_stage=args.num_blocks_per_stage,
|
||||
num_stages=args.num_stages,
|
||||
processing_block_type=processing_block_type,
|
||||
dimensionality_reduction_block_type=dim_reduction_block_type)
|
||||
dimensionality_reduction_block_type=dim_reduction_block_type,
|
||||
)
|
||||
)
|
||||
|
||||
conv_experiment = ExperimentBuilder(network_model=custom_conv_net,
|
||||
conv_experiment = ExperimentBuilder(
|
||||
network_model=custom_conv_net,
|
||||
experiment_name=args.experiment_name,
|
||||
num_epochs=args.num_epochs,
|
||||
weight_decay_coefficient=args.weight_decay_coefficient,
|
||||
use_gpu=args.use_gpu,
|
||||
continue_from_epoch=args.continue_from_epoch,
|
||||
train_data=train_data_loader, val_data=val_data_loader,
|
||||
test_data=test_data_loader) # build an experiment object
|
||||
experiment_metrics, test_metrics = conv_experiment.run_experiment() # run experiment and return experiment metrics
|
||||
train_data=train_data_loader,
|
||||
val_data=val_data_loader,
|
||||
test_data=test_data_loader,
|
||||
) # build an experiment object
|
||||
experiment_metrics, test_metrics = (
|
||||
conv_experiment.run_experiment()
|
||||
) # run experiment and return experiment metrics
|
||||
|
Loading…
Reference in New Issue
Block a user