formatting and BN

2024-11-19 09:38:29 +00:00 · 2024-11-19 09:38:29 +00:00 · cb5c6f4e19
commit cb5c6f4e19
parent 92fccb8eb2
5 changed files with 729 additions and 261 deletions
--- a/pytorch_mlp_framework/arg_extractor.py
+++ b/pytorch_mlp_framework/arg_extractor.py
@ -2,12 +2,12 @@ import argparse
 def str2bool(v):
-    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+    if v.lower() in ("yes", "true", "t", "y", "1"):
        return True
-    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+    elif v.lower() in ("no", "false", "f", "n", "0"):
        return False
    else:
-        raise argparse.ArgumentTypeError('Boolean value expected.')
+        raise argparse.ArgumentTypeError("Boolean value expected.")
 def get_args():
@ -16,38 +16,111 @@ def get_args():
    :return: A namedtuple with arguments
    """
    parser = argparse.ArgumentParser(
-        description='Welcome to the MLP course\'s Pytorch training and inference helper script')
+        description="Welcome to the MLP course's Pytorch training and inference helper script"
    )
-    parser.add_argument('--batch_size', nargs="?", type=int, default=100, help='Batch_size for experiment')
+    parser.add_argument(
-    parser.add_argument('--continue_from_epoch', nargs="?", type=int, default=-1, help='Epoch you want to continue training from while restarting an experiment')
+        "--batch_size",
-    parser.add_argument('--seed', nargs="?", type=int, default=7112018,
+        nargs="?",
-                        help='Seed to use for random number generator for experiment')
+        type=int,
-    parser.add_argument('--image_num_channels', nargs="?", type=int, default=3,
+        default=100,
-                        help='The channel dimensionality of our image-data')
+        help="Batch_size for experiment",
-    parser.add_argument('--image_height', nargs="?", type=int, default=32, help='Height of image data')
+    )
-    parser.add_argument('--image_width', nargs="?", type=int, default=32, help='Width of image data')
+    parser.add_argument(
-    parser.add_argument('--num_stages', nargs="?", type=int, default=3,
+        "--continue_from_epoch",
-                        help='Number of convolutional stages in the network. A stage is considered a sequence of '
+        nargs="?",
-                             'convolutional layers where the input volume remains the same in the spacial dimension and'
+        type=int,
-                             ' is always terminated by a dimensionality reduction stage')
+        default=-1,
-    parser.add_argument('--num_blocks_per_stage', nargs="?", type=int, default=5,
+        help="Epoch you want to continue training from while restarting an experiment",
-                        help='Number of convolutional blocks in each stage, not including the reduction stage.'
+    )
-                             ' A convolutional block is made up of two convolutional layers activated using the '
+    parser.add_argument(
-                             ' leaky-relu non-linearity')
+        "--seed",
-    parser.add_argument('--num_filters', nargs="?", type=int, default=16,
+        nargs="?",
-                        help='Number of convolutional filters per convolutional layer in the network (excluding '
+        type=int,
-                             'dimensionality reduction layers)')
+        default=7112018,
-    parser.add_argument('--num_epochs', nargs="?", type=int, default=100, help='Total number of epochs for model training')
+        help="Seed to use for random number generator for experiment",
-    parser.add_argument('--num_classes', nargs="?", type=int, default=100, help='Number of classes in the dataset')
+    )
-    parser.add_argument('--experiment_name', nargs="?", type=str, default="exp_1",
+    parser.add_argument(
-                        help='Experiment name - to be used for building the experiment folder')
+        "--image_num_channels",
-    parser.add_argument('--use_gpu', nargs="?", type=str2bool, default=True,
+        nargs="?",
-                        help='A flag indicating whether we will use GPU acceleration or not')
+        type=int,
-    parser.add_argument('--weight_decay_coefficient', nargs="?", type=float, default=0,
+        default=3,
-                        help='Weight decay to use for Adam')
+        help="The channel dimensionality of our image-data",
-    parser.add_argument('--block_type', type=str, default='conv_block',
+    )
-                        help='Type of convolutional blocks to use in our network '
+    parser.add_argument(
-                             '(This argument will be useful in running experiments to debug your network)')
+        "--image_height", nargs="?", type=int, default=32, help="Height of image data"
    )
    parser.add_argument(
        "--image_width", nargs="?", type=int, default=32, help="Width of image data"
    )
    parser.add_argument(
        "--num_stages",
        nargs="?",
        type=int,
        default=3,
        help="Number of convolutional stages in the network. A stage is considered a sequence of "
        "convolutional layers where the input volume remains the same in the spacial dimension and"
        " is always terminated by a dimensionality reduction stage",
    )
    parser.add_argument(
        "--num_blocks_per_stage",
        nargs="?",
        type=int,
        default=5,
        help="Number of convolutional blocks in each stage, not including the reduction stage."
        " A convolutional block is made up of two convolutional layers activated using the "
        " leaky-relu non-linearity",
    )
    parser.add_argument(
        "--num_filters",
        nargs="?",
        type=int,
        default=16,
        help="Number of convolutional filters per convolutional layer in the network (excluding "
        "dimensionality reduction layers)",
    )
    parser.add_argument(
        "--num_epochs",
        nargs="?",
        type=int,
        default=100,
        help="Total number of epochs for model training",
    )
    parser.add_argument(
        "--num_classes",
        nargs="?",
        type=int,
        default=100,
        help="Number of classes in the dataset",
    )
    parser.add_argument(
        "--experiment_name",
        nargs="?",
        type=str,
        default="exp_1",
        help="Experiment name - to be used for building the experiment folder",
    )
    parser.add_argument(
        "--use_gpu",
        nargs="?",
        type=str2bool,
        default=True,
        help="A flag indicating whether we will use GPU acceleration or not",
    )
    parser.add_argument(
        "--weight_decay_coefficient",
        nargs="?",
        type=float,
        default=0,
        help="Weight decay to use for Adam",
    )
    parser.add_argument(
        "--block_type",
        type=str,
        default="conv_block",
        help="Type of convolutional blocks to use in our network "
        "(This argument will be useful in running experiments to debug your network)",
    )
    args = parser.parse_args()
    print(args)
    return args
--- a/pytorch_mlp_framework/experiment_builder.py
+++ b/pytorch_mlp_framework/experiment_builder.py
@ -10,11 +10,23 @@ import time
 from pytorch_mlp_framework.storage_utils import save_statistics
 from matplotlib import pyplot as plt
 import matplotlib
-matplotlib.rcParams.update({'font.size': 8})
+
 matplotlib.rcParams.update({"font.size": 8})
 class ExperimentBuilder(nn.Module):
-    def __init__(self, network_model, experiment_name, num_epochs, train_data, val_data,
+    def __init__(
-                 test_data, weight_decay_coefficient, use_gpu, continue_from_epoch=-1):
+        self,
        network_model,
        experiment_name,
        num_epochs,
        train_data,
        val_data,
        test_data,
        weight_decay_coefficient,
        use_gpu,
        continue_from_epoch=-1,
    ):
        """
        Initializes an ExperimentBuilder object. Such an object takes care of running training and evaluation of a deep net
        on a given dataset. It also takes care of saving per epoch models and automatically inferring the best val model
@ -31,75 +43,95 @@ class ExperimentBuilder(nn.Module):
        """
        super(ExperimentBuilder, self).__init__()
        self.experiment_name = experiment_name
        self.model = network_model
        if torch.cuda.device_count() >= 1 and use_gpu:
-            self.device =  torch.device('cuda')
+            self.device = torch.device("cuda")
            self.model.to(self.device)  # sends the model from the cpu to the gpu
-            print('Use GPU', self.device)
+            print("Use GPU", self.device)
        else:
            print("use CPU")
-            self.device = torch.device('cpu')  # sets the device to be CPU
+            self.device = torch.device("cpu")  # sets the device to be CPU
            print(self.device)
-        print('here')
+        print("here")
        self.model.reset_parameters()  # re-initialize network parameters
        self.train_data = train_data
        self.val_data = val_data
        self.test_data = test_data
-        print('System learnable parameters')
+        print("System learnable parameters")
        num_conv_layers = 0
        num_linear_layers = 0
        total_num_parameters = 0
        for name, value in self.named_parameters():
            print(name, value.shape)
-            if all(item in name for item in ['conv', 'weight']):
+            if all(item in name for item in ["conv", "weight"]):
                num_conv_layers += 1
-            if all(item in name for item in ['linear', 'weight']):
+            if all(item in name for item in ["linear", "weight"]):
                num_linear_layers += 1
            total_num_parameters += np.prod(value.shape)
-        print('Total number of parameters', total_num_parameters)
+        print("Total number of parameters", total_num_parameters)
-        print('Total number of conv layers', num_conv_layers)
+        print("Total number of conv layers", num_conv_layers)
-        print('Total number of linear layers', num_linear_layers)
+        print("Total number of linear layers", num_linear_layers)
-        self.optimizer = optim.Adam(self.parameters(), amsgrad=False,
+        self.optimizer = optim.Adam(
-                                    weight_decay=weight_decay_coefficient)
+            self.parameters(), amsgrad=False, weight_decay=weight_decay_coefficient
-        self.learning_rate_scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer,
+        )
-                                                                            T_max=num_epochs,
+        self.learning_rate_scheduler = optim.lr_scheduler.CosineAnnealingLR(
-                                                                            eta_min=0.00002)
+            self.optimizer, T_max=num_epochs, eta_min=0.00002
        )
        # Generate the directory names
        self.experiment_folder = os.path.abspath(experiment_name)
-        self.experiment_logs = os.path.abspath(os.path.join(self.experiment_folder, "result_outputs"))
+        self.experiment_logs = os.path.abspath(
-        self.experiment_saved_models = os.path.abspath(os.path.join(self.experiment_folder, "saved_models"))
+            os.path.join(self.experiment_folder, "result_outputs")
        )
        self.experiment_saved_models = os.path.abspath(
            os.path.join(self.experiment_folder, "saved_models")
        )
        # Set best models to be at 0 since we are just starting
        self.best_val_model_idx = 0
-        self.best_val_model_acc = 0.
+        self.best_val_model_acc = 0.0
-        if not os.path.exists(self.experiment_folder):  # If experiment directory does not exist
+        if not os.path.exists(
            self.experiment_folder
        ):  # If experiment directory does not exist
            os.mkdir(self.experiment_folder)  # create the experiment directory
            os.mkdir(self.experiment_logs)  # create the experiment log directory
-            os.mkdir(self.experiment_saved_models)  # create the experiment saved models directory
+            os.mkdir(
                self.experiment_saved_models
            )  # create the experiment saved models directory
        self.num_epochs = num_epochs
-        self.criterion = nn.CrossEntropyLoss().to(self.device)  # send the loss computation to the GPU
+        self.criterion = nn.CrossEntropyLoss().to(
            self.device
        )  # send the loss computation to the GPU
-        if continue_from_epoch == -2:  # if continue from epoch is -2 then continue from latest saved model
+        if (
-            self.state, self.best_val_model_idx, self.best_val_model_acc = self.load_model(
+            continue_from_epoch == -2
-                model_save_dir=self.experiment_saved_models, model_save_name="train_model",
+        ):  # if continue from epoch is -2 then continue from latest saved model
-                model_idx='latest')  # reload existing model from epoch and return best val model index
+            self.state, self.best_val_model_idx, self.best_val_model_acc = (
                self.load_model(
                    model_save_dir=self.experiment_saved_models,
                    model_save_name="train_model",
                    model_idx="latest",
                )
            )  # reload existing model from epoch and return best val model index
            # and the best val acc of that model
-            self.starting_epoch = int(self.state['model_epoch'])
+            self.starting_epoch = int(self.state["model_epoch"])
        elif continue_from_epoch > -1:  # if continue from epoch is greater than -1 then
-            self.state, self.best_val_model_idx, self.best_val_model_acc = self.load_model(
+            self.state, self.best_val_model_idx, self.best_val_model_acc = (
-                model_save_dir=self.experiment_saved_models, model_save_name="train_model",
+                self.load_model(
-                model_idx=continue_from_epoch)  # reload existing model from epoch and return best val model index
+                    model_save_dir=self.experiment_saved_models,
                    model_save_name="train_model",
                    model_idx=continue_from_epoch,
                )
            )  # reload existing model from epoch and return best val model index
            # and the best val acc of that model
            self.starting_epoch = continue_from_epoch
        else:
@ -113,10 +145,7 @@ class ExperimentBuilder(nn.Module):
        return total_num_params
    def plot_func_def(self, all_grads, layers):
        """
        Plot function definition to plot the average gradient with respect to the number of layers in the given model
        :param all_grads: Gradients wrt weights for each layer in the model.
@ -124,34 +153,33 @@ class ExperimentBuilder(nn.Module):
        :return: plot for gradient flow
        """
        plt.plot(all_grads, alpha=0.3, color="b")
-        plt.hlines(0, 0, len(all_grads)+1, linewidth=1, color="k" )
+        plt.hlines(0, 0, len(all_grads) + 1, linewidth=1, color="k")
-        plt.xticks(range(0,len(all_grads), 1), layers, rotation="vertical")
+        plt.xticks(range(0, len(all_grads), 1), layers, rotation="vertical")
        plt.xlim(xmin=0, xmax=len(all_grads))
        plt.xlabel("Layers")
        plt.ylabel("Average Gradient")
        plt.title("Gradient flow")
        plt.grid(True)
        plt.tight_layout()
-        
+
        return plt
-        
+
    def plot_grad_flow(self, named_parameters):
        """
-        The function is being called in Line 298 of this file. 
+        The function is being called in Line 298 of this file.
        Receives the parameters of the model being trained. Returns plot of gradient flow for the given model parameters.
-       
+
        """
        all_grads = []
        layers = []
-        
+
        """
        Complete the code in the block below to collect absolute mean of the gradients for each layer in all_grads with the             layer names in layers.
        """
        for name, param in named_parameters:
            # Check if the parameter requires gradient and has a gradient
-            if param.requires_grad and param.grad is not None:  
+            if param.requires_grad and param.grad is not None:
                try:
                    _, a, _, b, _ = name.split(".", 4)
                except:
@ -165,23 +193,22 @@ class ExperimentBuilder(nn.Module):
        return plt
    def run_train_iter(self, x, y):
-        
+
        self.train()  # sets model to training mode (in case batch normalization or other methods have different procedures for training and evaluation)
        x, y = x.float().to(device=self.device), y.long().to(
-            device=self.device)  # send data to device as torch tensors
+            device=self.device
        )  # send data to device as torch tensors
        out = self.model.forward(x)  # forward the data in the model
        loss = F.cross_entropy(input=out, target=y)  # compute loss
        self.optimizer.zero_grad()  # set all weight grads from previous training iters to 0
        loss.backward()  # backpropagate to compute gradients for current iter loss
-        
+
        self.optimizer.step()  # update network parameters
        self.learning_rate_scheduler.step()  # update learning rate scheduler
-        
+
        _, predicted = torch.max(out.data, 1)  # get argmax of predictions
        accuracy = np.mean(list(predicted.eq(y.data).cpu()))  # compute accuracy
        return loss.cpu().data.numpy(), accuracy
@ -195,7 +222,8 @@ class ExperimentBuilder(nn.Module):
        """
        self.eval()  # sets the system to validation mode
        x, y = x.float().to(device=self.device), y.long().to(
-            device=self.device)  # convert data to pytorch tensors and send to the computation device
+            device=self.device
        )  # convert data to pytorch tensors and send to the computation device
        out = self.model.forward(x)  # forward the data in the model
        loss = F.cross_entropy(input=out, target=y)  # compute loss
@ -204,8 +232,14 @@ class ExperimentBuilder(nn.Module):
        accuracy = np.mean(list(predicted.eq(y.data).cpu()))  # compute accuracy
        return loss.cpu().data.numpy(), accuracy
-    def save_model(self, model_save_dir, model_save_name, model_idx, best_validation_model_idx,
+    def save_model(
-                   best_validation_model_acc):
+        self,
        model_save_dir,
        model_save_name,
        model_idx,
        best_validation_model_idx,
        best_validation_model_acc,
    ):
        """
        Save the network parameter state and current best val epoch idx and best val accuracy.
        :param model_save_name: Name to use to save model without the epoch index
@ -216,11 +250,21 @@ class ExperimentBuilder(nn.Module):
        :param state: The dictionary containing the system state.
        """
-        self.state['network'] = self.state_dict()  # save network parameter and other variables.
+        self.state["network"] = (
-        self.state['best_val_model_idx'] = best_validation_model_idx  # save current best val idx
+            self.state_dict()
-        self.state['best_val_model_acc'] = best_validation_model_acc  # save current best val acc
+        )  # save network parameter and other variables.
-        torch.save(self.state, f=os.path.join(model_save_dir, "{}_{}".format(model_save_name, str(
+        self.state["best_val_model_idx"] = (
-            model_idx))))  # save state at prespecified filepath
+            best_validation_model_idx  # save current best val idx
        )
        self.state["best_val_model_acc"] = (
            best_validation_model_acc  # save current best val acc
        )
        torch.save(
            self.state,
            f=os.path.join(
                model_save_dir, "{}_{}".format(model_save_name, str(model_idx))
            ),
        )  # save state at prespecified filepath
    def load_model(self, model_save_dir, model_save_name, model_idx):
        """
@ -230,98 +274,182 @@ class ExperimentBuilder(nn.Module):
        :param model_idx: The index to save the model with.
        :return: best val idx and best val model acc, also it loads the network state into the system state without returning it
        """
-        state = torch.load(f=os.path.join(model_save_dir, "{}_{}".format(model_save_name, str(model_idx))))
+        state = torch.load(
-        self.load_state_dict(state_dict=state['network'])
+            f=os.path.join(
-        return state, state['best_val_model_idx'], state['best_val_model_acc']
+                model_save_dir, "{}_{}".format(model_save_name, str(model_idx))
            )
        )
        self.load_state_dict(state_dict=state["network"])
        return state, state["best_val_model_idx"], state["best_val_model_acc"]
    def run_experiment(self):
        """
        Runs experiment train and evaluation iterations, saving the model and best val model and val model accuracy after each epoch
        :return: The summary current_epoch_losses from starting epoch to total_epochs.
        """
-        total_losses = {"train_acc": [], "train_loss": [], "val_acc": [],
+        total_losses = {
-                        "val_loss": []}  # initialize a dict to keep the per-epoch metrics
+            "train_acc": [],
            "train_loss": [],
            "val_acc": [],
            "val_loss": [],
        }  # initialize a dict to keep the per-epoch metrics
        for i, epoch_idx in enumerate(range(self.starting_epoch, self.num_epochs)):
            epoch_start_time = time.time()
-            current_epoch_losses = {"train_acc": [], "train_loss": [], "val_acc": [], "val_loss": []}
+            current_epoch_losses = {
                "train_acc": [],
                "train_loss": [],
                "val_acc": [],
                "val_loss": [],
            }
            self.current_epoch = epoch_idx
-            with tqdm.tqdm(total=len(self.train_data)) as pbar_train:  # create a progress bar for training
+            with tqdm.tqdm(
                total=len(self.train_data)
            ) as pbar_train:  # create a progress bar for training
                for idx, (x, y) in enumerate(self.train_data):  # get data batches
-                    loss, accuracy = self.run_train_iter(x=x, y=y)  # take a training iter step
+                    loss, accuracy = self.run_train_iter(
-                    current_epoch_losses["train_loss"].append(loss)  # add current iter loss to the train loss list
+                        x=x, y=y
-                    current_epoch_losses["train_acc"].append(accuracy)  # add current iter acc to the train acc list
+                    )  # take a training iter step
                    current_epoch_losses["train_loss"].append(
                        loss
                    )  # add current iter loss to the train loss list
                    current_epoch_losses["train_acc"].append(
                        accuracy
                    )  # add current iter acc to the train acc list
                    pbar_train.update(1)
-                    pbar_train.set_description("loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy))
+                    pbar_train.set_description(
                        "loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy)
                    )
-            with tqdm.tqdm(total=len(self.val_data)) as pbar_val:  # create a progress bar for validation
+            with tqdm.tqdm(
                total=len(self.val_data)
            ) as pbar_val:  # create a progress bar for validation
                for x, y in self.val_data:  # get data batches
-                    loss, accuracy = self.run_evaluation_iter(x=x, y=y)  # run a validation iter
+                    loss, accuracy = self.run_evaluation_iter(
-                    current_epoch_losses["val_loss"].append(loss)  # add current iter loss to val loss list.
+                        x=x, y=y
-                    current_epoch_losses["val_acc"].append(accuracy)  # add current iter acc to val acc lst.
+                    )  # run a validation iter
                    current_epoch_losses["val_loss"].append(
                        loss
                    )  # add current iter loss to val loss list.
                    current_epoch_losses["val_acc"].append(
                        accuracy
                    )  # add current iter acc to val acc lst.
                    pbar_val.update(1)  # add 1 step to the progress bar
-                    pbar_val.set_description("loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy))
+                    pbar_val.set_description(
-            val_mean_accuracy = np.mean(current_epoch_losses['val_acc'])
+                        "loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy)
-            if val_mean_accuracy > self.best_val_model_acc:  # if current epoch's mean val acc is greater than the saved best val acc then
+                    )
            val_mean_accuracy = np.mean(current_epoch_losses["val_acc"])
            if (
                val_mean_accuracy > self.best_val_model_acc
            ):  # if current epoch's mean val acc is greater than the saved best val acc then
                self.best_val_model_acc = val_mean_accuracy  # set the best val model acc to be current epoch's val accuracy
                self.best_val_model_idx = epoch_idx  # set the experiment-wise best val idx to be the current epoch's idx
            for key, value in current_epoch_losses.items():
-                total_losses[key].append(np.mean(
+                total_losses[key].append(
-                    value))  # get mean of all metrics of current epoch metrics dict, to get them ready for storage and output on the terminal.
+                    np.mean(value)
                )  # get mean of all metrics of current epoch metrics dict, to get them ready for storage and output on the terminal.
-            save_statistics(experiment_log_dir=self.experiment_logs, filename='summary.csv',
+            save_statistics(
-                            stats_dict=total_losses, current_epoch=i,
+                experiment_log_dir=self.experiment_logs,
-                            continue_from_mode=True if (self.starting_epoch != 0 or i > 0) else False)  # save statistics to stats file.
+                filename="summary.csv",
                stats_dict=total_losses,
                current_epoch=i,
                continue_from_mode=(
                    True if (self.starting_epoch != 0 or i > 0) else False
                ),
            )  # save statistics to stats file.
            # load_statistics(experiment_log_dir=self.experiment_logs, filename='summary.csv') # How to load a csv file if you need to
            out_string = "_".join(
-                ["{}_{:.4f}".format(key, np.mean(value)) for key, value in current_epoch_losses.items()])
+                [
                    "{}_{:.4f}".format(key, np.mean(value))
                    for key, value in current_epoch_losses.items()
                ]
            )
            # create a string to use to report our epoch metrics
-            epoch_elapsed_time = time.time() - epoch_start_time  # calculate time taken for epoch
+            epoch_elapsed_time = (
                time.time() - epoch_start_time
            )  # calculate time taken for epoch
            epoch_elapsed_time = "{:.4f}".format(epoch_elapsed_time)
-            print("Epoch {}:".format(epoch_idx), out_string, "epoch time", epoch_elapsed_time, "seconds")
+            print(
-            self.state['model_epoch'] = epoch_idx
+                "Epoch {}:".format(epoch_idx),
-            self.save_model(model_save_dir=self.experiment_saved_models,
+                out_string,
-                            # save model and best val idx and best val acc, using the model dir, model name and model idx
+                "epoch time",
-                            model_save_name="train_model", model_idx=epoch_idx,
+                epoch_elapsed_time,
-                            best_validation_model_idx=self.best_val_model_idx,
+                "seconds",
-                            best_validation_model_acc=self.best_val_model_acc)
+            )
-            self.save_model(model_save_dir=self.experiment_saved_models,
+            self.state["model_epoch"] = epoch_idx
-                            # save model and best val idx and best val acc, using the model dir, model name and model idx
+            self.save_model(
-                            model_save_name="train_model", model_idx='latest',
+                model_save_dir=self.experiment_saved_models,
-                            best_validation_model_idx=self.best_val_model_idx,
+                # save model and best val idx and best val acc, using the model dir, model name and model idx
-                            best_validation_model_acc=self.best_val_model_acc)
+                model_save_name="train_model",
-            
+                model_idx=epoch_idx,
                best_validation_model_idx=self.best_val_model_idx,
                best_validation_model_acc=self.best_val_model_acc,
            )
            self.save_model(
                model_save_dir=self.experiment_saved_models,
                # save model and best val idx and best val acc, using the model dir, model name and model idx
                model_save_name="train_model",
                model_idx="latest",
                best_validation_model_idx=self.best_val_model_idx,
                best_validation_model_acc=self.best_val_model_acc,
            )
            ################################################################
            ##### Plot Gradient Flow at each Epoch during Training  ######
            print("Generating Gradient Flow Plot at epoch {}".format(epoch_idx))
            plt = self.plot_grad_flow(self.model.named_parameters())
-            if not os.path.exists(os.path.join(self.experiment_saved_models, 'gradient_flow_plots')):
+            if not os.path.exists(
-                os.mkdir(os.path.join(self.experiment_saved_models, 'gradient_flow_plots'))
+                os.path.join(self.experiment_saved_models, "gradient_flow_plots")
            ):
                os.mkdir(
                    os.path.join(self.experiment_saved_models, "gradient_flow_plots")
                )
                # plt.legend(loc="best")
-            plt.savefig(os.path.join(self.experiment_saved_models, 'gradient_flow_plots', "epoch{}.pdf".format(str(epoch_idx))))
+            plt.savefig(
                os.path.join(
                    self.experiment_saved_models,
                    "gradient_flow_plots",
                    "epoch{}.pdf".format(str(epoch_idx)),
                )
            )
            ################################################################
-        
+
        print("Generating test set evaluation metrics")
-        self.load_model(model_save_dir=self.experiment_saved_models, model_idx=self.best_val_model_idx,
+        self.load_model(
-                        # load best validation model
+            model_save_dir=self.experiment_saved_models,
-                        model_save_name="train_model")
+            model_idx=self.best_val_model_idx,
-        current_epoch_losses = {"test_acc": [], "test_loss": []}  # initialize a statistics dict
+            # load best validation model
            model_save_name="train_model",
        )
        current_epoch_losses = {
            "test_acc": [],
            "test_loss": [],
        }  # initialize a statistics dict
        with tqdm.tqdm(total=len(self.test_data)) as pbar_test:  # ini a progress bar
            for x, y in self.test_data:  # sample batch
-                loss, accuracy = self.run_evaluation_iter(x=x,
+                loss, accuracy = self.run_evaluation_iter(
-                                                          y=y)  # compute loss and accuracy by running an evaluation step
+                    x=x, y=y
                )  # compute loss and accuracy by running an evaluation step
                current_epoch_losses["test_loss"].append(loss)  # save test loss
                current_epoch_losses["test_acc"].append(accuracy)  # save test accuracy
                pbar_test.update(1)  # update progress bar status
                pbar_test.set_description(
-                    "loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy))  # update progress bar string output
+                    "loss: {:.4f}, accuracy: {:.4f}".format(loss, accuracy)
                )  # update progress bar string output
-        test_losses = {key: [np.mean(value)] for key, value in
+        test_losses = {
-                       current_epoch_losses.items()}  # save test set metrics in dict format
+            key: [np.mean(value)] for key, value in current_epoch_losses.items()
-        save_statistics(experiment_log_dir=self.experiment_logs, filename='test_summary.csv',
+        }  # save test set metrics in dict format
-                        # save test set metrics on disk in .csv format
+        save_statistics(
-                        stats_dict=test_losses, current_epoch=0, continue_from_mode=False)
+            experiment_log_dir=self.experiment_logs,
            filename="test_summary.csv",
            # save test set metrics on disk in .csv format
            stats_dict=test_losses,
            current_epoch=0,
            continue_from_mode=False,
        )
        return total_losses, test_losses
--- a/pytorch_mlp_framework/model_architectures.py
+++ b/pytorch_mlp_framework/model_architectures.py
@ -4,7 +4,9 @@ import torch.nn.functional as F
 class FCCNetwork(nn.Module):
-    def __init__(self, input_shape, num_output_classes, num_filters, num_layers, use_bias=False):
+    def __init__(
        self, input_shape, num_output_classes, num_filters, num_layers, use_bias=False
    ):
        """
        Initializes a fully connected network similar to the ones implemented previously in the MLP package.
        :param input_shape: The shape of the inputs going in to the network.
@ -35,17 +37,25 @@ class FCCNetwork(nn.Module):
        # shapes of all dimensions after the 0th dim
        for i in range(self.num_layers):
-            self.layer_dict['fcc_{}'.format(i)] = nn.Linear(in_features=out.shape[1],  # initialize a fcc layer
+            self.layer_dict["fcc_{}".format(i)] = nn.Linear(
-                                                            out_features=self.num_filters,
+                in_features=out.shape[1],  # initialize a fcc layer
-                                                            bias=self.use_bias)
+                out_features=self.num_filters,
                bias=self.use_bias,
            )
-            out = self.layer_dict['fcc_{}'.format(i)](out)  # apply ith fcc layer to the previous layers outputs
+            out = self.layer_dict["fcc_{}".format(i)](
                out
            )  # apply ith fcc layer to the previous layers outputs
            out = F.relu(out)  # apply a ReLU on the outputs
-        self.logits_linear_layer = nn.Linear(in_features=out.shape[1],  # initialize the prediction output linear layer
+        self.logits_linear_layer = nn.Linear(
-                                             out_features=self.num_output_classes,
+            in_features=out.shape[1],  # initialize the prediction output linear layer
-                                             bias=self.use_bias)
+            out_features=self.num_output_classes,
-        out = self.logits_linear_layer(out)  # apply the layer to the previous layer's outputs
+            bias=self.use_bias,
        )
        out = self.logits_linear_layer(
            out
        )  # apply the layer to the previous layer's outputs
        print("Block is built, output volume is", out.shape)
        return out
@ -61,10 +71,14 @@ class FCCNetwork(nn.Module):
        # shapes of all dimensions after the 0th dim
        for i in range(self.num_layers):
-            out = self.layer_dict['fcc_{}'.format(i)](out)  # apply ith fcc layer to the previous layers outputs
+            out = self.layer_dict["fcc_{}".format(i)](
                out
            )  # apply ith fcc layer to the previous layers outputs
            out = F.relu(out)  # apply a ReLU on the outputs
-        out = self.logits_linear_layer(out)  # apply the layer to the previous layer's outputs
+        out = self.logits_linear_layer(
            out
        )  # apply the layer to the previous layer's outputs
        return out
    def reset_parameters(self):
@ -78,8 +92,16 @@ class FCCNetwork(nn.Module):
 class EmptyBlock(nn.Module):
-    def __init__(self, input_shape=None, num_filters=None, kernel_size=None, padding=None, bias=None, dilation=None,
+    def __init__(
-                 reduction_factor=None):
+        self,
        input_shape=None,
        num_filters=None,
        kernel_size=None,
        padding=None,
        bias=None,
        dilation=None,
        reduction_factor=None,
    ):
        super(EmptyBlock, self).__init__()
        self.num_filters = num_filters
@ -94,12 +116,12 @@ class EmptyBlock(nn.Module):
    def build_module(self):
        self.layer_dict = nn.ModuleDict()
        x = torch.zeros(self.input_shape)
-        self.layer_dict['Identity'] = nn.Identity()
+        self.layer_dict["Identity"] = nn.Identity()
    def forward(self, x):
        out = x
-        out = self.layer_dict['Identity'].forward(out)
+        out = self.layer_dict["Identity"].forward(out)
        return out
@ -122,21 +144,27 @@ class EntryConvolutionalBlock(nn.Module):
        x = torch.zeros(self.input_shape)
        out = x
-        self.layer_dict['conv_0'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
+        self.layer_dict["conv_0"] = nn.Conv2d(
-                                              kernel_size=self.kernel_size, dilation=self.dilation,
+            in_channels=out.shape[1],
-                                              padding=self.padding, stride=1)
+            out_channels=self.num_filters,
            bias=self.bias,
            kernel_size=self.kernel_size,
            dilation=self.dilation,
            padding=self.padding,
            stride=1,
        )
-        out = self.layer_dict['conv_0'].forward(out)
+        out = self.layer_dict["conv_0"].forward(out)
-        self.layer_dict['bn_0'] = nn.BatchNorm2d(num_features=out.shape[1])
+        self.layer_dict["bn_0"] = nn.BatchNorm2d(num_features=out.shape[1])
-        out = F.leaky_relu(self.layer_dict['bn_0'].forward(out))
+        out = F.leaky_relu(self.layer_dict["bn_0"].forward(out))
        print(out.shape)
    def forward(self, x):
        out = x
-        out = self.layer_dict['conv_0'].forward(out)
+        out = self.layer_dict["conv_0"].forward(out)
-        out = F.leaky_relu(self.layer_dict['bn_0'].forward(out))
+        out = F.leaky_relu(self.layer_dict["bn_0"].forward(out))
        return out
@ -159,18 +187,30 @@ class ConvolutionalProcessingBlock(nn.Module):
        x = torch.zeros(self.input_shape)
        out = x
-        self.layer_dict['conv_0'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
+        self.layer_dict["conv_0"] = nn.Conv2d(
-                                              kernel_size=self.kernel_size, dilation=self.dilation,
+            in_channels=out.shape[1],
-                                              padding=self.padding, stride=1)
+            out_channels=self.num_filters,
            bias=self.bias,
            kernel_size=self.kernel_size,
            dilation=self.dilation,
            padding=self.padding,
            stride=1,
        )
-        out = self.layer_dict['conv_0'].forward(out)
+        out = self.layer_dict["conv_0"].forward(out)
        out = F.leaky_relu(out)
-        self.layer_dict['conv_1'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
+        self.layer_dict["conv_1"] = nn.Conv2d(
-                                              kernel_size=self.kernel_size, dilation=self.dilation,
+            in_channels=out.shape[1],
-                                              padding=self.padding, stride=1)
+            out_channels=self.num_filters,
            bias=self.bias,
            kernel_size=self.kernel_size,
            dilation=self.dilation,
            padding=self.padding,
            stride=1,
        )
-        out = self.layer_dict['conv_1'].forward(out)
+        out = self.layer_dict["conv_1"].forward(out)
        out = F.leaky_relu(out)
        print(out.shape)
@ -178,17 +218,26 @@ class ConvolutionalProcessingBlock(nn.Module):
    def forward(self, x):
        out = x
-        out = self.layer_dict['conv_0'].forward(out)
+        out = self.layer_dict["conv_0"].forward(out)
        out = F.leaky_relu(out)
-        out = self.layer_dict['conv_1'].forward(out)
+        out = self.layer_dict["conv_1"].forward(out)
        out = F.leaky_relu(out)
        return out
 class ConvolutionalDimensionalityReductionBlock(nn.Module):
-    def __init__(self, input_shape, num_filters, kernel_size, padding, bias, dilation, reduction_factor):
+    def __init__(
        self,
        input_shape,
        num_filters,
        kernel_size,
        padding,
        bias,
        dilation,
        reduction_factor,
    ):
        super(ConvolutionalDimensionalityReductionBlock, self).__init__()
        self.num_filters = num_filters
@ -205,20 +254,32 @@ class ConvolutionalDimensionalityReductionBlock(nn.Module):
        x = torch.zeros(self.input_shape)
        out = x
-        self.layer_dict['conv_0'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
+        self.layer_dict["conv_0"] = nn.Conv2d(
-                                              kernel_size=self.kernel_size, dilation=self.dilation,
+            in_channels=out.shape[1],
-                                              padding=self.padding, stride=1)
+            out_channels=self.num_filters,
            bias=self.bias,
            kernel_size=self.kernel_size,
            dilation=self.dilation,
            padding=self.padding,
            stride=1,
        )
-        out = self.layer_dict['conv_0'].forward(out)
+        out = self.layer_dict["conv_0"].forward(out)
        out = F.leaky_relu(out)
        out = F.avg_pool2d(out, self.reduction_factor)
-        self.layer_dict['conv_1'] = nn.Conv2d(in_channels=out.shape[1], out_channels=self.num_filters, bias=self.bias,
+        self.layer_dict["conv_1"] = nn.Conv2d(
-                                              kernel_size=self.kernel_size, dilation=self.dilation,
+            in_channels=out.shape[1],
-                                              padding=self.padding, stride=1)
+            out_channels=self.num_filters,
            bias=self.bias,
            kernel_size=self.kernel_size,
            dilation=self.dilation,
            padding=self.padding,
            stride=1,
        )
-        out = self.layer_dict['conv_1'].forward(out)
+        out = self.layer_dict["conv_1"].forward(out)
        out = F.leaky_relu(out)
        print(out.shape)
@ -226,21 +287,29 @@ class ConvolutionalDimensionalityReductionBlock(nn.Module):
    def forward(self, x):
        out = x
-        out = self.layer_dict['conv_0'].forward(out)
+        out = self.layer_dict["conv_0"].forward(out)
        out = F.leaky_relu(out)
        out = F.avg_pool2d(out, self.reduction_factor)
-        out = self.layer_dict['conv_1'].forward(out)
+        out = self.layer_dict["conv_1"].forward(out)
        out = F.leaky_relu(out)
        return out
 class ConvolutionalNetwork(nn.Module):
-    def __init__(self, input_shape, num_output_classes, num_filters,
+    def __init__(
-                 num_blocks_per_stage, num_stages, use_bias=False, processing_block_type=ConvolutionalProcessingBlock,
+        self,
-                 dimensionality_reduction_block_type=ConvolutionalDimensionalityReductionBlock):
+        input_shape,
        num_output_classes,
        num_filters,
        num_blocks_per_stage,
        num_stages,
        use_bias=False,
        processing_block_type=ConvolutionalProcessingBlock,
        dimensionality_reduction_block_type=ConvolutionalDimensionalityReductionBlock,
    ):
        """
        Initializes a convolutional network module
        :param input_shape: The shape of the tensor to be passed into this network
@ -274,37 +343,59 @@ class ConvolutionalNetwork(nn.Module):
        """
        self.layer_dict = nn.ModuleDict()
        # initialize a module dict, which is effectively a dictionary that can collect layers and integrate them into pytorch
-        print("Building basic block of ConvolutionalNetwork using input shape", self.input_shape)
+        print(
-        x = torch.zeros((self.input_shape))  # create dummy inputs to be used to infer shapes of layers
+            "Building basic block of ConvolutionalNetwork using input shape",
            self.input_shape,
        )
        x = torch.zeros(
            (self.input_shape)
        )  # create dummy inputs to be used to infer shapes of layers
        out = x
-        self.layer_dict['input_conv'] = EntryConvolutionalBlock(input_shape=out.shape, num_filters=self.num_filters,
+        self.layer_dict["input_conv"] = EntryConvolutionalBlock(
-                                                                kernel_size=3, padding=1, bias=self.use_bias,
+            input_shape=out.shape,
-                                                                dilation=1)
+            num_filters=self.num_filters,
-        out = self.layer_dict['input_conv'].forward(out)
+            kernel_size=3,
            padding=1,
            bias=self.use_bias,
            dilation=1,
        )
        out = self.layer_dict["input_conv"].forward(out)
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
        for i in range(self.num_stages):  # for number of layers times
            for j in range(self.num_blocks_per_stage):
-                self.layer_dict['block_{}_{}'.format(i, j)] = self.processing_block_type(input_shape=out.shape,
+                self.layer_dict["block_{}_{}".format(i, j)] = (
-                                                                                         num_filters=self.num_filters,
+                    self.processing_block_type(
-                                                                                         bias=self.use_bias,
+                        input_shape=out.shape,
-                                                                                         kernel_size=3, dilation=1,
+                        num_filters=self.num_filters,
-                                                                                         padding=1)
+                        bias=self.use_bias,
-                out = self.layer_dict['block_{}_{}'.format(i, j)].forward(out)
+                        kernel_size=3,
-            self.layer_dict['reduction_block_{}'.format(i)] = self.dimensionality_reduction_block_type(
+                        dilation=1,
-                input_shape=out.shape,
+                        padding=1,
-                num_filters=self.num_filters, bias=True,
+                    )
-                kernel_size=3, dilation=1,
+                )
-                padding=1,
+                out = self.layer_dict["block_{}_{}".format(i, j)].forward(out)
-                reduction_factor=2)
+            self.layer_dict["reduction_block_{}".format(i)] = (
-            out = self.layer_dict['reduction_block_{}'.format(i)].forward(out)
+                self.dimensionality_reduction_block_type(
                    input_shape=out.shape,
                    num_filters=self.num_filters,
                    bias=True,
                    kernel_size=3,
                    dilation=1,
                    padding=1,
                    reduction_factor=2,
                )
            )
            out = self.layer_dict["reduction_block_{}".format(i)].forward(out)
        out = F.avg_pool2d(out, out.shape[-1])
-        print('shape before final linear layer', out.shape)
+        print("shape before final linear layer", out.shape)
        out = out.view(out.shape[0], -1)
-        self.logit_linear_layer = nn.Linear(in_features=out.shape[1],  # add a linear layer
+        self.logit_linear_layer = nn.Linear(
-                                            out_features=self.num_output_classes,
+            in_features=out.shape[1],  # add a linear layer
-                                            bias=True)
+            out_features=self.num_output_classes,
            bias=True,
        )
        out = self.logit_linear_layer(out)  # apply linear layer on flattened inputs
        print("Block is built, output volume is", out.shape)
        return out
@ -316,15 +407,19 @@ class ConvolutionalNetwork(nn.Module):
        :return: preds (b, num_classes)
        """
        out = x
-        out = self.layer_dict['input_conv'].forward(out)
+        out = self.layer_dict["input_conv"].forward(out)
        for i in range(self.num_stages):  # for number of layers times
            for j in range(self.num_blocks_per_stage):
-                out = self.layer_dict['block_{}_{}'.format(i, j)].forward(out)
+                out = self.layer_dict["block_{}_{}".format(i, j)].forward(out)
-            out = self.layer_dict['reduction_block_{}'.format(i)].forward(out)
+            out = self.layer_dict["reduction_block_{}".format(i)].forward(out)
        out = F.avg_pool2d(out, out.shape[-1])
-        out = out.view(out.shape[0], -1)  # flatten outputs from (b, c, h, w) to (b, c*h*w)
+        out = out.view(
-        out = self.logit_linear_layer(out)  # pass through a linear layer to get logits/preds
+            out.shape[0], -1
        )  # flatten outputs from (b, c, h, w) to (b, c*h*w)
        out = self.logit_linear_layer(
            out
        )  # pass through a linear layer to get logits/preds
        return out
    def reset_parameters(self):
@ -338,3 +433,138 @@ class ConvolutionalNetwork(nn.Module):
                pass
        self.logit_linear_layer.reset_parameters()
 # My Implementation:
 class ConvolutionalProcessingBlockBN(nn.Module):
    def __init__(self, input_shape, num_filters, kernel_size, padding, bias, dilation):
        super().__init__()
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.input_shape = input_shape
        self.padding = padding
        self.bias = bias
        self.dilation = dilation
        self.build_module()
    def build_module(self):
        self.layer_dict = nn.ModuleDict()
        x = torch.zeros(self.input_shape)
        out = x
        # First convolutional layer with Batch Normalization
        self.layer_dict["conv_0"] = nn.Conv2d(
            in_channels=out.shape[1],
            out_channels=self.num_filters,
            bias=self.bias,
            kernel_size=self.kernel_size,
            dilation=self.dilation,
            padding=self.padding,
            stride=1,
        )
        self.layer_dict["bn_0"] = nn.BatchNorm2d(self.num_filters)
        out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
        # Second convolutional layer with Batch Normalization
        self.layer_dict["conv_1"] = nn.Conv2d(
            in_channels=out.shape[1],
            out_channels=self.num_filters,
            bias=self.bias,
            kernel_size=self.kernel_size,
            dilation=self.dilation,
            padding=self.padding,
            stride=1,
        )
        self.layer_dict["bn_1"] = nn.BatchNorm2d(self.num_filters)
        out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
        print(out.shape)
    def forward(self, x):
        out = x
        # Apply first conv layer + BN + ReLU
        out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
        # Apply second conv layer + BN + ReLU
        out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
        return out
 class ConvolutionalDimensionalityReductionBlockBN(nn.Module):
    def __init__(
        self,
        input_shape,
        num_filters,
        kernel_size,
        padding,
        bias,
        dilation,
        reduction_factor,
    ):
        super().__init__()
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.input_shape = input_shape
        self.padding = padding
        self.bias = bias
        self.dilation = dilation
        self.reduction_factor = reduction_factor
        self.build_module()
    def build_module(self):
        self.layer_dict = nn.ModuleDict()
        x = torch.zeros(self.input_shape)
        out = x
        # First convolutional layer with Batch Normalization
        self.layer_dict["conv_0"] = nn.Conv2d(
            in_channels=out.shape[1],
            out_channels=self.num_filters,
            bias=self.bias,
            kernel_size=self.kernel_size,
            dilation=self.dilation,
            padding=self.padding,
            stride=1,
        )
        self.layer_dict["bn_0"] = nn.BatchNorm2d(self.num_filters)
        out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
        # Dimensionality reduction through average pooling
        out = F.avg_pool2d(out, self.reduction_factor)
        # Second convolutional layer with Batch Normalization
        self.layer_dict["conv_1"] = nn.Conv2d(
            in_channels=out.shape[1],
            out_channels=self.num_filters,
            bias=self.bias,
            kernel_size=self.kernel_size,
            dilation=self.dilation,
            padding=self.padding,
            stride=1,
        )
        self.layer_dict["bn_1"] = nn.BatchNorm2d(self.num_filters)
        out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
        print(out.shape)
    def forward(self, x):
        out = x
        # Apply first conv layer + BN + ReLU
        out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
        # Dimensionality reduction through average pooling
        out = F.avg_pool2d(out, self.reduction_factor)
        # Apply second conv layer + BN + ReLU
        out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
        return out
--- a/pytorch_mlp_framework/storage_utils.py
+++ b/pytorch_mlp_framework/storage_utils.py
@ -17,7 +17,14 @@ def load_from_stats_pkl_file(experiment_log_filepath, filename):
    return stats
-def save_statistics(experiment_log_dir, filename, stats_dict, current_epoch, continue_from_mode=False, save_full_dict=False):
+def save_statistics(
    experiment_log_dir,
    filename,
    stats_dict,
    current_epoch,
    continue_from_mode=False,
    save_full_dict=False,
 ):
    """
    Saves the statistics in stats dict into a csv file. Using the keys as the header entries and the values as the
    columns of a particular header entry
@ -29,7 +36,7 @@ def save_statistics(experiment_log_dir, filename, stats_dict, current_epoch, con
    :return: The filepath to the summary file
    """
    summary_filename = os.path.join(experiment_log_dir, filename)
-    mode = 'a' if continue_from_mode else 'w'
+    mode = "a" if continue_from_mode else "w"
    with open(summary_filename, mode) as f:
        writer = csv.writer(f)
        if not continue_from_mode:
@ -57,7 +64,7 @@ def load_statistics(experiment_log_dir, filename):
    """
    summary_filename = os.path.join(experiment_log_dir, filename)
-    with open(summary_filename, 'r+') as f:
+    with open(summary_filename, "r+") as f:
        lines = f.readlines()
    keys = lines[0].split(",")
--- a/pytorch_mlp_framework/train_evaluate_image_classification_system.py
+++ b/pytorch_mlp_framework/train_evaluate_image_classification_system.py
@ -7,7 +7,8 @@ import mlp.data_providers as data_providers
 from pytorch_mlp_framework.arg_extractor import get_args
 from pytorch_mlp_framework.experiment_builder import ExperimentBuilder
 from pytorch_mlp_framework.model_architectures import *
-import os 
+import os
 # os.environ["CUDA_VISIBLE_DEVICES"]="0"
 args = get_args()  # get arguments from command line
@ -15,54 +16,83 @@ rng = np.random.RandomState(seed=args.seed)  # set the seeds for the experiment
 torch.manual_seed(seed=args.seed)  # sets pytorch's seed
 # set up data augmentation transforms for training and testing
-transform_train = transforms.Compose([
+transform_train = transforms.Compose(
    [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
-    ])
+    ]
 )
-transform_test = transforms.Compose([
+transform_test = transforms.Compose(
-    transforms.ToTensor(),
+    [
-    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+        transforms.ToTensor(),
-])
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
 )
-train_data = data_providers.CIFAR100(root='data', set_name='train',
+train_data = data_providers.CIFAR100(
-                 transform=transform_train,
+    root="data", set_name="train", transform=transform_train, download=True
-                 download=True)  # initialize our rngs using the argument set seed
+)  # initialize our rngs using the argument set seed
-val_data = data_providers.CIFAR100(root='data', set_name='val',
+val_data = data_providers.CIFAR100(
-                 transform=transform_test,
+    root="data", set_name="val", transform=transform_test, download=True
-                 download=True)  # initialize our rngs using the argument set seed
+)  # initialize our rngs using the argument set seed
-test_data = data_providers.CIFAR100(root='data', set_name='test',
+test_data = data_providers.CIFAR100(
-                 transform=transform_test,
+    root="data", set_name="test", transform=transform_test, download=True
-                 download=True)  # initialize our rngs using the argument set seed
+)  # initialize our rngs using the argument set seed
-train_data_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=2)
+train_data_loader = DataLoader(
-val_data_loader = DataLoader(val_data, batch_size=args.batch_size, shuffle=True, num_workers=2)
+    train_data, batch_size=args.batch_size, shuffle=True, num_workers=2
-test_data_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=True, num_workers=2)
+)
 val_data_loader = DataLoader(
    val_data, batch_size=args.batch_size, shuffle=True, num_workers=2
 )
 test_data_loader = DataLoader(
    test_data, batch_size=args.batch_size, shuffle=True, num_workers=2
 )
-if args.block_type == 'conv_block':
+if args.block_type == "conv_block":
    processing_block_type = ConvolutionalProcessingBlock
    dim_reduction_block_type = ConvolutionalDimensionalityReductionBlock
-elif args.block_type == 'empty_block':
+elif args.block_type == "empty_block":
    processing_block_type = EmptyBlock
    dim_reduction_block_type = EmptyBlock
 elif args.block_type == "conv_bn":
    processing_block_type = ConvolutionalProcessingBlockBN
    dim_reduction_block_type = ConvolutionalDimensionalityReductionBlockBN
 else:
    raise ModuleNotFoundError
-custom_conv_net = ConvolutionalNetwork(  # initialize our network object, in this case a ConvNet
+custom_conv_net = (
-    input_shape=(args.batch_size, args.image_num_channels, args.image_height, args.image_width),
+    ConvolutionalNetwork(  # initialize our network object, in this case a ConvNet
-    num_output_classes=args.num_classes, num_filters=args.num_filters, use_bias=False,
+        input_shape=(
-    num_blocks_per_stage=args.num_blocks_per_stage, num_stages=args.num_stages,
+            args.batch_size,
-    processing_block_type=processing_block_type,
+            args.image_num_channels,
-    dimensionality_reduction_block_type=dim_reduction_block_type)
+            args.image_height,
            args.image_width,
        ),
        num_output_classes=args.num_classes,
        num_filters=args.num_filters,
        use_bias=False,
        num_blocks_per_stage=args.num_blocks_per_stage,
        num_stages=args.num_stages,
        processing_block_type=processing_block_type,
        dimensionality_reduction_block_type=dim_reduction_block_type,
    )
 )
-conv_experiment = ExperimentBuilder(network_model=custom_conv_net,
+conv_experiment = ExperimentBuilder(
-                                    experiment_name=args.experiment_name,
+    network_model=custom_conv_net,
-                                    num_epochs=args.num_epochs,
+    experiment_name=args.experiment_name,
-                                    weight_decay_coefficient=args.weight_decay_coefficient,
+    num_epochs=args.num_epochs,
-                                    use_gpu=args.use_gpu,
+    weight_decay_coefficient=args.weight_decay_coefficient,
-                                    continue_from_epoch=args.continue_from_epoch,
+    use_gpu=args.use_gpu,
-                                    train_data=train_data_loader, val_data=val_data_loader,
+    continue_from_epoch=args.continue_from_epoch,
-                                    test_data=test_data_loader)  # build an experiment object
+    train_data=train_data_loader,
-experiment_metrics, test_metrics = conv_experiment.run_experiment()  # run experiment and return experiment metrics
+    val_data=val_data_loader,
    test_data=test_data_loader,
 )  # build an experiment object
 experiment_metrics, test_metrics = (
    conv_experiment.run_experiment()
 )  # run experiment and return experiment metrics