diff --git a/pytorch_mlp_framework/experiment_builder.py b/pytorch_mlp_framework/experiment_builder.py index 8aadf16..846e9bf 100644 --- a/pytorch_mlp_framework/experiment_builder.py +++ b/pytorch_mlp_framework/experiment_builder.py @@ -114,7 +114,7 @@ class ExperimentBuilder(nn.Module): return total_num_params - def plot_func_def(self,all_grads, layers): + def plot_func_def(self, all_grads, layers): """ @@ -148,19 +148,24 @@ class ExperimentBuilder(nn.Module): """ Complete the code in the block below to collect absolute mean of the gradients for each layer in all_grads with the layer names in layers. """ - ######################################## - #TODO write your code here - - ######################################## - - + + for name, param in named_parameters: + # Check if the parameter requires gradient and has a gradient + if param.requires_grad and param.grad is not None: + try: + _, a, _, b, _ = name.split(".", 4) + except: + b, a = name.split(".", 1) + + layers.append(f"{a}_{b}") + # Collect the mean of the absolute gradients + all_grads.append(param.grad.abs().mean().item()) + plt = self.plot_func_def(all_grads, layers) - + return plt - - - - + + def run_train_iter(self, x, y): self.train() # sets model to training mode (in case batch normalization or other methods have different procedures for training and evaluation)