diff --git a/pytorch_mlp_framework/arg_extractor.py b/pytorch_mlp_framework/arg_extractor.py index 2351100..a56de45 100644 --- a/pytorch_mlp_framework/arg_extractor.py +++ b/pytorch_mlp_framework/arg_extractor.py @@ -47,6 +47,13 @@ def get_args(): default=3, help="The channel dimensionality of our image-data", ) + parser.add_argument( + "--learning-rate", + nargs="?", + type=float, + default=1e-3, + help="The learning rate (default 1e-3)", + ) parser.add_argument( "--image_height", nargs="?", type=int, default=32, help="Height of image data" ) diff --git a/pytorch_mlp_framework/experiment_builder.py b/pytorch_mlp_framework/experiment_builder.py index e3d0146..aeceeb8 100644 --- a/pytorch_mlp_framework/experiment_builder.py +++ b/pytorch_mlp_framework/experiment_builder.py @@ -24,6 +24,7 @@ class ExperimentBuilder(nn.Module): val_data, test_data, weight_decay_coefficient, + learning_rate, use_gpu, continue_from_epoch=-1, ): @@ -79,7 +80,10 @@ class ExperimentBuilder(nn.Module): print("Total number of linear layers", num_linear_layers) self.optimizer = optim.Adam( - self.parameters(), amsgrad=False, weight_decay=weight_decay_coefficient + self.parameters(), + amsgrad=False, + weight_decay=weight_decay_coefficient, + lr=learning_rate, ) self.learning_rate_scheduler = optim.lr_scheduler.CosineAnnealingLR( self.optimizer, T_max=num_epochs, eta_min=0.00002 @@ -178,7 +182,7 @@ class ExperimentBuilder(nn.Module): """ for name, param in named_parameters: - if 'bias' in name: + if "bias" in name: continue # Check if the parameter requires gradient and has a gradient if param.requires_grad and param.grad is not None: diff --git a/pytorch_mlp_framework/model_architectures.py b/pytorch_mlp_framework/model_architectures.py index 4b54a41..6c6c6c1 100644 --- a/pytorch_mlp_framework/model_architectures.py +++ b/pytorch_mlp_framework/model_architectures.py @@ -568,3 +568,73 @@ class ConvolutionalDimensionalityReductionBlockBN(nn.Module): out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out))) return out + + +class ConvolutionalProcessingBlockBNRC(nn.Module): + def __init__(self, input_shape, num_filters, kernel_size, padding, bias, dilation): + super().__init__() + self.num_filters = num_filters + self.kernel_size = kernel_size + self.input_shape = input_shape + self.padding = padding + self.bias = bias + self.dilation = dilation + self.build_module() + + def build_module(self): + self.layer_dict = nn.ModuleDict() + x = torch.zeros(self.input_shape) + out = x + + # First convolutional layer with BN + self.layer_dict["conv_0"] = nn.Conv2d( + in_channels=out.shape[1], + out_channels=self.num_filters, + bias=self.bias, + kernel_size=self.kernel_size, + dilation=self.dilation, + padding=self.padding, + stride=1, + ) + self.layer_dict["bn_0"] = nn.BatchNorm2d(self.num_filters) + + out = self.layer_dict["conv_0"].forward(out) + out = self.layer_dict["bn_0"].forward(out) + out = F.leaky_relu(out) + + # Second convolutional layer with BN + self.layer_dict["conv_1"] = nn.Conv2d( + in_channels=out.shape[1], + out_channels=self.num_filters, + bias=self.bias, + kernel_size=self.kernel_size, + dilation=self.dilation, + padding=self.padding, + stride=1, + ) + self.layer_dict["bn_1"] = nn.BatchNorm2d(self.num_filters) + + out = self.layer_dict["conv_1"].forward(out) + out = self.layer_dict["bn_1"].forward(out) + out = F.leaky_relu(out) + + # Print final output shape for debugging + print(out.shape) + + def forward(self, x): + residual = x # Save input for residual connection + out = x + + # Apply first conv layer + BN + ReLU + out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out))) + + # Apply second conv layer + BN + ReLU + out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out))) + + # Add residual connection + # Ensure shape compatibility + assert residual.shape == out.shape + # if residual.shape == out.shape: + out += residual + + return out diff --git a/pytorch_mlp_framework/tests.py b/pytorch_mlp_framework/tests.py new file mode 100644 index 0000000..088ba78 --- /dev/null +++ b/pytorch_mlp_framework/tests.py @@ -0,0 +1,87 @@ +import unittest +import torch +from model_architectures import ( + ConvolutionalProcessingBlockBN, + ConvolutionalDimensionalityReductionBlockBN, + ConvolutionalProcessingBlockBNRC, +) + + +class TestBatchNormalizationBlocks(unittest.TestCase): + def setUp(self): + # Common parameters + self.input_shape = (1, 3, 32, 32) # Batch size 1, 3 channels, 32x32 input + self.num_filters = 16 + self.kernel_size = 3 + self.padding = 1 + self.bias = False + self.dilation = 1 + self.reduction_factor = 2 + + def test_convolutional_processing_block(self): + # Create a ConvolutionalProcessingBlockBN instance + block = ConvolutionalProcessingBlockBN( + input_shape=self.input_shape, + num_filters=self.num_filters, + kernel_size=self.kernel_size, + padding=self.padding, + bias=self.bias, + dilation=self.dilation, + ) + + # Generate a random tensor matching the input shape + input_tensor = torch.randn(self.input_shape) + + # Forward pass + try: + output = block(input_tensor) + self.assertIsNotNone(output, "Output should not be None.") + except Exception as e: + self.fail(f"ConvolutionalProcessingBlock raised an error: {e}") + + def test_convolutional_processing_block_with_rc(self): + # Create a ConvolutionalProcessingBlockBNRC instance + block = ConvolutionalProcessingBlockBNRC( + input_shape=self.input_shape, + num_filters=self.num_filters, + kernel_size=self.kernel_size, + padding=self.padding, + bias=self.bias, + dilation=self.dilation, + ) + + # Generate a random tensor matching the input shape + input_tensor = torch.randn(self.input_shape) + + # Forward pass + try: + output = block(input_tensor) + self.assertIsNotNone(output, "Output should not be None.") + except Exception as e: + self.fail(f"ConvolutionalProcessingBlock raised an error: {e}") + + def test_convolutional_dimensionality_reduction_block(self): + # Create a ConvolutionalDimensionalityReductionBlockBN instance + block = ConvolutionalDimensionalityReductionBlockBN( + input_shape=self.input_shape, + num_filters=self.num_filters, + kernel_size=self.kernel_size, + padding=self.padding, + bias=self.bias, + dilation=self.dilation, + reduction_factor=self.reduction_factor, + ) + + # Generate a random tensor matching the input shape + input_tensor = torch.randn(self.input_shape) + + # Forward pass + try: + output = block(input_tensor) + self.assertIsNotNone(output, "Output should not be None.") + except Exception as e: + self.fail(f"ConvolutionalDimensionalityReductionBlock raised an error: {e}") + + +if __name__ == "__main__": + unittest.main() diff --git a/pytorch_mlp_framework/train_evaluate_image_classification_system.py b/pytorch_mlp_framework/train_evaluate_image_classification_system.py index afd9b50..c1e39ff 100644 --- a/pytorch_mlp_framework/train_evaluate_image_classification_system.py +++ b/pytorch_mlp_framework/train_evaluate_image_classification_system.py @@ -61,6 +61,9 @@ elif args.block_type == "empty_block": elif args.block_type == "conv_bn": processing_block_type = ConvolutionalProcessingBlockBN dim_reduction_block_type = ConvolutionalDimensionalityReductionBlockBN +elif args.block_type == "conv_bn_rc": + processing_block_type = ConvolutionalProcessingBlockBNRC + dim_reduction_block_type = ConvolutionalDimensionalityReductionBlockBN else: raise ModuleNotFoundError @@ -87,6 +90,7 @@ conv_experiment = ExperimentBuilder( experiment_name=args.experiment_name, num_epochs=args.num_epochs, weight_decay_coefficient=args.weight_decay_coefficient, + learning_rate=args.learning_rate, use_gpu=args.use_gpu, continue_from_epoch=args.continue_from_epoch, train_data=train_data_loader, diff --git a/run_vgg_38_bn_rc.sh b/run_vgg_38_bn_rc.sh new file mode 100644 index 0000000..1b09df5 --- /dev/null +++ b/run_vgg_38_bn_rc.sh @@ -0,0 +1 @@ +python pytorch_mlp_framework/train_evaluate_image_classification_system.py --batch_size 100 --seed 0 --num_filters 32 --num_stages 3 --num_blocks_per_stage 5 --experiment_name VGG_38_experiment --use_gpu True --num_classes 100 --block_type 'conv_bn_rc' --continue_from_epoch -1 --learning-rate 0.01