add BN+RC layer

2024-11-19 10:38:54 +00:00 · 2024-11-19 10:38:54 +00:00 · ae0e14b5fb
commit ae0e14b5fb
parent 7861133463
6 changed files with 175 additions and 2 deletions
--- a/pytorch_mlp_framework/arg_extractor.py
+++ b/pytorch_mlp_framework/arg_extractor.py
@ -47,6 +47,13 @@ def get_args():
        default=3,
        help="The channel dimensionality of our image-data",
    )
+    parser.add_argument(
+        "--learning-rate",
+        nargs="?",
+        type=float,
+        default=1e-3,
+        help="The learning rate (default 1e-3)",
+    )
    parser.add_argument(
        "--image_height", nargs="?", type=int, default=32, help="Height of image data"
    )
--- a/pytorch_mlp_framework/experiment_builder.py
+++ b/pytorch_mlp_framework/experiment_builder.py
@ -24,6 +24,7 @@ class ExperimentBuilder(nn.Module):
        val_data,
        test_data,
        weight_decay_coefficient,
+        learning_rate,
        use_gpu,
        continue_from_epoch=-1,
    ):
@ -79,7 +80,10 @@ class ExperimentBuilder(nn.Module):
        print("Total number of linear layers", num_linear_layers)

        self.optimizer = optim.Adam(
-            self.parameters(), amsgrad=False, weight_decay=weight_decay_coefficient
+            self.parameters(),
+            amsgrad=False,
+            weight_decay=weight_decay_coefficient,
+            lr=learning_rate,
        )
        self.learning_rate_scheduler = optim.lr_scheduler.CosineAnnealingLR(
            self.optimizer, T_max=num_epochs, eta_min=0.00002
@ -178,7 +182,7 @@ class ExperimentBuilder(nn.Module):
        """

        for name, param in named_parameters:
-            if 'bias' in name:
+            if "bias" in name:
                continue
            # Check if the parameter requires gradient and has a gradient
            if param.requires_grad and param.grad is not None:
--- a/pytorch_mlp_framework/model_architectures.py
+++ b/pytorch_mlp_framework/model_architectures.py
@ -568,3 +568,73 @@ class ConvolutionalDimensionalityReductionBlockBN(nn.Module):
        out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))

        return out
+
+
+class ConvolutionalProcessingBlockBNRC(nn.Module):
+    def __init__(self, input_shape, num_filters, kernel_size, padding, bias, dilation):
+        super().__init__()
+        self.num_filters = num_filters
+        self.kernel_size = kernel_size
+        self.input_shape = input_shape
+        self.padding = padding
+        self.bias = bias
+        self.dilation = dilation
+        self.build_module()
+
+    def build_module(self):
+        self.layer_dict = nn.ModuleDict()
+        x = torch.zeros(self.input_shape)
+        out = x
+
+        # First convolutional layer with BN
+        self.layer_dict["conv_0"] = nn.Conv2d(
+            in_channels=out.shape[1],
+            out_channels=self.num_filters,
+            bias=self.bias,
+            kernel_size=self.kernel_size,
+            dilation=self.dilation,
+            padding=self.padding,
+            stride=1,
+        )
+        self.layer_dict["bn_0"] = nn.BatchNorm2d(self.num_filters)
+
+        out = self.layer_dict["conv_0"].forward(out)
+        out = self.layer_dict["bn_0"].forward(out)
+        out = F.leaky_relu(out)
+
+        # Second convolutional layer with BN
+        self.layer_dict["conv_1"] = nn.Conv2d(
+            in_channels=out.shape[1],
+            out_channels=self.num_filters,
+            bias=self.bias,
+            kernel_size=self.kernel_size,
+            dilation=self.dilation,
+            padding=self.padding,
+            stride=1,
+        )
+        self.layer_dict["bn_1"] = nn.BatchNorm2d(self.num_filters)
+
+        out = self.layer_dict["conv_1"].forward(out)
+        out = self.layer_dict["bn_1"].forward(out)
+        out = F.leaky_relu(out)
+
+        # Print final output shape for debugging
+        print(out.shape)
+
+    def forward(self, x):
+        residual = x  # Save input for residual connection
+        out = x
+
+        # Apply first conv layer + BN + ReLU
+        out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
+
+        # Apply second conv layer + BN + ReLU
+        out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
+
+        # Add residual connection
+        # Ensure shape compatibility
+        assert residual.shape == out.shape
+        # if residual.shape == out.shape:
+        out += residual
+
+        return out
--- a/pytorch_mlp_framework/tests.py
+++ b/pytorch_mlp_framework/tests.py
@ -0,0 +1,87 @@
+import unittest
+import torch
+from model_architectures import (
+    ConvolutionalProcessingBlockBN,
+    ConvolutionalDimensionalityReductionBlockBN,
+    ConvolutionalProcessingBlockBNRC,
+)
+
+
+class TestBatchNormalizationBlocks(unittest.TestCase):
+    def setUp(self):
+        # Common parameters
+        self.input_shape = (1, 3, 32, 32)  # Batch size 1, 3 channels, 32x32 input
+        self.num_filters = 16
+        self.kernel_size = 3
+        self.padding = 1
+        self.bias = False
+        self.dilation = 1
+        self.reduction_factor = 2
+
+    def test_convolutional_processing_block(self):
+        # Create a ConvolutionalProcessingBlockBN instance
+        block = ConvolutionalProcessingBlockBN(
+            input_shape=self.input_shape,
+            num_filters=self.num_filters,
+            kernel_size=self.kernel_size,
+            padding=self.padding,
+            bias=self.bias,
+            dilation=self.dilation,
+        )
+
+        # Generate a random tensor matching the input shape
+        input_tensor = torch.randn(self.input_shape)
+
+        # Forward pass
+        try:
+            output = block(input_tensor)
+            self.assertIsNotNone(output, "Output should not be None.")
+        except Exception as e:
+            self.fail(f"ConvolutionalProcessingBlock raised an error: {e}")
+
+    def test_convolutional_processing_block_with_rc(self):
+        # Create a ConvolutionalProcessingBlockBNRC instance
+        block = ConvolutionalProcessingBlockBNRC(
+            input_shape=self.input_shape,
+            num_filters=self.num_filters,
+            kernel_size=self.kernel_size,
+            padding=self.padding,
+            bias=self.bias,
+            dilation=self.dilation,
+        )
+
+        # Generate a random tensor matching the input shape
+        input_tensor = torch.randn(self.input_shape)
+
+        # Forward pass
+        try:
+            output = block(input_tensor)
+            self.assertIsNotNone(output, "Output should not be None.")
+        except Exception as e:
+            self.fail(f"ConvolutionalProcessingBlock raised an error: {e}")
+
+    def test_convolutional_dimensionality_reduction_block(self):
+        # Create a ConvolutionalDimensionalityReductionBlockBN instance
+        block = ConvolutionalDimensionalityReductionBlockBN(
+            input_shape=self.input_shape,
+            num_filters=self.num_filters,
+            kernel_size=self.kernel_size,
+            padding=self.padding,
+            bias=self.bias,
+            dilation=self.dilation,
+            reduction_factor=self.reduction_factor,
+        )
+
+        # Generate a random tensor matching the input shape
+        input_tensor = torch.randn(self.input_shape)
+
+        # Forward pass
+        try:
+            output = block(input_tensor)
+            self.assertIsNotNone(output, "Output should not be None.")
+        except Exception as e:
+            self.fail(f"ConvolutionalDimensionalityReductionBlock raised an error: {e}")
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/pytorch_mlp_framework/train_evaluate_image_classification_system.py
+++ b/pytorch_mlp_framework/train_evaluate_image_classification_system.py
@ -61,6 +61,9 @@ elif args.block_type == "empty_block":
 elif args.block_type == "conv_bn":
    processing_block_type = ConvolutionalProcessingBlockBN
    dim_reduction_block_type = ConvolutionalDimensionalityReductionBlockBN
+elif args.block_type == "conv_bn_rc":
+    processing_block_type = ConvolutionalProcessingBlockBNRC
+    dim_reduction_block_type = ConvolutionalDimensionalityReductionBlockBN
 else:
    raise ModuleNotFoundError

@ -87,6 +90,7 @@ conv_experiment = ExperimentBuilder(
    experiment_name=args.experiment_name,
    num_epochs=args.num_epochs,
    weight_decay_coefficient=args.weight_decay_coefficient,
+    learning_rate=args.learning_rate,
    use_gpu=args.use_gpu,
    continue_from_epoch=args.continue_from_epoch,
    train_data=train_data_loader,
--- a/run_vgg_38_bn_rc.sh
+++ b/run_vgg_38_bn_rc.sh
@ -0,0 +1 @@
+python pytorch_mlp_framework/train_evaluate_image_classification_system.py --batch_size 100 --seed 0 --num_filters 32 --num_stages 3 --num_blocks_per_stage 5 --experiment_name VGG_38_experiment --use_gpu True --num_classes 100 --block_type 'conv_bn_rc' --continue_from_epoch -1 --learning-rate 0.01
				`@ -0,0 +1 @@`
				`python pytorch_mlp_framework/train_evaluate_image_classification_system.py --batch_size 100 --seed 0 --num_filters 32 --num_stages 3 --num_blocks_per_stage 5 --experiment_name VGG_38_experiment --use_gpu True --num_classes 100 --block_type 'conv_bn_rc' --continue_from_epoch -1 --learning-rate 0.01`