add BN+RC layer

2024-11-19 10:38:54 +00:00 · 2024-11-19 10:38:54 +00:00 · ae0e14b5fb
commit ae0e14b5fb
parent 7861133463
6 changed files with 175 additions and 2 deletions
--- a/pytorch_mlp_framework/arg_extractor.py
+++ b/pytorch_mlp_framework/arg_extractor.py
@ -47,6 +47,13 @@ def get_args():
        default=3,
        help="The channel dimensionality of our image-data",
    )
    parser.add_argument(
        "--learning-rate",
        nargs="?",
        type=float,
        default=1e-3,
        help="The learning rate (default 1e-3)",
    )
    parser.add_argument(
        "--image_height", nargs="?", type=int, default=32, help="Height of image data"
    )
--- a/pytorch_mlp_framework/experiment_builder.py
+++ b/pytorch_mlp_framework/experiment_builder.py
@ -24,6 +24,7 @@ class ExperimentBuilder(nn.Module):
        val_data,
        test_data,
        weight_decay_coefficient,
        learning_rate,
        use_gpu,
        continue_from_epoch=-1,
    ):
@ -79,7 +80,10 @@ class ExperimentBuilder(nn.Module):
        print("Total number of linear layers", num_linear_layers)
        self.optimizer = optim.Adam(
-            self.parameters(), amsgrad=False, weight_decay=weight_decay_coefficient
+            self.parameters(),
            amsgrad=False,
            weight_decay=weight_decay_coefficient,
            lr=learning_rate,
        )
        self.learning_rate_scheduler = optim.lr_scheduler.CosineAnnealingLR(
            self.optimizer, T_max=num_epochs, eta_min=0.00002
@ -178,7 +182,7 @@ class ExperimentBuilder(nn.Module):
        """
        for name, param in named_parameters:
-            if 'bias' in name:
+            if "bias" in name:
                continue
            # Check if the parameter requires gradient and has a gradient
            if param.requires_grad and param.grad is not None:
--- a/pytorch_mlp_framework/model_architectures.py
+++ b/pytorch_mlp_framework/model_architectures.py
@ -568,3 +568,73 @@ class ConvolutionalDimensionalityReductionBlockBN(nn.Module):
        out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
        return out
 class ConvolutionalProcessingBlockBNRC(nn.Module):
    def __init__(self, input_shape, num_filters, kernel_size, padding, bias, dilation):
        super().__init__()
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.input_shape = input_shape
        self.padding = padding
        self.bias = bias
        self.dilation = dilation
        self.build_module()
    def build_module(self):
        self.layer_dict = nn.ModuleDict()
        x = torch.zeros(self.input_shape)
        out = x
        # First convolutional layer with BN
        self.layer_dict["conv_0"] = nn.Conv2d(
            in_channels=out.shape[1],
            out_channels=self.num_filters,
            bias=self.bias,
            kernel_size=self.kernel_size,
            dilation=self.dilation,
            padding=self.padding,
            stride=1,
        )
        self.layer_dict["bn_0"] = nn.BatchNorm2d(self.num_filters)
        out = self.layer_dict["conv_0"].forward(out)
        out = self.layer_dict["bn_0"].forward(out)
        out = F.leaky_relu(out)
        # Second convolutional layer with BN
        self.layer_dict["conv_1"] = nn.Conv2d(
            in_channels=out.shape[1],
            out_channels=self.num_filters,
            bias=self.bias,
            kernel_size=self.kernel_size,
            dilation=self.dilation,
            padding=self.padding,
            stride=1,
        )
        self.layer_dict["bn_1"] = nn.BatchNorm2d(self.num_filters)
        out = self.layer_dict["conv_1"].forward(out)
        out = self.layer_dict["bn_1"].forward(out)
        out = F.leaky_relu(out)
        # Print final output shape for debugging
        print(out.shape)
    def forward(self, x):
        residual = x  # Save input for residual connection
        out = x
        # Apply first conv layer + BN + ReLU
        out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
        # Apply second conv layer + BN + ReLU
        out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
        # Add residual connection
        # Ensure shape compatibility
        assert residual.shape == out.shape
        # if residual.shape == out.shape:
        out += residual
        return out
--- a/pytorch_mlp_framework/tests.py
+++ b/pytorch_mlp_framework/tests.py
@ -0,0 +1,87 @@
 import unittest
 import torch
 from model_architectures import (
    ConvolutionalProcessingBlockBN,
    ConvolutionalDimensionalityReductionBlockBN,
    ConvolutionalProcessingBlockBNRC,
 )
 class TestBatchNormalizationBlocks(unittest.TestCase):
    def setUp(self):
        # Common parameters
        self.input_shape = (1, 3, 32, 32)  # Batch size 1, 3 channels, 32x32 input
        self.num_filters = 16
        self.kernel_size = 3
        self.padding = 1
        self.bias = False
        self.dilation = 1
        self.reduction_factor = 2
    def test_convolutional_processing_block(self):
        # Create a ConvolutionalProcessingBlockBN instance
        block = ConvolutionalProcessingBlockBN(
            input_shape=self.input_shape,
            num_filters=self.num_filters,
            kernel_size=self.kernel_size,
            padding=self.padding,
            bias=self.bias,
            dilation=self.dilation,
        )
        # Generate a random tensor matching the input shape
        input_tensor = torch.randn(self.input_shape)
        # Forward pass
        try:
            output = block(input_tensor)
            self.assertIsNotNone(output, "Output should not be None.")
        except Exception as e:
            self.fail(f"ConvolutionalProcessingBlock raised an error: {e}")
    def test_convolutional_processing_block_with_rc(self):
        # Create a ConvolutionalProcessingBlockBNRC instance
        block = ConvolutionalProcessingBlockBNRC(
            input_shape=self.input_shape,
            num_filters=self.num_filters,
            kernel_size=self.kernel_size,
            padding=self.padding,
            bias=self.bias,
            dilation=self.dilation,
        )
        # Generate a random tensor matching the input shape
        input_tensor = torch.randn(self.input_shape)
        # Forward pass
        try:
            output = block(input_tensor)
            self.assertIsNotNone(output, "Output should not be None.")
        except Exception as e:
            self.fail(f"ConvolutionalProcessingBlock raised an error: {e}")
    def test_convolutional_dimensionality_reduction_block(self):
        # Create a ConvolutionalDimensionalityReductionBlockBN instance
        block = ConvolutionalDimensionalityReductionBlockBN(
            input_shape=self.input_shape,
            num_filters=self.num_filters,
            kernel_size=self.kernel_size,
            padding=self.padding,
            bias=self.bias,
            dilation=self.dilation,
            reduction_factor=self.reduction_factor,
        )
        # Generate a random tensor matching the input shape
        input_tensor = torch.randn(self.input_shape)
        # Forward pass
        try:
            output = block(input_tensor)
            self.assertIsNotNone(output, "Output should not be None.")
        except Exception as e:
            self.fail(f"ConvolutionalDimensionalityReductionBlock raised an error: {e}")
 if __name__ == "__main__":
    unittest.main()
--- a/pytorch_mlp_framework/train_evaluate_image_classification_system.py
+++ b/pytorch_mlp_framework/train_evaluate_image_classification_system.py
@ -61,6 +61,9 @@ elif args.block_type == "empty_block":
 elif args.block_type == "conv_bn":
    processing_block_type = ConvolutionalProcessingBlockBN
    dim_reduction_block_type = ConvolutionalDimensionalityReductionBlockBN
 elif args.block_type == "conv_bn_rc":
    processing_block_type = ConvolutionalProcessingBlockBNRC
    dim_reduction_block_type = ConvolutionalDimensionalityReductionBlockBN
 else:
    raise ModuleNotFoundError
@ -87,6 +90,7 @@ conv_experiment = ExperimentBuilder(
    experiment_name=args.experiment_name,
    num_epochs=args.num_epochs,
    weight_decay_coefficient=args.weight_decay_coefficient,
    learning_rate=args.learning_rate,
    use_gpu=args.use_gpu,
    continue_from_epoch=args.continue_from_epoch,
    train_data=train_data_loader,
--- a/run_vgg_38_bn_rc.sh
+++ b/run_vgg_38_bn_rc.sh
@ -0,0 +1 @@
 python pytorch_mlp_framework/train_evaluate_image_classification_system.py --batch_size 100 --seed 0 --num_filters 32 --num_stages 3 --num_blocks_per_stage 5 --experiment_name VGG_38_experiment --use_gpu True --num_classes 100 --block_type 'conv_bn_rc' --continue_from_epoch -1 --learning-rate 0.01
		`@ -0,0 +1 @@`
							`python pytorch_mlp_framework/train_evaluate_image_classification_system.py --batch_size 100 --seed 0 --num_filters 32 --num_stages 3 --num_blocks_per_stage 5 --experiment_name VGG_38_experiment --use_gpu True --num_classes 100 --block_type 'conv_bn_rc' --continue_from_epoch -1 --learning-rate 0.01`