add BN+RC layer

This commit is contained in:
Anton Lydike 2024-11-19 10:38:54 +00:00
parent 7861133463
commit ae0e14b5fb
6 changed files with 175 additions and 2 deletions

View File

@ -47,6 +47,13 @@ def get_args():
default=3,
help="The channel dimensionality of our image-data",
)
parser.add_argument(
"--learning-rate",
nargs="?",
type=float,
default=1e-3,
help="The learning rate (default 1e-3)",
)
parser.add_argument(
"--image_height", nargs="?", type=int, default=32, help="Height of image data"
)

View File

@ -24,6 +24,7 @@ class ExperimentBuilder(nn.Module):
val_data,
test_data,
weight_decay_coefficient,
learning_rate,
use_gpu,
continue_from_epoch=-1,
):
@ -79,7 +80,10 @@ class ExperimentBuilder(nn.Module):
print("Total number of linear layers", num_linear_layers)
self.optimizer = optim.Adam(
self.parameters(), amsgrad=False, weight_decay=weight_decay_coefficient
self.parameters(),
amsgrad=False,
weight_decay=weight_decay_coefficient,
lr=learning_rate,
)
self.learning_rate_scheduler = optim.lr_scheduler.CosineAnnealingLR(
self.optimizer, T_max=num_epochs, eta_min=0.00002
@ -178,7 +182,7 @@ class ExperimentBuilder(nn.Module):
"""
for name, param in named_parameters:
if 'bias' in name:
if "bias" in name:
continue
# Check if the parameter requires gradient and has a gradient
if param.requires_grad and param.grad is not None:

View File

@ -568,3 +568,73 @@ class ConvolutionalDimensionalityReductionBlockBN(nn.Module):
out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
return out
class ConvolutionalProcessingBlockBNRC(nn.Module):
def __init__(self, input_shape, num_filters, kernel_size, padding, bias, dilation):
super().__init__()
self.num_filters = num_filters
self.kernel_size = kernel_size
self.input_shape = input_shape
self.padding = padding
self.bias = bias
self.dilation = dilation
self.build_module()
def build_module(self):
self.layer_dict = nn.ModuleDict()
x = torch.zeros(self.input_shape)
out = x
# First convolutional layer with BN
self.layer_dict["conv_0"] = nn.Conv2d(
in_channels=out.shape[1],
out_channels=self.num_filters,
bias=self.bias,
kernel_size=self.kernel_size,
dilation=self.dilation,
padding=self.padding,
stride=1,
)
self.layer_dict["bn_0"] = nn.BatchNorm2d(self.num_filters)
out = self.layer_dict["conv_0"].forward(out)
out = self.layer_dict["bn_0"].forward(out)
out = F.leaky_relu(out)
# Second convolutional layer with BN
self.layer_dict["conv_1"] = nn.Conv2d(
in_channels=out.shape[1],
out_channels=self.num_filters,
bias=self.bias,
kernel_size=self.kernel_size,
dilation=self.dilation,
padding=self.padding,
stride=1,
)
self.layer_dict["bn_1"] = nn.BatchNorm2d(self.num_filters)
out = self.layer_dict["conv_1"].forward(out)
out = self.layer_dict["bn_1"].forward(out)
out = F.leaky_relu(out)
# Print final output shape for debugging
print(out.shape)
def forward(self, x):
residual = x # Save input for residual connection
out = x
# Apply first conv layer + BN + ReLU
out = F.leaky_relu(self.layer_dict["bn_0"](self.layer_dict["conv_0"](out)))
# Apply second conv layer + BN + ReLU
out = F.leaky_relu(self.layer_dict["bn_1"](self.layer_dict["conv_1"](out)))
# Add residual connection
# Ensure shape compatibility
assert residual.shape == out.shape
# if residual.shape == out.shape:
out += residual
return out

View File

@ -0,0 +1,87 @@
import unittest
import torch
from model_architectures import (
ConvolutionalProcessingBlockBN,
ConvolutionalDimensionalityReductionBlockBN,
ConvolutionalProcessingBlockBNRC,
)
class TestBatchNormalizationBlocks(unittest.TestCase):
def setUp(self):
# Common parameters
self.input_shape = (1, 3, 32, 32) # Batch size 1, 3 channels, 32x32 input
self.num_filters = 16
self.kernel_size = 3
self.padding = 1
self.bias = False
self.dilation = 1
self.reduction_factor = 2
def test_convolutional_processing_block(self):
# Create a ConvolutionalProcessingBlockBN instance
block = ConvolutionalProcessingBlockBN(
input_shape=self.input_shape,
num_filters=self.num_filters,
kernel_size=self.kernel_size,
padding=self.padding,
bias=self.bias,
dilation=self.dilation,
)
# Generate a random tensor matching the input shape
input_tensor = torch.randn(self.input_shape)
# Forward pass
try:
output = block(input_tensor)
self.assertIsNotNone(output, "Output should not be None.")
except Exception as e:
self.fail(f"ConvolutionalProcessingBlock raised an error: {e}")
def test_convolutional_processing_block_with_rc(self):
# Create a ConvolutionalProcessingBlockBNRC instance
block = ConvolutionalProcessingBlockBNRC(
input_shape=self.input_shape,
num_filters=self.num_filters,
kernel_size=self.kernel_size,
padding=self.padding,
bias=self.bias,
dilation=self.dilation,
)
# Generate a random tensor matching the input shape
input_tensor = torch.randn(self.input_shape)
# Forward pass
try:
output = block(input_tensor)
self.assertIsNotNone(output, "Output should not be None.")
except Exception as e:
self.fail(f"ConvolutionalProcessingBlock raised an error: {e}")
def test_convolutional_dimensionality_reduction_block(self):
# Create a ConvolutionalDimensionalityReductionBlockBN instance
block = ConvolutionalDimensionalityReductionBlockBN(
input_shape=self.input_shape,
num_filters=self.num_filters,
kernel_size=self.kernel_size,
padding=self.padding,
bias=self.bias,
dilation=self.dilation,
reduction_factor=self.reduction_factor,
)
# Generate a random tensor matching the input shape
input_tensor = torch.randn(self.input_shape)
# Forward pass
try:
output = block(input_tensor)
self.assertIsNotNone(output, "Output should not be None.")
except Exception as e:
self.fail(f"ConvolutionalDimensionalityReductionBlock raised an error: {e}")
if __name__ == "__main__":
unittest.main()

View File

@ -61,6 +61,9 @@ elif args.block_type == "empty_block":
elif args.block_type == "conv_bn":
processing_block_type = ConvolutionalProcessingBlockBN
dim_reduction_block_type = ConvolutionalDimensionalityReductionBlockBN
elif args.block_type == "conv_bn_rc":
processing_block_type = ConvolutionalProcessingBlockBNRC
dim_reduction_block_type = ConvolutionalDimensionalityReductionBlockBN
else:
raise ModuleNotFoundError
@ -87,6 +90,7 @@ conv_experiment = ExperimentBuilder(
experiment_name=args.experiment_name,
num_epochs=args.num_epochs,
weight_decay_coefficient=args.weight_decay_coefficient,
learning_rate=args.learning_rate,
use_gpu=args.use_gpu,
continue_from_epoch=args.continue_from_epoch,
train_data=train_data_loader,

1
run_vgg_38_bn_rc.sh Normal file
View File

@ -0,0 +1 @@
python pytorch_mlp_framework/train_evaluate_image_classification_system.py --batch_size 100 --seed 0 --num_filters 32 --num_stages 3 --num_blocks_per_stage 5 --experiment_name VGG_38_experiment --use_gpu True --num_classes 100 --block_type 'conv_bn_rc' --continue_from_epoch -1 --learning-rate 0.01