HW3 : Convolutional Nueral Network
In this homework, you are required to build a convolutional neural network for image classification, possibly with some advanced training tips.
There are three levels here:
Easy : Build a simple convolutional neural network as the baseline. (2 pts)
Medium : Design a better architecture or adopt different data augmentations to improve the performance. (2 pts)
Hard : Utilize provided unlabeled data to obtain better results. (2 pts)
https://www.kaggle.com/competitions/ml2021spring-hw3
0.Prepare
get Data: Food 11 Class
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 !gdown --id '1awF7pZ9Dz7X1jn1_QAiKN-_v56veCEKy' --output food-11. zip !unzip -q food-11. zip
'gdown' 不是内部或外部命令,也不是可运行的程序
或批处理文件。
unzip: cannot find either food-11.zip or food-11.zip.zip.
1 2 3 4 5 6 7 8 9 10 11 12 import numpy as npimport torchimport torch.nn as nnimport torchvision.transforms as transformsimport torchvision.models as models from PIL import Imagefrom torch.utils.data import DataLoader, ConcatDataset, Subsetfrom torchvision.datasets import DatasetFolderfrom tqdm.auto import tqdm
torchvision for:
Image Preprocessing
Data Wrapping
Data Augmentation 很重要,但是需要思考如何对food进行augmentation
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 train_tfm = transforms.Compose([ transforms.RandomResizedCrop((128 , 128 )), transforms.RandomChoice([ transforms.AutoAugment(), transforms.AutoAugment(transforms.AutoAugmentPolicy.CIFAR10), transforms.AutoAugment(transforms.AutoAugmentPolicy.SVHN) ]), transforms.RandomHorizontalFlip(p=0.5 ), transforms.ColorJitter(brightness=0.5 ), transforms.RandomRotation(5 ), transforms.ToTensor(), ]) test_tfm = transforms.Compose([ transforms.Resize((128 , 128 )), transforms.ToTensor(), ])
Dataset 与 DataLoader
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 batch_size = 32 test_batch_size = 512 train_set = DatasetFolder("food-11/training/labeled" , loader=lambda x: Image.open (x), extensions="jpg" , transform=train_tfm) valid_set = DatasetFolder("food-11/validation" , loader=lambda x: Image.open (x), extensions='jpg' , transform=test_tfm) unlabeled_set = DatasetFolder("food-11/training/unlabeled" , loader=lambda x: Image.open (x), extensions='jpg' , transform=train_tfm) test_set = DatasetFolder("food-11/testing" , loader=lambda x: Image.open (x), extensions='jpg' , transform=test_tfm) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True , pin_memory=True ) valid_loader = DataLoader(valid_set, batch_size=test_batch_size, shuffle=True , pin_memory=True ) test_loader = DataLoader(test_set, batch_size=test_batch_size, shuffle=False )
1 2 3 4 5 unlabeled_set_list = [] for img, _ in unlabeled_set: unlabeled_set_list.append(img)
1 2 3 4 import gcdel valid_set, unlabeled_set, test_setgc.collect()
0
2. Model
WARNING – You Must Know
You are free to modify the model architecture here for further improvement.
However, if you want to use some well-known architectures such as ResNet50, please make sure NOT to load the pre-trained weights.
Using such pre-trained models is considered cheating and therefore you will be punished.
Similarly, it is your responsibility to make sure no pre-trained weights are used if you use torch.hub to load any modules.
For example, if you use ResNet-18 as your model:
model = torchvision.models.resnet18(pretrained=False ) → This is fine.
model = torchvision.models.resnet18(pretrained=True ) → This is NOT allowed.
注意参数:
torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
torch.nn.BatchNorm2d(channels)
torch.nn.MaxPool2d(kernel_size, stride, padding)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 class Classifier (nn.Module): def __init__ (self ): ''' input image size = [3, 128, 128] ''' super (Classifier, self).__init__() self.cnn_layers = nn.Sequential( nn.Conv2d(3 , 64 , 3 , 1 , 1 ), nn.BatchNorm2d(64 ), nn.ReLU(), nn.MaxPool2d(2 , 2 , 0 ), nn.Conv2d(64 , 128 , 3 , 1 , 1 ), nn.BatchNorm2d(128 ), nn.ReLU(), nn.MaxPool2d(2 , 2 , 0 ), nn.Conv2d(128 , 256 , 3 , 1 , 1 ), nn.BatchNorm2d(256 ), nn.ReLU(), nn.MaxPool2d(4 , 4 , 0 ) ) self.fc_layers = nn.Sequential( nn.Linear(256 * 8 * 8 , 256 ), nn.ReLU(), nn.Linear(256 , 256 ), nn.ReLU(), nn.Linear(256 , 11 ) ) def forward (self, x ): ''' input: [batch_size, 3, 128, 128] output: [batch_size, 11] ''' x = self.cnn_layers(x) x = x.flatten(1 ) x = self.fc_layers(x) return x
3. Training
其中的 get_pseudo_labels 函数 用于 semi-supervised learning,可选
Prof. Lee’s slides
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 def get_pseudo_labels (dataset, model, threshold=0.65 ): ''' 用已有模型获取 伪标签 pseudo labels ''' global unlabeled_set_list, train_set remove_index, index = [], 0 model.eval () softmax = nn.Softmax(dim=-1 ) for img in tqdm(unlabeled_set_list): with torch.no_grad(): logits = model(torch.unsequeeze(img, 0 ).cuda()) probs = softmax(logits) if torch.max (probs).item() > threshold: train_set = ConcatDataset([train_set, ([img, torch.max (probs).item()])]) remove_index.append(index) index += 1 remove_index.reverse() for i in remove_index: del unlabeled_set_list[i] print (f"[{len (train_set)-3080 } /6787] images have been labeled." ) model.train()
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 device = 'cuda' if torch.cuda.is_available() else 'cpu' model = models.resnet18(pretrained=False ) model.fc = nn.Linear(512 , 11 ) model.cuda() model.device = device critersion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.0001 , weight_decay=1e-5 ) n_epochs = 200 best_acc = 0 valid_acc_last = 0 valid_acc_threshold = 0.7 do_semi = False for epoch in range (n_epochs): if do_semi: if valid_acc_last > valid_acc_threshold: valid_acc_threshold = valid_acc_last if len (train_set) != 9866 : get_pseudo_labels(model) train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True , pin_memory=True ) model.train() train_loss = [] train_accs = [] for batch in tqdm(train_loader): imgs, labels = batch logits = model(imgs.to(device)) loss = critersion(logits, labels.to(device)) optimizer.zero_grad() loss.backward() grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10 ) optimizer.step() acc = (logits.argmax(dim=-1 ) == labels.to(device)).float ().mean() train_loss.append(loss.item()) train_accs.append(acc) train_loss = sum (train_loss) / len (train_loss) train_accs = sum (train_accs) / len (train_accs) print (f"[ Train | {epoch+1 :03d} /{n_epochs:03d} ] loss = {train_loss:.5 f} , acc = {train_accs:.5 f} " ) model.eval () valid_loss = [] valid_accs = [] for batch in tqdm(valid_loader): imgs, labels = batch with torch.no_grad(): logits = model(imgs.to(device)) loss = critersion(logits, labels.to(device)) acc = (logits.argmax(dim=-1 ) == labels.to(device)).float ().mean() valid_loss.append(loss.item()) valid_accs.append(acc) valid_loss = sum (valid_loss) / len (valid_loss) valid_accs = sum (valid_accs) / len (valid_accs) print (f"[ Valid | {epoch+1 :03d} /{n_epochs:03d} ] loss = {valid_loss:.5 f} , acc = {valid_accs:.5 f} " )
0%| | 0/97 [00:00<?, ?it/s]
[ Train | 001/200] loss = 2.36045, acc = 0.14530
0%| | 0/2 [00:00<?, ?it/s]
[ Valid | 001/200] loss = 2.19217, acc = 0.24406
....
0%| | 0/97 [00:00<?, ?it/s]
[ Train | 200/200] loss = 1.00010, acc = 0.66495
0%| | 0/2 [00:00<?, ?it/s]
[ Valid | 200/200] loss = 1.34337, acc = 0.62323
4. Testing
1 2 3 4 5 6 7 8 9 model.eval () predictions = [] for batch in tqdm(test_loader): imgs, labels = batch with torch.no_grad(): logits = model(imgs.to(device)) predictions.extend(logits.argmax(dim=-1 ).cpu().numpy().tolist())
0%| | 0/7 [00:00<?, ?it/s]
1 2 3 4 5 with open ("predict.csv" , "w" ) as f: f.write("Id,Category\n" ) for i, pred in enumerate (predictions): f.write(f"{i} ,{pred} \n" )