HW3 : Convolutional Nueral Network

In this homework, you are required to build a convolutional neural network for image classification, possibly with some advanced training tips.

There are three levels here:

Easy: Build a simple convolutional neural network as the baseline. (2 pts)

Medium: Design a better architecture or adopt different data augmentations to improve the performance. (2 pts)

Hard: Utilize provided unlabeled data to obtain better results. (2 pts)

https://www.kaggle.com/competitions/ml2021spring-hw3

0.Prepare

get Data: Food 11 Class

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# Download the dataset
# You may choose where to download the data.

# Google Drive
!gdown --id '1awF7pZ9Dz7X1jn1_QAiKN-_v56veCEKy' --output food-11.zip

# Dropbox
# !wget https://www.dropbox.com/s/m9q6273jl3djall/food-11.zip -O food-11.zip

# MEGA
# !sudo apt install megatools
# !megadl "https://mega.nz/#!zt1TTIhK!ZuMbg5ZjGWzWX1I6nEUbfjMZgCmAgeqJlwDkqdIryfg"

# Unzip the dataset.
# This may take some time.
!unzip -q food-11.zip
'gdown' 不是内部或外部命令,也不是可运行的程序
或批处理文件。
unzip:  cannot find either food-11.zip or food-11.zip.zip.
1
2
3
4
5
6
7
8
9
10
11
12
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models # 使用经典模型架构

from PIL import Image

from torch.utils.data import DataLoader, ConcatDataset, Subset
from torchvision.datasets import DatasetFolder

from tqdm.auto import tqdm

1. Dataset, DataLoader, Transforms

torchvision for:

  1. Image Preprocessing
  2. Data Wrapping
  3. Data Augmentation 很重要,但是需要思考如何对food进行augmentation

Transform

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
train_tfm = transforms.Compose([
# transforms.Resize((128, 128)), # fixed shape
# TODO
transforms.RandomResizedCrop((128, 128)),
transforms.RandomChoice([
transforms.AutoAugment(),
transforms.AutoAugment(transforms.AutoAugmentPolicy.CIFAR10),
transforms.AutoAugment(transforms.AutoAugmentPolicy.SVHN)
]),
transforms.RandomHorizontalFlip(p=0.5),
transforms.ColorJitter(brightness=0.5),
transforms.RandomRotation(5),

transforms.ToTensor(), # last step
])


# test 和 validation 上不需要 augmentation
# 但是需要 fixed shape 和 to tensor
test_tfm = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
])

Dataset 与 DataLoader

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# 调整 batch size 
# 一个好的 batch size 会使得 gradient 更加 stable
# 但是受限于 GPU memory,适度调整
batch_size = 32
test_batch_size = 512

# Dataset
train_set = DatasetFolder("food-11/training/labeled", loader=lambda x: Image.open(x),
extensions="jpg", transform=train_tfm)
valid_set = DatasetFolder("food-11/validation", loader=lambda x: Image.open(x),
extensions='jpg', transform=test_tfm)
unlabeled_set = DatasetFolder("food-11/training/unlabeled", loader=lambda x: Image.open(x),
extensions='jpg', transform=train_tfm)
test_set = DatasetFolder("food-11/testing", loader=lambda x: Image.open(x),
extensions='jpg', transform=test_tfm)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=True)
valid_loader = DataLoader(valid_set, batch_size=test_batch_size, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=test_batch_size, shuffle=False)
1
2
3
4
5
unlabeled_set_list = []

for img, _ in unlabeled_set:
unlabeled_set_list.append(img)

1
2
3
4
import gc

del valid_set, unlabeled_set, test_set
gc.collect()
0

2. Model

WARNING – You Must Know

You are free to modify the model architecture here for further improvement.
However, if you want to use some well-known architectures such as ResNet50, please make sure NOT to load the pre-trained weights.
Using such pre-trained models is considered cheating and therefore you will be punished.
Similarly, it is your responsibility to make sure no pre-trained weights are used if you use torch.hub to load any modules.

For example, if you use ResNet-18 as your model:

model = torchvision.models.resnet18(pretrained=False) → This is fine.

model = torchvision.models.resnet18(pretrained=True) → This is NOT allowed.

注意参数:

torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)

torch.nn.BatchNorm2d(channels)

torch.nn.MaxPool2d(kernel_size, stride, padding)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
class Classifier(nn.Module):
def __init__(self):
'''
input image size = [3, 128, 128]
'''
super(Classifier, self).__init__()

self.cnn_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),

nn.Conv2d(64, 128, 3, 1, 1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0),

nn.Conv2d(128, 256, 3, 1, 1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(4, 4, 0)
)

# 计算一下:
# channels: 256 (就是最后一层 Conv2d 的 out_channels)
# width : 8 (此处的 Cov2d 不会改变width, 上采样 MaxPool2d 会改。 128/2 /2 /4 = 8)
# height : 8 (同理)

self.fc_layers = nn.Sequential(
nn.Linear(256 * 8 * 8, 256),
nn.ReLU(),
nn.Linear(256, 256),
nn.ReLU(),
nn.Linear(256, 11)
)

def forward(self, x):
'''
input: [batch_size, 3, 128, 128]
output: [batch_size, 11]
'''
# 卷积,展平,全连接
x = self.cnn_layers(x)
x = x.flatten(1)
x = self.fc_layers(x)

return x

3. Training

其中的 get_pseudo_labels 函数 用于 semi-supervised learning,可选

Prof. Lee’s slides

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def get_pseudo_labels(dataset, model, threshold=0.65):
'''
用已有模型获取 伪标签 pseudo labels
'''
global unlabeled_set_list, train_set

remove_index, index = [], 0

# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

model.eval()
softmax = nn.Softmax(dim=-1)

for img in tqdm(unlabeled_set_list):

with torch.no_grad():
logits = model(torch.unsequeeze(img, 0).cuda())

probs = softmax(logits)

# TODO: 过滤数据 构建新有效数据
# dataset = ...
if torch.max(probs).item() > threshold:
train_set = ConcatDataset([train_set, ([img, torch.max(probs).item()])])
remove_index.append(index)
index += 1

remove_index.reverse()
for i in remove_index:
del unlabeled_set_list[i]

print(f"[{len(train_set)-3080}/6787] images have been labeled.")

model.train()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# -----原模型-----
# model = Classifier().to(device)

# -----现模型-----
model = models.resnet18(pretrained=False)
model.fc = nn.Linear(512, 11)
model.cuda()


model.device = device
critersion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)

n_epochs = 200
best_acc = 0

valid_acc_last = 0
valid_acc_threshold = 0.7

do_semi = False

for epoch in range(n_epochs):
# TODO: 每一次 epoch 可以使用 semi-supervised learning 先进行 伪标签,再与原数据混合,进行训练
if do_semi:
if valid_acc_last > valid_acc_threshold:
valid_acc_threshold = valid_acc_last

if len(train_set) != 9866: # 3080 + 6786
get_pseudo_labels(model)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=True)


# Train
model.train()

train_loss = []
train_accs = []

for batch in tqdm(train_loader):
imgs, labels = batch
logits = model(imgs.to(device))
loss = critersion(logits, labels.to(device))
optimizer.zero_grad()
loss.backward()
grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
optimizer.step()

acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

train_loss.append(loss.item())
train_accs.append(acc)

train_loss = sum(train_loss) / len(train_loss)
train_accs = sum(train_accs) / len(train_accs)

print(f"[ Train | {epoch+1:03d}/{n_epochs:03d}] loss = {train_loss:.5f}, acc = {train_accs:.5f}")

# Validation
model.eval()
valid_loss = []
valid_accs = []
for batch in tqdm(valid_loader):
imgs, labels = batch
with torch.no_grad():
logits = model(imgs.to(device))

loss = critersion(logits, labels.to(device))
acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

valid_loss.append(loss.item())
valid_accs.append(acc)

valid_loss = sum(valid_loss) / len(valid_loss)
valid_accs = sum(valid_accs) / len(valid_accs)

print(f"[ Valid | {epoch+1:03d}/{n_epochs:03d}] loss = {valid_loss:.5f}, acc = {valid_accs:.5f}")

  0%|          | 0/97 [00:00<?, ?it/s]
[ Train | 001/200] loss = 2.36045, acc = 0.14530
  0%|          | 0/2 [00:00<?, ?it/s]
[ Valid | 001/200] loss = 2.19217, acc = 0.24406

....

  0%|          | 0/97 [00:00<?, ?it/s]
[ Train | 200/200] loss = 1.00010, acc = 0.66495
  0%|          | 0/2 [00:00<?, ?it/s]
[ Valid | 200/200] loss = 1.34337, acc = 0.62323

4. Testing

1
2
3
4
5
6
7
8
9
model.eval()

predictions = []

for batch in tqdm(test_loader):
imgs, labels = batch
with torch.no_grad():
logits = model(imgs.to(device))
predictions.extend(logits.argmax(dim=-1).cpu().numpy().tolist())
  0%|          | 0/7 [00:00<?, ?it/s]
1
2
3
4
5
with open("predict.csv", "w") as f:
f.write("Id,Category\n")
for i, pred in enumerate(predictions):
f.write(f"{i},{pred}\n")

1