HW2: Phoneme Classification

Platform : Kaggle

Sample Code : Google Colab

0. Prepare

  • TIMIT
  • This homework is a multiclass classification task, we are going to train a deep neural network classifier to predict the phonemes for each frame from the speech corpus TIMIT.

Google Colab 下载:(或官网下载也行)

1
2
3
!gdown --id '1HPkcmQmFGu-3OknddKIa5dNDsR05lIQR' --output data.zip
!unzip data.zip
!ls
zsh:1: command not found: gdown
unzip:  cannot find or open data.zip, data.zip.zip or data.zip.ZIP.
HW02.pdf                       SHARE_MLSpring2021_HW2_1.ipynb
HW2.ipynb

Prepare Data

1
2
3
4
5
6
7
8
9
10
11
12
13
import numpy as np

print("Loading data...")

data_root = './timit_11/'
train = np.load(data_root + 'train_11.npy')
train_label = np.load(data_root + 'train_label_11.npy')
test = np.load(data_root + 'test_11.npy')

print('Size of training data: {}'.format(train.shape))
print('Size of testing data: {}'.format(test.shape))

train_label
Loading data...
Size of training data: (1229932, 429)
Size of testing data: (451552, 429)





array(['36', '36', '36', ..., '35', '35', '35'], dtype='<U2')

1. Dataset

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import torch
from torch.utils.data import Dataset

class TIMITDataset(Dataset):
'''
语音数据集
'''
def __init__(self, X, y=None):
self.data = torch.from_numpy(X).float()
if y is not None:
y = y.astype(np.int32)
self.label = torch.LongTensor(y)
else:
self.label = None

def __getitem__(self, idx):
if self.label is not None:
return self.data[idx], self.label[idx]
else:
return self.data[idx]

def __len__(self):
return len(self.data)

1
2
3
4
5
6
7
8
# 切分出 训练集 与 验证集
VAL_RATIO = 0.2

percent = int(train.shape[0]*(1-VAL_RATIO))
train_x, train_y = train[:percent], train_label[:percent]
val_x, val_y = train[percent:], train_label[percent:]
print("Size of training set: {}".format(train_x.shape))
print("Size of validation set: {}".format(val_x.shape))
Size of training set: (983945, 429)
Size of validation set: (245987, 429)
1
2
3
4
5
6
7
8
9
BATCH_SIZE = 64

from torch.utils.data import DataLoader

train_set = TIMITDataset(train_x, train_y)
val_set = TIMITDataset(val_x, val_y)

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)

由于数据量很大,到这里,可以清除一些内存

1
2
3
4
import gc

del train, train_label, train_x, train_y, val_x, val_y
gc.collect()
0

2. Model

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import torch
import torch.nn as nn

class Classifier(nn.Module):
'''
分类器模型
'''
def __init__(self):
super(Classifier, self).__init__()
self.layer0 = nn.Linear(429, 2048)
self.layer1 = nn.Linear(2048, 1024)
self.layer2 = nn.Linear(1024, 512)
self.layer3 = nn.Linear(512, 128)
self.out = nn.Linear(128, 39)

self.act_fn = nn.ReLU()
self.dropout = nn.Dropout(0.2)
self.batchnorm0 = nn.BatchNorm1d(2048)
self.batchnorm1 = nn.BatchNorm1d(1024)
self.batchnorm2 = nn.BatchNorm1d(512)
self.batchnorm3 = nn.BatchNorm1d(128)


def forward(self, x):
x = self.layer0(x)
x = self.batchnorm0(x)
x = self.act_fn(x)
x = self.dropout(x)

x = self.layer1(x)
x = self.batchnorm1(x)
x = self.act_fn(x)
x = self.dropout(x)

x = self.layer2(x)
x = self.batchnorm2(x)
x = self.act_fn(x)
x = self.dropout(x)

x = self.layer3(x)
x = self.batchnorm3(x)
x = self.act_fn(x)

x = self.out(x)

return x

3. Train

1
2
3
4
5
6
7
8
9
10
11
12
13
def get_device():
''' 获取 GPU/CPU '''
return 'cuda' if torch.cuda.is_available() else 'cpu'

def same_seeds(seed):
''' 可复现性 '''
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 配置参数
same_seeds(0)

device = get_device()
print(f'Device: {device}')

num_epoch = 50
learning_rate = 0.0001

model_path = './model.ckpt'

# 定义 模型、损失函数、优化器
model = Classifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Device: cuda
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# 开始训练
best_acc = 0.0
for epoch in range(num_epoch):
train_acc =0.0
train_loss = 0.0
val_acc = 0.0
val_loss = 0.0

# 一轮 train
model.train()
for i, data in enumerate(train_loader):
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)

# 流程
optimizer.zero_grad()
outputs = model(inputs)
batch_loss = criterion(outputs, labels)
batch_loss.backward()
optimizer.step()

# 获取概率最大的 class, 计算准确率和损失值
_, train_pred = torch.max(outputs, 1)
train_acc += (train_pred.cpu() == labels.cpu()).sum().item()
train_loss += batch_loss.item()

# 一轮 validation
if len(val_set) > 0:
model.eval()
with torch.no_grad():
for i, data in enumerate(val_loader):
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)

# 流程
outputs = model(inputs)
batch_loss = criterion(outputs, labels)

# 计算准确率、损失值
_, val_pred = torch.max(outputs, 1)
val_acc += (val_pred.cpu() == labels.cpu()).sum().item()
val_loss += batch_loss.item()

print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f} | Val Acc: {:3.6f} Loss: {:3.6f}'.format(
epoch+1, num_epoch,
train_acc/len(train_set), train_loss/len(train_loader),
val_acc/len(val_set), val_loss/len(val_loader)
))

# 模型提升,则保存
if val_acc > best_acc:
best_acc = val_acc
torch.save(model.state_dict(), model_path)
print("Saving model with acc {:3.6f}".format(val_acc/len(val_set)))

# 若无 validation
else:
print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f}'.format(
epoch+1, num_epoch,
train_acc/len(train_set), train_loss/len(train_loader)
))

# 无 val 则保存最后的 模型
if len(val_set) == 0:
torch.save(model.state_dict(), model_path)
print("Saving model at last epoch.")
[001/050] Train Acc: 0.609924 Loss: 1.286747 | Val Acc: 0.694931 Loss: 0.956475
Saving model with acc 0.694931
[002/050] Train Acc: 0.670930 Loss: 1.028066 | Val Acc: 0.715131 Loss: 0.876589
Saving model with acc 0.715131
[003/050] Train Acc: 0.693027 Loss: 0.948449 | Val Acc: 0.722457 Loss: 0.844810
Saving model with acc 0.722457
[004/050] Train Acc: 0.707268 Loss: 0.895078 | Val Acc: 0.730852 Loss: 0.815643
Saving model with acc 0.730852
[005/050] Train Acc: 0.717857 Loss: 0.856392 | Val Acc: 0.734486 Loss: 0.800215
Saving model with acc 0.734486
[006/050] Train Acc: 0.728068 Loss: 0.821858 | Val Acc: 0.737852 Loss: 0.786928
Saving model with acc 0.737852
[007/050] Train Acc: 0.735884 Loss: 0.792803 | Val Acc: 0.740482 Loss: 0.781770
Saving model with acc 0.740482
[008/050] Train Acc: 0.743622 Loss: 0.767214 | Val Acc: 0.742462 Loss: 0.776334
Saving model with acc 0.742462
[009/050] Train Acc: 0.750388 Loss: 0.743795 | Val Acc: 0.743291 Loss: 0.772254
Saving model with acc 0.743291
[010/050] Train Acc: 0.756309 Loss: 0.723846 | Val Acc: 0.743966 Loss: 0.777039
Saving model with acc 0.743966
[011/050] Train Acc: 0.761473 Loss: 0.705245 | Val Acc: 0.746893 Loss: 0.768033
Saving model with acc 0.746893
[012/050] Train Acc: 0.766553 Loss: 0.688432 | Val Acc: 0.744808 Loss: 0.774924
[013/050] Train Acc: 0.771151 Loss: 0.671526 | Val Acc: 0.747405 Loss: 0.768126
Saving model with acc 0.747405
[014/050] Train Acc: 0.776192 Loss: 0.655516 | Val Acc: 0.746763 Loss: 0.770916
[015/050] Train Acc: 0.779843 Loss: 0.642709 | Val Acc: 0.744210 Loss: 0.783095
[016/050] Train Acc: 0.784258 Loss: 0.629430 | Val Acc: 0.743808 Loss: 0.782051
[017/050] Train Acc: 0.787317 Loss: 0.617749 | Val Acc: 0.745161 Loss: 0.787476
[018/050] Train Acc: 0.791100 Loss: 0.605589 | Val Acc: 0.746808 Loss: 0.787505
[019/050] Train Acc: 0.794240 Loss: 0.596160 | Val Acc: 0.747308 Loss: 0.782463
[020/050] Train Acc: 0.797185 Loss: 0.585868 | Val Acc: 0.748263 Loss: 0.787579
Saving model with acc 0.748263
[021/050] Train Acc: 0.800573 Loss: 0.575418 | Val Acc: 0.747287 Loss: 0.790574
[022/050] Train Acc: 0.803376 Loss: 0.567265 | Val Acc: 0.747857 Loss: 0.791368
[023/050] Train Acc: 0.806060 Loss: 0.558286 | Val Acc: 0.745966 Loss: 0.798865
[024/050] Train Acc: 0.807887 Loss: 0.551136 | Val Acc: 0.746117 Loss: 0.801162
[025/050] Train Acc: 0.810238 Loss: 0.544797 | Val Acc: 0.744637 Loss: 0.806984
[026/050] Train Acc: 0.811948 Loss: 0.537816 | Val Acc: 0.745942 Loss: 0.805529
[027/050] Train Acc: 0.814784 Loss: 0.529741 | Val Acc: 0.744210 Loss: 0.819476
[028/050] Train Acc: 0.816640 Loss: 0.524325 | Val Acc: 0.744889 Loss: 0.814478
[029/050] Train Acc: 0.818946 Loss: 0.517234 | Val Acc: 0.746007 Loss: 0.817322
[030/050] Train Acc: 0.819842 Loss: 0.513504 | Val Acc: 0.745564 Loss: 0.814147
[031/050] Train Acc: 0.821976 Loss: 0.506308 | Val Acc: 0.744812 Loss: 0.832211
[032/050] Train Acc: 0.824199 Loss: 0.501401 | Val Acc: 0.744917 Loss: 0.815683
[033/050] Train Acc: 0.825285 Loss: 0.497150 | Val Acc: 0.746401 Loss: 0.827134
[034/050] Train Acc: 0.827462 Loss: 0.491655 | Val Acc: 0.745206 Loss: 0.826960
[035/050] Train Acc: 0.828403 Loss: 0.487580 | Val Acc: 0.744064 Loss: 0.830157
[036/050] Train Acc: 0.830072 Loss: 0.482296 | Val Acc: 0.744421 Loss: 0.836688
[037/050] Train Acc: 0.830875 Loss: 0.479000 | Val Acc: 0.743942 Loss: 0.837147
[038/050] Train Acc: 0.832914 Loss: 0.474777 | Val Acc: 0.744279 Loss: 0.845676
[039/050] Train Acc: 0.833925 Loss: 0.471150 | Val Acc: 0.744710 Loss: 0.846064
[040/050] Train Acc: 0.834895 Loss: 0.467962 | Val Acc: 0.744137 Loss: 0.847515
[041/050] Train Acc: 0.836528 Loss: 0.463762 | Val Acc: 0.744560 Loss: 0.840268
[042/050] Train Acc: 0.838013 Loss: 0.458918 | Val Acc: 0.745165 Loss: 0.842505
[043/050] Train Acc: 0.838658 Loss: 0.456205 | Val Acc: 0.743905 Loss: 0.855258
[044/050] Train Acc: 0.839970 Loss: 0.453910 | Val Acc: 0.743604 Loss: 0.863975
[045/050] Train Acc: 0.840819 Loss: 0.450363 | Val Acc: 0.742645 Loss: 0.861839
[046/050] Train Acc: 0.841898 Loss: 0.446966 | Val Acc: 0.743682 Loss: 0.862985
[047/050] Train Acc: 0.843353 Loss: 0.443084 | Val Acc: 0.743125 Loss: 0.867474
[048/050] Train Acc: 0.844095 Loss: 0.441196 | Val Acc: 0.742706 Loss: 0.869525
[049/050] Train Acc: 0.844868 Loss: 0.437980 | Val Acc: 0.743064 Loss: 0.869354
[050/050] Train Acc: 0.846024 Loss: 0.434302 | Val Acc: 0.742873 Loss: 0.865394

4. Test

1
del model
1
2
3
4
5
test_set = TIMITDataset(test, None)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)

model = Classifier().to(device)
model.load_state_dict(torch.load(model_path))
<All keys matched successfully>
1
2
3
4
5
6
7
8
9
10
11
12
13
predict = []
model.eval()
with torch.no_grad():
for i, data in enumerate(test_loader):
inputs = data
inputs = inputs.to(device)

outputs = model(inputs)

_, test_pred = torch.max(outputs, 1)

for y in test_pred.cpu().numpy():
predict.append(y)
1
2
3
4
5
with open('prediction.csv', 'w') as f:
f.write('Id,Class\n')
for i, y in enumerate(predict):
f.write('{},{}\n'.format(i, y))
print("Saving to file: prediction.csv")
Saving to file: prediction.csv
1