HW2: Phoneme Classification
Platform : Kaggle
Sample Code : Google Colab
0. Prepare
TIMIT
This homework is a multiclass classification task, we are going to train a deep neural network classifier to predict the phonemes for each frame from the speech corpus TIMIT.
Google Colab 下载:(或官网下载也行)
1 2 3 !gdown --id '1HPkcmQmFGu-3OknddKIa5dNDsR05lIQR' --output data.zip !unzip data.zip !ls
zsh:1: command not found: gdown
unzip: cannot find or open data.zip, data.zip.zip or data.zip.ZIP.
HW02.pdf SHARE_MLSpring2021_HW2_1.ipynb
HW2.ipynb
Prepare Data
1 2 3 4 5 6 7 8 9 10 11 12 13 import numpy as npprint ("Loading data..." )data_root = './timit_11/' train = np.load(data_root + 'train_11.npy' ) train_label = np.load(data_root + 'train_label_11.npy' ) test = np.load(data_root + 'test_11.npy' ) print ('Size of training data: {}' .format (train.shape))print ('Size of testing data: {}' .format (test.shape))train_label
Loading data...
Size of training data: (1229932, 429)
Size of testing data: (451552, 429)
array(['36', '36', '36', ..., '35', '35', '35'], dtype='<U2')
1. Dataset
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 import torchfrom torch.utils.data import Datasetclass TIMITDataset (Dataset ): ''' 语音数据集 ''' def __init__ (self, X, y=None ): self.data = torch.from_numpy(X).float () if y is not None : y = y.astype(np.int32) self.label = torch.LongTensor(y) else : self.label = None def __getitem__ (self, idx ): if self.label is not None : return self.data[idx], self.label[idx] else : return self.data[idx] def __len__ (self ): return len (self.data)
1 2 3 4 5 6 7 8 VAL_RATIO = 0.2 percent = int (train.shape[0 ]*(1 -VAL_RATIO)) train_x, train_y = train[:percent], train_label[:percent] val_x, val_y = train[percent:], train_label[percent:] print ("Size of training set: {}" .format (train_x.shape))print ("Size of validation set: {}" .format (val_x.shape))
Size of training set: (983945, 429)
Size of validation set: (245987, 429)
1 2 3 4 5 6 7 8 9 BATCH_SIZE = 64 from torch.utils.data import DataLoadertrain_set = TIMITDataset(train_x, train_y) val_set = TIMITDataset(val_x, val_y) train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True ) val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False )
由于数据量很大,到这里,可以清除一些内存
1 2 3 4 import gcdel train, train_label, train_x, train_y, val_x, val_ygc.collect()
0
2. Model
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 import torchimport torch.nn as nnclass Classifier (nn.Module): ''' 分类器模型 ''' def __init__ (self ): super (Classifier, self).__init__() self.layer0 = nn.Linear(429 , 2048 ) self.layer1 = nn.Linear(2048 , 1024 ) self.layer2 = nn.Linear(1024 , 512 ) self.layer3 = nn.Linear(512 , 128 ) self.out = nn.Linear(128 , 39 ) self.act_fn = nn.ReLU() self.dropout = nn.Dropout(0.2 ) self.batchnorm0 = nn.BatchNorm1d(2048 ) self.batchnorm1 = nn.BatchNorm1d(1024 ) self.batchnorm2 = nn.BatchNorm1d(512 ) self.batchnorm3 = nn.BatchNorm1d(128 ) def forward (self, x ): x = self.layer0(x) x = self.batchnorm0(x) x = self.act_fn(x) x = self.dropout(x) x = self.layer1(x) x = self.batchnorm1(x) x = self.act_fn(x) x = self.dropout(x) x = self.layer2(x) x = self.batchnorm2(x) x = self.act_fn(x) x = self.dropout(x) x = self.layer3(x) x = self.batchnorm3(x) x = self.act_fn(x) x = self.out(x) return x
3. Train
1 2 3 4 5 6 7 8 9 10 11 12 13 def get_device (): ''' 获取 GPU/CPU ''' return 'cuda' if torch.cuda.is_available() else 'cpu' def same_seeds (seed ): ''' 可复现性 ''' torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 same_seeds(0 ) device = get_device() print (f'Device: {device} ' )num_epoch = 50 learning_rate = 0.0001 model_path = './model.ckpt' model = Classifier().to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
Device: cuda
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 best_acc = 0.0 for epoch in range (num_epoch): train_acc =0.0 train_loss = 0.0 val_acc = 0.0 val_loss = 0.0 model.train() for i, data in enumerate (train_loader): inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = model(inputs) batch_loss = criterion(outputs, labels) batch_loss.backward() optimizer.step() _, train_pred = torch.max (outputs, 1 ) train_acc += (train_pred.cpu() == labels.cpu()).sum ().item() train_loss += batch_loss.item() if len (val_set) > 0 : model.eval () with torch.no_grad(): for i, data in enumerate (val_loader): inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) outputs = model(inputs) batch_loss = criterion(outputs, labels) _, val_pred = torch.max (outputs, 1 ) val_acc += (val_pred.cpu() == labels.cpu()).sum ().item() val_loss += batch_loss.item() print ('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f} | Val Acc: {:3.6f} Loss: {:3.6f}' .format ( epoch+1 , num_epoch, train_acc/len (train_set), train_loss/len (train_loader), val_acc/len (val_set), val_loss/len (val_loader) )) if val_acc > best_acc: best_acc = val_acc torch.save(model.state_dict(), model_path) print ("Saving model with acc {:3.6f}" .format (val_acc/len (val_set))) else : print ('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f}' .format ( epoch+1 , num_epoch, train_acc/len (train_set), train_loss/len (train_loader) )) if len (val_set) == 0 : torch.save(model.state_dict(), model_path) print ("Saving model at last epoch." )
[001/050] Train Acc: 0.609924 Loss: 1.286747 | Val Acc: 0.694931 Loss: 0.956475
Saving model with acc 0.694931
[002/050] Train Acc: 0.670930 Loss: 1.028066 | Val Acc: 0.715131 Loss: 0.876589
Saving model with acc 0.715131
[003/050] Train Acc: 0.693027 Loss: 0.948449 | Val Acc: 0.722457 Loss: 0.844810
Saving model with acc 0.722457
[004/050] Train Acc: 0.707268 Loss: 0.895078 | Val Acc: 0.730852 Loss: 0.815643
Saving model with acc 0.730852
[005/050] Train Acc: 0.717857 Loss: 0.856392 | Val Acc: 0.734486 Loss: 0.800215
Saving model with acc 0.734486
[006/050] Train Acc: 0.728068 Loss: 0.821858 | Val Acc: 0.737852 Loss: 0.786928
Saving model with acc 0.737852
[007/050] Train Acc: 0.735884 Loss: 0.792803 | Val Acc: 0.740482 Loss: 0.781770
Saving model with acc 0.740482
[008/050] Train Acc: 0.743622 Loss: 0.767214 | Val Acc: 0.742462 Loss: 0.776334
Saving model with acc 0.742462
[009/050] Train Acc: 0.750388 Loss: 0.743795 | Val Acc: 0.743291 Loss: 0.772254
Saving model with acc 0.743291
[010/050] Train Acc: 0.756309 Loss: 0.723846 | Val Acc: 0.743966 Loss: 0.777039
Saving model with acc 0.743966
[011/050] Train Acc: 0.761473 Loss: 0.705245 | Val Acc: 0.746893 Loss: 0.768033
Saving model with acc 0.746893
[012/050] Train Acc: 0.766553 Loss: 0.688432 | Val Acc: 0.744808 Loss: 0.774924
[013/050] Train Acc: 0.771151 Loss: 0.671526 | Val Acc: 0.747405 Loss: 0.768126
Saving model with acc 0.747405
[014/050] Train Acc: 0.776192 Loss: 0.655516 | Val Acc: 0.746763 Loss: 0.770916
[015/050] Train Acc: 0.779843 Loss: 0.642709 | Val Acc: 0.744210 Loss: 0.783095
[016/050] Train Acc: 0.784258 Loss: 0.629430 | Val Acc: 0.743808 Loss: 0.782051
[017/050] Train Acc: 0.787317 Loss: 0.617749 | Val Acc: 0.745161 Loss: 0.787476
[018/050] Train Acc: 0.791100 Loss: 0.605589 | Val Acc: 0.746808 Loss: 0.787505
[019/050] Train Acc: 0.794240 Loss: 0.596160 | Val Acc: 0.747308 Loss: 0.782463
[020/050] Train Acc: 0.797185 Loss: 0.585868 | Val Acc: 0.748263 Loss: 0.787579
Saving model with acc 0.748263
[021/050] Train Acc: 0.800573 Loss: 0.575418 | Val Acc: 0.747287 Loss: 0.790574
[022/050] Train Acc: 0.803376 Loss: 0.567265 | Val Acc: 0.747857 Loss: 0.791368
[023/050] Train Acc: 0.806060 Loss: 0.558286 | Val Acc: 0.745966 Loss: 0.798865
[024/050] Train Acc: 0.807887 Loss: 0.551136 | Val Acc: 0.746117 Loss: 0.801162
[025/050] Train Acc: 0.810238 Loss: 0.544797 | Val Acc: 0.744637 Loss: 0.806984
[026/050] Train Acc: 0.811948 Loss: 0.537816 | Val Acc: 0.745942 Loss: 0.805529
[027/050] Train Acc: 0.814784 Loss: 0.529741 | Val Acc: 0.744210 Loss: 0.819476
[028/050] Train Acc: 0.816640 Loss: 0.524325 | Val Acc: 0.744889 Loss: 0.814478
[029/050] Train Acc: 0.818946 Loss: 0.517234 | Val Acc: 0.746007 Loss: 0.817322
[030/050] Train Acc: 0.819842 Loss: 0.513504 | Val Acc: 0.745564 Loss: 0.814147
[031/050] Train Acc: 0.821976 Loss: 0.506308 | Val Acc: 0.744812 Loss: 0.832211
[032/050] Train Acc: 0.824199 Loss: 0.501401 | Val Acc: 0.744917 Loss: 0.815683
[033/050] Train Acc: 0.825285 Loss: 0.497150 | Val Acc: 0.746401 Loss: 0.827134
[034/050] Train Acc: 0.827462 Loss: 0.491655 | Val Acc: 0.745206 Loss: 0.826960
[035/050] Train Acc: 0.828403 Loss: 0.487580 | Val Acc: 0.744064 Loss: 0.830157
[036/050] Train Acc: 0.830072 Loss: 0.482296 | Val Acc: 0.744421 Loss: 0.836688
[037/050] Train Acc: 0.830875 Loss: 0.479000 | Val Acc: 0.743942 Loss: 0.837147
[038/050] Train Acc: 0.832914 Loss: 0.474777 | Val Acc: 0.744279 Loss: 0.845676
[039/050] Train Acc: 0.833925 Loss: 0.471150 | Val Acc: 0.744710 Loss: 0.846064
[040/050] Train Acc: 0.834895 Loss: 0.467962 | Val Acc: 0.744137 Loss: 0.847515
[041/050] Train Acc: 0.836528 Loss: 0.463762 | Val Acc: 0.744560 Loss: 0.840268
[042/050] Train Acc: 0.838013 Loss: 0.458918 | Val Acc: 0.745165 Loss: 0.842505
[043/050] Train Acc: 0.838658 Loss: 0.456205 | Val Acc: 0.743905 Loss: 0.855258
[044/050] Train Acc: 0.839970 Loss: 0.453910 | Val Acc: 0.743604 Loss: 0.863975
[045/050] Train Acc: 0.840819 Loss: 0.450363 | Val Acc: 0.742645 Loss: 0.861839
[046/050] Train Acc: 0.841898 Loss: 0.446966 | Val Acc: 0.743682 Loss: 0.862985
[047/050] Train Acc: 0.843353 Loss: 0.443084 | Val Acc: 0.743125 Loss: 0.867474
[048/050] Train Acc: 0.844095 Loss: 0.441196 | Val Acc: 0.742706 Loss: 0.869525
[049/050] Train Acc: 0.844868 Loss: 0.437980 | Val Acc: 0.743064 Loss: 0.869354
[050/050] Train Acc: 0.846024 Loss: 0.434302 | Val Acc: 0.742873 Loss: 0.865394
4. Test
1 2 3 4 5 test_set = TIMITDataset(test, None ) test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False ) model = Classifier().to(device) model.load_state_dict(torch.load(model_path))
<All keys matched successfully>
1 2 3 4 5 6 7 8 9 10 11 12 13 predict = [] model.eval () with torch.no_grad(): for i, data in enumerate (test_loader): inputs = data inputs = inputs.to(device) outputs = model(inputs) _, test_pred = torch.max (outputs, 1 ) for y in test_pred.cpu().numpy(): predict.append(y)
1 2 3 4 5 with open ('prediction.csv' , 'w' ) as f: f.write('Id,Class\n' ) for i, y in enumerate (predict): f.write('{},{}\n' .format (i, y)) print ("Saving to file: prediction.csv" )
Saving to file: prediction.csv