定义训练函数:
In [17]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=10): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(num_epochs): print(f'Epoch {epoch+1}/{num_epochs}: ', end='') # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 # Iterate over data. for inputs, labels in dataloaders[phase]: inputs = inputs.to(device) labels = labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) if phase == 'train': scheduler.step() epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = running_corrects.double() / dataset_sizes[phase] print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} ', end='' if phase=='train' else 'n') # deep copy the model if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) time_elapsed = time.time() - since print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s') print(f'Best val Acc: {best_acc:4f}') # load best model weights model.load_state_dict(best_model_wts) return model
In [14]:
model_ft = models.resnet18(weights=False) # 定义模型 model_ft.load_state_dict(torch.load('./model/resnet18-f37072fd.pth')) # 加载权重
Out[14]:
<All keys matched successfully>
weights为True时,将会从网络中自动加载预训练好的权重,但由于网络原因,自动加载会很慢,所以设置为False,然后通过load_state_dict方法加载本地下载好的权重。
In [15]:
# model_ft = models.resnet18(pretrained=True) # 提取出预训练模型中,最后的全连接层 num_ftrs = model_ft.fc.in_features # 定义一个二分类的全连接层,并替换原来的全连接层 model_ft.fc = nn.Linear(num_ftrs, 2) model_ft = model_ft.to(device) criterion = nn.CrossEntropyLoss() # 所有参数都要进行更新 optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9) exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
In [18]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=10)
Epoch 1/10: train Loss: 0.2739 Acc: 0.8852 val Loss: 0.2041 Acc: 0.9216 Epoch 2/10: train Loss: 0.2986 Acc: 0.8770 val Loss: 0.1896 Acc: 0.9281 Epoch 3/10: train Loss: 0.3275 Acc: 0.8607 val Loss: 0.1806 Acc: 0.9150 Epoch 4/10: train Loss: 0.4008 Acc: 0.8074 val Loss: 0.2174 Acc: 0.9085 Epoch 5/10: train Loss: 0.3790 Acc: 0.8156 val Loss: 0.2254 Acc: 0.9085 Epoch 6/10: train Loss: 0.2306 Acc: 0.9016 val Loss: 0.1927 Acc: 0.9150 Epoch 7/10: train Loss: 0.2859 Acc: 0.8689 val Loss: 0.1935 Acc: 0.9281 Epoch 8/10: train Loss: 0.2926 Acc: 0.9016 val Loss: 0.1830 Acc: 0.9216 Epoch 9/10: train Loss: 0.3015 Acc: 0.8607 val Loss: 0.1743 Acc: 0.9216 Epoch 10/10: train Loss: 0.2726 Acc: 0.8566 val Loss: 0.2281 Acc: 0.9085 Training complete in 0m 20s Best val Acc: 0.928105
可见,通过迁移学习,模型经过短短几次迭代,就已经获得了非常不错的准确率。
