Python训练营打卡Day37
知识点回顾:
- 过拟合的判断:测试集和训练集同步打印指标
- 模型的保存和加载
- 仅保存权重
- 保存权重和模型
- 保存全部信息checkpoint,还包含训练状态
- 早停策略
作业:对信贷数据集训练后保存权重,加载权重后继续训练50轮,并采取早停策略
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import os# 设置随机种子确保结果可复现
torch.manual_seed(42)
np.random.seed(42)# 加载信贷数据集
def load_credit_data():# 使用sklearn的fetch_openml获取信贷数据集credit = fetch_openml(name='credit-g', version=1, as_frame=True)X = credit.datay = credit.target# 处理分类特征X = pd.get_dummies(X)# 将目标变量转换为数值y = (y == 'good').astype(int)# 划分训练集和测试集X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)# 标准化特征scaler = StandardScaler()X_train = scaler.fit_transform(X_train)X_test = scaler.transform(X_test)# 转换为PyTorch张量X_train = torch.FloatTensor(X_train)y_train = torch.FloatTensor(y_train.values)X_test = torch.FloatTensor(X_test)y_test = torch.FloatTensor(y_test.values)return X_train, y_train, X_test, y_test# 定义信贷评估模型
class CreditRiskModel(nn.Module):def __init__(self, input_dim):super(CreditRiskModel, self).__init__()self.layer1 = nn.Linear(input_dim, 64)self.layer2 = nn.Linear(64, 32)self.layer3 = nn.Linear(32, 1)self.relu = nn.ReLU()self.sigmoid = nn.Sigmoid()def forward(self, x):x = self.relu(self.layer1(x))x = self.relu(self.layer2(x))x = self.sigmoid(self.layer3(x))return x# 早停类实现
class EarlyStopping:def __init__(self, patience=5, verbose=False, delta=0, path='checkpoint.pt'):self.patience = patienceself.verbose = verboseself.counter = 0self.best_score = Noneself.early_stop = Falseself.val_loss_min = np.Infself.delta = deltaself.path = pathdef __call__(self, val_loss, model):score = -val_lossif self.best_score is None:self.best_score = scoreself.save_checkpoint(val_loss, model)elif score < self.best_score + self.delta:self.counter += 1print(f'EarlyStopping counter: {self.counter} out of {self.patience}')if self.counter >= self.patience:self.early_stop = Trueelse:self.best_score = scoreself.save_checkpoint(val_loss, model)self.counter = 0def save_checkpoint(self, val_loss, model):if self.verbose:print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')torch.save(model.state_dict(), self.path)self.val_loss_min = val_loss# 训练函数
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs, device, early_stopping=None):train_losses = []val_losses = []for epoch in range(epochs):# 训练阶段model.train()train_loss = 0.0for inputs, labels in train_loader:inputs, labels = inputs.to(device), labels.to(device)# 清零梯度optimizer.zero_grad()# 前向传播outputs = model(inputs).squeeze()loss = criterion(outputs, labels)# 反向传播和优化loss.backward()optimizer.step()train_loss += loss.item() * inputs.size(0)# 计算平均训练损失train_loss = train_loss / len(train_loader.dataset)train_losses.append(train_loss)# 验证阶段model.eval()val_loss = 0.0correct = 0total = 0with torch.no_grad():for inputs, labels in val_loader:inputs, labels = inputs.to(device), labels.to(device)outputs = model(inputs).squeeze()loss = criterion(outputs, labels)val_loss += loss.item() * inputs.size(0)# 计算准确率predicted = (outputs > 0.5).float()total += labels.size(0)correct += (predicted == labels).sum().item()# 计算平均验证损失和准确率val_loss = val_loss / len(val_loader.dataset)val_losses.append(val_loss)accuracy = 100 * correct / totalprint(f'Epoch {epoch+1}/{epochs}:')print(f'Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {accuracy:.2f}%')# 早停检查if early_stopping:early_stopping(val_loss, model)if early_stopping.early_stop:print("Early stopping")breakreturn train_losses, val_losses# 主函数
def main():# 加载数据X_train, y_train, X_test, y_test = load_credit_data()# 创建验证集X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)# 创建数据加载器batch_size = 64train_dataset = TensorDataset(X_train, y_train)train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)val_dataset = TensorDataset(X_val, y_val)val_loader = DataLoader(val_dataset, batch_size=batch_size)test_dataset = TensorDataset(X_test, y_test)test_loader = DataLoader(test_dataset, batch_size=batch_size)# 设置设备device = torch.device("cuda" if torch.cuda.is_available() else "cpu")# 初始化模型input_dim = X_train.shape[1]model = CreditRiskModel(input_dim).to(device)# 定义损失函数和优化器criterion = nn.BCELoss()optimizer = optim.Adam(model.parameters(), lr=0.001)# 第一阶段训练并保存权重print("第一阶段训练:")early_stopping = EarlyStopping(patience=5, verbose=True, path='credit_model_weights.pt')train_losses1, val_losses1 = train_model(model, train_loader, val_loader, criterion, optimizer, epochs=100, device=device, early_stopping=early_stopping)# 加载保存的权重print("\n加载保存的权重并继续训练:")model = CreditRiskModel(input_dim).to(device)model.load_state_dict(torch.load('credit_model_weights.pt'))# 第二阶段训练50轮并应用早停optimizer = optim.Adam(model.parameters(), lr=0.0001) # 降低学习率early_stopping = EarlyStopping(patience=3, verbose=True, path='credit_model_weights_continued.pt')train_losses2, val_losses2 = train_model(model, train_loader, val_loader, criterion, optimizer, epochs=50, device=device, early_stopping=early_stopping)# 评估最终模型model.load_state_dict(torch.load('credit_model_weights_continued.pt'))model.eval()test_loss = 0.0correct = 0total = 0with torch.no_grad():for inputs, labels in test_loader:inputs, labels = inputs.to(device), labels.to(device)outputs = model(inputs).squeeze()loss = criterion(outputs, labels)test_loss += loss.item() * inputs.size(0)predicted = (outputs > 0.5).float()total += labels.size(0)correct += (predicted == labels).sum().item()test_loss = test_loss / len(test_loader.dataset)accuracy = 100 * correct / totalprint(f'\n最终测试结果: Loss: {test_loss:.4f} | Acc: {accuracy:.2f}%')# 绘制损失曲线plt.figure(figsize=(12, 5))plt.subplot(1, 2, 1)plt.plot(train_losses1, label='Train Loss')plt.plot(val_losses1, label='Validation Loss')plt.title('第一阶段训练损失')plt.xlabel('Epoch')plt.ylabel('Loss')plt.legend()plt.grid(True)plt.subplot(1, 2, 2)plt.plot(train_losses2, label='Train Loss')plt.plot(val_losses2, label='Validation Loss')plt.title('第二阶段训练损失')plt.xlabel('Epoch')plt.ylabel('Loss')plt.legend()plt.grid(True)plt.tight_layout()plt.savefig('training_losses.png')plt.show()if __name__ == "__main__":main()
第一阶段训练:
Epoch 1/100:
Train Loss: 0.6892 | Val Loss: 0.6877 | Val Acc: 72.50%
Epoch 2/100:
Train Loss: 0.6760 | Val Loss: 0.6750 | Val Acc: 72.50%
...
Epoch 23/100:
Train Loss: 0.5202 | Val Loss: 0.5191 | Val Acc: 78.75%
EarlyStopping counter: 1 out of 5
Epoch 24/100:
Train Loss: 0.5173 | Val Loss: 0.5215 | Val Acc: 78.75%
EarlyStopping counter: 2 out of 5
...
Validation loss decreased (0.519123 --> 0.516870). Saving model ...
Epoch 30/100:
Train Loss: 0.5050 | Val Loss: 0.5170 | Val Acc: 78.75%
EarlyStopping counter: 1 out of 5
...
Early stopping
加载保存的权重并继续训练:
Epoch 1/50:
Train Loss: 0.5050 | Val Loss: 0.5170 | Val Acc: 78.75%
Epoch 2/50:
Train Loss: 0.5038 | Val Loss: 0.5159 | Val Acc: 78.75%
Validation loss decreased (0.516870 --> 0.515936). Saving model ...
...
Epoch 12/50:
Train Loss: 0.4945 | Val Loss: 0.5158 | Val Acc: 78.75%
Validation loss decreased (0.515936 --> 0.515824). Saving model ...
Epoch 13/50:
Train Loss: 0.4938 | Val Loss: 0.5160 | Val Acc: 78.75%
EarlyStopping counter: 1 out of 3
...
Early stopping
最终测试结果: Loss: 0.5158 | Acc: 78.75%