当前位置: 首页 > news >正文

DAY 52 神经网络调参指南

知识点回顾:

  1. 随机种子
  2. 内参的初始化
  3. 神经网络调参指南
    1. 参数的分类
    2. 调参的顺序
    3. 各部分参数的调整心得

作业:对于day'41的简单cnn,看看是否可以借助调参指南进一步提高精度。

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import random
from tqdm import tqdm# 设置随机种子确保结果可复现
def set_seed(seed=42):random.seed(seed)np.random.seed(seed)torch.manual_seed(seed)if torch.cuda.is_available():torch.cuda.manual_seed(seed)torch.cuda.manual_seed_all(seed)torch.backends.cudnn.deterministic = Truetorch.backends.cudnn.benchmark = False# 定义CNN模型
class SimpleCNN(nn.Module):def __init__(self):super(SimpleCNN, self).__init__()# 第一个卷积层:输入通道1,输出通道32,卷积核3x3self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)self.bn1 = nn.BatchNorm2d(32)self.relu1 = nn.ReLU()self.pool1 = nn.MaxPool2d(2)# 第二个卷积层:输入通道32,输出通道64,卷积核3x3self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)self.bn2 = nn.BatchNorm2d(64)self.relu2 = nn.ReLU()self.pool2 = nn.MaxPool2d(2)# 全连接层self.fc1 = nn.Linear(64 * 7 * 7, 128)self.dropout = nn.Dropout(0.5)self.fc2 = nn.Linear(128, 10)def forward(self, x):# 卷积层1x = self.conv1(x)x = self.bn1(x)x = self.relu1(x)x = self.pool1(x)# 卷积层2x = self.conv2(x)x = self.bn2(x)x = self.relu2(x)x = self.pool2(x)# 展平x = x.view(-1, 64 * 7 * 7)# 全连接层x = self.fc1(x)x = self.relu1(x)x = self.dropout(x)x = self.fc2(x)return x# 参数初始化
def init_weights(m):if isinstance(m, nn.Conv2d):nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')if m.bias is not None:nn.init.constant_(m.bias, 0)elif isinstance(m, nn.BatchNorm2d):nn.init.constant_(m.weight, 1)nn.init.constant_(m.bias, 0)elif isinstance(m, nn.Linear):nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')nn.init.constant_(m.bias, 0)# 训练函数
def train(model, train_loader, criterion, optimizer, device):model.train()train_loss = 0correct = 0total = 0for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):inputs, targets = inputs.to(device), targets.to(device)optimizer.zero_grad()outputs = model(inputs)loss = criterion(outputs, targets)loss.backward()optimizer.step()train_loss += loss.item()_, predicted = outputs.max(1)total += targets.size(0)correct += predicted.eq(targets).sum().item()acc = 100. * correct / totalreturn train_loss / len(train_loader), acc# 测试函数
def test(model, test_loader, criterion, device):model.eval()test_loss = 0correct = 0total = 0with torch.no_grad():for batch_idx, (inputs, targets) in enumerate(tqdm(test_loader)):inputs, targets = inputs.to(device), targets.to(device)outputs = model(inputs)loss = criterion(outputs, targets)test_loss += loss.item()_, predicted = outputs.max(1)total += targets.size(0)correct += predicted.eq(targets).sum().item()acc = 100. * correct / totalreturn test_loss / len(test_loader), acc# 主函数
def main():# 设置随机种子set_seed(42)# 设置设备device = 'cuda' if torch.cuda.is_available() else 'cpu'print(f"使用设备: {device}")# 数据预处理transform_train = transforms.Compose([transforms.RandomRotation(10),transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])transform_test = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])# 加载数据集train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)# 创建数据加载器batch_size = 128  # 调参参数1train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)# 初始化模型model = SimpleCNN().to(device)model.apply(init_weights)# 定义损失函数和优化器criterion = nn.CrossEntropyLoss()learning_rate = 0.01  # 调参参数2optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)  # 调参参数3scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)# 训练模型epochs = 15  # 调参参数4train_losses = []train_accs = []test_losses = []test_accs = []best_acc = 0for epoch in range(epochs):print(f"\nEpoch: {epoch+1}/{epochs}")train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)test_loss, test_acc = test(model, test_loader, criterion, device)train_losses.append(train_loss)train_accs.append(train_acc)test_losses.append(test_loss)test_accs.append(test_acc)print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")print(f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.2f}%")# 学习率调整scheduler.step(test_loss)# 保存最佳模型if test_acc > best_acc:best_acc = test_acctorch.save(model.state_dict(), 'best_model.pth')print(f"Model saved with accuracy: {best_acc:.2f}%")# 加载最佳模型model.load_state_dict(torch.load('best_model.pth'))_, final_acc = test(model, test_loader, criterion, device)print(f"\nFinal Test Accuracy: {final_acc:.2f}%")# 绘制训练过程plt.figure(figsize=(12, 5))plt.subplot(1, 2, 1)plt.plot(train_losses, label='Train Loss')plt.plot(test_losses, label='Test Loss')plt.xlabel('Epoch')plt.ylabel('Loss')plt.legend()plt.title('Loss Curves')plt.subplot(1, 2, 2)plt.plot(train_accs, label='Train Accuracy')plt.plot(test_accs, label='Test Accuracy')plt.xlabel('Epoch')plt.ylabel('Accuracy (%)')plt.legend()plt.title('Accuracy Curves')plt.tight_layout()plt.savefig('training_curves.png')plt.show()if __name__ == '__main__':main()

http://www.xdnf.cn/news/1003861.html

相关文章:

  • yolov5环境配置
  • linux共享内存解析
  • 完整 Bootloader 框架
  • 智能制造:解读41页中小工厂数字化新路径【附全文阅读】
  • CoSchedule Headline Analyzer:分析标题情感强度与可读性
  • 在 Flask 或 Tornado 中返回渲染后的页面内容
  • 爱普生SG5032EEN差分晶体振荡器的特点
  • Element UI 表格el-table宽度不能自适应的问题解决方法
  • 深度学习编译器
  • Docker搭建2FAuth服务
  • 数据结构 (树) 学习 2025年6月12日12:59:39
  • Vue 生命周期
  • 铸铁平台的制造工艺复杂而精细
  • 音视频之H.264/AVC编码器原理
  • 头歌之动手学人工智能-Pytorch 之torch.nn进阶
  • 算法导论第二章:递归与分治的数学艺术
  • 【MV】为什么需要DeepSeek 的分析: AI 替代编舞师
  • Docker 常用命令大全
  • 基于LangChain构建一个RAG多轮对话问答应用
  • LeetCode 868.二进制间距
  • 第三十八课:实战案例-飞鸟和飞机的识别
  • EtherCAT主站转Profinet网关与禾川伺服驱动器X4E快速通讯案例
  • 并行程序设计
  • Nuttx之mm_realloc
  • AtCoder-ABC-409 题解
  • java BIO/NIO/AIO
  • 工具+服务双驱动:创客匠人打造中医IP差异化竞争力
  • 搭建商城系统可能运用到的技术
  • Python告别数据处理卡顿之itertools模块使用详解
  • 立即体验|效果好、低延迟,Trae 已支持 Doubao-1.5-thinking-pro 新模型