深度学习篇---InceptionNet网络结构
在 PyTorch 中实现 InceptionNet(以经典的 Inception v1,即 GoogLeNet 为例),核心是实现它的 "多分支特征融合" 机制 —— 通过 1×1、3×3、5×5 卷积和池化的并行组合,捕捉不同尺度的特征。我们从基础模块开始,一步步搭建,确保你能理解每个分支的作用。
一、先明确 Inception v1 的核心结构
Inception v1(GoogLeNet)的结构可以概括为:
输入(224×224彩色图) →
初始卷积层 → 池化层 →
多个Inception模块(分3个阶段) →
每个阶段后穿插池化层 →
两个辅助分类器(训练时用) →
全局平均池化 → 全连接层(输出1000类)
其中,Inception 模块(多分支并行卷积)和辅助分类器(解决深层梯度消失)是核心组件。
二、PyTorch 实现 Inception v1 的步骤
步骤 1:导入必要的库
import torch # 核心库
import torch.nn as nn # 神经网络层
import torch.optim as optim # 优化器
from torch.utils.data import DataLoader # 数据加载器
from torchvision import datasets, transforms # 图像数据处理
步骤 2:实现核心组件 ——Inception 模块
Inception 模块包含 4 个并行分支,分别用不同尺寸的卷积核提取特征,最后通过通道拼接融合:
class InceptionModule(nn.Module):def __init__(self, in_channels, n1x1, n3x3_reduce, n3x3, n5x5_reduce, n5x5, pool_proj):super(InceptionModule, self).__init__()# 分支1:1×1卷积(捕捉细粒度特征)self.branch1 = nn.Sequential(nn.Conv2d(in_channels, n1x1, kernel_size=1, stride=1, padding=0, bias=False),nn.BatchNorm2d(n1x1), # 加入BN加速训练nn.ReLU(inplace=True))# 分支2:1×1卷积(降维)→ 3×3卷积(捕捉局部特征)self.branch2 = nn.Sequential(nn.Conv2d(in_channels, n3x3_reduce, kernel_size=1, stride=1, padding=0, bias=False),nn.BatchNorm2d(n3x3_reduce),nn.ReLU(inplace=True),nn.Conv2d(n3x3_reduce, n3x3, kernel_size=3, stride=1, padding=1, bias=False), # padding=1保持尺寸nn.BatchNorm2d(n3x3),nn.ReLU(inplace=True))# 分支3:1×1卷积(降维)→ 5×5卷积(捕捉全局特征)self.branch3 = nn.Sequential(nn.Conv2d(in_channels, n5x5_reduce, kernel_size=1, stride=1, padding=0, bias=False),nn.BatchNorm2d(n5x5_reduce),nn.ReLU(inplace=True),nn.Conv2d(n5x5_reduce, n5x5, kernel_size=5, stride=1, padding=2, bias=False), # padding=2保持尺寸nn.BatchNorm2d(n5x5),nn.ReLU(inplace=True))# 分支4:3×3池化 → 1×1卷积(降维,补充特征)self.branch4 = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=1, padding=1), # 池化后尺寸不变nn.Conv2d(in_channels, pool_proj, kernel_size=1, stride=1, padding=0, bias=False),nn.BatchNorm2d(pool_proj),nn.ReLU(inplace=True))def forward(self, x):# 4个分支并行计算b1 = self.branch1(x)b2 = self.branch2(x)b3 = self.branch3(x)b4 = self.branch4(x)# 关键:通道拼接(融合多尺度特征)out = torch.cat([b1, b2, b3, b4], dim=1)return out
参数解释:
in_channels
:输入特征图的通道数;n1x1
:分支 1 的 1×1 卷积输出通道数;n3x3_reduce
/n3x3
:分支 2 中 1×1 降维通道数和 3×3 卷积输出通道数;n5x5_reduce
/n5x5
:分支 3 中 1×1 降维通道数和 5×5 卷积输出通道数;pool_proj
:分支 4 的 1×1 卷积输出通道数。
举例:当输入通道 = 192 时,一个典型配置是:
n1x1=64, n3x3_reduce=96, n3x3=128, n5x5_reduce=16, n5x5=32, pool_proj=32
输出总通道 = 64+128+32+32=256。
步骤 3:实现辅助分类器(解决深层梯度消失)
Inception v1 在网络中间加入 2 个辅助分类器,训练时提供额外梯度:
class AuxiliaryClassifier(nn.Module):def __init__(self, in_channels, num_classes):super(AuxiliaryClassifier, self).__init__()self.features = nn.Sequential(nn.AvgPool2d(kernel_size=5, stride=3), # 压缩尺寸nn.Conv2d(in_channels, 128, kernel_size=1, stride=1, padding=0, bias=False), # 降维nn.BatchNorm2d(128),nn.ReLU(inplace=True))self.classifier = nn.Sequential(nn.Linear(128 * 4 * 4, 1024), # 4×4是池化后的尺寸nn.ReLU(inplace=True),nn.Dropout(p=0.7), # 减少过拟合nn.Linear(1024, num_classes))def forward(self, x):x = self.features(x)x = x.view(x.size(0), -1) # 拉平成向量x = self.classifier(x)return x
作用:
- 训练时,辅助分类器的损失会被加到总损失中(权重 0.3),让中间层也能获得有效梯度;
- 测试时,辅助分类器不工作,只使用主分类器的输出。
步骤 4:搭建 Inception v1 完整网络
按照 Inception v1 的经典结构,串联所有模块
结构解释:
- 网络总层数 22 层,通过 3 组 Inception 模块逐步提升特征抽象程度;
- 尺寸变化:224×224 → 112×112 → 56×56 → 28×28 → 14×14 → 7×7;
- 通道变化:3 → 64 → 192 → 256 → 480 → 512 → 832 → 1024。
步骤 5:准备数据(用 CIFAR-10 演示)
Inception v1 适合高精度分类,我们用 CIFAR-10(10 类)演示,输入尺寸调整为 224×224:
# 数据预处理:缩放+裁剪+翻转+标准化
transform = transforms.Compose([transforms.Resize(256), # 缩放为256×256transforms.RandomCrop(224), # 随机裁剪成224×224transforms.RandomHorizontalFlip(), # 数据增强transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # ImageNet标准化
])# 加载CIFAR-10数据集
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform
)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform
)# 批量加载数据(Inception计算量大,batch_size适当减小)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)
步骤 6:初始化模型、损失函数和优化器
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 初始化Inception v1,输出10类(CIFAR-10),启用辅助分类器
model = InceptionV1(num_classes=10, aux_logits=True).to(device)criterion = nn.CrossEntropyLoss() # 交叉熵损失
# 优化器:推荐用SGD+动量,学习率0.01
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
步骤 7:训练和测试函数(含辅助分类器损失)
训练时需要融合主分类器和辅助分类器的损失:
def train(model, train_loader, criterion, optimizer, epoch):model.train()total_loss = 0.0for batch_idx, (data, target) in enumerate(train_loader):data, target = data.to(device), target.to(device)optimizer.zero_grad() # 清空梯度# 训练时返回主输出+两个辅助输出main_out, aux1, aux2 = model(data)# 计算损失:主损失 + 0.3×辅助损失1 + 0.3×辅助损失2(论文推荐权重)loss_main = criterion(main_out, target)loss_aux1 = criterion(aux1, target)loss_aux2 = criterion(aux2, target)loss = loss_main + 0.3 * loss_aux1 + 0.3 * loss_aux2loss.backward() # 反向传播optimizer.step() # 更新参数total_loss += loss.item()# 打印进度if batch_idx % 50 == 0:print(f'Epoch {epoch}, Batch {batch_idx}, Loss: {loss.item():.6f}')def test(model, test_loader):model.eval()correct = 0total = 0with torch.no_grad():for data, target in test_loader:data, target = data.to(device), target.to(device)output = model(data) # 测试时只返回主输出_, predicted = torch.max(output.data, 1)total += target.size(0)correct += (predicted == target).sum().item()print(f'Test Accuracy: {100 * correct / total:.2f}%')
步骤 8:开始训练和测试
Inception v1 训练较慢,建议训练 30-50 轮:
for epoch in range(1, 31):train(model, train_loader, criterion, optimizer, epoch)test(model, test_loader)
在 CIFAR-10 上,Inception v1 训练充分后准确率能达到 93% 以上,体现了多尺度特征融合的优势。
三、完整代码总结
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms# 1. 实现Inception模块(多分支特征融合)
class InceptionModule(nn.Module):def __init__(self, in_channels, n1x1, n3x3_reduce, n3x3, n5x5_reduce, n5x5, pool_proj):super(InceptionModule, self).__init__()# 分支1:1×1卷积(捕捉细粒度特征)self.branch1 = nn.Sequential(nn.Conv2d(in_channels, n1x1, kernel_size=1, stride=1, padding=0, bias=False),nn.BatchNorm2d(n1x1),nn.ReLU(inplace=True))# 分支2:1×1降维 → 3×3卷积(捕捉局部特征)self.branch2 = nn.Sequential(nn.Conv2d(in_channels, n3x3_reduce, kernel_size=1, stride=1, padding=0, bias=False),nn.BatchNorm2d(n3x3_reduce),nn.ReLU(inplace=True),nn.Conv2d(n3x3_reduce, n3x3, kernel_size=3, stride=1, padding=1, bias=False),nn.BatchNorm2d(n3x3),nn.ReLU(inplace=True))# 分支3:1×1降维 → 5×5卷积(捕捉全局特征)self.branch3 = nn.Sequential(nn.Conv2d(in_channels, n5x5_reduce, kernel_size=1, stride=1, padding=0, bias=False),nn.BatchNorm2d(n5x5_reduce),nn.ReLU(inplace=True),nn.Conv2d(n5x5_reduce, n5x5, kernel_size=5, stride=1, padding=2, bias=False),nn.BatchNorm2d(n5x5),nn.ReLU(inplace=True))# 分支4:3×3池化 → 1×1卷积(补充特征)self.branch4 = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=1, padding=1),nn.Conv2d(in_channels, pool_proj, kernel_size=1, stride=1, padding=0, bias=False),nn.BatchNorm2d(pool_proj),nn.ReLU(inplace=True))def forward(self, x):b1 = self.branch1(x)b2 = self.branch2(x)b3 = self.branch3(x)b4 = self.branch4(x)return torch.cat([b1, b2, b3, b4], dim=1) # 通道拼接# 2. 实现辅助分类器(解决深层梯度消失)
class AuxiliaryClassifier(nn.Module):def __init__(self, in_channels, num_classes):super(AuxiliaryClassifier, self).__init__()self.features = nn.Sequential(nn.AvgPool2d(kernel_size=5, stride=3),nn.Conv2d(in_channels, 128, kernel_size=1, stride=1, padding=0, bias=False),nn.BatchNorm2d(128),nn.ReLU(inplace=True))self.classifier = nn.Sequential(nn.Linear(128 * 4 * 4, 1024),nn.ReLU(inplace=True),nn.Dropout(p=0.7),nn.Linear(1024, num_classes))def forward(self, x):x = self.features(x)x = x.view(x.size(0), -1)return self.classifier(x)# 3. 搭建Inception v1完整网络
class InceptionV1(nn.Module):def __init__(self, num_classes=1000, aux_logits=True):super(InceptionV1, self).__init__()self.aux_logits = aux_logits# 初始卷积和池化self.initial_layers = nn.Sequential(nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),nn.BatchNorm2d(64),nn.ReLU(inplace=True),nn.MaxPool2d(kernel_size=3, stride=2, padding=1),nn.Conv2d(64, 64, kernel_size=1, stride=1, padding=0, bias=False),nn.BatchNorm2d(64),nn.ReLU(inplace=True),nn.Conv2d(64, 192, kernel_size=3, stride=1, padding=1, bias=False),nn.BatchNorm2d(192),nn.ReLU(inplace=True),nn.MaxPool2d(kernel_size=3, stride=2, padding=1))# 第1组Inception模块self.inception3a = InceptionModule(192, 64, 96, 128, 16, 32, 32)self.inception3b = InceptionModule(256, 128, 128, 192, 32, 96, 64)self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)# 第2组Inception模块self.inception4a = InceptionModule(480, 192, 96, 208, 16, 48, 64)self.inception4b = InceptionModule(512, 160, 112, 224, 24, 64, 64)self.inception4c = InceptionModule(512, 128, 128, 256, 24, 64, 64)self.inception4d = InceptionModule(512, 112, 144, 288, 32, 64, 64)self.inception4e = InceptionModule(528, 256, 160, 320, 32, 128, 128)self.pool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)# 第3组Inception模块self.inception5a = InceptionModule(832, 256, 160, 320, 32, 128, 128)self.inception5b = InceptionModule(832, 384, 192, 384, 48, 128, 128)# 主分类器self.avgpool = nn.AdaptiveAvgPool2d((7, 7))self.dropout = nn.Dropout(p=0.4)self.fc = nn.Linear(1024 * 7 * 7, num_classes)# 辅助分类器if self.aux_logits:self.aux1 = AuxiliaryClassifier(512, num_classes)self.aux2 = AuxiliaryClassifier(528, num_classes)def forward(self, x):x = self.initial_layers(x)x = self.inception3a(x)x = self.inception3b(x)x = self.pool3(x)x = self.inception4a(x)aux1 = self.aux1(x) if self.training and self.aux_logits else Nonex = self.inception4b(x)x = self.inception4c(x)x = self.inception4d(x)aux2 = self.aux2(x) if self.training and self.aux_logits else Nonex = self.inception4e(x)x = self.pool4(x)x = self.inception5a(x)x = self.inception5b(x)x = self.avgpool(x)x = x.view(x.size(0), -1)x = self.dropout(x)main_out = self.fc(x)if self.training and self.aux_logits:return main_out, aux1, aux2else:return main_out# 4. 准备CIFAR-10数据
transform = transforms.Compose([transforms.Resize(256),transforms.RandomCrop(224),transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform
)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform
)train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)# 5. 初始化模型、损失函数和优化器
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = InceptionV1(num_classes=10, aux_logits=True).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)# 6. 训练函数(含辅助分类器损失)
def train(model, train_loader, criterion, optimizer, epoch):model.train()for batch_idx, (data, target) in enumerate(train_loader):data, target = data.to(device), target.to(device)optimizer.zero_grad()main_out, aux1, aux2 = model(data)# 融合主损失和辅助损失loss = criterion(main_out, target) + 0.3*criterion(aux1, target) + 0.3*criterion(aux2, target)loss.backward()optimizer.step()if batch_idx % 50 == 0:print(f'Epoch {epoch}, Batch {batch_idx}, Loss: {loss.item():.6f}')# 7. 测试函数
def test(model, test_loader):model.eval()correct = 0total = 0with torch.no_grad():for data, target in test_loader:data, target = data.to(device), target.to(device)output = model(data)_, predicted = torch.max(output.data, 1)total += target.size(0)correct += (predicted == target).sum().item()print(f'Test Accuracy: {100 * correct / total:.2f}%')# 8. 开始训练和测试
for epoch in range(1, 31):train(model, train_loader, criterion, optimizer, epoch)test(model, test_loader)
四、关键知识点回顾
- 核心机制:Inception 模块通过 4 个并行分支(1×1、3×3、5×5 卷积 + 池化)捕捉多尺度特征,再用
torch.cat
拼接融合,让模型同时关注细节和全局; - 1×1 卷积作用:在 3×3 和 5×5 卷积前降维,大幅减少计算量(如 5×5 卷积前降维可减少 70% 计算量);
- 辅助分类器:训练时从中间层提取特征计算损失(权重 0.3),解决深层网络梯度消失问题,测试时不启用;
- 结构特点:
- 总层数 22 层,通过 3 组 Inception 模块逐步提升特征抽象程度;
- 尺寸从 224×224 逐步缩减到 7×7,通道数从 3 增长到 1024;
- 优缺点:精度高、特征提取能力强,但结构较复杂,计算量和内存占用较大。
通过这段代码,你能亲手实现这个 "多尺度特征融合专家",感受 InceptionNet 如何通过创新的分支设计突破传统 CNN 的性能瓶颈!