稠密连接网络(DensoNet)
简介
稠密连接网络(DenseNet)是由康奈尔大学的 Gao Huang 等人在 2017 年提出的深度卷积神经网络架构。它在 ResNet 的残差连接思想基础上进一步创新,通过密集跨层连接(Dense Connectivity)显著提升了特征复用效率,减少了参数量 。
enseNet 的核心思想
每一层的输入不仅来自前一层的输出,还来自前面所有层的输出。
与ResNet的主要区别在于,DenseNet里模块的输出不是像ResNet那样和模块的输出相加,而是在通道维上连结。这样模块的输出可以直接传入模块后面的层。在这个设计里,模块直接跟模块后面的所有层连接在了一起。这也是它被称为“稠密连接”的原因。
DenseNet的主要构建模块是稠密块(dense block)和过渡层(transition layer)。前者定义了输 入和输出是如何连结的,后者则用来控制通道数,使之不过大。
DenseNet 的关键组件
稠密块
-
由多个密集连接的层组成,层间通过拼接(而非相加)传递特征。
-
每层的输出通道数固定(如
growth_rate=32
),但整体通道数线性增长。
Transition Layer
-
用于连接不同 Dense Block,降低特征图尺寸并压缩通道数:
-
1x1 卷积:减少通道数(通常压缩为一半)。
-
2x2 平均池化:下采样。
-
代码:
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import time
import sys
from torch import nn, optim
import torch.nn.functional as F# 定义全局平均池化层
class GlobalAvgPool2d(nn.Module):# 全局平均池化层可通过将池化窗口形状设置成输入的高和宽实现def __init__(self):super(GlobalAvgPool2d, self).__init__()def forward(self, x):return F.avg_pool2d(x, kernel_size=x.size()[2:])# 定义卷积块
def conv_block(in_channels, out_channels):blk = nn.Sequential(nn.BatchNorm2d(in_channels),nn.ReLU(),nn.Conv2d(in_channels, out_channels,kernel_size=3, padding=1))return blk# 定义DenseBlock
class DenseBlock(nn.Module):def __init__(self, num_convs, in_channels, out_channels):super(DenseBlock, self).__init__()net = []for i in range(num_convs):in_c = in_channels + i*out_channelsnet.append(conv_block(in_c, out_channels))self.net = nn.ModuleList(net)# 计算输出通道数self.out_channels = in_channels + num_convs*out_channelsdef forward(self, X):for blk in self.net:Y = blk(X)# 在通道维上将输入和输出连结X = torch.cat((X, Y), dim=1)return X# blk = DenseBlock(2, 3, 10)
# X = torch.rand(4, 3, 8, 8)
# Y = blk(X)
# print(Y.shape)# 定义过渡块
def transition_block(in_channels, out_channels):blk = nn.Sequential(nn.BatchNorm2d(in_channels),nn.ReLU(),nn.Conv2d(in_channels, out_channels, kernel_size=1),nn.AvgPool2d(kernel_size=2, stride=2))return blk# blk1 = transition_block(23, 10)
# print(blk1(Y).shape)net = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),nn.BatchNorm2d(64),nn.ReLU(),nn.MaxPool2d(kernel_size=3, stride=2, padding=1))num_channels, growth_rate = 64, 32
num_convs_in_dense_blocks = [4, 4, 4, 4]
for i, num_convs in enumerate(num_convs_in_dense_blocks):DB = DenseBlock(num_convs, num_channels, growth_rate)net.add_module('DenseBlock_%d' % i, DB)# 上一个稠密块的输出通道数num_channels = DB.out_channels# 在稠密块之间加入通道数减半的过渡层if i != len(num_convs_in_dense_blocks)-1:net.add_module('transition_block_%d' % i, transition_block(num_channels, num_channels//2))num_channels = num_channels // 2net.add_module('BN', nn.BatchNorm2d(num_channels))
net.add_module('relu', nn.ReLU())
# GlobalAvgPool2d的输出: (Batch, num_channels, 1, 1)
net.add_module('global_avg_pool', GlobalAvgPool2d())
net.add_module('fc', nn.Sequential(nn.Flatten(),nn.Linear(num_channels, 10)))# X = torch.rand((1, 1, 96, 96))
# for name, layer in net.named_children():
# X = layer(X)
# print(name, ' output shape:\t', X.shape)def load_data_fashion_mnist(batch_size, resize=None):"""function:将fashion mnist数据集划分为小批量样本Parameters:batch_size - 小批量样本的大小(int)resize - 对图像的维度进行扩大Returns:train_iter - 训练集样本划分为最小批的结果test_iter - 测试集样本划分为最小批的结果Modify:2020-11-262020-12-10 添加图像维度变化"""# 存储图像处理流程trans = []if resize:trans.append(transforms.Resize(size=resize))trans.append(transforms.ToTensor())transform = transforms.Compose(trans)mnist_train = torchvision.datasets.FashionMNIST(root='data/FashionMNIST',train=True,download=True,transform=transform)mnist_test = torchvision.datasets.FashionMNIST(root='data/FashionMNIST',train=False,download=True,transform=transform)if sys.platform.startswith('win'):# 0表示不用额外的进程来加速读取数据num_workers = 0else:num_workers = 4train_iter = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size,shuffle=True,num_workers=num_workers)test_iter = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size,shuffle=False,num_workers=num_workers)return train_iter, test_iterdef evaluate_accuracy(data_iter, net, device=None):"""function:计算多分类模型预测结果的准确率Parameters:data_iter - 样本划分为最小批的结果net - 定义的网络device - 指定计算在GPU或者CPU上进行Returns:准确率计算结果Modify:2020-11-302020-12-03 增加模型训练模型和推理模式的判别2020-12-10 增加指定运行计算位置的方法"""if device is None and isinstance(net, torch.nn.Module):# 如果没指定device就使用net的devicedevice = next(net.parameters()).deviceacc_sum, n = 0.0, 0with torch.no_grad():for X, y in data_iter:if isinstance(net, torch.nn.Module):# 评估模式, 这会关闭dropoutnet.eval()# .cpu()保证可以进行数值加减acc_sum += (net(X.to(device)).argmax(dim=1) ==y.to(device)).float().sum().cpu().item()# 改回训练模式net.train()# 自定义的模型, 2.13节之后不会用到, 不考虑GPUelse:if ('is_training' in net.__code__.co_varnames):# 将is_training设置成Falseacc_sum += (net(X, is_training=False).argmax(dim=1) ==y).float().sum().item()else:acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()n += y.shape[0]return acc_sum / ndef train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):"""function:利用softmax回归模型对图像进行分类识别Parameters:net - 定义的网络train_iter - 训练集样本划分为最小批的结果test_iter - 测试集样本划分为最小批的结果num_epochs - 迭代次数batch_size - 最小批大小optimizer - 优化器device - 指定计算在GPU或者CPU上进行Returns:Modify:2020-12-10"""# 将模型加载到指定运算器中net = net.to(device)print("training on ", device)loss = torch.nn.CrossEntropyLoss()for epoch in range(num_epochs):train_l_sum, train_acc_sum, n, batch_count = 0.0, 0.0, 0, 0start = time.time()for X, y in train_iter:X = X.to(device)y = y.to(device)y_hat = net(X)l = loss(y_hat, y)# 梯度清零optimizer.zero_grad()l.backward()optimizer.step()train_l_sum += l.cpu().item()train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()n += y.shape[0]batch_count += 1test_acc = evaluate_accuracy(test_iter, net, device=device)print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, \time %.1f sec' % (epoch+1, train_l_sum/batch_count,train_acc_sum/n, test_acc,time.time()-start))device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size,
resize=96)
lr, num_epochs = 0.001, 2
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
train_ch5(net, train_iter, test_iter, batch_size, optimizer,
device, num_epochs)torch.save(net.state_dict(), 'DensoNet.params')