tensorboard --logdir=logs
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import os
from datetime import datetime
import matplotlib.pyplot as plt
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" #解决matplotlib报错问题
定义一个卷积网络类,kernel_size为卷积核数量, stride为卷积窗口移动步长, padding为在图像的周围添加额外的像素值,这里用来补偿卷积后导致图像变小
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.fc1 = nn.Linear(64 * 7 * 7, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.pool(torch.relu(self.conv1(x)))
x = self.pool(torch.relu(self.conv2(x)))
x = x.view(-1, 64 * 7 * 7)
x = torch.relu(self.fc1(x))
x = self.fc2(x)
return x
def train():
# 定义数据转换,将其映射到-1到1加速网络收敛
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
# 加载训练集和测试集,如果数据集不存在则自动下载
train_set = torchvision.datasets.MNIST(root='./', train=True, download=True, transform=transform)
test_set = torchvision.datasets.MNIST(root='./', train=False, download=True, transform=transform)
# 定义数据加载器加载训练集和测试集(这里测试集其实叫验证集才合适)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size , shuffle=True)
model = CNN().to(device)#模型移动到目标设备,例如cuda
criterion = nn.CrossEntropyLoss() #使用交叉熵损失函数
optimizer = optim.AdamW(model.parameters(),lr=0.0001)#用adamW作为优化器
for epoch in range(epochs):
model.train()
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
inputs, labels = data
inputs,labels=inputs.to(device),labels.to(device)#数据转移到目标设备
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)#计算交叉熵损失
loss.backward()#反向传播
optimizer.step()#更新优化器参数
running_loss += loss.item()
print('[%d] loss: %.3f' %(epoch , running_loss / i))
writer.add_scalar('training loss', running_loss /i, epoch )#添加训练loss到tensorboard log
running_loss = 0.0
model.eval() #切换模型为评估模式
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
test_loss += loss.item()
_, predicted = outputs.max(1)
total += labels.size(0)
correct += predicted.eq(labels).sum().item()
if total == len(test_loader.dataset): # 如果是最后一组数据
# 创建带有预测结果的图片
images = images.cpu().numpy() #确保数据在cpu
# 取一部分图像数据
images_to_show = images[:8]
predicted_classes = predicted[:8]
fig, axes = plt.subplots(2, 4, figsize=(10, 5))
for i, (image, predicted_class) in enumerate(zip(images_to_show, predicted_classes)):
row = i // 4
col = i % 4
ax = axes[row][col]
ax.imshow(image.squeeze(), cmap='gray')
ax.set_title('Predicted: {}'.format(predicted_class.item()))
ax.axis('off')
# 将图像添加到 TensorBoard
writer.add_figure('Predictions', fig, epoch)
accuracy = 100. * correct / total
print('Accuracy of the network on the test images: %.2f %%' % accuracy)
writer.add_scalar('test accuracy', accuracy, epoch)#添加测试acc到tensorboard log
torch.save(model,'mnist_cnn.pth')#保存pth文件
#保存onnx文件
torch.onnx.export(model.to('cpu'), torch.randn(1, 1, 28, 28), 'mnist_cnn.onnx',#确保模型调回cpu
input_names=['input'], output_names=['output'], # 输入和输出节点名称
do_constant_folding=True, # 执行常量折叠优化
export_params=True, # 包含权重
verbose=True, # 打印转换细节
dynamic_axes={'input': {0: 'batch_size'},
'output': {0: 'batch_size'}})
主函数
# 创建一个 TensorBoard 的 summary writer,并使用目前的系统时间为文件名
writer = SummaryWriter(log_dir="./logs/" + datetime.now().strftime("%Y%m%d%H%M%S"))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")#选择训练设备
print(device)
batch_size=500
epochs=50
train()
writer.close()