目录
还没测试出效果
还没测试出效果
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor# 定义上述的大型全连接层模型
class LargeFullyConnectedModel(nn.Module):def __init__(self):super(LargeFullyConnectedModel, self).__init__()input_size = 10000hidden_size1 = 20000hidden_size2 = 15000hidden_size3 = 12000output_size = 5000self.fc1 = nn.Linear(input_size, hidden_size1)self.relu1 = nn.ReLU()self.fc2 = nn.Linear(hidden_size1, hidden_size2)self.relu2 = nn.ReLU()self.fc3 = nn.Linear(hidden_size2, hidden_size3)self.relu3 = nn.ReLU()self.fc4 = nn.Linear(hidden_size3, output_size)def forward(self, x):x = self.relu1(self.fc1(x))x = self.relu2(self.fc2(x))x = self.relu3(self.fc3(x))x = self.fc4(x)return x# 初始化模型并准备多卡环境
devices = [0, 1] # 指定要使用的显卡编号列表
model = LargeFullyConnectedModel()
if torch.cuda.device_count() > 1 and len(devices) > 1:print(f"使用 {len(devices)} 个 GPU 进行推理")model = nn.DataParallel(model, device_ids=devices)
else:print("仅使用单个 GPU 进行推理")
model.to(torch.device(f"cuda:{devices[0]}" if torch.cuda.is_available() else "cpu"))# 模拟数据加载(这里只是示例,实际需根据你的数据进行调整)
batch_size = 32
input_size = 10000
data = torch.randn(batch_size, input_size).to(torch.device(f"cuda:{devices[0]}"))
targets = torch.randint(0, 5000, (batch_size,)).to(torch.device(f"cuda:{devices[0]}"))# 定义推理函数
def inference():model.eval()with torch.no_grad():outputs = model(data)# 可以根据需要进行后续处理,如计算损失、准确率等return outputsif __name__ == "__main__":inference()