PyTorch 深度学习实践-循环神经网络(高级篇)

视频指路
参考博客笔记
参考笔记二

文章目录

  • 上课笔记
  • 总代码
  • 练习


上课笔记

个人能力有限,重看几遍吧,第一遍基本看不懂

在这里插入图片描述

名字的每个字母都是一个特征x1,x2,x3…,一个名字是一个序列

rnn用GRU

在这里插入图片描述

用ASCII表作为词典,长度为128,每一个值对应一个独热向量,比如77对应128维向量中第77个位置为1其他位置为0,但是对于embed层只要告诉它哪个是1就行,这些序列长短不一,需要padding到统一长度

在这里插入图片描述

把国家的名字变成索引标签,做成下图所示的词典

在这里插入图片描述
数据集构建

class NameDataset(Dataset):def __init__(self, is_train=True):# 文件读取filename = './dataset/names_train.csv.gz' if is_train else './dataset/names_test.csv.gz'with gzip.open(filename, 'rt') as f:  # rt表示以只读模式打开文件,并将文件内容解析为文本形式reader = csv.reader(f)rows =list(reader)  # 每个元素由一个名字和国家组成# 提取属性self.names = [row[0] for row in rows]self.len = len(self.names)self.countries = [row[1] for row in rows]# 编码处理self.country_list = list(sorted(set(self.countries)))  # 列表,按字母表顺序排序,去重后有18个国家名self.country_dict = self.get_countries_dict()  # 字典,国家名对应序号标签self.country_num = len(self.country_list)def __getitem__(self, item):# 索引获取return self.names[item], self.country_dict[self.countries[item]]  # 根据国家去字典查找索引def __len__(self):# 获取个数return self.lendef get_countries_dict(self):# 根据国家名对应序号country_dict = dict()for idx, country_name in enumerate(self.country_list):country_dict[country_name] = idxreturn country_dictdef idx2country(self, index):# 根据索引返回国家名字return self.country_list[index]def get_countries_num(self):# 返回国家名个数(分类的总个数)return self.country_num

双向RNN,从左往右走一遍,把得到的值和逆向计算得到的hN拼到一起,比如最后一个 [ h 0 b , h n f ] [h^b_0, h^f_n] [h0b,hnf]

		self.n_directions = 2 if bidirectional else 1self.gru = torch.nn.GRU(hidden_size, hidden_size, num_layers=n_layers, bidirectional=bidirectional)#bidirectional双向神经网络

在这里插入图片描述

h i d d e n = [ h N f , h N b ] hidden = [h{^f_N},h{^b_N}] hidden=[hNf,hNb]

# 进行打包(不考虑0元素,提高运行速度)首先需要将嵌入数据按长度排好
gru_input = pack_padded_sequence(embedding, seq_lengths)

pack_padded_sequence:这是 PyTorch 提供的一个函数,用于将填充后的序列打包。其主要目的是跳过填充值,并且在 RNN 中只处理实际的序列数据。它会将填充后的嵌入和实际序列长度作为输入,并返回一个打包后的序列,便于 RNN 处理。可以只把非零序列提取出来放到一块,也就是把为0的填充量都丢掉,这样将来fru就可以处理长短不一的输入序列

在这里插入图片描述

首先要根据序列长度进行排序,然后再经过嵌入层

在这里插入图片描述

如下图所示:这样用gru的时候效率就会更高,因为可以方便去掉好多padding的数据

在这里插入图片描述

双向RNN要拼接起来

if self.n_directions == 2:hidden_cat = torch.cat([hidden[-1], hidden[-2]], dim=1)  # hidden[-1]的形状是(1,256,100),hidden[-2]的形状是(1,256,100),拼接后的形状是(1,256,200)

总代码

import csv
import time
import matplotlib.pyplot as plt
import numpy as np
import math
import gzip  # 用于读取压缩文件
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence# 1.超参数设置
HIDDEN_SIZE = 100
BATCH_SIZE = 256
N_LAYER = 2  # RNN的层数
N_EPOCHS = 100
N_CHARS = 128  # ASCII码的个数
USE_GPU = False# 工具类函数
# 把名字转换成ASCII码,    b  返回ASCII码值列表和名字的长度
def name2list(name):arr = [ord(c) for c in name]return arr, len(arr)# 是否把数据放到GPU上
def create_tensor(tensor):if USE_GPU:device = torch.device('cuda:0')tensor = tensor.to(device)return tensordef timesince(since):now = time.time()s = now - sincem = math.floor(s / 60)  # math.floor()向下取整s -= m * 60return '%dmin %ds' % (m, s)  # 多少分钟多少秒# 2.构建数据集
class NameDataset(Dataset):def __init__(self, is_train=True):# 文件读取filename = './dataset/names_train.csv.gz' if is_train else './dataset/names_test.csv.gz'with gzip.open(filename, 'rt') as f:  # rt表示以只读模式打开文件,并将文件内容解析为文本形式reader = csv.reader(f)rows =list(reader)  # 每个元素由一个名字和国家组成# 提取属性self.names = [row[0] for row in rows]self.len = len(self.names)self.countries = [row[1] for row in rows]# 编码处理self.country_list = list(sorted(set(self.countries)))  # 列表,按字母表顺序排序,去重后有18个国家名self.country_dict = self.get_countries_dict()  # 字典,国家名对应序号标签self.country_num = len(self.country_list)def __getitem__(self, item):# 索引获取return self.names[item], self.country_dict[self.countries[item]]  # 根据国家去字典查找索引def __len__(self):# 获取个数return self.lendef get_countries_dict(self):# 根据国家名对应序号country_dict = dict()for idx, country_name in enumerate(self.country_list):country_dict[country_name] = idxreturn country_dictdef idx2country(self, index):# 根据索引返回国家名字return self.country_list[index]def get_countries_num(self):# 返回国家名个数(分类的总个数)return self.country_num# 3.实例化数据集
train_set = NameDataset(is_train=True)
train_loader = DataLoader(train_set, shuffle=True, batch_size=BATCH_SIZE, num_workers=2)
test_set = NameDataset(is_train=False)
test_loder = DataLoader(test_set, shuffle=False, batch_size=BATCH_SIZE, num_workers=2)
N_COUNTRY = train_set.get_countries_num()  # 18个国家名,即18个类别# 4.模型构建
class GRUClassifier(torch.nn.Module):def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):super(GRUClassifier, self).__init__()self.hidden_size = hidden_sizeself.n_layers = n_layersself.n_directions = 2 if bidirectional else 1# 词嵌入层,将词语映射到hidden维度self.embedding = torch.nn.Embedding(input_size, hidden_size)# GRU层(输入为特征数,这里是embedding_size,其大小等于hidden_size))self.gru = torch.nn.GRU(hidden_size, hidden_size, num_layers=n_layers, bidirectional=bidirectional)#bidirectional双向神经网络# 线性层self.fc = torch.nn.Linear(hidden_size * self.n_directions, output_size)def _init_hidden(self, bath_size):# 初始化权重,(n_layers * num_directions 双向, batch_size, hidden_size)hidden = torch.zeros(self.n_layers * self.n_directions, bath_size, self.hidden_size)return create_tensor(hidden)def forward(self, input, seq_lengths):# 转置 B X S -> S X Binput = input.t()  # 此时的维度为seq_len, batch_sizebatch_size = input.size(1)hidden = self._init_hidden(batch_size)# 嵌入层处理 input:(seq_len,batch_size) -> embedding:(seq_len,batch_size,embedding_size)embedding = self.embedding(input)# 进行打包(不考虑0元素,提高运行速度)需要将嵌入数据按长度排好gru_input = pack_padded_sequence(embedding, seq_lengths)# output:(*, hidden_size * num_directions),*表示输入的形状(seq_len,batch_size)# hidden:(num_layers * num_directions, batch, hidden_size)output, hidden = self.gru(gru_input, hidden)if self.n_directions == 2:hidden_cat = torch.cat([hidden[-1], hidden[-2]], dim=1)  # hidden[-1]的形状是(1,256,100),hidden[-2]的形状是(1,256,100),拼接后的形状是(1,256,200)else:hidden_cat = hidden[-1]  # (1,256,100)fc_output = self.fc(hidden_cat)return fc_output# 3.数据处理(姓名->数字)
def make_tensors(names, countries):# 获取嵌入长度从大到小排序的seq_tensor(嵌入向量)、seq_lengths(对应长度)、countries(对应顺序的国家序号)-> 便于pack_padded_sequence处理name_len_list = [name2list(name) for name in names]  # 每个名字对应的1列表name_seq = [sl[0] for sl in name_len_list]  # 姓名列表seq_lengths = torch.LongTensor([sl[1] for sl in name_len_list])  # 名字对应的字符个数countries = countries.long()   # PyTorch 中,张量的默认数据类型是浮点型 (float),这里转换成整型,可以避免浮点数比较时的精度误差,从而提高模型的训练效果# 创建全零张量,再依次进行填充# 创建了一个 len(name_seq) * seq_length.max()维的张量seq_tensor = torch.zeros(len(name_seq), seq_lengths.max()).long()for idx, (seq, seq_len) in enumerate(zip(name_seq, seq_lengths)):seq_tensor[idx, :seq_len] = torch.LongTensor(seq)# 为了使用pack_padded_sequence,需要按照长度排序# perm_idx是排序后的数据在原数据中的索引,seq_tensor是排序后的数据,seq_lengths是排序后的数据的长度,countries是排序后的国家seq_lengths, perm_idx = seq_lengths.sort(dim=0, descending=True)  # descending=True 表示按降序进行排序,即从最长的序列到最短的序列。seq_tensor = seq_tensor[perm_idx]countries = countries[perm_idx]return create_tensor(seq_tensor), create_tensor(seq_lengths), create_tensor(countries)# 训练循环
def train(epoch, start):total_loss = 0for i, (names, countries) in enumerate(train_loader, 1):inputs, seq_lengths, target = make_tensors(names, countries)  # 输入、每个序列长度、输出output = model(inputs, seq_lengths)loss = criterion(output, target)optimizer.zero_grad()loss.backward()optimizer.step()total_loss += loss.item()if i % 10 == 0:print(f'[{timesince(start)}] Epoch {epoch} ', end='')print(f'[{i * len(inputs)}/{len(train_set)}] ', end='')print(f'loss={total_loss / (i * len(inputs))}')  # 打印每个样本的平均损失return total_loss# 测试循环
def test():correct = 0total = len(test_set)print('evaluating trained model ...')with torch.no_grad():for i, (names, countries) in enumerate(test_loder, 1):inputs, seq_lengths, target = make_tensors(names, countries)output = model(inputs, seq_lengths)pred = output.max(dim=1, keepdim=True)[1]  # 返回每一行中最大值的那个元素的索引,且keepdim=True,表示保持输出的二维特性correct += pred.eq(target.view_as(pred)).sum().item()  # 计算正确的个数percent = '%.2f' % (100 * correct / total)print(f'Test set: Accuracy {correct}/{total} {percent}%')return correct / total  # 返回的是准确率,0.几几的格式,用来画图if __name__ == '__main__':model = GRUClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER)criterion = torch.nn.CrossEntropyLoss()optimizer = optim.Adam(model.parameters(), lr=0.001)device = 'cuda:0' if USE_GPU else 'cpu'model.to(device)start = time.time()print('Training for %d epochs...' % N_EPOCHS)acc_list = []# 在每个epoch中,训练完一次就测试一次for epoch in range(1, N_EPOCHS + 1):# Train cycletrain(epoch, start)acc = test()acc_list.append(acc)# 绘制在测试集上的准确率epoch = np.arange(1, len(acc_list) + 1)acc_list = np.array(acc_list)plt.plot(epoch, acc_list)plt.xlabel('Epoch')plt.ylabel('Accuracy')plt.grid()plt.show()

在准确率最高点save模型

保存整个模型:

torch.save(model,'save.pt')

只保存训练好的权重:

torch.save(model.state_dict(), 'save.pt')

练习

数据集地址,判断句子是哪类(0-negative,1-somewhat negative,2-neutral,3-somewhat positive,4-positive)情感分析

import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from torch.nn.utils.rnn import pad_sequence
from torch.nn.utils.rnn import pack_padded_sequence
import zipfile# 超参数设置
BATCH_SIZE = 64
HIDDEN_SIZE = 100
N_LAYERS = 2
N_EPOCHS = 10
LEARNING_RATE = 0.001# 数据集路径
TRAIN_ZIP_PATH = './dataset/train.tsv.zip'
TEST_ZIP_PATH = './dataset/test.tsv.zip'# 解压缩文件
def unzip_file(zip_path, extract_to='.'):with zipfile.ZipFile(zip_path, 'r') as zip_ref:zip_ref.extractall(extract_to)unzip_file(TRAIN_ZIP_PATH)
unzip_file(TEST_ZIP_PATH)# 数据集路径
TRAIN_PATH = './train.tsv'
TEST_PATH = './test.tsv'# 自定义数据集类
class SentimentDataset(Dataset):def __init__(self, phrases, sentiments=None):self.phrases = phrasesself.sentiments = sentimentsdef __len__(self):return len(self.phrases)def __getitem__(self, idx):phrase = self.phrases[idx]if self.sentiments is not None:sentiment = self.sentiments[idx]return phrase, sentimentreturn phrase# 加载数据
def load_data():train_df = pd.read_csv(TRAIN_PATH, sep='\t')test_df = pd.read_csv(TEST_PATH, sep='\t')return train_df, test_dftrain_df, test_df = load_data()# 数据预处理
def preprocess_data(train_df, test_df):le = LabelEncoder()train_df['Sentiment'] = le.fit_transform(train_df['Sentiment'])train_phrases = train_df['Phrase'].tolist()train_sentiments = train_df['Sentiment'].tolist()test_phrases = test_df['Phrase'].tolist()return train_phrases, train_sentiments, test_phrases, letrain_phrases, train_sentiments, test_phrases, le = preprocess_data(train_df, test_df)# 构建词汇表
def build_vocab(phrases):vocab = set()for phrase in phrases:for word in phrase.split():vocab.add(word)word2idx = {word: idx for idx, word in enumerate(vocab, start=1)}word2idx['<PAD>'] = 0return word2idxword2idx = build_vocab(train_phrases + test_phrases)# 将短语转换为索引
def phrase_to_indices(phrase, word2idx):return [word2idx[word] for word in phrase.split() if word in word2idx]train_indices = [phrase_to_indices(phrase, word2idx) for phrase in train_phrases]
test_indices = [phrase_to_indices(phrase, word2idx) for phrase in test_phrases]# 移除长度为0的样本
train_indices = [x for x in train_indices if len(x) > 0]
train_sentiments = [y for x, y in zip(train_indices, train_sentiments) if len(x) > 0]
test_indices = [x for x in test_indices if len(x) > 0]# 数据加载器
def collate_fn(batch):phrases, sentiments = zip(*batch)lengths = torch.tensor([len(x) for x in phrases])phrases = [torch.tensor(x) for x in phrases]phrases_padded = pad_sequence(phrases, batch_first=True, padding_value=0)sentiments = torch.tensor(sentiments)return phrases_padded, sentiments, lengthstrain_dataset = SentimentDataset(train_indices, train_sentiments)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)test_dataset = SentimentDataset(test_indices)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=lambda x: pad_sequence([torch.tensor(phrase) for phrase in x], batch_first=True, padding_value=0))# 模型定义
class SentimentRNN(nn.Module):def __init__(self, vocab_size, embed_size, hidden_size, output_size, n_layers):super(SentimentRNN, self).__init__()self.embedding = nn.Embedding(vocab_size, embed_size, padding_idx=0)self.lstm = nn.LSTM(embed_size, hidden_size, n_layers, batch_first=True, bidirectional=True)self.fc = nn.Linear(hidden_size * 2, output_size)def forward(self, x, lengths):x = self.embedding(x)x = pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)_, (hidden, _) = self.lstm(x)hidden = torch.cat((hidden[-2], hidden[-1]), dim=1)out = self.fc(hidden)return outvocab_size = len(word2idx)
embed_size = 128
output_size = len(le.classes_)model = SentimentRNN(vocab_size, embed_size, HIDDEN_SIZE, output_size, N_LAYERS)criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)# 训练和测试循环
def train(model, train_loader, criterion, optimizer, n_epochs):model.train()for epoch in range(n_epochs):total_loss = 0for phrases, sentiments, lengths in train_loader:optimizer.zero_grad()output = model(phrases, lengths)loss = criterion(output, sentiments)loss.backward()optimizer.step()total_loss += loss.item()print(f'Epoch: {epoch+1}, Loss: {total_loss/len(train_loader)}')def generate_test_results(model, test_loader, test_ids):model.eval()results = []with torch.no_grad():for phrases in test_loader:lengths = torch.tensor([len(x) for x in phrases])output = model(phrases, lengths)preds = torch.argmax(output, dim=1)results.extend(preds.cpu().numpy())return resultstrain(model, train_loader, criterion, optimizer, N_EPOCHS)test_ids = test_df['PhraseId'].tolist()
preds = generate_test_results(model, test_loader, test_ids)# 保存结果
output_df = pd.DataFrame({'PhraseId': test_ids, 'Sentiment': preds})
output_df.to_csv('sentiment_predictions.csv', index=False)

引入随机性:重要性采样,对分类的样本按照它的分布来进行随机采样

gpt优化后代码:停词表自己在网上找个博客复制粘贴成stopwords.txt文件就行
主要改进点:

  1. 增加了嵌入层维度和隐藏层大小,LSTM 层数。
  2. 使用 GloVe 预训练词向量。
  3. 使用了 dropout 防止过拟合。
  4. 使用 AdamW 优化器。
  5. 移除停用词。
  6. 增加了验证集并使用学习率调度器用于模型性能评估。
  7. 保存和加载最优模型。

import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from torch.nn.utils.rnn import pad_sequence
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from sklearn.model_selection import train_test_split
import zipfile
import os
from torchtext.vocab import GloVe# 超参数设置
BATCH_SIZE = 64
HIDDEN_SIZE = 256
N_LAYERS = 3
N_EPOCHS = 20
LEARNING_RATE = 0.01
EMBED_SIZE = 300
DROPOUT = 0.5# 数据集路径
TRAIN_ZIP_PATH = './dataset/train.tsv.zip'
TEST_ZIP_PATH = './dataset/test.tsv.zip'# 解压缩文件
def unzip_file(zip_path, extract_to='.'):with zipfile.ZipFile(zip_path, 'r') as zip_ref:zip_ref.extractall(extract_to)unzip_file(TRAIN_ZIP_PATH)
unzip_file(TEST_ZIP_PATH)# 数据集路径
TRAIN_PATH = './train.tsv'
TEST_PATH = './test.tsv'# 自定义数据集类
class SentimentDataset(Dataset):def __init__(self, phrases, sentiments=None):self.phrases = phrasesself.sentiments = sentimentsdef __len__(self):return len(self.phrases)def __getitem__(self, idx):phrase = self.phrases[idx]if self.sentiments is not None:sentiment = self.sentiments[idx]return phrase, sentimentreturn phrase# 加载数据
def load_data():train_df = pd.read_csv(TRAIN_PATH, sep='\t')test_df = pd.read_csv(TEST_PATH, sep='\t')return train_df, test_dftrain_df, test_df = load_data()# 数据预处理
def preprocess_data(train_df, test_df):le = LabelEncoder()train_df['Sentiment'] = le.fit_transform(train_df['Sentiment'])train_phrases = train_df['Phrase'].tolist()train_sentiments = train_df['Sentiment'].tolist()test_phrases = test_df['Phrase'].tolist()test_ids = test_df['PhraseId'].tolist()return train_phrases, train_sentiments, test_phrases, test_ids, letrain_phrases, train_sentiments, test_phrases, test_ids, le = preprocess_data(train_df, test_df)# 移除停用词
def remove_stopwords(phrases):stopwords = set(open('./dataset/stopwords.txt').read().split())return [' '.join([word for word in phrase.split() if word not in stopwords]) for phrase in phrases]train_phrases = remove_stopwords(train_phrases)
test_phrases = remove_stopwords(test_phrases)# 构建词汇表
def build_vocab(phrases):vocab = set()for phrase in phrases:for word in phrase.split():vocab.add(word)word2idx = {word: idx for idx, word in enumerate(vocab, start=1)}word2idx['<PAD>'] = 0return word2idxword2idx = build_vocab(train_phrases + test_phrases)# 加载预训练的词向量
glove = GloVe(name='6B', dim=EMBED_SIZE)def create_embedding_matrix(word2idx, glove):vocab_size = len(word2idx)embedding_matrix = torch.zeros((vocab_size, EMBED_SIZE))for word, idx in word2idx.items():if word in glove.stoi:embedding_matrix[idx] = glove[word]else:embedding_matrix[idx] = torch.randn(EMBED_SIZE)return embedding_matrixembedding_matrix = create_embedding_matrix(word2idx, glove)# 将短语转换为索引
def phrase_to_indices(phrase, word2idx):return [word2idx[word] for word in phrase.split() if word in word2idx]train_indices = [phrase_to_indices(phrase, word2idx) for phrase in train_phrases]
test_indices = [phrase_to_indices(phrase, word2idx) for phrase in test_phrases]# 移除长度为0的样本
train_indices, train_sentiments = zip(*[(x, y) for x, y in zip(train_indices, train_sentiments) if len(x) > 0])
# 注意:这里不移除 test_indices 中的空样本,因为我们需要保持 test_ids 的完整性
test_indices_with_default = [phrase if len(phrase) > 0 else [0] for phrase in test_indices]# 划分训练集和验证集
train_indices, val_indices, train_sentiments, val_sentiments = train_test_split(train_indices, train_sentiments, test_size=0.2, random_state=42)# 数据加载器
def collate_fn(batch):phrases, sentiments = zip(*batch)lengths = torch.tensor([len(x) for x in phrases])phrases = [torch.tensor(x) for x in phrases]phrases_padded = pad_sequence(phrases, batch_first=True, padding_value=0)sentiments = torch.tensor(sentiments)return phrases_padded, sentiments, lengthstrain_dataset = SentimentDataset(train_indices, train_sentiments)
val_dataset = SentimentDataset(val_indices, val_sentiments)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)test_dataset = SentimentDataset(test_indices_with_default)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False,collate_fn=lambda x: pad_sequence([torch.tensor(phrase) for phrase in x], batch_first=True,padding_value=0))# 模型定义
class SentimentRNN(nn.Module):def __init__(self, vocab_size, embed_size, hidden_size, output_size, n_layers, dropout, embedding_matrix):super(SentimentRNN, self).__init__()self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)self.lstm = nn.LSTM(embed_size, hidden_size, n_layers, batch_first=True, bidirectional=True, dropout=dropout)self.fc = nn.Linear(hidden_size * 2, output_size)self.dropout = nn.Dropout(dropout)def forward(self, x, lengths):x = self.embedding(x)x = pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)packed_output, (hidden, _) = self.lstm(x)hidden = torch.cat((hidden[-2], hidden[-1]), dim=1)hidden = self.dropout(hidden)out = self.fc(hidden)return outvocab_size = len(word2idx)
output_size = len(le.classes_)model = SentimentRNN(vocab_size, EMBED_SIZE, HIDDEN_SIZE, output_size, N_LAYERS, DROPOUT, embedding_matrix)criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)# 学习率调度器
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)# 训练和测试循环
def train(model, train_loader, val_loader, criterion, optimizer, scheduler, n_epochs):model.train()best_val_accuracy = 0for epoch in range(n_epochs):total_loss = 0model.train()for phrases, sentiments, lengths in train_loader:optimizer.zero_grad()output = model(phrases, lengths)loss = criterion(output, sentiments)loss.backward()optimizer.step()total_loss += loss.item()print(f'Epoch: {epoch + 1}, Loss: {total_loss / len(train_loader)}')val_accuracy = evaluate(model, val_loader)scheduler.step(total_loss / len(train_loader))if val_accuracy > best_val_accuracy:best_val_accuracy = val_accuracytorch.save(model.state_dict(), 'best_model.pt')print(f'Epoch: {epoch + 1}, Val Accuracy: {val_accuracy}')def evaluate(model, val_loader):model.eval()correct, total = 0, 0with torch.no_grad():for phrases, sentiments, lengths in val_loader:output = model(phrases, lengths)preds = torch.argmax(output, dim=1)correct += (preds == sentiments).sum().item()total += sentiments.size(0)return correct / totaldef generate_test_results(model, test_loader):model.eval()results = []with torch.no_grad():for phrases in test_loader:lengths = torch.tensor([len(x) for x in phrases])output = model(phrases, lengths)preds = torch.argmax(output, dim=1)results.extend(preds.cpu().numpy())return resultstrain(model, train_loader, val_loader, criterion, optimizer, scheduler, N_EPOCHS)# 加载最优模型
model.load_state_dict(torch.load('best_model.pt'))preds = generate_test_results(model, test_loader)# 确保 test_ids 和 preds 长度一致
assert len(test_ids) == len(preds), f"Lengths do not match: {len(test_ids)} vs {len(preds)}"# 保存结果
output_df = pd.DataFrame({'PhraseId': test_ids, 'Sentiment': preds})
output_df.to_csv('sentiment_predictions2.csv', index=False)

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/diannao/47885.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

业务系统核心模块资料访问性能优化实战

随着业务系统的云化转型不断推进&#xff0c;业务量呈现显著增长&#xff0c;对业务系统的性能和资源管理提出了更高要求。在这样的背景下&#xff0c;实现系统资源使用与性能指标的均衡成为保障生产系统高效稳定运行的核心任务。 在性能优化的范畴内&#xff0c;核心业务系统对…

苏州金龙海格汽车入选2024中国汽车行业可持续发展实践案例

2024年7月11日-13日&#xff0c;由中国汽车工业协会主办的第14届中国汽车论坛在上海嘉定举办。本届论坛隆重发布了“2024中国汽车行业可持续发展实践案例”&#xff0c;苏州金龙因在坚持绿色可持续发展方面做出的努力和贡献获评2024中国汽车行业可持续发展实践案例“绿色发展”…

【P2P_BMA_P2MP_NBMA】

基本概念介绍 1. BMA&#xff08;Broadcast&#xff09; 广播型多路访问技术&#xff0c;在一个MA&#xff08;多路访问&#xff0c;在一个网段内的节点数量不限制。&#xff09;网络中同时存在广播机制。 特点&#xff1a; 允许将数据包广播到网络上的所有主机。路由器之间…

【AI大模型Agent探索】深入探索实践 Qwen-Agent 的 Function Calling

系列篇章&#x1f4a5; No.文章1【Qwen部署实战】探索Qwen-7B-Chat&#xff1a;阿里云大型语言模型的对话实践2【Qwen2部署实战】Qwen2初体验&#xff1a;用Transformers打造智能聊天机器人3【Qwen2部署实战】探索Qwen2-7B&#xff1a;通过FastApi框架实现API的部署与调用4【Q…

PyTorch 深度学习实践-卷积神经网络基础篇

视频指路 参考博客笔记 参考笔记二 文章目录 上课笔记代码实现作业实现 上课笔记 如果一个网络全都是由线性层串联起来&#xff08;torch.nn.Linear(xx, yy)&#xff09;&#xff0c;就叫他全连接的网络(左边节点到右边节点任意两个都存在权重) 先看一下吴恩达或者李宏毅老师…

QT通用配置文件库(QPreferences)

QT通用配置文件库(QPreferences) QPreferences项目是基于nlohmann/json的qt可视化配置文件库&#xff0c;将配置保存成json格式&#xff0c;并提供UI查看与修改&#xff0c;可通过cmake可快速添加进项目。默认支持基本类型、stl常用容器、基本类型与stl容器组成的结构体&#…

【Git标签管理】理解标签 | 创建标签 | 查看标签 | 删除标签 | 推送标签

目录 1.理解标签 2.创建标签 3.查看标签 4.删除本地仓库的标签 5.推送标签 6.删除远程仓库的标签 1.理解标签 Git提供一个打标签的功能tag&#xff0c;对某一次事务/提交的表示&#xff08;作用/意义&#xff09;。标签 tag &#xff0c;可以简单的理解为是对某次 comm…

Java(二十二)---队列

文章目录 前言1.队列(Queue)的概念2.Queue的使用3.队列的模拟实现4.循环队列5.双端队列6.面试题[1. 用队列实现栈](https://leetcode.cn/problems/implement-stack-using-queues/description/)[2. 用栈实现队列](https://leetcode.cn/problems/implement-queue-using-stacks/de…

人工智能导论-机器学习

机器学习概述 概述 本章主要介绍的机器学习的概念、发展历程、发展趋势、相关应用&#xff0c;着重拓展机监督学习和无监督学习的相关知识。 重点&#xff1a;机器学习的定义和应用&#xff1b; 难点&#xff1a;机器学习算法及分类。 机器学习 - 重要性 MachineLeaning出…

<数据集>钢板缺陷检测数据集<目标检测>

数据集格式&#xff1a;VOCYOLO格式 图片数量&#xff1a;1986张 标注数量(xml文件个数)&#xff1a;1986 标注数量(txt文件个数)&#xff1a;1986 标注类别数&#xff1a;7 标注类别名称&#xff1a;[crescent gap, silk spot, water spot, weld line, oil spot, punchin…

工业圆点定位激光器主要应用场景有哪些?

在现代工业生产的各个领域&#xff0c;精确定位和高效操作已成为提升生产效率和产品质量的关键。其中&#xff0c;工业圆点定位激光器以其高精度、高效率的特性&#xff0c;成为了众多工业应用中的核心工具。接下来我们就跟着鑫优威一起来深入了解一下关于工业圆点定位激光器的…

抖音/快手/小红书私信卡片在线制作

W外链平台&#xff0c;作为现代网络营销领域的一颗璀璨明星&#xff0c;其强大的功能和独特的优势已经吸引了无数企业和个人的目光。在如今这个信息爆炸的时代&#xff0c;如何有效地将自己的网站、产品、服务推广出去&#xff0c;成为了每个营销人员都在思考的问题。而W外链平…

CentOS 7报错:yum命令报错 “ Cannot find a valid baseurl for repo: base/7/x86_6 ”

参考连接&#xff1a; 【linux】CentOS 7报错&#xff1a;yum命令报错 “ Cannot find a valid baseurl for repo: base/7/x86_6 ”_centos linux yum search ifconfig cannot find a val-CSDN博客 Centos7出现问题Cannot find a valid baseurl for repo: base/7/x86_64&…

uniapp 微信默认地图选点功能实现

效果图 配置项 微信公众平台-小程序配置 uniapp配置 上代码 <template><view class"content"><button click"toMap">请选择位置</button></view> </template><script setup lang"ts">function toMa…

Flink HA

目录 Flink HA集群规划 环境变量配置 masters配置 flink-conf.yaml配置 测试 Flink HA集群规划 FLink HA集群规划如下&#xff1a; IP地址主机名称Flink角色ZooKeeper角色192.168.128.111bigdata111masterQuorumPeerMain192.168.128.112bigdata112worker、masterQuorumPee…

Leetcode1929. 数组串联

问题描述&#xff1a; 给你一个长度为 n 的整数数组 nums 。请你构建一个长度为 2n 的答案数组 ans &#xff0c;数组下标 从 0 开始计数 &#xff0c;对于所有 0 < i < n 的 i &#xff0c;满足下述所有要求&#xff1a; ans[i] nums[i]ans[i n] nums[i] 具体而言…

Unity 导入MRTK,使用URP 升级材质,MRTK的材质还是洋红色

控制台显示信息 ToggleBackground material was not upgraded. There’s no upgrader to convert Mixed Reality Toolkit/Standard shader to selected pipeline UnityEditor.Rendering.Universal.UniversalRenderPipelineMaterialUpgrader:UpgradeProjectMaterials() (at 点击…

独立游戏《星尘异变》UE5 C++程序开发日志6——实现存档和基础设置系统

目录 一、存档类 1.创建一个SaveGame类 2.存储关卡内数据 3.加载关卡数据 4.关于定时器 5.存储全局数据 6.加载全局数据 二、存档栏 1.存档栏的数据结构 2.创建新存档 3.覆盖已有存档 4.删除存档 三、游戏的基础设置 1.存储游戏设置的数据结构 2.初始化设置 3.…

在国产芯片上实现YOLOv5/v8图像AI识别-【1.3】YOLOv5的介绍及使用(训练、导出)更多内容见视频

本专栏主要是提供一种国产化图像识别的解决方案&#xff0c;专栏中实现了YOLOv5/v8在国产化芯片上的使用部署&#xff0c;并可以实现网页端实时查看。根据自己的具体需求可以直接产品化部署使用。 B站配套视频&#xff1a;https://www.bilibili.com/video/BV1or421T74f 数据…

5.5 软件工程-系统测试

系统测试 - 意义和目的 系统测试 - 原则 系统测试 - 测试过程 系统测试 - 测试策略 系统测试 - 测试方法 真题 系统测试 - 测试用例设计 黑盒测试 白盒测试 真题 系统测试 - 调试 系统测试 - 软件度量 真题