from tqdm import tqdm
import numpy as np
import random
from tensorflow.python.keras.preprocessing.sequence import pad_sequencesdefgen_data_set(data, negsample=0):# 根据timestamp排序数据,并替换data.sort_values("timestamp", inplace=True)#根据item_id进行去重item_ids = data['item_id'].unique()# 构建训练与测试listtrain_set =list()test_set =list()for reviewrID, hist in tqdm(data.groupby('user_id')):# 正样本列表pos_list = hist['item_id'].tolist()rating_list = hist['rating'].tolist()if negsample >0:# 候选集中去掉用户看过的item项目candidate_set =list(set(item_ids)-set(pos_list))# 随机选择负采样样本neg_list = np.random.choice(candidate_set, size=len(pos_list)* negsample, replace=True)for i inrange(1,len(pos_list)):if i !=len(pos_list)-1:# 训练集和测试集划分train_set.append((reviewrID, hist[::-1], pos_list[i],1,len(hist[::-1]), rating_list[i]))for negi inrange(negsample):train_set.append((reviewrID, hist[::-1], neg_list[i * negsample + negi],0,len(hist[::-1])))else:test_set.append((reviewrID, hist[::-1], pos_list[i],1,len(hist[::-1]), rating_list[i]))# 打乱数据集random.shuffle(train_set)random.shuffle(test_set)return train_set, test_setdefgen_model_input(train_set, user_profile, seq_max_len):# 用户idtrain_uid = np.array([line[0]for line in train_set])# 历史交互序列train_seq =[line[1]for line in train_set]# 物品idtrain_iid = np.array([line[2]for line in train_set])# 正负样本标签train_label = np.array([line[3]for line in train_set])# 历史交互序列长度train_hist_len = np.array([line[4]for line in train_set])train_seq_pad = pad_sequences(train_seq, maxlen=seq_max_len, padding='post', truncating='post', value=0)train_model_input ={"user_id": train_uid,"item_id": train_iid,"hist_item_id": train_seq_pad,"hist_len": train_hist_len}for key in{"gender","age","city"}:train_model_input[key]= user_profile.loc[train_model_input['user_id']][key].valuesreturn train_model_input, train_label
代码解释:
**gen_data_set() **主要作用是接收数据集(data)和一个负采样(negsample)参数,返回一个训练集列表(trainset)和一个测试集列表(testset)。具体流程是先通过timetamp列对数据进行排序,根据item_id进行去重;然后根据user_id分组形成正负样本(正样本为购买过的,负样本为没有购买过的),对于negsample大于0,我们就要进行负采样,也就是随机选择一些没有购买过的商品为负样本,然后将它们保存到训练集中;最后,将正负样本数据以及其他信息(如历史交互序列、用户 ID 和历史交互序列的长度)保存到训练集列表和测试集列表中。
当计算机系统中mfc140.dll文件丢失时,可能会引发一系列运行问题,影响到系统的正常功能及应用程序的稳定执行。具体来说,由于mfc140.dll是Microsoft Visual C Redistributable Package的重要组成部分,它的缺失会导致依赖于该动态链…