同构图卷积
from time import time
import numpy as np
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
class TwoLayerModel(nn.Module):def __init__(self):super().__init__()self.conv1 = dgl.nn.GraphConv(256, 128)self.conv2 = dgl.nn.GraphConv(128, 128)layer_size = [128, 64, 32, 1]self.predict = nn.Sequential()for j in range(len(layer_size) - 1):self.predict.add_module("Linear_layer_%d" % j, nn.Linear(layer_size[j], layer_size[j + 1]))if j == len(layer_size) - 2:self.predict.add_module("Sigmoid_layer_%d" % j, nn.Sigmoid())else:self.predict.add_module("Relu_layer_%d" % j, nn.ReLU())self.lossfunc = nn.BCELoss(reduction='sum')self.epsilon = torch.FloatTensor([1e-12]) def forward(self, blocks, x, label):x = self.feat_mapping[x]x = F.relu(self.conv1(blocks[0], x))x = F.relu(self.conv2(blocks[1], x))label = label.reshape(-1, 1).float()prob = self.predict(x / (torch.max(torch.norm(x, dim=1, keepdim=True), self.epsilon)))loss = self.lossfunc(prob, label)yp = prob.squeeze().detach().numpy()yt = label.squeeze().detach().numpy()return loss
def loadData(path):uu_fn = ["uu_tr.csv", "uu_te.csv"] uf_fn = ["uf_tr.csv", "uf_te.csv"] bg_l = []for i in range(2):uu = pd.read_csv(path + uu_fn[i], header=0)uf = pd.read_csv(path + uf_fn[i], header=0)g = dgl.graph((th.tensor(uu['uid'].values), th.tensor(uu['fid'].values)), num_nodes=uf.shape[0])bg = dgl.to_bidirected(g)bg = dgl.add_self_loop(bg)bg.ndata['feat'] = th.LongTensor(uf.iloc[:, 1:].values)print(bg)bg_l.append(bg)with open(path + "u_train_test.pickle", "rb") as fp:X_train_p, X_train_n, X_test_p, X_test_n = pickle.load(fp)train_label_map = torch.zeros(max(X_train_p + X_train_n) + 1)train_label_map[torch.LongTensor(X_train_p)] = 1test_label_map = torch.zeros(max(X_test_p + X_test_n) + 1)test_label_map[torch.LongTensor(X_test_p)] = 1return bg_l, X_train_p, X_train_n, X_test_p, X_test_n, train_label_map, test_label_map
if __name__ == "__main__":model = TwoLayerModel()optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)bg_l, X_train_p, X_train_n, X_test_p, X_test_n, train_label_map, test_label_map = loadData("../data_"+args.dataPercesent+"/")sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2)dataloader = dgl.dataloading.NodeDataLoader(dgl.add_self_loop(bg_l[0]), X_train_p + X_train_n, sampler, batch_size=1024 * 8, shuffle=True, drop_last=False)dataloader2 = dgl.dataloading.NodeDataLoader(dgl.add_self_loop(bg_l[1]), X_test_p + X_test_n, sampler, batch_size=len(X_test_p + X_test_n), shuffle=False, drop_last=False)r_l, p_l, f_l = [], [], []for epoch in range(200):t0 = time()model.train()train_loss = 0.for input_nodes, output_nodes, blocks in dataloader:optimizer.zero_grad()input_features = blocks[0].srcdata['feat']loss = model(blocks, input_features, train_label_map[torch.LongTensor(output_nodes)])loss_.backward()optimizer.step()train_loss += loss_.item()t1 = time()model.eval()with torch.no_grad():for input_nodes, output_nodes, blocks in dataloader2:input_features = blocks[0].srcdata['feat']test_loss= model(blocks, input_features, test_label_map[torch.LongTensor(output_nodes)])t2 = time()log.record('epoch[%d],TrainLoss[%.2f],TestLoss[%.2f],time[%.1f + %.1f]' % (epoch, train_loss, test_loss.item(), t1 - t0, t2 - t1))
异构图卷积
from time import time
import torch
import torch.nn as nn
import random
from torch.optim import lr_scheduler
import dgl
import dgl.function as fn
import numpy as np
import pandas as pd
class HeteroGNN(nn.Module):def __init__(self, G):super(HeteroGNN, self).__init__()self.G = Gself.hgl = HeteroRGCNLayer()self.G.nodes['group'].data['emb'] = nn.Parameter(torch.randn(3320,64))self.G.nodes['ip'].data['emb'] = nn.Parameter(torch.randn(1242,64))self.uf_mapping = nn.Linear(88, 64)self.gf_mapping = nn.Linear(33, 64)self.pf_mapping = nn.Linear(690, 64)self.weight = nn.ModuleDict({'user': nn.Sequential(nn.Linear(64, 1), nn.Sigmoid()),'group': nn.Sequential(nn.Linear(64, 1), nn.Sigmoid())})layer_size = [128, 64, 32, 1]self.predict = nn.Sequential()for j in range(len(layer_size) - 1):self.predict.add_module("Linear_layer_%d" % j, nn.Linear(layer_size[j], layer_size[j + 1]))if j == len(layer_size) - 2:self.predict.add_module("Sigmoid_layer_%d" % j, nn.Sigmoid())else:self.predict.add_module("Relu_layer_%d" % j, nn.ReLU())self.lossfunc = nn.BCELoss()def forward(self, up, un, flag):self.G.nodes['user'].data['emb'] = self.uf_mapping(self.G.nodes['user'].data['feat'])self.G.nodes['group'].data['emb'] = self.gf_mapping(self.G.nodes['group'].data['feat'])self.G.nodes['ip'].data['emb'] = self.pf_mapping(self.G.nodes['ip'].data['feat'])SG = self.Gself.hgl(SG)user = torch.LongTensor(np.concatenate([np.array(up), np.array(un)], 0))label = torch.LongTensor(np.concatenate([np.ones(len(up)), np.zeros(len(un))], 0)).reshape(-1, 1).float()user_femb = self.predict1(self.G.nodes['user'].data['h'])prob = self.predict2(user_femb[user])loss = self.lossfunc(prob, label)yp1 = prob.squeeze().detach().numpy()yt = label.detach().numpy()return lossclass HeteroRGCNLayer(nn.Module):def __init__(self):super(HeteroRGCNLayer, self).__init__()self.weight = nn.ModuleDict({'user': nn.Sequential(nn.Linear(64, 32)),'group': nn.Sequential(nn.Linear(64, 32)),'ip': nn.Sequential(nn.Linear(64, 32)),})def forward(self, G):funcs = {}srctype, etype, dsttype = 'user', 'belong', 'group'G.nodes[srctype].data['Wh'] = self.weight[srctype](G.nodes[srctype].data['emb'])funcs[etype] = (fn.copy_u('Wh', 'm'), fn.max('m', 'guh'))srctype, etype, dsttype = 'user', 'belong2', 'ip'funcs[etype] = (fn.copy_u('Wh', 'm'), fn.max('m', 'puh'))G.multi_update_all(funcs, 'max')funcs = {}srctype, etype, dsttype = 'group', 'have', 'user'G.nodes[srctype].data['Wh'] = self.weight[srctype](G.nodes[srctype].data['emb'])funcs[etype] = (fn.copy_u('Wh', 'm'), fn.max('m', 'gh'))srctype, etype, dsttype = 'ip', 'have2', 'user'G.nodes[srctype].data['Wh'] = self.weight[srctype](G.nodes[srctype].data['emb'])funcs[etype] = (fn.copy_u('Wh', 'm'), fn.max('m', 'ph'))G.multi_update_all(funcs, 'max')funcs = {}funcs['have'] = (fn.copy_u('guh', 'm'), fn.max('m', 'uh'))funcs['have2'] = (fn.copy_u('puh', 'm'), fn.max('m', 'uh'))G.multi_update_all(funcs, 'max')G.nodes['user'].data['h'] = torch.cat([G.nodes['user'].data['Wh'], G.nodes['user'].data['uh'], G.nodes['user'].data['gh'], G.nodes['user'].data['ph']], 1)
def loadData():ug = pd.read_csv("ug.csv")uf = pd.read_csv("user_feat.csv", sep=",", header=None)gf = pd.read_csv("group_feat.csv", sep=",", header=None)user_group = (th.tensor(ug['u'].values), th.tensor(ug['g'].values))group_user = (th.tensor(ug['g'].values), th.tensor(ug['u'].values))with open("data/u_train_test.pickle", "rb") as fp:up_train, up_test, un_train, un_test = pickle.load(fp)hg = dgl.heterograph({('user', 'belong', 'group'): user_group,('group', 'have', 'user'): group_user})hg.nodes['user'].data['feat'] = th.tensor(uf.values, dtype=th.float)hg.nodes['group'].data['feat'] = th.tensor(gf.values, dtype=th.float)return hg, up_train, up_test, un_train, un_test
if __name__ == "__main__":G, up_train, up_test, un_train, un_test = loadData()model = HeteroGNN(G)optimizer = torch.optim.Adam(model.parameters(), lr=0.005) save = Falser_l,p_l,f_l = [],[],[]for epoch in range(200):t0 = time()model.train()optimizer.zero_grad()neg = random.sample(un_train, k=5 * len(up_train))train_loss, tauc, tf1 = model(up_train, neg, 0)train_loss.backward()optimizer.step()t1 = time()model.eval()with torch.no_grad():test_loss = model(up_test, un_test, 0)t2 = time()print('epoch[%d],TrainLoss[%.2f],TestLoss[%.2f],time[%.1f + %.1f]' % (epoch, train_loss.item(), test_loss.item(),t1 - t0, t2 - t1))