lstm_crf

文章目录


code

import torch
#一些参数
embedding_dim=300
hidden_dim=256
vocab_size=4833
dic_label={'<BEG>': 0, 'B-ORG': 1, 'B-LOC': 2, 'B-PER': 3, 'I-PER': 4, 'I-ORG': 5, 'I-LOC': 6, 'O': 7, '<END>': 8}
tagset_size=len(dic_label)
x=torch.tensor([4445, 1021, 1759, 825, 8, 481, 3763, 2985, 976, 3416, 1894, 843, 1478, 2044, 3033, 3802, 1756, 3080, 2240, 1459, 2285, 1220, 4090, 1478, 3246, 348, 1756, 3520, 2430, 2453, 2490, 4301, 3839, 2004, 2985, 2826, 3256, 406, 3764, 1756, 3220, 405, 3197, 924, 3256, 646, 2522, 4445, 4427, 1065])
y=torch.tensor([7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7])
torch.manual_seed(1)
<torch._C.Generator at 0x7f38583be750>

1.这里和LSTM一样

import torch.nn as nn
def init_hidden():# 一开始并没有隐藏状态所以我们要先初始化一个# 关于维度为什么这么设计请参考Pytoch相关文档# 各个维度的含义是 (num_layers*num_directions, batch_size, hidden_dim)return (torch.zeros(4, 1, hidden_dim//2),torch.zeros(4, 1, hidden_dim//2))
word_embeddings = nn.Embedding(vocab_size, embedding_dim)
lstm = nn.LSTM(embedding_dim, hidden_dim//2,num_layers=2,dropout=0.5,bidirectional=True)
hidden2tag = nn.Linear(hidden_dim, tagset_size)
hidden=init_hidden()
embed=word_embeddings(x)
lstm_out,hidden=lstm(embed.view(len(x),1,-1),hidden)
lstm_feats=hidden2tag(lstm_out.view(len(x), -1))
print(lstm_feats)
tensor([[ 3.8183e-02,  1.5836e-02,  3.7862e-02,  7.0418e-02,  2.2438e-02,4.1844e-02,  6.3254e-02, -6.2891e-02, -2.4113e-02],[ 1.7222e-02,  2.5335e-02,  4.9524e-02,  2.8871e-02,  1.0654e-02,3.5806e-02,  5.0367e-02, -8.1280e-03, -1.3964e-02],[ 1.3208e-02,  4.9184e-02,  3.3503e-02,  4.9016e-02,  1.7465e-02,1.4404e-02,  2.6906e-02, -8.3263e-03,  1.3732e-02],[ 3.4434e-02,  6.7054e-02, -3.4955e-03,  3.3051e-02,  5.5812e-02,1.8879e-02, -2.3393e-02, -2.8981e-02, -2.1825e-03],[ 3.9222e-02,  5.2818e-02, -8.8901e-03,  6.4605e-02,  9.6511e-03,4.4643e-02, -4.2248e-02, -3.2466e-02, -3.0473e-02],[ 1.7703e-02,  5.7643e-02, -8.6769e-04,  3.7427e-02,  4.2789e-02,-3.0154e-03, -4.8294e-03, -6.7323e-02, -3.1352e-02],[ 7.6252e-03,  4.4999e-02,  2.2395e-02,  8.7719e-02,  5.0058e-02,-2.8418e-02, -1.7231e-02, -9.2161e-02,  2.4956e-02],[ 1.0476e-02,  5.4486e-02,  5.4586e-02,  1.3326e-01,  3.2672e-02,-5.3690e-02, -2.1257e-02, -1.1390e-01,  3.1813e-02],[ 5.0284e-02,  8.0629e-02,  7.8598e-02,  9.5313e-02,  6.1923e-02,-1.5688e-02, -4.8931e-02, -6.9627e-02,  2.5013e-03],[ 4.7074e-02,  3.6267e-02,  1.2090e-01,  1.5534e-01,  1.1135e-01,-4.7217e-02,  2.2335e-02, -5.7714e-02, -3.9817e-03],[ 7.1382e-02,  1.6969e-02,  1.1157e-01,  1.7124e-01,  8.8421e-02,-8.4020e-03,  7.9469e-02, -5.2841e-02,  2.3255e-02],[ 2.6767e-02,  8.4420e-03,  9.5907e-02,  1.7361e-01,  8.9725e-02,-2.3339e-02,  4.5094e-02, -1.0153e-01,  1.3048e-03],[ 4.7910e-03,  1.6833e-02,  7.6657e-02,  1.0251e-01,  8.0264e-02,-8.3014e-03,  4.4257e-02, -7.3046e-02,  1.4148e-02],[-6.8932e-03,  1.4081e-02,  8.9506e-03,  1.2938e-01,  2.6720e-02,-5.6110e-03, -9.9000e-03, -9.3737e-02, -9.7825e-03],[ 1.4149e-02,  1.1808e-02,  3.3058e-02,  6.6578e-02,  2.2146e-02,-5.0391e-02,  3.6378e-02, -4.2137e-02, -8.5403e-03],[ 1.7245e-02, -2.5705e-03,  4.0062e-02,  3.1326e-02,  2.9868e-02,-1.4757e-02,  1.9054e-02, -6.2589e-02, -5.0268e-02],[ 6.3002e-02,  1.1055e-02,  7.9968e-02,  3.7050e-03,  5.0645e-02,-2.2947e-02,  5.4943e-03, -6.3421e-02, -5.3466e-02],[ 1.9044e-02,  4.9453e-02,  5.6323e-02,  4.7003e-02,  3.0991e-02,-1.3294e-02,  1.6698e-02, -6.4374e-02,  2.8829e-02],[-3.2599e-02,  3.2138e-02,  9.9127e-02,  5.4674e-02,  1.7155e-05,-4.1551e-02, -3.7157e-04, -7.8810e-02, -8.2693e-03],[-2.6327e-02,  4.8945e-03,  2.5955e-02,  4.5786e-02,  2.5402e-02,-3.7257e-02, -8.9031e-03, -7.9090e-02,  1.3885e-02],[-1.1430e-02, -1.5008e-02,  7.0533e-02,  1.9002e-02,  7.2340e-02,-5.5851e-02,  5.1460e-02, -3.9105e-02, -1.2614e-02],[-3.7780e-02, -7.6263e-04,  6.7537e-02,  4.5665e-02,  7.4211e-02,-5.4651e-02,  8.8694e-02, -3.1336e-02, -2.2369e-02],[-4.8749e-02,  5.0722e-03,  3.4647e-02,  8.2097e-02,  7.9723e-02,-9.1170e-02,  1.0117e-01, -2.8669e-02, -1.0937e-02],[ 7.2655e-03,  1.7515e-02,  4.3003e-02,  8.7258e-02,  7.3814e-02,-1.3298e-02,  7.8785e-02,  9.9551e-03, -9.1298e-03],[-3.5876e-02,  3.6467e-03,  4.5647e-02,  1.2269e-01,  8.9789e-02,-2.7733e-02,  2.2605e-02, -2.6139e-02, -3.1497e-02],[ 7.9253e-03,  5.0785e-02,  6.3591e-02,  1.0623e-01,  9.4893e-02,-7.0320e-02,  4.7295e-03, -6.9957e-02, -2.6867e-02],[ 4.0818e-02,  6.3439e-02,  6.3788e-02,  5.9888e-02,  8.5492e-02,-5.3867e-02,  4.7728e-02,  1.9959e-02, -2.2501e-02],[ 1.7481e-02,  4.3535e-02,  1.1841e-01,  9.6153e-02,  1.0129e-01,-8.3599e-02,  5.0522e-03, -2.6142e-03, -4.8033e-02],[ 5.4343e-02, -1.3246e-02,  7.0181e-02,  6.6330e-02,  4.9628e-02,-8.1740e-02,  3.2154e-02, -3.0047e-03, -7.4529e-02],[-2.1330e-02, -1.2100e-02,  8.6243e-02,  1.0439e-01,  4.5414e-02,-5.5776e-02,  5.7161e-02, -3.2444e-02, -5.6948e-02],[ 3.9036e-03, -2.4023e-03,  1.1202e-01,  1.3427e-01,  5.9969e-02,-2.4287e-02,  4.6610e-02, -8.8279e-02, -4.2456e-02],[-1.5046e-02,  2.5931e-02,  4.3004e-02,  8.1989e-02,  3.6611e-02,3.5708e-02,  4.1205e-02, -4.7704e-02, -3.2832e-02],[-4.1585e-03,  1.6047e-02,  4.4101e-02,  5.1798e-02,  1.7038e-02,1.9604e-02,  5.2224e-02, -3.2164e-02, -9.1968e-03],[-1.0024e-02,  1.5265e-02,  6.6991e-02,  6.1134e-02,  4.9887e-02,2.6940e-02,  1.0078e-02, -7.3716e-02, -2.9959e-02],[ 2.9316e-02,  1.8044e-02,  5.4106e-02,  9.5514e-02,  4.2175e-02,4.0722e-02, -2.6533e-02, -9.7807e-02, -3.0428e-02],[ 3.5950e-02,  2.1914e-02,  2.2790e-02,  1.1776e-01,  8.5006e-02,3.3220e-03, -5.4721e-02, -1.0637e-01, -1.4969e-02],[ 2.8662e-04,  5.7606e-03, -5.1026e-02,  9.8858e-02,  4.9239e-02,6.3886e-03, -9.1599e-02, -1.1911e-01, -9.8223e-03],[-7.4165e-03, -9.9517e-03,  2.9346e-03,  1.0618e-01,  4.2462e-02,-1.5746e-02, -9.9808e-02, -9.0239e-02, -2.4547e-02],[ 2.3751e-02,  2.7337e-02,  5.2005e-02,  9.1845e-02,  3.9141e-02,-5.1640e-02, -6.5797e-02, -7.0130e-02, -3.7505e-02],[ 6.5887e-02,  3.8870e-02,  6.4941e-02,  5.1979e-02, -1.3211e-03,-4.3425e-02, -1.5248e-02, -1.6603e-04, -2.2244e-02],[ 1.0810e-01,  1.3062e-02,  6.5309e-02,  5.6089e-02,  2.5537e-02,-6.5674e-02,  1.7194e-02, -4.3231e-02,  6.0457e-02],[ 8.2087e-02,  2.0487e-02,  1.4045e-02,  4.9639e-02,  4.8768e-02,-3.1174e-02,  5.2010e-02,  2.8771e-03,  7.0507e-02],[ 1.8444e-02,  1.1632e-02,  4.5379e-02,  6.8511e-02,  5.6063e-02,1.2600e-04,  7.5738e-02, -2.0299e-02,  3.9262e-02],[-2.4086e-02, -2.7776e-02,  6.4662e-02,  9.4318e-02,  4.0032e-02,-2.0971e-02,  8.4450e-02, -5.6561e-02,  8.8655e-02],[-1.3338e-02, -4.5115e-02, -5.8770e-04,  1.0142e-01,  1.0268e-02,-1.2930e-02, -2.0746e-02, -7.1666e-02,  6.0637e-02],[-1.5192e-02, -4.2696e-02, -6.9170e-04,  6.6469e-02,  2.4795e-03,-8.2365e-02, -1.4042e-02, -4.1713e-02,  3.0981e-02],[ 2.4108e-02,  1.0243e-02,  2.7134e-02,  5.2960e-02,  1.6884e-03,-6.3587e-02,  2.4258e-02,  1.1909e-02, -2.1302e-02],[-4.6295e-03,  4.9394e-02,  4.3290e-02,  6.0374e-02,  3.6289e-02,-5.6213e-02,  5.7276e-02, -4.6284e-02, -4.6490e-02],[-2.3926e-02,  6.8226e-02,  1.1496e-02,  5.5457e-02,  2.7757e-02,-5.2626e-02,  1.2686e-02, -1.0490e-01,  8.4274e-03],[ 4.0195e-02, -6.3513e-03,  1.9505e-02,  4.9050e-02,  2.1933e-02,-7.4236e-02, -3.8088e-02, -7.7404e-02, -5.0512e-03]],grad_fn=<AddmmBackward>)

2.维特比

lstm_feats:是状态矩阵(只是这么看),(序列长,1,tagset_size)

transition:是转移矩阵

δ(t)j=max(δ(t−1)iai,j)∗b(j)\delta(t)_j=max(\delta(t-1)_ia_{i,j})*b(j)δ(t)j=max(δ(t1)iai,j)b(j)

print(lstm_feats.shape)
torch.Size([50, 9])
A=nn.Parameter(torch.randn(tagset_size,tagset_size))#行为t,列为t-1
A[0,:]=-1000
A[:,tagset_size-1]=-1000
sentence_len=lstm_feats.shape[0]
delta=torch.full((1,tagset_size),-1000.)
delta[0][0]=0
forward=[]
forward.append(delta)
i=0
gamma_r_l=forward[i]
delta0,indice=torch.max(gamma_r_l+A,axis=1)
g=torch.stack([forward[i]]*tagset_size)
print(torch.max(torch.squeeze(g)+A,axis=1))
torch.return_types.max(
values=tensor([-1.0000e+03, -2.0581e-01, -5.9801e-01, -1.6440e-01,  2.3199e-01,1.0266e+00,  8.5233e-01,  1.1755e+00, -6.3664e-01],grad_fn=<MaxBackward0>),
indices=tensor([0, 0, 0, 0, 0, 0, 0, 0, 0]))
t_r1_k=torch.unsqueeze(lstm_feats[i],0)
print(t_r1_k)
tensor([[ 0.0382,  0.0158,  0.0379,  0.0704,  0.0224,  0.0418,  0.0633, -0.0629,-0.0241]], grad_fn=<UnsqueezeBackward0>)
delta=torch.unsqueeze(delta0,0)+t_r1_k
print(delta)
tensor([[-9.9996e+02, -1.8997e-01, -5.6015e-01, -9.3982e-02,  2.5443e-01,1.0684e+00,  9.1558e-01,  1.1126e+00, -6.6076e-01]],grad_fn=<AddBackward0>)
print(delta0+lstm_feats[i])
tensor([-9.9996e+02, -1.8997e-01, -5.6015e-01, -9.3982e-02,  2.5443e-01,1.0684e+00,  9.1558e-01,  1.1126e+00, -6.6076e-01],grad_fn=<AddBackward0>)
print(lstm_feats[i])
print(delta0)
tensor([ 0.0382,  0.0158,  0.0379,  0.0704,  0.0224,  0.0418,  0.0633, -0.0629,-0.0241], grad_fn=<SelectBackward>)
tensor([-1.0000e+03, -2.0581e-01, -5.9801e-01, -1.6440e-01,  2.3199e-01,1.0266e+00,  8.5233e-01,  1.1755e+00, -6.3664e-01],grad_fn=<MaxBackward0>)
indices=[]
forward.append(delta)
indices.append(indice.tolist())
print(forward)
[tensor([[    0., -1000., -1000., -1000., -1000., -1000., -1000., -1000., -1000.]]), tensor([[-9.9996e+02, -1.8997e-01, -5.6015e-01, -9.3982e-02,  2.5443e-01,1.0684e+00,  9.1558e-01,  1.1126e+00, -6.6076e-01]],grad_fn=<AddBackward0>), tensor([[-9.9996e+02, -1.8997e-01, -5.6015e-01, -9.3982e-02,  2.5443e-01,1.0684e+00,  9.1558e-01,  1.1126e+00, -6.6076e-01]],grad_fn=<AddBackward0>), tensor([[-9.9996e+02, -1.8997e-01, -5.6015e-01, -9.3982e-02,  2.5443e-01,1.0684e+00,  9.1558e-01,  1.1126e+00, -6.6076e-01]],grad_fn=<AddBackward0>), tensor([[-9.9996e+02, -1.8997e-01, -5.6015e-01, -9.3982e-02,  2.5443e-01,1.0684e+00,  9.1558e-01,  1.1126e+00, -6.6076e-01]],grad_fn=<AddBackward0>)]
def Viterbi_M(features):sequence_len=features.shape[0]delta = torch.full((1, tagset_size), -1000.)delta[0][0]=0;# logM = torch.log(features)forward=[]forward.append(delta)indices = []for i in range(len(features)):gamma_r_l=forward[i]# print(gamma_r_l+self.A)delta,indice=torch.max(gamma_r_l+A,dim=1)delta=features[i]+deltaforward.append(delta.reshape(1,tagset_size))indices.append(indice.tolist())terminal=forward[-1]+A[tagset_size-1]best_tag_id=torch.argmax(terminal).tolist()best_score=terminal[0][best_tag_id]
#     print(best_tag_id)
#     print(best_score)bestpath=[best_tag_id]for indice in reversed(indices):best_tag_id=indice[best_tag_id]bestpath.append(best_tag_id)bestpath.pop()bestpath.reverse()return bestpath,best_scorebestpath,best_score=Viterbi_M(lstm_feats)
print(bestpath)
print(best_score)
[7, 5, 7, 5, 7, 6, 2, 3, 6, 2, 3, 6, 2, 3, 6, 2, 3, 6, 2, 3, 6, 2, 3, 6, 2, 3, 6, 2, 3, 6, 2, 3, 6, 2, 3, 6, 2, 3, 6, 2, 3, 6, 2, 3, 6, 2, 3, 6, 7, 5]
tensor(60.5767, grad_fn=<SelectBackward>)

3.neg-log-loss

θ^=argmaxΠi=1Np(y(i)∣x(i))λ^,η^=argmaxλ,ηΠi=1Np(y(i)∣x(i))Σi=1Nlogp(y(i)∣x(i))=Σi=1N(−log(Z)+Σt=1T(λTf(yt−1,yt,x)+ηTg(yt,x)))=L∂L∂λ=Σi=1Nlogp(y(i)∣x(i))=Σi=1N(−∂∂λlog(Z)+Σt=1Tf(yt−1,yt,x))log−partitionfunction:∂∂λlog(Z)=(积分就是期望)E(Σt=1Tf(yt−1,yt,x(i)))=ΣyP(y∣x(i))Σt=1Tf(yt−1,yt,x(i))=Σt=1TΣyP(y∣x(i))f(yt−1,yt,x(i))=Σt=1TΣy1,y2,...,yt−2Σyt−1ΣytΣyt+1,yt+2,...,yTP(y∣x(i))f(yt−1,yt,x(i))=Σt=1TΣyt−1Σyt(Σy1,y2,...,yt−2Σyt+1,yt+2,...,yTP(y∣x(i))f(yt−1,yt,x(i)))=Σt=1TΣyt−1ΣytP(yt−1,yt∣x(i))f(yt−1,yt,x(i))\hat{\theta}=argmax\Pi_{i=1}^N p(y^{(i)}|x^{(i)})\\ \hat{\lambda},\hat{\eta}=argmax_{\lambda,\eta}\Pi_{i=1}^N p(y^{(i)}|x^{(i)})\\ \Sigma_{i=1}^Nlog p(y^{(i)}|x^{(i)})=\Sigma_{i=1}^N(-log(Z)+\Sigma_{t=1}^T(\lambda^Tf(y_{t-1},y_t,x)+\eta^Tg(y_t,x)))\\ =L\\ \frac{\partial L}{\partial \lambda}=\Sigma_{i=1}^Nlog p(y^{(i)}|x^{(i)})=\Sigma_{i=1}^N(-\frac{\partial }{\partial \lambda} log(Z)+\Sigma_{t=1}^Tf(y_{t-1},y_t,x))\\ log-partition function:\\ \frac{\partial }{\partial \lambda} log(Z)\\ =(积分就是期望)E(\Sigma_{t=1}^Tf(y_{t-1},y_t,x^{(i)}))\\ =\Sigma_y P(y|x^{(i)})\Sigma_{t=1}^T f(y_{t-1},y_t,x^{(i)})\\ =\Sigma_{t=1}^T\Sigma_y P(y|x^{(i)}) f(y_{t-1},y_t,x^{(i)})\\ =\Sigma_{t=1}^T\Sigma_{y_1,y_2,...,y_{t-2}}\Sigma_{y_{t-1}}\Sigma_{y_t}\Sigma_{y_{t+1},y_{t+2},...,y_T} P(y|x^{(i)}) f(y_{t-1},y_t,x^{(i)})\\ =\Sigma_{t=1}^T\Sigma_{y_{t-1}}\Sigma_{y_t} (\Sigma_{y_1,y_2,...,y_{t-2}}\Sigma_{y_{t+1},y_{t+2},...,y_T}P(y|x^{(i)}) f(y_{t-1},y_t,x^{(i)}))\\ =\Sigma_{t=1}^T\Sigma_{y_{t-1}}\Sigma_{y_t}P(y_{t-1},y_t|x^{(i)}) f(y_{t-1},y_t,x^{(i)})θ^=argmaxΠi=1Np(y(i)x(i))λ^,η^=argmaxλ,ηΠi=1Np(y(i)x(i))Σi=1Nlogp(y(i)x(i))=Σi=1N(log(Z)+Σt=1T(λTf(yt1,yt,x)+ηTg(yt,x)))=LλL=Σi=1Nlogp(y(i)x(i))=Σi=1N(λlog(Z)+Σt=1Tf(yt1,yt,x))logpartitionfunction:λlog(Z)=(E(Σt=1Tf(yt1,yt,x(i)))=ΣyP(yx(i))Σt=1Tf(yt1,yt,x(i))=Σt=1TΣyP(yx(i))f(yt1,yt,x(i))=Σt=1TΣy1,y2,...,yt2Σyt1ΣytΣyt+1,yt+2,...,yTP(yx(i))f(yt1,yt,x(i))=Σt=1TΣyt1Σyt(Σy1,y2,...,yt2Σyt+1,yt+2,...,yTP(yx(i))f(yt1,yt,x(i)))=Σt=1TΣyt1ΣytP(yt1,ytx(i))f(yt1,yt,x(i))
p(yi−1,yi∣x)=αi−1T(yi∣x)Mi(yi−1,yi∣x)βi(yi∣x)Z(x)p(y_{i-1},y_i|x)=\frac{\alpha_{i-1}^T(y_i|x)M_i(y_{i-1},y_i|x)\beta_i(y_i|x)}{Z(x)}p(yi1,yix)=Z(x)αi1T(yix)Mi(yi1,yix)βi(yix)

一次只对一个句子算就可以了
negL=Σi=1N(+log(Z)−Σt=1T(λTf(yt−1,yt,x)+ηTg(yt,x)))=−LnegL=\Sigma_{i=1}^N(+log(Z)-\Sigma_{t=1}^T(\lambda^Tf(y_{t-1},y_t,x)+\eta^Tg(y_t,x)))=-LnegL=Σi=1N(+log(Z)Σt=1T(λTf(yt1,yt,x)+ηTg(yt,x)))=L

#这是crf中的
# y=[0,1,1]
# delta=torch.sum(self.f[:,len(y),[0]+y,y+[9]],axis=(1))-torch.sum(self.f* self.p_y12_x_condition_alpha_beta(alpha, beta),axis=(1,2,3))

3.1求logZ(前向算法)

α0(yi)=(0,−1000,−1000,...,−1000)α1(yi+1)=log(Σyiexp(α0(yi)+λTf(yi,yi+1)+μTg(yi,xd)))μTg(yi,xd)=lstmfeatsλTf(yi,yi+1)=A\alpha_0(y_i)=(0,-1000,-1000,...,-1000)\\ \alpha_1(y_{i+1})=log(\Sigma_{y_{i}}exp(\alpha_0(y_i)+\lambda^Tf(y_i,y_{i+1})+\mu^Tg(y_i,x_d)))\\ \mu^Tg(y_i,x_d)=lstmfeats\\ \lambda^Tf(y_i,y_{i+1})=Aα0(yi)=010001000...,1000)α1(yi+1)=log(Σyiexp(α0(yi)+λTf(yi,yi+1)+μTg(yi,xd)))μTg(yi,xd)=lstmfeatsλTf(yi,yi+1)=A

def _forward_alg( feats):# Do the forward algorithm to compute the partition functioninit_alphas = torch.full([tagset_size], -10000.)# START_TAG has all of the score.init_alphas[0] = 0.# Wrap in a variable so that we will get automatic backprop# Iterate through the sentenceforward_var_list=[]forward_var_list.append(init_alphas)for feat_index in range(feats.shape[0]):        gamar_r_l = torch.stack([forward_var_list[feat_index]] * feats.shape[1])
#         print(gamar_r_l)t_r1_k = torch.unsqueeze(feats[feat_index],0).transpose(0,1)aa = gamar_r_l + t_r1_k + Aforward_var_list.append(torch.logsumexp(aa,dim=1))terminal_var = forward_var_list[-1] + A[tagset_size-1]terminal_var = torch.unsqueeze(terminal_var,0)alpha = torch.logsumexp(terminal_var, dim=1)[0]return alpha
_forward_alg(lstm_feats)
tensor(114.9550, grad_fn=<SelectBackward>)
def alpha_alg(feats):init_alpha=torch.full([tagset_size],-1000.)init_alpha[0]=0.alpha=[init_alpha]for i in range(feats.shape[0]):gamma=alpha[i]aa=gamma+A+feats[i].reshape(tagset_size,1)alpha.append(torch.logsumexp(aa,axis=1))terminal=alpha[-1]+A[tagset_size-1]logZ=torch.logsumexp(terminal,axis=0)return logZ;alpha_alg(lstm_feats)
tensor([-888.8337,  110.8548,  111.7476,  112.9284,  110.7119,  114.4487,113.1612,  111.3721, -887.2665], grad_fn=<AddBackward0>)
tensor(114.9550, grad_fn=<LogsumexpBackward>)
init_alpha=torch.full([tagset_size],-1000.)
init_alpha[0]=0.
print(init_alpha)
tensor([    0., -1000., -1000., -1000., -1000., -1000., -1000., -1000., -1000.])
alpha=[]
alpha.append(init_alpha)
print(alpha)
[tensor([    0., -1000., -1000., -1000., -1000., -1000., -1000., -1000., -1000.])]
i=0
gammar=torch.stack([alpha[i]]*tagset_size)
print(gammar)
tensor([[    0., -1000., -1000., -1000., -1000., -1000., -1000., -1000., -1000.],[    0., -1000., -1000., -1000., -1000., -1000., -1000., -1000., -1000.],[    0., -1000., -1000., -1000., -1000., -1000., -1000., -1000., -1000.],[    0., -1000., -1000., -1000., -1000., -1000., -1000., -1000., -1000.],[    0., -1000., -1000., -1000., -1000., -1000., -1000., -1000., -1000.],[    0., -1000., -1000., -1000., -1000., -1000., -1000., -1000., -1000.],[    0., -1000., -1000., -1000., -1000., -1000., -1000., -1000., -1000.],[    0., -1000., -1000., -1000., -1000., -1000., -1000., -1000., -1000.],[    0., -1000., -1000., -1000., -1000., -1000., -1000., -1000., -1000.]])
print(lstm_feats[i])
print(torch.unsqueeze(lstm_feats[i],0))
print(torch.unsqueeze(lstm_feats[i],0).transpose(0,1))
tensor([ 0.0382,  0.0158,  0.0379,  0.0704,  0.0224,  0.0418,  0.0633, -0.0629,-0.0241], grad_fn=<SelectBackward>)
tensor([[ 0.0382,  0.0158,  0.0379,  0.0704,  0.0224,  0.0418,  0.0633, -0.0629,-0.0241]], grad_fn=<UnsqueezeBackward0>)
tensor([[ 0.0382],[ 0.0158],[ 0.0379],[ 0.0704],[ 0.0224],[ 0.0418],[ 0.0633],[-0.0629],[-0.0241]], grad_fn=<TransposeBackward0>)
t=lstm_feats[i].reshape(tagset_size,1)
print(t)
tensor([[ 0.0382],[ 0.0158],[ 0.0379],[ 0.0704],[ 0.0224],[ 0.0418],[ 0.0633],[-0.0629],[-0.0241]], grad_fn=<AsStridedBackward>)
aa=gammar+t+A
print(aa)
tensor([[-9.9996e+02, -2.0000e+03, -2.0000e+03, -2.0000e+03, -2.0000e+03,-2.0000e+03, -2.0000e+03, -2.0000e+03, -2.0000e+03],[-1.8997e-01, -1.0012e+03, -9.9866e+02, -9.9967e+02, -1.0013e+03,-1.0007e+03, -1.0006e+03, -1.0009e+03, -2.0000e+03],[-5.6015e-01, -1.0014e+03, -9.9917e+02, -9.9963e+02, -9.9776e+02,-9.9906e+02, -9.9860e+02, -1.0004e+03, -2.0000e+03],[-9.3982e-02, -9.9949e+02, -9.9901e+02, -9.9943e+02, -1.0008e+03,-9.9923e+02, -1.0001e+03, -1.0012e+03, -1.9999e+03],[ 2.5443e-01, -1.0014e+03, -1.0001e+03, -1.0005e+03, -9.9904e+02,-1.0002e+03, -1.0009e+03, -1.0018e+03, -2.0000e+03],[ 1.0684e+00, -9.9965e+02, -1.0008e+03, -1.0014e+03, -1.0022e+03,-1.0009e+03, -9.9986e+02, -9.9887e+02, -2.0000e+03],[ 9.1558e-01, -1.0010e+03, -1.0002e+03, -9.9890e+02, -9.9977e+02,-1.0006e+03, -1.0010e+03, -9.9912e+02, -1.9999e+03],[ 1.1126e+00, -1.0009e+03, -1.0013e+03, -9.9953e+02, -1.0025e+03,-9.9881e+02, -9.9909e+02, -9.9945e+02, -2.0001e+03],[-6.6076e-01, -1.0009e+03, -1.0006e+03, -9.9899e+02, -1.0004e+03,-9.9686e+02, -9.9844e+02, -1.0003e+03, -2.0000e+03]],grad_fn=<AddBackward0>)

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-HrvP3XSv-1581078252705)(attachment:image.png)]

torch.logsumexp(aa,dim=1)#一行求一个值(t+1时刻的)
tensor([-9.9996e+02, -1.8997e-01, -5.6015e-01, -9.3982e-02,  2.5443e-01,1.0684e+00,  9.1558e-01,  1.1126e+00, -6.6076e-01],grad_fn=<LogsumexpBackward>)
torch.logsumexp(aa[0],axis=0)
tensor(-999.9618, grad_fn=<LogsumexpBackward>)
torch.logsumexp(aa[:,0],axis=0)
tensor([-9.9996e+02, -1.8997e-01, -5.6015e-01, -9.3982e-02,  2.5443e-01,1.0684e+00,  9.1558e-01,  1.1126e+00, -6.6076e-01],grad_fn=<SelectBackward>)
torch.log(torch.sum(torch.exp(aa),axis=1))
tensor([   -inf, -0.1900, -0.5601, -0.0940,  0.2544,  1.0684,  0.9156,  1.1126,-0.6608], grad_fn=<LogBackward>)
alpha.append(torch.logsumexp(aa,dim=1))

2.Σt=1T(λTf(yt−1,yt,x)+ηTg(yt,x))\Sigma_{t=1}^T(\lambda^Tf(y_{t-1},y_t,x)+\eta^Tg(y_t,x))Σt=1T(λTf(yt1,yt,x)+ηTg(yt,x))

print(y)
tensor([4301, 2826,  375, 3802, 3197, 2874, 3016, 2453, 1389, 2284, 2490, 4301,3992, 3726,  981, 2985, 2557, 2218, 2264,  471, 1756,  397, 2874, 4154,535, 1244, 2406,  545, 2411, 2985,  348, 3489, 4586, 3551,  473, 3462,2401])
y=torch.tensor([7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7])
def gold_score(feats,y):goldScore=A[y[0],0]+feats[0,y[0]]for i in range(len(y)-1):goldScore+=A[y[i+1],y[i]]+feats[i+1,y[i+1]]return goldScore
print(sum)
tensor(28.4129, grad_fn=<AddBackward0>)

4.整体

import torch
import torch.nn as nn
import torch.optim as optim
from processData import *
from tqdm import tqdmtorch.manual_seed(1)def prepare_sequence(seq, to_ix):idxs = [to_ix[w] for w in seq]return torch.tensor(idxs, dtype=torch.long)class BiLSTM_CRF(nn.Module):def __init__(self, vocab_size, tag_to_ix, embedding_dim, hidden_dim):super(BiLSTM_CRF, self).__init__()self.embedding_dim = embedding_dimself.hidden_dim = hidden_dimself.vocab_size = vocab_sizeself.tag_to_ix = tag_to_ixself.tagset_size = len(tag_to_ix)self.word_embeds = nn.Embedding(vocab_size, embedding_dim)self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2,num_layers=1, bidirectional=True)# Maps the output of the LSTM into tag space.self.hidden2tag = nn.Linear(hidden_dim, self.tagset_size)# Matrix of transition parameters.  Entry i,j is the score of# transitioning *to* i *from* j.self.transitions = nn.Parameter(torch.randn(self.tagset_size, self.tagset_size))# These two statements enforce the constraint that we never transfer# to the start tag and we never transfer from the stop tagself.transitions.data[tag_to_ix[START_TAG], :] = -10000self.transitions.data[:, tag_to_ix[STOP_TAG]] = -10000self.hidden = self.init_hidden()def init_hidden(self):return (torch.randn(2, 1, self.hidden_dim // 2),torch.randn(2, 1, self.hidden_dim // 2))def _forward_alg(self, feats):# Do the forward algorithm to compute the partition functioninit_alphas = torch.full([self.tagset_size], -10000.)# START_TAG has all of the score.init_alphas[self.tag_to_ix[START_TAG]] = 0.# Wrap in a variable so that we will get automatic backprop# Iterate through the sentenceforward_var_list=[]forward_var_list.append(init_alphas)for feat_index in range(feats.shape[0]):gamar_r_l = torch.stack([forward_var_list[feat_index]] * feats.shape[1])t_r1_k = torch.unsqueeze(feats[feat_index],0).transpose(0,1)aa = gamar_r_l + t_r1_k + self.transitionsforward_var_list.append(torch.logsumexp(aa,dim=1))terminal_var = forward_var_list[-1] + self.transitions[self.tag_to_ix[STOP_TAG]]terminal_var = torch.unsqueeze(terminal_var,0)alpha = torch.logsumexp(terminal_var, dim=1)[0]return alphadef _get_lstm_features(self, sentence):self.hidden = self.init_hidden()embeds = self.word_embeds(sentence).view(len(sentence), 1, -1)lstm_out, self.hidden = self.lstm(embeds, self.hidden)lstm_out = lstm_out.view(len(sentence), self.hidden_dim)lstm_feats = torch.tanh(self.hidden2tag(lstm_out))return lstm_featsdef _score_sentence(self, feats, tags):# Gives the score of a provided tag sequencescore = torch.zeros(1)tags = torch.cat([torch.tensor([self.tag_to_ix[START_TAG]], dtype=torch.long), tags])for i, feat in enumerate(feats):score = score + \self.transitions[tags[i + 1], tags[i]] + feat[tags[i + 1]]score = score + self.transitions[self.tag_to_ix[STOP_TAG], tags[-1]]return scoredef _viterbi_decode(self, feats):backpointers = []# Initialize the viterbi variables in log spaceinit_vvars = torch.full((1, self.tagset_size), -10000.)init_vvars[0][self.tag_to_ix[START_TAG]] = 0# forward_var at step i holds the viterbi variables for step i-1forward_var_list = []forward_var_list.append(init_vvars)for feat_index in range(feats.shape[0]):gamar_r_l = torch.stack([forward_var_list[feat_index]] * feats.shape[1])gamar_r_l = torch.squeeze(gamar_r_l)next_tag_var = gamar_r_l + self.transitionsviterbivars_t,bptrs_t = torch.max(next_tag_var,dim=1)t_r1_k = torch.unsqueeze(feats[feat_index], 0)forward_var_new = torch.unsqueeze(viterbivars_t,0) + t_r1_kforward_var_list.append(forward_var_new)backpointers.append(bptrs_t.tolist())# Transition to STOP_TAGterminal_var = forward_var_list[-1] + self.transitions[self.tag_to_ix[STOP_TAG]]best_tag_id = torch.argmax(terminal_var).tolist()# print(best_tag_id)path_score = terminal_var[0][best_tag_id]# Follow the back pointers to decode the best path.best_path = [best_tag_id]for bptrs_t in reversed(backpointers):best_tag_id = bptrs_t[best_tag_id]best_path.append(best_tag_id)# Pop off the start tag (we dont want to return that to the caller)start = best_path.pop()assert start == self.tag_to_ix[START_TAG]  # Sanity checkbest_path.reverse()return path_score, best_pathdef neg_log_likelihood(self, sentence, tags):feats = self._get_lstm_features(sentence)forward_score = self._forward_alg(feats)gold_score = self._score_sentence(feats, tags)return forward_score - gold_scoredef forward(self, sentence):  # dont confuse this with _forward_alg above.# Get the emission scores from the BiLSTMlstm_feats = self._get_lstm_features(sentence)# print(lstm_feats.shape)# Find the best path, given the features.score, tag_seq = self._viterbi_decode(lstm_feats)return score, tag_seqdef measure(predict,y):acc = (torch.sum(torch.eq(predict, y))).type(torch.FloatTensor) / float(len(y))TP=torch.zeros(7,dtype=float)FP=torch.zeros(7,dtype=float)FN=torch.zeros(7,dtype=float)for i in range(len(y)):if(y[i]==predict[i]):TP[y[i]-1]+=1else:FP[predict[i]-1]+=1FN[y[i]-1]+=1# micro:算总的# print(torch.sum(TP))print(TP)micro_precision=torch.sum(TP)/(torch.sum(TP)+torch.sum(FP))micro_recall=torch.sum(TP)/(torch.sum(TP)+torch.sum(FN))micro_F1=2*(micro_precision*micro_recall)/(micro_precision+micro_recall)# macro :算每一类的然后平均# TP[TP==0]=1e-8# FP[FP==0]=1e-8# FN[FN==0]=1e-8macro_precision=TP/(TP+FP)macro_recall=TP/(TP+FN)macro_F1=2*(macro_recall*macro_precision)/(macro_recall+macro_precision)print(macro_F1)macro_F1=torch.mean(macro_F1)print(acc,micro_F1,macro_F1)return acc,micro_F1,macro_F1
if __name__== '__main__':START_TAG = "<BEG>"STOP_TAG = "<END>"EMBEDDING_DIM = 300HIDDEN_DIM = 256training_data, dic_word_list, dic_label_list, word_to_ix, tag_to_ix = getAllTrain()# Make up some training data# training_data = [(#     "the wall street journal reported today that apple corporation made money".split(),#     "B I I I O O O B I O O".split()# ), (#     "georgia tech is a university in georgia".split(),#     "B I O O O O B".split()# )]## word_to_ix = {}# for sentence, tags in training_data:#     for word in sentence:#         if word not in word_to_ix:#             word_to_ix[word] = len(word_to_ix)## tag_to_ix = {"B": 0, "I": 1, "O": 2, START_TAG: 3, STOP_TAG: 4}#model = BiLSTM_CRF(len(dic_word_list), tag_to_ix, EMBEDDING_DIM, HIDDEN_DIM)# print(list(model.named_parameters()))# print(list(model.parameters()))optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-4)# Check predictions before trainingwith torch.no_grad():precheck_sent = torch.tensor(training_data[0][0])# print(precheck_sent)precheck_tags = torch.tensor(training_data[1][0])print(model(precheck_sent))# Make sure prepare_sequence from earlier in the LSTM section is loadedfor epoch in range(1):  # again, normally you would NOT do 300 epochs, it is toy datafor sentence, tags in tqdm(zip(training_data[0][:-2000],training_data[1][:-2000])):# print(sentence,tags)# Step 1. Remmber that Pytorch accumulates gradients.# We need to clear them out before each instancemodel.zero_grad()sentence_in=torch.tensor(sentence)targets=torch.tensor(tags)# Step 2. Get our inputs ready for the network, that is,# turn them into Tensors of word indices.# sentence_in = prepare_sequence(sentence, word_to_ix)# targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long)score,predict=model(sentence_in)# Step 3. Run our forward pass.loss = model.neg_log_likelihood(sentence_in, targets)# Step 4. Compute the loss, gradients, and update the parameters by# calling optimizer.step()loss.backward()optimizer.step()# Check predictions after trainingwith torch.no_grad():# precheck_sent = torch.tensor(training_data[0][0])# print(model(precheck_sent))y = torch.tensor([training_data[1][-2000]])sentence_in = torch.tensor(training_data[0][-2000])tag_scores,predict1 = model(sentence_in)predict = torch.tensor([predict1])for sentence, tags in zip(training_data[0][-2001:], training_data[1][-2001:]):# 准备网络输入, 将其变为词索引的 Tensor 类型数据sentence_in = torch.tensor(sentence)# targets = torch.tensor(tags)tag_scores,predict1 = model(sentence_in)predict = torch.cat((predict, torch.tensor([predict1])), axis=1)y = torch.cat((y, torch.tensor([tags])), axis=1)x0 = [dic_word_list[s] for s in sentence]y0 = [dic_label_list[t] for t in tags]predict0 = [dic_label_list[t] for t in predict1]print(x0)print(y0)print(predict0)# print(predict.shape)# print(y.shape)measure(predict.reshape(y.shape[1]), y.reshape(y.shape[1]))

问题:倾向于全标注O

  • lstm后使用tanh层

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/news/481548.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

生成式AI无敌了: 大神微调Stable Diffusion,打造神奇宝贝新世界

来源&#xff1a;大数据文摘授权转载自AI科技评论作者&#xff1a;李梅、施方圆编辑&#xff1a;陈彩娴作为一个强大、公开且足够简单的模型&#xff0c;最近大火的 Stable Diffusion 在文本生成图像之外&#xff0c;给了大家无限的创作可能性。最近&#xff0c;来自 Lambda La…

论文学习21-Globally Normalized Transition-Based Neural Networks(2016,标签偏差问题

文章目录abstract1.introduction2.Model2.1 Transition System2.2 全局和局部归一化3.训练3.2标签偏差问题abstract 介绍了一种基于全局规范化转换的神经网络模型&#xff0c;该模型实现了最先进的词性标注、依存分析和句子压缩结果。我们的模型是一个简单的前馈神经网络&#…

推翻单一干细胞理论:哺乳动物大脑中发现了第二种干细胞

来源&#xff1a;生物通在成年哺乳动物的大脑中&#xff0c;神经干细胞保证了新的神经细胞&#xff0c;即神经元的不断形成。这个过程被称为成年神经发生&#xff0c;帮助鼠维持它们的嗅觉。一个研究小组最近在鼠大脑中发现了第二种干细胞群&#xff0c;它主要参与成年鼠嗅球中…

论文阅读课1-Attention Guided Graph Convolutional Networks for Relation Extraction(关系抽取,图卷积,ACL2019,n元)

文章目录abstract1.introduction1.1 dense connectionGCN1.2 效果突出1.3 contribution2.Attention Guided GCNs2.1 GCNs2.2 Attention Guided Layer2.3 Densely Connected Layer2.4 线性层2.5 AGGCN for RE3.实验3.1 数据集3.2 设置3.3 n-ary3.4 句子级4.ablation Study4.相关…

Nat. Rev. Genet. | 通过可解释人工智能从深度学习中获得遗传学见解

编译 | 沈祥振审稿 | 夏忻焱今天为大家介绍的是来自Maxwell W. Libbrecht&#xff0c;Wyeth W. Wasserman和Sara Mostafavi的一篇关于人工智能对于基因组学的可解释性的研究的综述。基于深度学习的人工智能&#xff08;AI&#xff09;模型现在代表了基因组学研究中进行功能预测…

复杂系统的逆向工程——通过时间序列重构复杂网络和动力学

导语蛋白质相互作用网络、生态群落、全球气候系统……很多复杂系统都可以抽象为一个相互作用的网络和其上的动力学。传统的研究主要关注在如何构建网络动力学模型&#xff0c;从而产生和实验观测数据具有相似统计特征的结果。所谓的复杂系统逆向工程&#xff0c;就是反其道而行…

关系提取论文总结

文章目录1.模型总结1.1 基于序列的方法1.2 dependency-based&#xff08;基于依赖的&#xff09;(有图&#xff09;1.2.2 句间关系抽取1.5 自动学习特征的方法1.4 联合抽取模型1.6 RNN/CNN/GCN用于关系提取1.7 远程监督1.8句子级关系提取1.9MCR&#xff08;阅读理解&#xff09…

邬贺铨:“物超人”具有里程碑意义,五方面仍需发力

来源&#xff1a;人民邮电报作者&#xff1a;邬贺铨我国正式迈入“物超人”时代。据工业和信息化部最新数据显示&#xff0c;截至8月末&#xff0c;我国三家基础电信企业发展移动物联网终端用户16.98亿户&#xff0c;成为全球主要经济体中率先实现“物超人”的国家。“物超人”…

深度:计算机的本质到底是什么?

来源&#xff1a;图灵人工智能来源&#xff1a;www.cnblogs.com/jackyfei/p/13862607.html作者&#xff1a;张飞洪 01 抽象模型庄子说过吾生有崖&#xff0c;知无涯。以有限的生命去学习无尽的知识是很愚蠢的。所以&#xff0c;学习的终极目标一定不是知识本身&#xff0c;因为…

中科大郭光灿院士团队发PRL,量子力学基础研究取得重要进展

来源&#xff1a;FUTURE | 远见选编&#xff1a;FUTURE | 远见 闵青云 中国科学技术大学郭光灿院士团队在量子力学基础研究方面取得重要进展。该团队李传锋、黄运锋等人与西班牙理论物理学家合作&#xff0c;实验验证了基于局域操作和共享随机性&#xff08;LOSR, Local operat…

论文阅读课2-Inter-sentence Relation Extraction with Document-level (GCNN,句间关系抽取,ACL2019

文章目录abstract1.introduction2.model2.1输入层2.2构造图2.3 GCNN层2.4MIL-based Relation Classification3.实验设置3.1 数据集3.2 数据预处理3.3 基线模型3.4 训练3.5结果4.相关工作4.1 句子间关系抽取4.2 GCNN5. 结论相关博客Sahu, S. K., et al. (2019). Inter-sentence …

量子并不总意味着小尺度,量子物理学家用它探索系外行星生命

来源&#xff1a;机器之心除了量子计算&#xff0c;量子物理学的应用范畴还很广。近日&#xff0c;美国东北大学物理学教授 Gregory Fiete 探讨了量子研究的广泛应用。量子物理学家研究的世界与普通人每天生活的世界是同一个&#xff0c;唯一的区别是它被科学家「缩放」到了无法…

论文阅读课3-GraphRel: Modeling Text as Relational Graphs for(实体关系联合抽取,重叠关系,关系之间的关系,自动提取特征)

文章目录abstract1.Introduction2.相关工作3.回顾GCN4.方法4.1第一阶段4.1.1 Bi-LSTM4.1.2 Bi_GCN4.1.3 实体关系抽取4.2 第二阶段4.2.1 构建关系权图4.3训练4.4 inference5.实验5.1 settings5.1.1数据集5.2 baseline and evaluation metrics5.3 Quantitative Results5.4 细节分…

大脑是如何编码外界各种信息的?

来源&#xff1a;知乎链接&#xff1a;https://www.zhihu.com/question/532956044/answer/2494267009大脑将外部信息编码成心智模型。编码方式分为三种神经链接、语言逻辑和数学。心智模型理论是成型于上世纪九十年代的认知科学理论&#xff0c;代表人物就是著名学者史蒂芬平克…

论文阅读课4-Long-tail Relation Extraction via Knowledge Graph Embeddings(GCN,关系抽取,2019,远程监督,少样本不平衡,2注意

文章目录abstract1.introduction2.相关工作2.1 关系提取2.2 KG embedding2.3 GCNN3. 方法3.1符号3.2框架3.2.1 Instance Encoder3.4 Relational Knowledge Learning through KG Embeddings and GCNs.3.5 knowledge-aware attention4.实验4.1 数据集4.3 result4.4 长尾关系的处理…

用机器学习建立的数字「鼻子」表明,我们的嗅觉既反映了芳香分子的结构,也反映了产生它们的代谢过程...

来源&#xff1a;ScienceAI编辑&#xff1a;萝卜皮Alex Wiltschko 十几岁时就开始收集香水。他的第一瓶是 Azzaro Pour Homme&#xff0c;这是他在 T.J. Maxx百货的货架上发现的一款永恒的古龙水。他从《Perfumes: The Guide》中认出了这个名字&#xff0c;这本书对香气的诗意描…

论文阅读课5-DocRED: A Large-Scale Document-Level Relation Extraction Dataset(大规模文档集关系提取数据集

文章目录abstract1.Introduction2.数据收集3.数据分析4.基线设置5.实验Yao, Y., et al. (2019). DocRED A Large-Scale Document-Level Relation Extraction Dataset. Proceedings of the 57th Annual Meeting ofthe Association for Computational Linguistics.基线docRED数据…

74位图灵奖得主背景显示:大多数没有主修计算机专业,也并非高引用计算机科学家...

来源&#xff1a;中小学信息学竞赛计算机科学是世界上发展最快的学科之一&#xff0c;计算机科学的发展直接影响着人们的生活&#xff0c;并有可能从根本上改变传统的生活方式。图灵奖作为计算机领域的最高奖项&#xff0c;一直被誉为“计算机界的诺贝尔奖”&#xff0c;截止20…

Meta最新款VR头显体验者亲述:Quest Pro更漂亮、更有趣,但戴久了,脑袋疼!

来源&#xff1a;AI前线作者&#xff1a;Rachel Metz编译&#xff1a;核子可乐、冬梅价值一万多块的 VR 头显设备&#xff0c;你会买吗&#xff1f;声明&#xff1a;本文为 InfoQ 翻译&#xff0c;未经许可禁止转载。当地时间 10 月 11 日&#xff0c;元宇宙公司 Meta 召开了一…

文献阅读6-Entity-Relation Extraction as Multi-turn Question Answering(实体关系联合抽取,层次标签依赖关系,multi-turn QA)

文章目录abstract1.Introduction3.相关工作2.2MRC(机器阅读理解&#xff09;2.3 非QA->QA3.数据集和任务3.1别人的数据集3.2我们建立的数据集RESUME4.Model4.1概述4.2生成问题4.3通过MRC来获取答案范围4.4 强化学习5.实验5.1RESUME结果5.2 其他结果6 Ablation Studies6.2问题…