经过几天的研究,我想出了以下方法来解决提取问题。在识别命题,然后识别月份并进行提取。在
识别“-”,然后识别月份并进行提取。在
部分代码如下所示。(节选,需要上下文中的依赖项)new_w = new_s.split()
for j in range(len(new_w)):
if new_w[j] in prepositions and (new_w[j+1].isdecimal() or new_w[j+1].lower() in months):
# Process case like "Starting from Mar27, 2016 to Dec31, 2016"
if j+7 in range(len(new_w)) and new_w[j+4] in prepositions:
if new_w[j+5].isdecimal() or new_w[j+5].lower() in months:
u = ' '.join(new_w[j:j+8])
print(label_class[i] + ': ' + u)
break
# Process case like "Ticket must be issued on/before 29FEB, 2016"
elif new_w[j-1] in prepositions:
u = ' '.join(new_w[j-1:j+4])
print(label_class[i] + ': ' + u)
break
# Process case like "Ticketing valid until 18FEB16"
else:
u = ' '.join(new_w[j:j+4])
print(label_class[i] + ': ' + u)
break
# Process case like "TICKETING PERIOD: NOW - FEB 02, 2016"
# Process case like "TRAVELING DATES: NOW - FEB 10,2016 FEB 22,2016 - MAY 12,2016"
if new_w[j] in ['-'] and (new_w[j+1].lower() in months or new_w[j+2].lower() in months):
if new_w[j-1].lower() == 'now':
u = released_date + ' - ' + ' '.join(new_w[j+1:j+4])
print(label_class[i] + ': ' + u)
elif new_w[j-3].lower() in months or new_w[j-2].lower() in months:
u = ' '.join(new_w[j-3:j+4])
print(label_class[i] + ': ' + u)