利用xpath来提取所有标签里面的内容,即使标签头不同
#-*-coding:utf8-*-
import re
import os
from lxml import etree
html = '''
测试-常规用法- 我
是
谁- who
- am
- i!
- 你是
- 谁!
- who
- you
- are!
'''
selector = etree.HTML(html)
for k in range(1,3):
chinese = selector.xpath('//div[@id="content"][%s]/ul[@id="useful"]//text()'%k)
data = "".join([each for each in chinese])
english = selector.xpath('//div[@id="content"][%s]/ul[@id="useless"]//text()'%k)
Data = "".join([each for each in english])
print data
print Data
结果: