2019独角兽企业重金招聘Python工程师标准>>>
import scrapy
from scrapy.spiders import Spiderdef parse_response(response, ip_in):title = response.xpath("/html/head/title").extract()[0]filename = 'quotes.txt'with open(filename, 'a+') as f:f.write(response.request.url + ' ' + title + ' ' + ip_in + '\n')print(response.request.url + ' ' + title + ' ' + ip_in + '\n')class dmozSpider(Spider):name = 'dmoz'def start_requests(self):filename = 'quotes.txt'with open(filename, 'w') as d:d.write("")f = open("dns.txt")for line in f.readlines():line = line.strip('\n')url = line.split()[0] + '.baidu.com'ip = line.split()[1]print(url + ' ' + ip)yield scrapy.Request(url='http://' + url, callback=lambda response, ip_in=ip: parse_response(response, ip_in))# for url in urls:yield scrapy.Request(url='https://' + url, callback=lambda response, ip_in=ip: parse_response(response, ip_in))