1创建一个爬虫程序
import requests
urls = ['https://www.cnblogs.com/#p{page}'for page in range(1, 50+1)
]def craw(url):r = requests.get(url)print(url, len(r.text))craw(urls[0])
2定义单进程和多进程
import blob_spider
import threading
import timedef single_thread():print("singe_thread begin")for url in blob_spider.urls:blob_spider.craw(url)print("singe_thread end")def multi_thread():print("multi_thread begin")threads = []for url in blob_spider.urls:threads.append(threading.Thread(target=blob_spider.craw, args=(url, )) )for thread in threads:thread.start()for thread in threads:thread.join()print("multi_thread end")if __name__ == "__main__":start = time.time()single_thread()end = time.time()print("single_thread用时:{}".format(end-start))start = time.time()multi_thread()end = time.time()print("multi_thread用时:{}".format(end - start))