多进程适合于计算密集型任务和需要大量计算资源的场景,而多线程适合于I/O密集型任务和需要快速上下文切换的场景。
多线程版本
import time
import concurrent.futures
from tqdm import tqdmfile_list = ['file1.txt', 'file2.txt', 'file3.txt']*10def process_file(file, i):print(file, i)# 这里放置处理文件的代码time.sleep(2)return filewith (concurrent.futures.ThreadPoolExecutor(max_workers=10)) as executor:futures = [executor.submit(process_file, file, i) for i, file in enumerate(file_list)]for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):file = future.result()
多进程版本
import time
import concurrent.futures
from tqdm import tqdm
import osfile_list = ['file1.txt', 'file2.txt', 'file3.txt']*10def process_file(file, i):print(f"Processing {file} - {i}")# 这里放置处理文件的代码time.sleep(2)return file# 确保进程间不会共享全局变量
file_list = file_list[:os.cpu_count()] # 限制任务数量以防过多with concurrent.futures.ProcessPoolExecutor(max_workers=10) as executor:futures = [executor.submit(process_file, file, i) for i, file in enumerate(file_list)]for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):file = future.result()