支持xlsx,xls文件,相互对比字段列
输出两个表格文件相同字段,置底色为绿色
存在差异的不同字段,输出两个新的表格文件,差异字段,置底色为红色
注意点:读取的文件仅支持xlsx格式,头列需要删除空列及多余的字段值,确保对比的准确性
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import PatternFill
import osdef convert_xls_to_xlsx(file):if file.endswith('.xls'):df = pd.read_excel(file, engine='xlrd')xlsx_file = file.replace('.xls', '.xlsx')df.to_excel(xlsx_file, index=False)return xlsx_filereturn filedef read_excel_file(file):if file.endswith('.xlsx') or file.endswith('.xls'):return pd.read_excel(file)else:raise ValueError("Unsupported file format: {}".format(file))def load_workbook_file(file):if file.endswith('.xlsx'):return load_workbook(file)elif file.endswith('.xls'):raise ValueError("openpyxl does not support .xls files directly. Convert to .xlsx first.")else:raise ValueError("Unsupported file format: {}".format(file))def compare_headers(file1, file2):# 检查文件是否存在if not os.path.exists(file1):print(f"文件 {file1} 不存在")returnif not os.path.exists(file2):print(f"文件 {file2} 不存在")return# 如果是 .xls 文件,先转换为 .xlsx 文件file1 = convert_xls_to_xlsx(file1)file2 = convert_xls_to_xlsx(file2)try:# 读取两个文件的数据df1 = read_excel_file(file1)df2 = read_excel_file(file2)except Exception as e:print(f"读取文件时出错: {e}")return# 获取两个文件的列名headers1 = df1.columns.tolist()headers2 = df2.columns.tolist()# 找出两个文件中列名相同和不同的列same_headers = [h for h in headers1 if h in headers2]diff_headers1 = [h for h in headers1 if h not in headers2]diff_headers2 = [h for h in headers2 if h not in headers1]# 设置填充颜色green_fill = PatternFill(fill_type='solid', fgColor='00FF00')red_fill = PatternFill(fill_type='solid', fgColor='FF0000')try:# 加载两个工作簿wb1 = load_workbook_file(file1)wb2 = load_workbook_file(file2)except Exception as e:print(f"加载工作簿时出错: {e}")return# 获取第一个工作表ws1 = wb1.activews2 = wb2.active# 遍历列,设置颜色for i, header in enumerate(headers1, start=1):if header in same_headers:ws1.cell(row=1, column=i).fill = green_fillelse:ws1.cell(row=1, column=i).fill = red_fillfor i, header in enumerate(headers2, start=1):if header in same_headers:ws2.cell(row=1, column=i).fill = green_fillelse:ws2.cell(row=1, column=i).fill = red_fill# 保存文件output_file1 = 'output1.xlsx'output_file2 = 'output2.xlsx'try:wb1.save(output_file1)wb2.save(output_file2)except Exception as e:print(f"保存文件时出错: {e}")return# 输出对比结果print('两表格文件对比后相同字段:', same_headers)print('表格文件1,存在差异字段:', diff_headers1)print('表格文件2,存在差异字段:', diff_headers2)# xlsx 和 xls 文件对比
compare_headers('file1.xls', 'file2.xls')