安装bibtexparser
pip install bibtexparser
代码
import bibtexparser
from difflib import SequenceMatcherdef parse_bib_file(filename):with open(filename, 'r', encoding='utf-8') as bibfile:bib_database = bibtexparser.load(bibfile)return bib_database.entriesdef find_duplicates(entries):duplicates = dict()all_dict = dict()NO_ID_Duplicate = Truefor i, entry in enumerate(entries):if entry['ID'] in all_dict.keys():print('发现重复: ', f"{entry['ID'], entry['title']}") # ID 重复NO_ID_Duplicate = Falsecontinueall_dict[entry['ID']] = entry['title']for k, v in all_dict.items():out = title_in_values(v, list(all_dict.values())) # title 重复duplicates.update(out)return duplicates,NO_ID_Duplicatedef title_in_values(title: str, values: list):values.remove(title)re_d = dict()for i in values:ratio = SequenceMatcher(None, i, title).ratio()re_d[i] = [title, ratio]out = sorted(re_d.items(), key=lambda x: x[1][1], reverse=True)out = filter(lambda x: x[1][1] > 0.9, out)return dict(out)def main():bib_filename = './file/MAGNet.bib' # 你的.bib文件名entries = parse_bib_file(bib_filename)duplicates,NO_ID_Duplicate = find_duplicates(entries)if len(duplicates) == 0 and NO_ID_Duplicate:print('未发现重复!')else:for i in duplicates:print('发现疑似重复:', i)if __name__ == "__main__":main()
效果