🚀 作者 :“码上有前”
🚀 文章简介 :Python
🚀 欢迎小伙伴们 点赞👍、收藏⭐、留言💬
python练习题
- 抽取关键词
抽取关键词
import os
import json
import pandas as pd# 指定文件夹路径和关键词列名
folder_path = './Cosmetic_data/Brand_Classification/brand&details_analysis'
categories_path = './Cosmetic_data/Makeup_Classification/pcommit&details_analysis'
keyword_column = '关键词' # 替换为实际的关键词列名def extract_keywords(folder_path, keyword_column):# 存储关键词的列表keyword_list = []# 获取文件夹下所有的 csv 文件csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]# 遍历每个 csv 文件for csv_file in csv_files:file_path = os.path.join(folder_path, csv_file)# 读取 csv 文件df = pd.read_csv(file_path)# 检查关键词列是否存在if keyword_column in df.columns:# 获取关键词列的值并去除换行符和制表符keywords = df[keyword_column].astype(str).str.replace(r'\n|\t', '', regex=True).tolist()# 将关键词添加到列表中keyword_list.extend(keywords)# 去除空字符串keyword_list = [keyword for keyword in keyword_list if keyword]return keyword_list# 提取关键词
# result_keywords = extract_keywords(folder_path, keyword_column)
# 打印结果
# print("提取的关键词列表:")
# print(result_keywords)def extract_keywords_from_json(categories_path, keyword_key):keyword_list = []json_files = [file for file in os.listdir(categories_path) if file.endswith('.json')]for json_file in json_files:file_path = os.path.join(categories_path, json_file)with open(file_path, 'r',encoding="utf-8") as f:data = json.load(f)for item in data:if keyword_key in item:keywords = item[keyword_key]if isinstance(keywords, str):# 替换关键词中的换行符和制表符keywords = keywords.replace('\n', '').replace('\t', '')keyword_list.append(keywords)elif isinstance(keywords, list):for keyword in keywords:# 替换关键词中的换行符和制表符keyword = keyword.replace('\n', '').replace('\t', '')keyword_list.append(keyword)keyword_list = [keyword.strip() for keyword in keyword_list if keyword.strip()]return keyword_list
categories_keywords = extract_keywords_from_json(categories_path, keyword_column)