两个 pdf 分别是两个电商公司的财报之类的,像分析这两家公司的盈利情况,利用 llamaindex 的 rag 检索盈利的部分,来对比
使用 tongyi,如果指定,默认是 openai 了
# 加载电商财报数据
from llama_index.core import SimpleDirectoryReaderA_docs = SimpleDirectoryReader(input_files=["电商A-Third Quarter 2023 Results.pdf"]
).load_data()
B_docs = SimpleDirectoryReader(input_files=["电商B-Third Quarter 2023 Results.pdf"]
).load_data()# 加载本地 embedding 模型
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
Settings.embed_model = HuggingFaceEmbedding(model_name="../../bge-small-zh-v1.5"
)# 配置大模型
import os
from dotenv import load_dotenv
from langchain_community.llms import Tongyi
load_dotenv('../../key.env') # 指定加载 env 文件
key = os.getenv('DASHSCOPE_API_KEY') # 获得指定环境变量
DASHSCOPE_API_KEY = os.environ["DASHSCOPE_API_KEY"] # 获得指定环境变量
Settings.llm = Tongyi(temperature=1)# 从文档中创建索引
from llama_index.core import VectorStoreIndex
A_index = VectorStoreIndex.from_documents(A_docs)
B_index = VectorStoreIndex.from_documents(B_docs)# 持久化索引(保存到本地)
from llama_index.core import StorageContext
A_index.storage_context.persist(persist_dir="./storage/A")
B_index.storage_context.persist(persist_dir="./storage/B")# 从本地读取索引
from llama_index.core import load_index_from_storage
try:storage_context = StorageContext.from_defaults(persist_dir="./storage/A")A_index = load_index_from_storage(storage_context)storage_context = StorageContext.from_defaults(persist_dir="./storage/B")B_index = load_index_from_storage(storage_context)index_loaded = True
except:index_loaded = False# 创建查询引擎
A_engine = A_index.as_query_engine(similarity_top_k=3)
B_engine = B_index.as_query_engine(similarity_top_k=3)# 配置查询工具
from llama_index.core.tools import QueryEngineTool
from llama_index.core.tools import ToolMetadata
query_engine_tools = [QueryEngineTool(query_engine=A_engine,metadata=ToolMetadata(name="A_Finance",description=("用于提供A公司的财务信息 "),),),QueryEngineTool(query_engine=B_engine,metadata=ToolMetadata(name="B_Finance",description=("用于提供A公司的财务信息 "),),),
]# 创建ReAct Agent
from llama_index.core.agent import ReActAgent
agent = ReActAgent.from_tools(query_engine_tools, verbose=True)# 让Agent完成任务
agent.chat("比较一下电商A,电商B个公司的销售额,请使用中文回答")