官网: Ragas
Ragas(Retrieval-Augmented Generation, RAG)是一个基于简单手写提示的评估框架,通过这些提示全自动地衡量答案的准确性、 相关性和上下文相关性。这种评估方法不需要访问人工注释的数据集或参考答案,使得评估过程更为简便和高效。
跟着官方的入门案例来了解该框架
安装
pip install ragas
加载数据
从Hugging face中加载数据集
from datasets import load_dataset
dataset = load_dataset("explodinggradients/amnesty_qa","english_v3",trust_remote_code=True
)# 若无法使用代理也可以直接加载
dataset = {'question': 'Which private companies in the Americas are the largest GHG emitters according to the Carbon Majors database?','ground_truths': ['The largest private companies in the Americas that are the largest GHG emitters according to the Carbon Majors database are ExxonMobil, Chevron, and Peabody.'],'answer': 'According to the Carbon Majors database, the largest private companies in the Americas that are the largest GHG emitters are:\n\n1. Chevron Corporation (United States)\n2. ExxonMobil Corporation (United States)\n3. ConocoPhillips Company (United States)\n4. BP plc (United Kingdom, but with significant operations in the Americas)\n5. Royal Dutch Shell plc (Netherlands, but with significant operations in the Americas)\n6. Peabody Energy Corporation (United States)\n7. Duke Energy Corporation (United States)\n8. TotalEnergies SE (France, but with significant operations in the Americas)\n9. BHP Group Limited (Australia, but with significant operations in the Americas)\n10. Rio Tinto Group (United Kingdom/Australia, but with significant operations in the Americas)\n\nPlease note that the rankings may change over time as new data becomes available.','contexts': ['The private companies responsible for the most emissions during this period, according to the database, are from the United States: ExxonMobil, Chevron and Peabody.\nThe largest emitter amongst state-owned companies in the Americas is Mexican company Pemex, followed by Venezuelan company Petróleos de Venezuela, S.A.']}
将数据集加载到 Ragas EvaluationDataset 对象中。
from ragas import EvaluationDataseteval_dataset = EvaluationDataset.from_hf_dataset(dataset["eval"])# 若无法使用代理也可以直接加载
eval_dataset = Dataset.from_dict(data_samples)
选择对应的指标并运行(以OpenAI为例)
metrics = [LLMContextRecall(llm=evaluator_llm), #对应着上下文召回率FactualCorrectness(llm=evaluator_llm), #对应着事实正确性Faithfulness(llm=evaluator_llm), #对应着忠诚度SemanticSimilarity(embeddings=evaluator_embeddings) #对应着语义相似度
]
results = evaluate(dataset=eval_dataset, metrics=metrics)
导出和分析结果
df = results.to_pandas()
print(df.head())
效果展示:
代码总览
from datasets import load_dataset
from ragas import EvaluationDataset
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, SemanticSimilarity
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddingsdataset = load_dataset("explodinggradients/amnesty_qa","english_v3",trust_remote_code=True
)
eval_dataset = EvaluationDataset.from_hf_dataset(dataset["eval"])evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-3.5-turbo", api_key='sk-L9Vf0rGL7kkNTGXCvDG1vHBj5wQj1jAwX1K8wp0qbIO9XCFx', base_url='https://api.openai-proxy.org/v1'))
evaluator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())metrics = [LLMContextRecall(llm=evaluator_llm),FactualCorrectness(llm=evaluator_llm),Faithfulness(llm=evaluator_llm),SemanticSimilarity(embeddings=evaluator_embeddings)
]
results = evaluate(dataset=eval_dataset, metrics=metrics)df = results.to_pandas()print(df.head())