☆ Evaluate
https://huggingface.co/docs/evaluate/main/en/installation
★ 解决方案
常用代码
# 查看支持的评估函数
evaluate.list_evaluation_modules(include_community=True)# 加载评估函数
accuracy = evaluate.load("accuracy")# load function description
print(accuracy.description)# load function input description
print(accuracy.inputs_description)
# IN jupyter
accuracy# compute evaluation metric
results = accuracy.compute(references=[...],predictions=[...])# Iterative calculation evaluation indicators
for ref,pred in zip([...],[....]):accuracy.add(references=ref,predictions=pred)
accuracy.compute()# multiple evaluation compute
clf_metrics = evaluate.combine(['accuracy','f1','recall','precision'])
clf_merics.compute(predicitons = [...], references = [...])
# 也可以封装成函数,然后更新字典
import evaluate
acc_metric = evaluate.load('accuracy')
f1_metric = evaluate.load('f1')
def eval_metrics(eval_predict):predictions,labels = eval_predictpredictions = predictions.argmax(axis=-1)acc = acc_metric.compute(preditions = predictions,references=labels)f1 = f1_metric.compute(predictions = predictions, references = labels)acc.update(f1) # 这里更新字典return acc# 评估对比可视化
from evaluate.visualization import radar_plot # 目前只支持雷达图# 也可以对比多个模型在同一个数据集上的表现
#也可以对比同一个模型在不同数据集上的表现
data = [....]
models = [....]
plot = radar_plot(data = data,model_names=model_names)