论文辅助笔记：LLM-Mob metric测量

0 导入库

import os
import pandas as pd
from sklearn.metrics import f1_score
import ast
import numpy as np

1 基本的metric计算方式

1.1 get_acc1_f1

def get_acc1_f1(df):#计算top1 prediction的准确度和f1 scoreacc1 = (df['prediction'] == df['ground_truth']).sum() / len(df)f1 = f1_score(df['ground_truth'], df['prediction'], average='weighted')#根据支持度（每个标签的真实实例数）加权平均return acc1, f1

1.2 get_is_correct

def get_is_correct(row):#计算ground truth是否在top k prediction内pred_list = row['prediction']if row['ground_truth'] in pred_list:row['is_correct'] = Trueelse:row['is_correct'] = Falsereturn row

1.3 get_is_correct10

def get_is_correct10(row):#计算ground truth是否在top 10，top 5， top 1 prediction内pred_list = row['top10']if row['ground_truth'] in pred_list:row['is_correct10'] = Trueelse:row['is_correct10'] = Falsepred_list = row['top5']if row['ground_truth'] in pred_list:row['is_correct5'] = Trueelse:row['is_correct5'] = Falsepred = row['top1']if pred == row['ground_truth']:row['is_correct1'] = Trueelse:row['is_correct1'] = Falsereturn row

1.4 first_nonzero

def first_nonzero(arr, axis, invalid_val=-1):mask = arr!=0return np.where(mask.any(axis=axis), mask.argmax(axis=axis), invalid_val)
#在给定轴上找到数组中第一个非零元素的索引。如果没有非零元素，则返回一个无效值

1.5 get_ndcg

#计算归一化折扣累积增益（NDCG），这是评估排名质量的一种方式，尤其用于推荐系统和信息检索
def get_ndcg(prediction, targets, k=10):"""Calculates the NDCG score for the given predictions and targets.Args:prediction (Nxk): list of lists. the softmax output of the model.targets (N): torch.LongTensor. actual target place id.Returns:the sum ndcg score"""for _, xi in enumerate(prediction):#首先遍历prediction列表中的每个子列表xiif len(xi) < k:xi += [-5 for _ in range(k-len(xi))]#如果xi的长度小于k，则将其通过添加特定值（-5）扩展到k的长度elif len(xi) > k:xi = xi[:k]#如果长度大于k，则截断至kelse:pass'''确保每个预测列表都有k个元素，方便后续操作'''n_sample = len(prediction)prediction = np.array(prediction)targets = np.broadcast_to(targets.reshape(-1, 1), prediction.shape)#targets被重塑并广播到与prediction相同的形状，以便可以逐元素比较hits = first_nonzero(prediction == targets, axis=1, invalid_val=-1)#调用first_nonzero函数，该函数返回prediction中与targets相等的元素的第一个索引位置#如果没有匹配的元素，则返回一个预先设定的无效值（-1）hits = hits[hits>=0]ranks = hits + 1#计算每个有效命中的排名（即索引位置加1，因为索引是从0开始的ndcg = 1 / np.log2(ranks + 1)#计算每个排名的折扣增益，使用公式1 / np.log2(ranks + 1)return np.sum(ndcg) / n_sample#计算所有样本的平均NDCG分数

2 Top10预测指标衡量

2.1 文件列表获取

output_dir = 'results/geolife/top10_wot'
file_list = [file for file in os.listdir(output_dir) if file.endswith('.csv')]
file_list

file_path_list = [os.path.join(output_dir, file) for file in file_list]
file_path_list

iter_df = pd.read_csv(file_path_list[0])
iter_df

2.2. 创建结果dataframe

df = pd.DataFrame({'user_id': None,'ground_truth': None,'prediction': None,'reason': None
}, index=[])
df

for file_path in file_path_list:iter_df = pd.read_csv(file_path)if output_dir[-1] != '1':pred_series = iter_df['prediction'].apply(lambda x: ast.literal_eval(x))  # A pandas seriesiter_df['top10'] = pred_series.apply(lambda x: x[:10] if type(x) == list else [x] * 10)iter_df['top5'] = pred_series.apply(lambda x: x[:5] if type(x) == list else [x] * 5)iter_df['top1'] = pred_series.apply(lambda x: x[0] if type(x) == list else x)#如果预测的结果是列表类型（也就是预测top k），那么保存前k个元素的list#如果预测的结果是int类型（预测最有可能的location），那么复制这个元素k次df = pd.concat([df, iter_df], ignore_index=True)
df

2.3 调用get_is_correct10

df = df.apply(func=get_is_correct10, axis=1)
df

2.4 结果计算

acc1 = (df['is_correct1']).sum() / len(df)
acc5 = (df['is_correct5']).sum() / len(df)
acc10 = (df['is_correct10']).sum() / len(df)
f1 = f1_score(df['ground_truth'], df['top1'], average='weighted')
preds = df['top10'].tolist()
targets = np.array(df['ground_truth'].tolist())
ndcg = get_ndcg(prediction=preds, targets=targets, k=10)print("Acc@1: ", acc1)
print("Acc@5: ", acc5)
print("Acc@10: ", acc10)
print("Weighted F1: ", f1)
print("NDCG@10: ", ndcg)
'''
Acc@1:  0.3295750216825672
Acc@5:  0.8291413703382481
Acc@10:  0.8736629083550159
Weighted F1:  0.21629743615527502
NDCG@10:  0.6276420364672752
'''

3 Top1

3.1 读取文件+创建df

output_dir = 'results/geolife/top1'
file_list = [file for file in os.listdir(output_dir) if file.endswith('.csv')]file_path_list = [os.path.join(output_dir, file) for file in file_list]df = pd.DataFrame({'user_id': None,'ground_truth': None,'prediction': None,'reason': None
}, index=[])pd.read_csv(file_path_list[0])

3.2 读取prediction 结果


for file_path in file_path_list:iter_df = pd.read_csv(file_path)df = pd.concat([df, iter_df], ignore_index=True)df['prediction'] = df['prediction'].apply(lambda x: int(x))
df['ground_truth'] = df['ground_truth'].apply(lambda x: int(x))
df

3.3 计算metric

acc1, f1 = get_acc1_f1(df)
print("Acc@1: ", acc1)
print("F1: ", f1)
'''
Acc@1:  0.4512864989881469
F1:  0.403742729579556
'''

本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若转载，请注明出处：http://www.mzph.cn/news/828684.shtml

如若内容造成侵权/违法违规/事实不符，请联系多彩编程网进行投诉反馈email:809451989@qq.com，一经查实，立即删除！

论文辅助笔记：LLM-Mob metric测量

0 导入库

1 基本的metric计算方式

1.1 get_acc1_f1

1.2 get_is_correct

1.3 get_is_correct10

1.4 first_nonzero

1.5 get_ndcg

2 Top10预测指标衡量

2.1 文件列表获取

2.2. 创建结果dataframe

2.3 调用get_is_correct10

2.4 结果计算

3 Top1

3.1 读取文件+创建df

3.2 读取prediction 结果

3.3 计算metric

相关文章

开源数据集分享———猫脸码客

Exploiting CXL-based Memory for Distributed Deep Learning——论文泛读

Git for Windows 下载与安装

建造者模式（装修公司装修套餐）

代码随想录算法训练营day9 | 28. 实现 strStr()、459.重复的子字符串

使用 Flask 和 WTForms 构建一个用户注册表单

好用的在线客服系统PHP源码(开源代码+终身使用+安装教程) 制作第一步

分布式技术在文本摘要生成中的应用

基于springboot+vue+Mysql的广场舞团管理系统

猫头虎分享已解决Bug || TypeError: Cannot read property ‘map‘ of undefined**

智慧养猪场视频AI智能监控与可视化管理方案

Android11适配

(C++) share_ptr 之循环引用

Vu3+QuaggaJs实现web页面识别条形码

LORA详解

初识reactor响应式编程

DiT论文精读Scalable Diffusion Models with Transformers CVPR2023

小程序AI智能名片S2B2C商城系统：四大主流商业模式深度解析与实战案例分享

【文章转载】Lance Martin的关于RAG的笔记

C# GetField 方法应用实例