0 导入库
import os
import pandas as pd
from sklearn.metrics import f1_score
import ast
import numpy as np
1 基本的metric计算方式
1.1 get_acc1_f1
def get_acc1_f1(df):#计算top1 prediction的准确度和f1 scoreacc1 = (df['prediction'] == df['ground_truth']).sum() / len(df)f1 = f1_score(df['ground_truth'], df['prediction'], average='weighted')#根据支持度(每个标签的真实实例数)加权平均return acc1, f1
1.2 get_is_correct
def get_is_correct(row):#计算ground truth是否在top k prediction内pred_list = row['prediction']if row['ground_truth'] in pred_list:row['is_correct'] = Trueelse:row['is_correct'] = Falsereturn row
1.3 get_is_correct10
def get_is_correct10(row):#计算ground truth是否在top 10,top 5, top 1 prediction内pred_list = row['top10']if row['ground_truth'] in pred_list:row['is_correct10'] = Trueelse:row['is_correct10'] = Falsepred_list = row['top5']if row['ground_truth'] in pred_list:row['is_correct5'] = Trueelse:row['is_correct5'] = Falsepred = row['top1']if pred == row['ground_truth']:row['is_correct1'] = Trueelse:row['is_correct1'] = Falsereturn row
1.4 first_nonzero
def first_nonzero(arr, axis, invalid_val=-1):mask = arr!=0return np.where(mask.any(axis=axis), mask.argmax(axis=axis), invalid_val)
#在给定轴上找到数组中第一个非零元素的索引。如果没有非零元素,则返回一个无效值
1.5 get_ndcg
#计算归一化折扣累积增益(NDCG),这是评估排名质量的一种方式,尤其用于推荐系统和信息检索
def get_ndcg(prediction, targets, k=10):"""Calculates the NDCG score for the given predictions and targets.Args:prediction (Nxk): list of lists. the softmax output of the model.targets (N): torch.LongTensor. actual target place id.Returns:the sum ndcg score"""for _, xi in enumerate(prediction):#首先遍历prediction列表中的每个子列表xiif len(xi) < k:xi += [-5 for _ in range(k-len(xi))]#如果xi的长度小于k,则将其通过添加特定值(-5)扩展到k的长度elif len(xi) > k:xi = xi[:k]#如果长度大于k,则截断至kelse:pass'''确保每个预测列表都有k个元素,方便后续操作'''n_sample = len(prediction)prediction = np.array(prediction)targets = np.broadcast_to(targets.reshape(-1, 1), prediction.shape)#targets被重塑并广播到与prediction相同的形状,以便可以逐元素比较hits = first_nonzero(prediction == targets, axis=1, invalid_val=-1)#调用first_nonzero函数,该函数返回prediction中与targets相等的元素的第一个索引位置#如果没有匹配的元素,则返回一个预先设定的无效值(-1)hits = hits[hits>=0]ranks = hits + 1#计算每个有效命中的排名(即索引位置加1,因为索引是从0开始的ndcg = 1 / np.log2(ranks + 1)#计算每个排名的折扣增益,使用公式1 / np.log2(ranks + 1)return np.sum(ndcg) / n_sample#计算所有样本的平均NDCG分数
2 Top10预测指标衡量
2.1 文件列表获取
output_dir = 'results/geolife/top10_wot'
file_list = [file for file in os.listdir(output_dir) if file.endswith('.csv')]
file_list
file_path_list = [os.path.join(output_dir, file) for file in file_list]
file_path_list
iter_df = pd.read_csv(file_path_list[0])
iter_df
2.2. 创建结果dataframe
df = pd.DataFrame({'user_id': None,'ground_truth': None,'prediction': None,'reason': None
}, index=[])
df
for file_path in file_path_list:iter_df = pd.read_csv(file_path)if output_dir[-1] != '1':pred_series = iter_df['prediction'].apply(lambda x: ast.literal_eval(x)) # A pandas seriesiter_df['top10'] = pred_series.apply(lambda x: x[:10] if type(x) == list else [x] * 10)iter_df['top5'] = pred_series.apply(lambda x: x[:5] if type(x) == list else [x] * 5)iter_df['top1'] = pred_series.apply(lambda x: x[0] if type(x) == list else x)#如果预测的结果是列表类型(也就是预测top k),那么保存前k个元素的list#如果预测的结果是int类型(预测最有可能的location),那么复制这个元素k次df = pd.concat([df, iter_df], ignore_index=True)
df
2.3 调用get_is_correct10
df = df.apply(func=get_is_correct10, axis=1)
df
2.4 结果计算
acc1 = (df['is_correct1']).sum() / len(df)
acc5 = (df['is_correct5']).sum() / len(df)
acc10 = (df['is_correct10']).sum() / len(df)
f1 = f1_score(df['ground_truth'], df['top1'], average='weighted')
preds = df['top10'].tolist()
targets = np.array(df['ground_truth'].tolist())
ndcg = get_ndcg(prediction=preds, targets=targets, k=10)print("Acc@1: ", acc1)
print("Acc@5: ", acc5)
print("Acc@10: ", acc10)
print("Weighted F1: ", f1)
print("NDCG@10: ", ndcg)
'''
Acc@1: 0.3295750216825672
Acc@5: 0.8291413703382481
Acc@10: 0.8736629083550159
Weighted F1: 0.21629743615527502
NDCG@10: 0.6276420364672752
'''
3 Top1
3.1 读取文件+创建df
output_dir = 'results/geolife/top1'
file_list = [file for file in os.listdir(output_dir) if file.endswith('.csv')]file_path_list = [os.path.join(output_dir, file) for file in file_list]df = pd.DataFrame({'user_id': None,'ground_truth': None,'prediction': None,'reason': None
}, index=[])pd.read_csv(file_path_list[0])
3.2 读取prediction 结果
for file_path in file_path_list:iter_df = pd.read_csv(file_path)df = pd.concat([df, iter_df], ignore_index=True)df['prediction'] = df['prediction'].apply(lambda x: int(x))
df['ground_truth'] = df['ground_truth'].apply(lambda x: int(x))
df
3.3 计算metric
acc1, f1 = get_acc1_f1(df)
print("Acc@1: ", acc1)
print("F1: ", f1)
'''
Acc@1: 0.4512864989881469
F1: 0.403742729579556
'''