import bisect
from sklearn import metrics
from sklearn.metrics import precision_recall_curve# 创建一个单独的Metircs.py文件
def calc_auc(y_true, y_pred): # y_pred:[0,1]之间任何数return metrics.roc_auc_score(y_true, y_pred)def calc_ks(y_true, y_pred): # y_pred:[0,1]之间任何数fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred)return max(tpr - fpr)def calc_acc(y_true, y_pred): # y_pred:{0,1} 必须事先通过阈值转变为0,1return metrics.accuracy_score(y_true, y_pred)def calc_f1(y_true, y_pred): # y_pred:{0,1} 必须事先通过阈值转变为0,1return metrics.recall_score(y_true, y_pred), metrics.precision_score(y_true, y_pred), metrics.f1_score(y_true, y_pred)# 召回率大于0.9时的准确率
def precision_at_r9(y_true, y_pred):p, r, tresholds = precision_recall_curve(y_true, y_pred)ind = bisect.bisect(r, 0.9)if ind >= len(p)-1:return r[-2], p[-2], tresholds[-1]else:return r[ind], p[ind], tresholds[ind]# 准确率大于0.9时的召回率
def recall_at_p9(y_true, y_pred):p, r, tresholds = precision_recall_curve(y_true, y_pred)ind = bisect.bisect(p, 0.9)if ind >= len(p)-1:return r[-2], p[-2], tresholds[-1]else:return r[ind], p[ind], tresholds[ind]
NDCG@K
def get_dcg(y_pred, y_true, k):# 注意y_pred与y_true必须是一一对应的,而且y_pred越大越接近label=1(用相关性的说法就是,与label=1越相关)df = pd.DataFrame({"y_pred": y_pred, "y_true": y_true})df = df.sort_values(by="y_pred", ascending=False) # 对y_pred进行降序排列,越排在前面的,越接近label=1df = df.iloc[0:k, :] # 取前K个dcg = (2 ** df["y_true"] - 1) / np.log2(np.arange(1, df["y_true"].count() + 1) + 1) # 位置从1开始计数dcg = np.sum(dcg)return dcgdef calc_ndcg(y_true, y_pred, k):dcg = get_dcg(y_pred, y_true, k)idcg = get_dcg(y_true, y_true, k)ndcg = dcg / idcgreturn ndcg
Recall@K,Precision@K
def calc_f1(y_true, y_pred, k): df = pd.DataFrame({"y_pred": y_pred, "y_true": y_true})df = df.sort_values(by="y_pred", ascending=False) # 对y_pred进行降序排列,越排在前面的,越接近label=1df = df.iloc[0:k, :] # 取前K个y_true = np.array(df["y_true"])y_pred = np.round(np.array(df["y_pred"])) # y_pred:{0,1} 必须事先通过阈值转变为0,1return recall_score(y_true, y_pred), precision_score(y_true, y_pred), f1_score(y_true, y_pred)