Python计算回归拟合各项指标

0、各项回归指标简介

Relative Root Mean Squared Error（RRMSE）：The RRMSE normalizes the Root Mean Squared Error (RMSE) by the mean of observations. It goes from 0 to infinity. The lower the better the prediction performance.
The NRMSE（Normalized Root Mean Square Error） is calculated as the RMSE divided by the range of the observed values, expressed as a percentage. The range of the observed values is the difference between the maximum and minimum values of the observed data.
Best possible score is 0.0, smaller value is better. Range = [0, +inf)
MAE (Mean absolute error) represents the difference between the original and predicted values extracted by averaged the absolute difference over the data set.
MSE (Mean Squared Error) represents the difference between the
original and predicted values extracted by squared the average
difference over the data set.
RMSE (Root Mean Squared Error) is the error rate by the square root
of MSE.
R-squared (Coefficient of determination) represents the coefficient
of how well the values fit compared to the original values. The value
from 0 to 1 interpreted as percentages. The higher the value is, the
better the model is.

1、Python计算回归拟合各项指标：包括RMSE # RRMSE # RSE # NSE # MAE # R # R2 # MAPE # ρ
在这里插入图片描述
图片来源：https://github.com/alifrmf/Evaluation-Metrics-for-Linear-Regression/blob/main/README.md

代码：

# RMSE
def rmse(y_true, y_pred):squared_diff = (y_true - y_pred) ** 2mean_squared_diff = np.mean(squared_diff)rmse_value = np.sqrt(mean_squared_diff)return rmse_value

RRMSE计算方式一：RMSE除以真实值的均值

# RRMSE（Relative Root Mean Squared Error ）
def rrmse(y_true, y_pred):# Calculate the squared errors between the predicted and true valuessquared_errors = (y_true - y_pred) ** 2# Calculate the mean of the squared errorsmean_squared_error = np.mean(squared_errors)# Take the square root of the mean squared errorroot_mean_squared_error = np.sqrt(mean_squared_error)# Calculate the relative error by dividing the root mean squared error by the mean of the true valuesrelative_error = root_mean_squared_error / np.mean(y_true)# Return the RRMSE valuereturn relative_error

RRMSE计算方式二：除以真实值最大值-真实值最小值

def rrmse(s, o):"""Relative Root Mean Squared Errorinput:s: simulatedo: observedoutput:relative root mean squared error"""return 100*np.sqrt(np.mean((s-o)**2))/(o.max()-o.min())

# RSE
def root_squared_error(y_true, y_pred):"""Calculate the Root Squared Error between two arrays (y_true and y_pred).Args:y_true (numpy.ndarray): Actual values.y_pred (numpy.ndarray): Predicted values.Returns:float: The Root Squared Error."""error = y_true - y_predsquared_error = np.square(error)mean_squared_error = np.mean(squared_error)root_squared_error = np.sqrt(mean_squared_error)return root_squared_error

# NSE
def nash_sutcliffe_efficiency(y_true, y_pred):"""Calculate the Nash-Sutcliffe Efficiency (NSE) between two arrays (y_true and y_pred).Args:y_true (numpy.ndarray): Actual values.y_pred (numpy.ndarray): Predicted values.Returns:float: The Nash-Sutcliffe Efficiency."""numerator = np.sum(np.square(y_true - y_pred))denominator = np.sum(np.square(y_true - np.mean(y_true)))nse = 1 - (numerator / denominator)return nse

# MAE
def mean_absolute_error(y_true, y_pred):"""Calculate the Mean Absolute Error (MAE) between two arrays (y_true and y_pred).Args:y_true (numpy.ndarray): Actual values.y_pred (numpy.ndarray): Predicted values.Returns:float: The Mean Absolute Error."""absolute_error = np.abs(y_true - y_pred)mae = np.mean(absolute_error)return mae

# R
def pearson_correlation_coefficient(y_true, y_pred):"""Calculate the Pearson Correlation Coefficient (R) between two arrays (y_true and y_pred).Args:y_true (numpy.ndarray): Actual values.y_pred (numpy.ndarray): Predicted values.Returns:float: The Pearson Correlation Coefficient."""correlation_matrix = np.corrcoef(y_true, y_pred)r = correlation_matrix[0, 1]return r

# R2
def r_squared(y_true, y_pred):"""Calculate the R squared value between two arrays (y_true and y_pred).Args:y_true (numpy.ndarray): Actual values.y_pred (numpy.ndarray): Predicted values.Returns:float: The R squared value."""correlation_matrix = np.corrcoef(y_true, y_pred)correlation_xy = correlation_matrix[0,1]r_squared = correlation_xy**2return r_squared

# ρ (RRMSE / (1 + R))
def relative_rmse(y_true, y_pred):rmse = np.sqrt(metrics.mean_squared_error(y_true, y_pred))return rmse / (np.max(y_true) - np.min(y_true))def pearson_correlation_coefficient(y_true, y_pred):correlation_matrix = np.corrcoef(y_true, y_pred)r = correlation_matrix[0, 1]return r

代码来源：https://github.com/alifrmf/Evaluation-Metrics-for-Linear-Regression/blob/main/Regression%20Metrics%20for%20Machine%20Learning.py

2、Python计算bias、rbias、mae、rmse等指标
代码来源：https://github.com/dsi-llc/scripts/blob/d4445ef02a971754fdaef901250b42b8394539fa/EEstatslib.py#L80

import numpy as np# ------------------------------------------------------------------------------
# statistic functions
# ------------------------------------------------------------------------------def drop_nan(df):"""this function reads in dataframe after using dffromdatfile function in dataFrameFromdatfiles.pythen returns a dataframe without nan """df_dropped = df.dropna()return df_droppeddef data_paired(df):"""this function return the number of data pairedafter dropping nan values"""return df.shape[0]def bias(s, o):"""Biasinput:s: simulatedo: observedoutput:bias"""return np.mean(s-o)def rbias(s, o):"""Relative Biasinput:s: simulatedo: observedoutput:relative bias"""return 100*(np.sum(s-o))/np.sum(o)def mae(s, o):"""Mean(Average) Absolute Errorinput:s: simulatedo: observedoutput:mean absolute error"""return np.mean(np.abs(s-o))def rmse(s, o):"""Root Mean Squared Errorinput:s: simulatedo: observedoutput:root mean squared error"""return np.sqrt(np.mean((s-o)**2))def rrmse(s, o):"""Relative Root Mean Squared Errorinput:s: simulatedo: observedoutput:relative root mean squared error"""return 100*np.sqrt(np.mean((s-o)**2))/(o.max()-o.min())def correlation(s, o):"""Correlation Coefficientinput:s: simulatedo: observedoutput:correlation coefficient"""return np.corrcoef(o, s)[0, 1]def r_sqr(s, o):"""R Squared (Square of Correlation Coefficient)input:s: simulatedo: observedoutput:R Squared"""    return correlation(s, o)**2def nsi(s, o):"""Nash-Sutcliffe Index of Efficiencyinput:s: simulatedo: observedoutput:nash-sutcliffe index of efficiency"""return 1-np.sum((s-o)**2)/np.sum((o-np.mean(o))**2)def coe(s, o):"""Coefficient of Efficiencyinput:s: simulatedo: observedoutput:coefficient of efficiency"""return 1 - np.sum(np.abs(s-o))/np.sum(np.abs(o-np.mean(o)))def ioa(s, o):"""Index of Agreementinput:s: simulatedo: observedoutput:index of agreement"""return 1 - (np.sum((o-s)**2))/\(np.sum((np.abs(s-np.mean(o))+np.abs(o-np.mean(o)))**2))def kge(s, o):"""Kling-Gupta Efficiencyinput:s: simulatedo: observedoutput:kgef: kling-gupta efficiencycc: correlationalpha: ratio of the standard deviationbeta: ratio of the mean"""cc = correlation(s, o)alpha = np.std(s)/np.std(o)beta = np.sum(s)/np.sum(o)kgef = 1 - np.sqrt((cc-1)**2 + (alpha-1)**2 + (beta-1)**2)return kgef, cc, alpha, betadef stats_summary(df, sim_column_idx=0, obs_column_idx=1, decimals=3):"""Statistics Summary, output all statistics number in dictionaryinput:df: dataframe from EE.dat file (default just two columns, model and data)sim_column_idx: column index for simulated values (default 0)obs_column_idx: column index for observed values (default 1)decimals: round all statistics to the given number of decimals (default 3)output:statsummary: dictionary with all statistics number"""df_drop = drop_nan(df)simulated = df_drop.iloc[:, sim_column_idx]observed = df_drop.iloc[:, obs_column_idx]statsummary = {'Data Paired': data_paired(df_drop),'Bias': np.round(bias(simulated, observed), decimals),'Percent Bias': np.round(rbias(simulated, observed), decimals),'Mean Absolute Error': np.round(mae(simulated, observed), decimals),'RMSE': np.round(rmse(simulated, observed), decimals),'RRMSE': np.round(rrmse(simulated, observed), decimals),'R': np.round(correlation(simulated, observed), decimals),'R-Sqr': np.round(r_sqr(simulated, observed), decimals),'Nash-Sutcliffe Efficiency': np.round(nsi(simulated, observed), decimals),'Coefficient of Efficiency': np.round(coe(simulated, observed),decimals),'Index of Agreement': np.round(ioa(simulated, observed), decimals),'Kling-Gupta Efficiency': np.round(list(kge(simulated, observed))[0], decimals)}return statsummary