一个简单的遗传算法迭代xgboost最优参数的示例,这里用的是自定义损失函数
import pandas as pd
import numpy as np
import xgboost as xgb
from sko. GA import GA
from sklearn. model_selection import train_test_split
from sklearn. linear_model import LogisticRegression
from sklearn. metrics import classification_report
from sklearn import metrics
from log_color import log, LogLevel
from tqdm import tqdm
train_df = pd. read_csv( './train_v2.csv' )
test_df = pd. read_csv( './test_v2.csv' ) x = train_df. drop( [ 'user_id' , 'merchant_id' , 'label' ] , axis= 1 )
y = train_df[ 'label' ]
x_train, x_val, y_train, y_val = train_test_split( x, y, test_size= 0.2 , random_state = 42 ) gamma = 0
train_Y = y_train alpha = ( train_Y== 0 ) . sum ( ) / train_Y. sizedef logistic_obj ( p, dtrain) : y = dtrain. get_label( ) p = 1.0 / ( 1.0 + np. exp( - p) ) grad = p * ( 1 - p) * ( alpha * gamma * y * ( 1 - p) ** gamma * np. log( p) / ( 1 - p) - alpha * y * ( 1 - p) ** gamma / p - gamma * p ** gamma * ( 1 - alpha) * ( 1 - y) * np. log( 1 - p) / p + p ** gamma * ( 1 - alpha) * ( 1 - y) / ( 1 - p) ) hess = p * ( 1 - p) * ( p * ( 1 - p) * ( - alpha * gamma ** 2 * y * ( 1 - p) ** gamma * np. log( p) / ( 1 - p) ** 2 + alpha * gamma * y * ( 1 - p) ** gamma * np. log( p) / ( 1 - p) ** 2 + 2 * alpha * gamma * y * ( 1 - p) ** gamma / ( p * ( 1 - p) ) + alpha * y * ( 1 - p) ** gamma / p ** 2 - gamma ** 2 * p ** gamma * ( 1 - alpha) * ( 1 - y) * np. log( 1 - p) / p ** 2 + 2 * gamma * p ** gamma * ( 1 - alpha) * ( 1 - y) / ( p * ( 1 - p) ) + gamma * p ** gamma * ( 1 - alpha) * ( 1 - y) * np. log( 1 - p) / p ** 2 + p ** gamma * ( 1 - alpha) * ( 1 - y) / ( 1 - p) ** 2 ) - p * ( alpha * gamma * y * ( 1 - p) ** gamma * np. log( p) / ( 1 - p) - alpha * y * ( 1 - p) ** gamma / p - gamma * p ** gamma * ( 1 - alpha) * ( 1 - y) * np. log( 1 - p) / p + p ** gamma * ( 1 - alpha) * ( 1 - y) / ( 1 - p) ) + ( 1 - p) * ( alpha * gamma * y * ( 1 - p) ** gamma * np. log( p) / ( 1 - p) - alpha * y * ( 1 - p) ** gamma / p - gamma * p ** gamma * ( 1 - alpha) * ( 1 - y) * np. log( 1 - p) / p + p ** gamma * ( 1 - alpha) * ( 1 - y) / ( 1 - p) ) ) return grad, hess
def XGBoostAUC ( p) : etas = [ 0.0001 , 0.001 , 0.01 , 0.1 ] sampling_methods = [ "uniform" , "gradient_based" ] w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15, w16, w17 = pparams = { "learning_rate" : w1 , "n_estimators" : int ( w2) , "max_depth" : int ( w3) , "min_child_weight" : w4
, "gamma" : w5 , "subsample" : w6
, "nthread" : 5 , "scale_pos_weight" : ( train_Y== 0 ) . sum ( ) / ( train_Y== 1 ) . sum ( ) , "lambda" : w7, "eta" : etas[ int ( w8) ] , "verbosity" : 1
, "eval_metric" : "auc"
, "seed" : int ( w9) , "max_delta_step" : w10 , "subsample" : w11, "sampling_method" : sampling_methods[ int ( w12) ] , 'colsample_bytree' : w13, 'colsample_bylevel' : w14, 'colsample_bynode' : w15, "gpu_id" : 0 , "tree_method" : "gpu_hist" , "max_leaves" : int ( w16) , "num_parallel_tree" : int ( w17) } dtrain = xgb. DMatrix( x_train, label= y_train) clf = xgb. train( params= params, dtrain= dtrain, num_boost_round= 100 , evals= [ ( dtrain, "train" ) ] , verbose_eval= False , obj= logistic_obj) dtest = xgb. DMatrix( x_val, label= y_val) lr_proba = clf. predict( dtest) lr_proba = np. nan_to_num( lr_proba, 0 ) fpr, tpr, threshold = metrics. roc_curve( y_val, lr_proba) roc_auc = metrics. auc( fpr, tpr) dtrain= None clf = None dtest = None lr_proba = None fpr, tpr, threshold = None , None , None log( f"本次迭代AUC分数为:[ { roc_auc} ],本次X值为:[ { p} ]" , LogLevel. PASS) return - roc_aucga = GA( func= XGBoostAUC, n_dim= 17 , size_pop= 10 , max_iter= 5 , prob_mut= 0.01 , lb= [ 0.1 , 5 , 1 , 0 , 0 , 0 , 0 , 0 , 10 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 ] , ub= [ 1 , 20 , 20 , 100 , 1 , 1 , 100 , 3 , 100 , 10 , 1 , 1 , 1 , 1 , 1 , 10 , 10 ] , precision= [ 0.1 , 1 , 1 , 0.1 , 0.1 , 0.1 , 0.1 , 1 , 1 , 0.1 , 0.1 , 1 , 0.1 , 0.1 , 0.1 , 1 , 1 ] )
best_x, best_y = ga. run( )
print ( 'best_x:' , best_x, '\n' , 'best_y:' , best_y) opt_x_log = pd. DataFrame( { "best_x" : [ best_x] , "best_y" : [ best_y]
} )
print ( f"优化结果表: { opt_x_log} " )
opt_x_log. to_csv( "best_x2.csv" )
w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15, w16, w17 = best_xetas = [ 0.0001 , 0.001 , 0.01 , 0.1 ]
sampling_methods = [ "uniform" , "gradient_based" ]
params = { "learning_rate" : w1 , "n_estimators" : int ( w2) , "max_depth" : int ( w3) , "min_child_weight" : w4
, "gamma" : w5 , "subsample" : w6
, "nthread" : 5 , "scale_pos_weight" : ( train_Y== 0 ) . sum ( ) / ( train_Y== 1 ) . sum ( ) , "lambda" : w7, "eta" : etas[ int ( w8) ] , "verbosity" : 1
, "eval_metric" : "auc"
, "seed" : int ( w9) , "max_delta_step" : w10 , "subsample" : w11, "sampling_method" : sampling_methods[ int ( w12) ] , 'colsample_bytree' : w13, 'colsample_bylevel' : w14, 'colsample_bynode' : w15, "gpu_id" : 0 , "tree_method" : "gpu_hist" , "max_leaves" : int ( w16) , "num_parallel_tree" : int ( w17) }
params. update( { "best_auc" : best_y} )
best_params_table = pd. DataFrame( { k: [ v] for k, v in params. items( ) } )
best_params_table. to_csv( "best_params_table.csv" )