模型和代码
数据格式如下:前21列作为模型输入X,最后5列作为模型输出Y。
训练集:
测试集:
代码:
from numpy import genfromtxt
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as pltfrom joblib import dump, load #保存模型# 读数据
data = genfromtxt('data2_calnsft.csv', delimiter=',')
data_test = genfromtxt('data_test_cal.csv', delimiter=',')# 定义X和Y
X = data[:, 0:21]
Y = data[:, 21:]
X_test = data_test[:, 0:21]
Y_test = data_test[:, 21:] # 假设测试集中也包含了真实的Y值# 特征标准化和多项式特征转换
poly = PolynomialFeatures(degree=2, include_bias=False)# 使用GradientBoostingRegressor,并增大模型容量和训练次数
regression_model = make_pipeline(StandardScaler(),poly,MultiOutputRegressor(GradientBoostingRegressor(n_estimators=300, max_depth=5, learning_rate=0.05))
)
# MultiOutputRegressor(GradientBoostingRegressor(n_estimators=500, max_depth=15, learning_rate=0.05))
# 训练模型
regression_model.fit(X, Y)# # 保存模型
# dump(regression_model, 'regression_model.joblib')# 加载模型
regression_model = load('regression_model.joblib')
# 使用加载的模型进行预测
predictions = regression_model.predict(X_test)# 打印预测结果
print("预测结果:")
# print(predictions)
for pred in predictions:formatted_pred = ["{:.2f}".format(x) for x in pred]print(formatted_pred)
运行结果:
PYQT做了个简单的demo,代码如下:
import sys
from PyQt5.QtWidgets import QApplication, QWidget, QPushButton, QVBoxLayout, QFileDialog, QTextEdit, QCheckBox
from PyQt5.QtCore import Qt # 导入Qt用于复选框状态的检测
from joblib import load
import numpy as np
import pandas as pdclass PredictorApp(QWidget):def __init__(self):super().__init__()self.model = Noneself.data = Noneself.show_predictions = False # 新增:记录是否显示预测结果self.initUI()def initUI(self):# 创建垂直布局layout = QVBoxLayout()# 加载模型按钮self.btn_load_model = QPushButton('Load model file')self.btn_load_model.clicked.connect(self.load_model)# 加载测试数据按钮self.btn_load_data = QPushButton('Load test data file')self.btn_load_data.clicked.connect(self.load_data)# 执行预测按钮self.btn_predict = QPushButton('Predict and save results')self.btn_predict.clicked.connect(self.predict)self.btn_predict.setEnabled(False) # 初始时不可点击# 显示预测结果的复选框self.chk_show_predictions = QCheckBox('Show predictions in the app')self.chk_show_predictions.stateChanged.connect(self.toggle_show_predictions)# 预测结果显示框self.predictions_text = QTextEdit()self.predictions_text.setReadOnly(True)self.predictions_text.setVisible(False) # 初始时不可见# 状态框self.status_text = QTextEdit()self.status_text.setReadOnly(True)# 添加控件到布局layout.addWidget(self.btn_load_model)layout.addWidget(self.btn_load_data)layout.addWidget(self.btn_predict)layout.addWidget(self.chk_show_predictions)layout.addWidget(self.status_text)layout.addWidget(self.predictions_text)# 设置窗口布局self.setLayout(layout)# 设置窗口标题和大小self.setWindowTitle('Regression Model Predictor')self.setGeometry(300, 300, 300, 300) # 调整高度以容纳预测结果显示框def toggle_show_predictions(self, state):self.show_predictions = state == Qt.Checkedself.predictions_text.setVisible(self.show_predictions)if not self.show_predictions:self.predictions_text.clear() # 不显示时清除文本框内容def load_model(self):fname, _ = QFileDialog.getOpenFileName(self, 'Open model file', '', 'Model files (*.joblib);; All files (*.*)')if fname:self.model = load(fname)self.status_text.append('Model loaded successfully.')if self.data is not None:self.btn_predict.setEnabled(True)def load_data(self):fname, _ = QFileDialog.getOpenFileName(self, 'Open test data file', '', 'CSV files (*.csv);; All files (*.*)')if fname:self.data = pd.read_csv(fname)self.status_text.append('Test data loaded successfully.')if self.model is not None:self.btn_predict.setEnabled(True)def predict(self):if self.model and isinstance(self.data, pd.DataFrame):try:# 预测predictions = self.model.predict(self.data.values)# 将预测结果四舍五入保留到小数点后三位predictions_rounded = np.round(predictions, decimals=3)# 计算具有多少行num_predictions = len(predictions_rounded)# 自定义每行的列数,假设我们选择3列columns_per_row = 3# 计算需要多少行来显示所有预测rows_required = (num_predictions + columns_per_row - 1) // columns_per_row# 构建用于显示的文本字符串predictions_text = ""for i in range(rows_required):# 计算每行的开始和结束索引start_index = i * columns_per_rowend_index = min(start_index + columns_per_row, num_predictions)# 获取每行的预测并将它们转换为字符串row_predictions = predictions_rounded[start_index:end_index]predictions_text += "\t".join(map(str, row_predictions)) + "\n"# 保存结果到CSV文件pd.DataFrame(predictions_rounded).to_csv('result.csv', index=False, header=False)self.status_text.append('Predictions saved to "result.csv" with 3 decimal places.')# 如果选择了显示预测结果,则显示在文本框中if self.show_predictions:# 显示格式化后的预测结果文本self.predictions_text.setPlainText(predictions_text)except Exception as e:self.status_text.append(str(e))else:self.status_text.append('Error: Model or data not loaded.')if __name__ == '__main__':app = QApplication(sys.argv)predictor_app = PredictorApp()predictor_app.show()sys.exit(app.exec_())
运行结果界面如下: