2024五一数学建模竞赛(五一赛)C题保姆级分析完整思路+代码+数据教学
C题 煤矿深部开采冲击地压危险预测
第一问 导入数据
以下仅展示部分,完整版看文末的文章
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
def preprocess_dataframe(df):window = '60s' # 窗口大小features = df.groupby(pd.Grouper(key='时间 (time)', freq=window)).agg({'声波强度 (AE)': ['mean', 'std', 'max', 'min']}).rename(columns={'mean': 'EMR_mean','std': 'EMR_std','max': 'EMR_max','min': 'EMR_min'})features.reset_index(inplace=True)features.fillna(0, inplace=True)def calculate_fft_features(signal):if isinstance(signal, pd.Series):signal = signal.values # 将 Series 转换为 NumPy 数组if isinstance(signal, (list, tuple)):signal = np.array(signal) # 将列表或元组转换为 NumPy 数组if isinstance(signal, (float, int)):signal = np.array([signal]) # 将单个数值转换为 NumPy 数组if signal.ndim > 1:signal = signal.squeeze() # 如果信号的维度大于1,压缩维度为1# 快速傅里叶变换fft_values = np.fft.fft(signal)fft_abs = np.abs(fft_values)return np.mean(fft_abs), np.std(fft_abs), np.max(fft_abs)df['声波强度 (AE)'] = df['声波强度 (AE)'].astype(float)# 应用FFT并计算特征fft_features = df['声波强度 (AE)'].apply(calculate_fft_features)# 解压特征并添加到 DataFramedf['FFT_mean'], df['FFT_std'], df['FFT_max'] = zip(*fft_features)df['hour'] = df['时间 (time)'].dt.hourdf['minute'] = df['时间 (time)'].dt.minutedf['weekday'] = df['时间 (time)'].dt.weekdaydf['is_weekend'] = df['weekday'].apply(lambda x: 1 if x >= 5 else 0)rolling_window = 10 # 使用10个数据点的窗口df['rolling_mean'] = df['声波强度 (AE)'].rolling(window=rolling_window).mean()df['rolling_std'] = df['声波强度 (AE)'].rolling(window=rolling_window).std()df['rolling_mean'].fillna(df.iloc[10,-2], inplace=True)df['rolling_std'].fillna(df.iloc[10,-1], inplace=True)df['diff_1'] = df['声波强度 (AE)'].diff(1) # 一阶差分df['diff_2'] = df['声波强度 (AE)'].diff(2) # 二阶差分df['diff_1'].fillna(0, inplace=True)df['diff_2'].fillna(0, inplace=True)# q = np.percentile(df['声波强度 (AE)'], 75)
# data_modified = [1 if x > q else 0 for x in df['声波强度 (AE)']]
# df['高频信号'] = data_modified# 数据预处理
# df['类别 (class)_encoded'], class_categories = pd.factorize(df['类别 (class)'])return dftrain_df = pd.read_excel("./附件1 (Attachment 1).xlsx",sheet_name='AE')
train_dfprocessed_df = preprocess_dataframe(train_df)
processed_df['类别 (class)_encoded'], class_categories = pd.factorize(processed_df['类别 (class)'])
q = np.percentile(processed_df['声波强度 (AE)'], 80)
data_modified = [1 if x > q else 0 for x in processed_df['声波强度 (AE)']]
processed_df['干扰信号'] = data_modified
processed_df# 分离特征和目标变量
X = processed_df.drop(['时间 (time)','类别 (class)','干扰信号','类别 (class)_encoded'], axis=1) # 假设目标变量列名为'类别'
# X = processed_df.drop(['时间 (time)','类别 (class)','干扰信号'], axis=1) # 假设目标变量列名为'类别'
y = processed_df['干扰信号']X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train.head()
集成随机森林代码:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42) # 100棵树rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)# 打印准确率
print("Accuracy:", accuracy_score(y_test, y_pred))# 打印分类报告
print("Classification Report:\n", classification_report(y_test, y_pred))# 混淆矩阵
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)# 可视化混淆矩阵
plt.figure(figsize=(8, 6))
plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(np.unique(y)))
plt.xticks(tick_marks, np.unique(y), rotation=45)
plt.yticks(tick_marks, np.unique(y))
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
第一问结果表:(csv文件)
第二问xgboost混淆矩阵:
from xgboost import XGBClassifierxgb_model = XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred_xgb))conf_matrix = confusion_matrix(y_test, y_pred_xgb)
sns.heatmap(conf_matrix, annot=True, fmt='d')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()
第三问部分代码:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler# 转换为PyTorch张量
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)# 创建数据加载器
train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)# 定义神经网络结构
class NeuralNetwork(nn.Module):def __init__(self):super(NeuralNetwork, self).__init__()self.layer1 = nn.Linear(X_train.shape[1], 64)self.layer2 = nn.Linear(64, 32)self.output_layer = nn.Linear(32, 1)self.relu = nn.ReLU()def forward(self, x):x = self.relu(self.layer1(x))x = self.relu(self.layer2(x))x = torch.sigmoid(self.output_layer(x)) # 使用sigmoid输出概率return x# 实例化模型、定义损失函数和优化器
model = NeuralNetwork()
criterion = nn.BCELoss() # 二元交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001)# 训练模型
num_epochs = 10
for epoch in range(num_epochs):for inputs, labels in train_loader:outputs = model(inputs)loss = criterion(outputs.squeeze(), labels)optimizer.zero_grad()loss.backward()optimizer.step()print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')
其中更详细的思路,各题目思路、代码、讲解视频、成品论文及其他相关内容,可以点击下方群名片哦!