1、加载函数和数据集
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
cancer = load_breast_cancer()
cancer_data = cancer['data']
cancer_target = cancer['target']
cancer_names = cancer['feature_names']
加载自带数据集,并提取X和y。
2、将数据划分为训练集测试集
cancer_data_train,cancer_data_test, \
cancer_target_train,cancer_target_test = \
train_test_split(cancer_data,cancer_target,test_size = 0.2,random_state = 22)
训练集为80%,测试集为20%,设置有随机化状态,使得每次运行时提取的数据是相同的。
3、数据标准化
stdScaler = StandardScaler().fit(cancer_data_train)
cancer_trainStd = stdScaler.transform(cancer_data_train)
cancer_testStd = stdScaler.transform(cancer_data_test)
从训练集中提取数据标准化规则,并应用于训练集和测试集。
4、建立分类模型
svm = SVC().fit(cancer_trainStd,cancer_target_train)
调用了SVC模型,并使用了默认参数。
5、进行预测
cancer_target_pred = svm.predict(cancer_testStd)
6、输出预测结果混淆矩阵
from sklearn.metrics import confusion_matrix
import seaborn as snscm = confusion_matrix(cancer_target_test, cancer_target_pred)
heatmap = sns.heatmap(cm, annot=True, fmt='d')
heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right')
heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right')
plt.ylabel("true label")
plt.xlabel("predict label")
plt.show()
预测对的结果数目为: 40+71=111 预测错的结果数目为: 0+3=3
7、输出评价指标值
from sklearn.metrics import accuracy_score,precision_score, \
recall_score,f1_score,cohen_kappa_score
print('使用SVM预测breast_cancer数据的准确率为:',accuracy_score(cancer_target_test,cancer_target_pred))
print('使用SVM预测breast_cancer数据的精确率为:',precision_score(cancer_target_test,cancer_target_pred))
print('使用SVM预测breast_cancer数据的召回率为:',recall_score(cancer_target_test,cancer_target_pred))
print('使用SVM预测breast_cancer数据的F1值为:',f1_score(cancer_target_test,cancer_target_pred))
print('使用SVM预测breast_cancer数据的Cohen’s Kappa系数为:',cohen_kappa_score(cancer_target_test,cancer_target_pred))
8、输出评价报告
from sklearn.metrics import classification_report
print('使用SVM预测breast_cancer数据的分类报告为:','\n',classification_report(cancer_target_test,cancer_target_pred))
9、输出ROC曲线
from sklearn.metrics import roc_curve
## 求出ROC曲线的x轴和y轴
fpr, tpr, thresholds = \
roc_curve(cancer_target_test,cancer_target_pred)
plt.figure(figsize=(10,6))
plt.xlim(0,1) ##设定x轴的范围
plt.ylim(0.0,1.1) ## 设定y轴的范围
plt.xlabel('False Postive Rate')
plt.ylabel('True Postive Rate')
plt.plot(fpr,tpr,linewidth=2, linestyle="-",color='red', label='ROC curve')
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.legend(loc="lower right")
plt.show()