题目
以下采用K-NN算法来解决水仙花的分类问题,每个样本有两个特征,第一个为水仙花的花萼长度,第二个为水仙花 的花萼宽度,具体数据见表,
1)设置k=3, 采用欧式距离,分析分类精度为多少?
2)使用网格搜索方式找到最佳参数,并预测
3)可视化
我的数据集合就是这个
excel数据展示
代码
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormapdef model_selection(x_train, y_train):params = {'n_neighbors': [3,5,7,8,10], 'p': [1,2]}model = KNeighborsClassifier()gs = GridSearchCV(model, params, verbose=2, cv=5)gs.fit(x_train, y_train)print("Best Model:", gs.best_params_, "Accuracy:", gs.best_score_)return gs.best_estimator_def read():filename = r"data/shuixianhua.xlsx"data = pd.read_excel(filename, header=None)x1 = data.iloc[1:, [0, 1]].valuesx2 = data.iloc[1:, [3, 4]].values# print(x2)y1 = data.iloc[1:, 2].valuesy2 = data.iloc[1:, 5].valuesx = np.vstack((x1, x2)) # 竖向合并y = np.hstack((y1, y2)) # 横向合并y = y.astype(int)return x, ydef plot_decision_boundary(x, y, model):h = 0.02 # Step size in the mesh# Create color mapscmap_light = ListedColormap(['#FFAAAA', '#AAFFAA'])cmap_bold = ListedColormap(['#FF0000', '#00FF00'])x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))Z = model.predict(np.c_[xx.ravel(), yy.ravel()])Z = Z.reshape(xx.shape)plt.figure()plt.pcolormesh(xx, yy, Z, cmap=cmap_light)plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=20)plt.xlim(xx.min(), xx.max())plt.ylim(yy.min(), yy.max())plt.title("KNN Decision Boundaries")plt.show()if __name__ == '__main__':x, y = read()best_model = model_selection(x, y)plot_decision_boundary(x, y, best_model)