import numpy as np
from os import listdir
from sklearn. neighbors import KNeighborsClassifier as kNN
def img2vector ( filename) : """将32x32的文本文件转换为1x1024的向量:param filename: 要转换的文本文件的文件名:return: 转换后的1x1024向量,如果出现错误则返回None""" try : returnVect = np. zeros( ( 1 , 1024 ) ) with open ( filename) as fr: for i in range ( 32 ) : lineStr = fr. readline( ) for j in range ( 32 ) : returnVect[ 0 , 32 * i + j] = int ( lineStr[ j] ) return returnVectexcept FileNotFoundError: print ( f"错误:文件 { filename} 未找到。" ) return None except Exception as e: print ( f"错误:处理文件 { filename} 时发生未知错误: { e} " ) return None
def load_training_data ( ) : """加载训练数据:return: 训练数据矩阵和对应的标签列表,如果出现错误则返回None, None""" hwLabels = [ ] try : trainingFileList = listdir( 'trainingDigits' ) m = len ( trainingFileList) trainingMat = np. zeros( ( m, 1024 ) ) for i in range ( m) : fileNameStr = trainingFileList[ i] classNumber = int ( fileNameStr. split( '_' ) [ 0 ] ) hwLabels. append( classNumber) vector = img2vector( f'trainingDigits/ { fileNameStr} ' ) if vector is not None : trainingMat[ i, : ] = vectorreturn trainingMat, hwLabelsexcept FileNotFoundError: print ( "错误:训练数据文件夹未找到。" ) return None , None except Exception as e: print ( f"错误:加载训练数据时发生未知错误: { e} " ) return None , None
def load_test_data ( ) : """加载测试数据:return: 测试数据矩阵和对应的标签列表,如果出现错误则返回None, None""" try : testFileList = listdir( 'testDigits' ) mTest = len ( testFileList) testMat = np. zeros( ( mTest, 1024 ) ) testLabels = [ ] for i in range ( mTest) : fileNameStr = testFileList[ i] classNumber = int ( fileNameStr. split( '_' ) [ 0 ] ) testLabels. append( classNumber) vector = img2vector( f'testDigits/ { fileNameStr} ' ) if vector is not None : testMat[ i, : ] = vectorreturn testMat, testLabelsexcept FileNotFoundError: print ( "错误:测试数据文件夹未找到。" ) return None , None except Exception as e: print ( f"错误:加载测试数据时发生未知错误: { e} " ) return None , None
def handwritingClassTest ( ) : """手写数字识别测试""" trainingMat, hwLabels = load_training_data( ) if trainingMat is None or hwLabels is None : return neigh = kNN( n_neighbors= 3 , algorithm= 'auto' ) neigh. fit( trainingMat, hwLabels) testMat, testLabels = load_test_data( ) if testMat is None or testLabels is None : return errorCount = 0.0 mTest = len ( testLabels) for i in range ( mTest) : classifierResult = neigh. predict( testMat[ i] . reshape( 1 , - 1 ) ) print ( f"分类返回结果为 { classifierResult[ 0 ] } \t真实结果为 { testLabels[ i] } " ) if classifierResult[ 0 ] != testLabels[ i] : errorCount += 1.0 print ( f"总共错了 { int ( errorCount) } 个数据\n错误率为 { errorCount / mTest * 100 : .2f } %" )
if __name__ == '__main__' : handwritingClassTest( )
from sklearn. datasets import load_iris
from sklearn. neighbors import KNeighborsClassifier
import matplotlib. pyplot as plt
import seaborn as sns
iris = load_iris( )
iris. keys( )
iris. feature_names
iris. target
x, y = iris. data, iris. target
x. shape
from sklearn. model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split( x, y)
x_train. shape
knn_clf = KNeighborsClassifier( n_neighbors= 6 )
knn_clf. fit( x_train, y_train)
print ( '训练集准确率:%.2f' % knn_clf. score( x_train, y_train) )
print ( '验证集准确率:%.2f' % knn_clf. score( x_test, y_test) )
from sklearn. model_selection import GridSearchCV
n_neighbors = tuple ( range ( 1 , 11 , 1 ) )
cv = GridSearchCV( estimator= KNeighborsClassifier( ) , param_grid = { 'n_neighbors' : n_neighbors} , cv = 5 )
cv. fit( x, y)
cv. best_params_
训练集准确率:0.96
验证集准确率:0.95{'n_neighbors': 6}