inX是待测样本,dataSet是训练样本集,labels是训练样本集的标签集 ,k是近邻数
from numpy import *
import operatordef knn(inX, dataSet, labels, k): m = dataSet.shape[0] # 获得训练样本的样本个数diffMat = tile(inX, (m,1)) - dataSet # tile函数将待测样本复制m行来组成与训练样本集同样大小的矩阵sqDiffMat = diffMat**2sqDistances = sqDiffMat.sum(axis=1) # 按行求和distances = sqDistances**0.5sortedDistIndicies = distances.argsort() # 返归经排序的索引下标 classCount={} for i in range(k):voteIlabel = labels[sortedDistIndicies[i]] # 获得k个近邻的类别classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True) # 对classCount的value进行降序排序return sortedClassCount[0][0] # 返回出现次数最多的类别