1、Kmeans++的原理如下:
(1)首先选取样本中任一数据点作为第一个聚类中心;
(2)计算样本每一个数据点至现所有聚类中心的最近距离,并记录下来;
(3)逐一挑选所有数据点最近距离之中的最大值,即最远距离,最大值对应的数据点为待求聚类中心;
(4)剔除已选为聚类中心的样本点,重新计算(2)、(3)步骤,得到指定的最终的聚类中心点数。
2、实现结果如下:
注:当噪点太多时,初始K个聚类中心的计算会出现偏差,从而导致整个聚类结果出现偏差。
3、原始数据读入格式如下:
点号-X坐标-Y坐标(非此格式的数据无法正常读入,程序会报错)
4、根据Kmeans++选择k个初始聚类中心的Kmeans聚类算法整体代码如下:
//Kmeans.cpp文件
#include "Kmeans.h"Kmeans::Kmeans(QWidget *parent): QWidget(parent)
{start = false;//dd = blank;dd = to2K;ui.setupUi(this);connect(ui.pushButton, SIGNAL(clicked()), this, SLOT(onBtReadData()));connect(ui.pushButton_2, SIGNAL(clicked()), this, SLOT(onBtCalKmeans()));connect(ui.pushButton_3, SIGNAL(clicked()), this, SLOT(onBtReadK()));
}void Kmeans::onBtReadData()
{K = ui.lineEdit->text().toInt();p.clear();//打开文件对话框QString fileName = QFileDialog::getOpenFileName(this, tr("打开"));QFile file(fileName);bool isOpen = 1;if (!file.open(QIODevice::ReadOnly | QIODevice::Text)){isOpen = 0;QMessageBox::StandardButton btnValue = QMessageBox::information(this, tr("提示"), tr("打开失败!"));}//逐行读取文本文件QTextStream stream(&file);while (!stream.atEnd()){Pointp pt;QString str = stream.readLine();QStringList list = str.split(",");pt.no = list.at(0);pt.x = list.at(1).toDouble();pt.y = list.at(2).toDouble();p.push_back(pt);}file.close();//判断是否读取完毕if (stream.atEnd()&&isOpen){QMessageBox box;box.setText("数据读取完毕");box.exec();}
}void Kmeans::onBtReadK()
{QString fileName = QFileDialog::getOpenFileName(this, tr("打开"));QFile file(fileName);bool isOpen = 1;if (!file.open(QIODevice::ReadOnly | QIODevice::Text)){isOpen = 0;QMessageBox::StandardButton btnValue = QMessageBox::information(this, tr("提示"), tr("打开失败!"));}QTextStream stream(&file);while (!stream.atEnd()){QString str = stream.readLine();QStringList list = str.split(",");Pointp k1;k1.no = list.at(0);k1.x = list.at(1).toDouble();k1.y = list.at(2).toDouble();k.push_back(k1);}//判断是否读取完毕if (stream.atEnd() && isOpen){QMessageBox box;box.setText("数据读取完毕");box.exec();}dd = readK;
}void Kmeans::toK()
{//随机选取k个初始聚类中心for (int i = 0; i < K; i++){Pointp k1;k1.no = i + 1;k1.x = p.at(i).x;k1.y = p.at(i).y;k.push_back(k1);}
}int Kmeans::onBtCalKmeans()
{K = ui.lineEdit->text().toInt();if (S.size()&&p.size()==S.size()){QMessageBox box;box.setText("已经计算完成");box.exec();return 0;}//if (dd == to2K)//{// toK();//}CalK();CalDis();//SCalcentroid();//用到S,得dis//CKmeans();//用到dis,得new k.int iCount = 0;while (iCount < K){if (dis.size()){for (int i = 0; i < k.size(); i++){for (int j = 0; j < dis.size(); j++){if (k.at(i).no == dis.at(j).noK){//qDebug() <<"k:" <<k.at(i).no<< k.at(i).x << k.at(i).y;//qDebug() <<"dis:" <<dis.at(i).noK.toInt()<< dis.at(j).sx << dis.at(j).sy<<endl;double detaX = k.at(i).x - dis.at(j).sx;double detaY = k.at(i).y - dis.at(j).sy;double sk = sqrt(detaX * detaX + detaY * detaY);//qDebug() << sk;if (sk == 0){iCount++;}else{CKmeans();}}}}}dis.clear();S.clear();CalDis();Calcentroid();}start = true;qDebug() << "S" << S.size();drawPoint();//drawK();QMessageBox box;box.setText("计算完成");box.exec();qDebug() << "k" << k.size();//for (int i = 0; i < S.size(); i++)//{// qDebug() << "S:" <<S.at(i).no<< S.at(i).noK;//}return 1;
}Kmeans::~Kmeans()
{}//计算质心
void Kmeans::Calcentroid()
{centroid s;for (int i = 0; i < k.size(); i++){s.sx = 0; s.sy = 0; int iCt = 0;for (int j = 0; j < S.size(); j++){if (k.at(i).no == S.at(j).noK){s.sx = s.sx + S.at(j).x;s.sy = s.sy + S.at(j).y;iCt++;}}s.noK = k.at(i).no;s.sx = s.sx / iCt;s.sy = s.sy / iCt;dis.push_back(s);}
}//计算每个对象至聚类中心的距离
void Kmeans::CalDis()
{for (int i = 0; i < p.size(); i++){double s0 = 0; QString no; Dis ss; int t = 0;double x1 = p.at(i).x;double y1 = p.at(i).y;//double x = k.at(0).x;//double y = k.at(0).y;//s0 = sqrt((x1 - x) * (x1 - x) + (y1 - y) * (y1 - y));for (int j = 0; j < k.size(); j++){double x2 = k.at(j).x;double y2 = k.at(j).y;double s1 = sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2));t++;if (t == 1){s0 = s1;no = k.at(j).no;}if (s1 < s0){s0 = s1;no = k.at(j).no;}}ss.s = s0;ss.no = p.at(i).no;ss.x = p.at(i).x;ss.y = p.at(i).y;ss.noK = no;S.push_back(ss);}}//将新的质心坐标赋值给k
void Kmeans::CKmeans()
{for (int i = 0; i < k.size(); i++){for (int j = 0; j < dis.size(); j++){if (k.at(i).no == dis.at(j).noK){k.at(i).x = dis.at(j).sx;k.at(i).y = dis.at(j).sy;}}}
}//绘图函数
void Kmeans::drawPoint()
{QPicture pp;pp.setBoundingRect(ui.label_2->rect());QPainter painterP(&pp);QPen pen;painterP.setRenderHint(QPainter::Antialiasing, true);Pointp p1;p1.no = p.at(0).no;p1.x = p.at(0).x;p1.y = p.at(0).y;for (int i = 1; i < p.size(); i++){if (p1.x > p.at(i).x){p1.x = p.at(i).x;}if (p1.y > p.at(i).y){p1.y = p.at(i).y;}}double xmin = p1.x;double ymin = p1.y;for (int i = 1; i < p.size(); i++){if (p1.x < p.at(i).x){p1.x = p.at(i).x;}if (p1.y < p.at(i).y){p1.y = p.at(i).y;}}double xmax = p1.x;double ymax = p1.y;int w=ui.label_2->width();int h=ui.label_2->height();double a = w/(xmax -xmin);double b1 = h/(ymax -ymin);for (int i = 0; i < k.size(); i++){int r = qrand() % 256;int g = qrand() % 256;int b = qrand() % 256;QColor color = QColor(r, g, b);for (int j = 0; j < S.size(); j++){if (k.at(i).no == S.at(j).noK){pen.setColor(color);painterP.setPen(pen);int radius = 5;double x = S.at(j).x;double y = S.at(j).y;x = (x - xmin)*a;y = (y - ymin)*b1;painterP.drawEllipse(x - radius, y - radius, radius * 2, radius * 2);}}}ui.label_2->setPicture(pp);
}void Kmeans::CalK()
{k.push_back(p.at(0));CalDistance();while (k.size() != K){qDebug() << k.size() << k.at(k.size() - 1).no;S.clear();CalDistance();for (auto& val : k){qDebug() << "CalK.k1" << val.no << val.x << val.y;}std::vector<Pointp> vk; int t3 = k.size();while (vk.size() != t3){Pointp p9 = k.at(0); int t2 = 0;for (int i = 1; i < k.size(); i++){Pointp p2 = k.at(i);if (p9.no.toInt() < k.at(i).no.toInt()){p9 = k.at(i);t2 = i;}}k.erase(k.begin() + t2);//删除下标为t2的元素;vk.push_back(p9);}for (int i = vk.size() - 1; i >= 0; i--){k.push_back(vk.at(i));}for (auto& val : k){qDebug() <<"CalK.k" << val.no << val.x << val.y;}int cv = 1;for (auto& val : k){S.erase(S.begin() + (val.no.toInt() - cv));//删除下标为val.number的元素;cv++;}double s0 = 0;Pointp kk;kk = { 0,0,0 };for (auto& valS : S){if (s0 <= valS.s){s0 = valS.s;kk.no = valS.no;kk.x = valS.x;kk.y = valS.y;}}k.push_back(kk);}int count = 1;for (auto& val : k){val.no = count;count++;//qDebug() << val.no << val.x << val.y;}
}void Kmeans::drawK()
{QPicture pp;pp.setBoundingRect(ui.label_2->rect());QPainter painterP(&pp);QPen pen;painterP.setRenderHint(QPainter::Antialiasing, true);Pointp p1;p1.no = p.at(0).no;p1.x = p.at(0).x;p1.y = p.at(0).y;for (int i = 1; i < p.size(); i++){if (p1.x > p.at(i).x){p1.x = p.at(i).x;}if (p1.y > p.at(i).y){p1.y = p.at(i).y;}}double xmin = p1.x;double ymin = p1.y;for (int i = 1; i < p.size(); i++){if (p1.x < p.at(i).x){p1.x = p.at(i).x;}if (p1.y < p.at(i).y){p1.y = p.at(i).y;}}double xmax = p1.x;double ymax = p1.y;int w = ui.label_2->width();int h = ui.label_2->height();double a = w / (xmax - xmin);double b1 = h / (ymax - ymin);QColor color = QColor(123,223,46);for (int i = 0; i < k.size(); i++){pen.setColor(color);pen.setWidth(3);painterP.setPen(pen);int radius = 10;double x = k.at(i).x;double y = k.at(i).y;x = (x - xmin) * a;y = (y - ymin) * b1;painterP.drawEllipse(x - radius, y - radius, radius * 2, radius * 2);}ui.label_2->setPicture(pp);
}void Kmeans::CalDistance()
{Dis ss;for (auto& valP : p){double s0 = 0; int c = 1;double x1 = valP.x;double y1 = valP.y;for (auto& valK : k){double x2 = valK.x;double y2 = valK.y;x2 = x2 - x1;y2 = y2 - y1;double s = sqrt(x2 * x2 + y2 * y2);if (c == 1){s0 = s;ss.no = valP.no;ss.noK = valK.no;ss.x = valP.x;ss.y = valP.y;ss.s = s;c++;}if (s0 == 0){ss.no = valP.no;ss.noK = valK.no;ss.x = valP.x;ss.y = valP.y;ss.s = s;break;}if (s < s0){s0 = s;ss.no = valP.no;ss.noK = valK.no;ss.x = valP.x;ss.y = valP.y;ss.s = s;}}S.push_back(ss);}
}
#pragma once#include <QtWidgets/QWidget>
#include "ui_Kmeans.h"
#include<QFileDialog>
#include<QFile>
#include<QMessageBox>
#include<QTextStream>
#include<vector>
#pragma execution_character_set("UTF-8")
#include<qDebug>
#include<QPainter>
#include<QColor>
#include<QColorDialog>
#include<QPicture>
#include <algorithm>struct Pointp
{double x;double y;QString no;
};struct Dis
{double x;double y;QString no;QString noK;double s;
};struct centroid
{QString noK;double sx;double sy;
};enum Pd
{readK,to2K,blank
};class Kmeans : public QWidget
{Q_OBJECTpublic:Kmeans(QWidget *parent = nullptr);~Kmeans();public slots:void onBtReadData();int onBtCalKmeans();void onBtReadK();public:std::vector<Pointp> p;//原始数据点std::vector<Pointp> k;//各簇质心坐标int K;std::vector<Dis> S;std::vector<centroid> dis;bool start;Pd dd;public:void Calcentroid();void CKmeans();void CalDis();void drawPoint();void CalK();void drawK();void toK();void CalDistance();private:Ui::KmeansClass ui;
};