k-中心点法的聚类数据挖掘算法的C#实现
- using System;
- using System.Collections.Generic;
- using System.Text;
- using System.Collections;
- using System.Data.Odbc;
- class ClusterGenerator : IDataWorker
- {
- #region IDataWorker Members
- public void work()
- {
- throw new Exception("The method or operation is not implemented.");
- }
- #endregion
- private int k = 10;
- public int K
- {
- get { return k; }
- set { k = value; }
- }
- private ArrayList centerList = new ArrayList();
- private ArrayList pointClusterList = new ArrayList();
- private ArrayList cluster = new ArrayList();
- private bool isChanged = false;
- private int minCenter = 0;
- private double minPower = 0;
- private double tempPower = 1;
- private double sumPower = 0;
- private double randPower = 0;
- private int oRandom = 0;
- private void genCluster(ArrayList alldata)
- {
- centerList.Clear();
- //初始化clusterList
- Random random = new Random();
- for (int i = 0; i < k; i++)
- {
- centerList.Add(random.Next(alldata.Count));
- pointClusterList.Add(new ArrayList());
- }
- do
- {
- isChanged = false;
- /////////////////////////////指派每个剩余的对象给离它最近的中心点所代表的簇
- for (int i = 0; i < alldata.Count; i++)
- {
- for (int j = 0; j < centerList.Count; j++)
- {
- tempPower = PowerUtil.countPower(alldata[i], alldata[(int)centerList[j]]);
- if (tempPower < minPower)
- {
- minCenter = j;
- minPower = tempPower;
- }
- }
- cluster = (ArrayList)(pointClusterList[minCenter]);
- cluster.Add(i);
- }
- ///////////////////////////////////////////////////
- ////////////////////针对每个簇提出一个随机的非中心点对象
- //for (int i = 0; i < pointClusterList; i++) {
- // cluster = (ArrayList)pointClusterList[i];
- // oRandom = random.Next(alldata.Count);
- // for (int j = 0; j < cluster; j++) {
- // sumPower += PowerUtil.countPower(alldata((int)(cluster[j])), alldata[(int)centerList[i]]);
- // randPower += PowerUtil.countPower(alldata((int)(cluster[j])), alldata(oRandom));
- // }
- // if (randPower < sumPower)
- // {
- // centerList[i] = oRandom;
- // isChanged = true;
- // }
- //}
- /////////////////////////
- for (int i = 0; i < pointClusterList; i++)
- {
- oRandom = random.Next(alldata.Count);
- if (PowerUtil.countEM(alldata, centerList, pointClusterList, oRandom, i))
- {
- centerList[i] = oRandom;
- isChanged = true;
- break;//?
- }
- }
- ///////////////////////////
- /////////////////////////////////
- } while (!isChanged);
- }
- }
- using System;
- using System.Collections.Generic;
- using System.Text;
- using System.Collections;
- class PowerUtil
- {
- public static double countPower(ArrayList a,ArrayList b) {
- double powerA=1.0;
- double powerB=1.0;
- double power=0;
- int lengthA = a.Count;
- int lengthB = b.Count;
- for(int i=0;i<a.Count;i++){
- for(int j=0;j<b.Count;j++){
- if (((string)a[i]).Equals((string)b[i]) && !((string)a[i]).Equals(""))
- {
- powerA = powerA - 1 / lengthA;
- }
- else {
- if(((string)a[i]).Equals("")){
- lengthA--;
- }
- }
- }
- }
- for(int i=0;i<b.Count;i++){
- for(int j=0;j<a.Count;j++){
- if (((string)b[i]).Equals((string)a[i]) && !((string)a[i]).Equals(""))
- {
- powerB = powerB - 1 / lengthB;
- }
- else {
- if (((string)b[i]).Equals(""))
- {
- lengthB--;
- }
- }
- }
- }
- return (powerA + powerB) / 2;
- }
- public static bool countEM(ArrayList alldata, ArrayList centerList, ArrayList pointClusterList,int oRandom,int position)
- {
- int minCenter = 0;
- double minPower = 0;
- double tempPower = 1;
- double sumPower = 0;
- double randPower = 0;
- ArrayList cluster = new ArrayList();
- ArrayList newCenterList = centerList.Clone();
- newCenterList[position] = oRandom;
- ArrayList newPointClusterList = new ArrayList();
- for (int i = 0; i < pointClusterList.Count; i++)
- {
- newPointClusterList.Add(new ArrayList());
- }
- for (int i = 0; i < alldata.Count; i++)
- {
- for (int j = 0; j < centerList.Count; j++)
- {
- tempPower = PowerUtil.countPower(alldata[i], alldata[(int)newCenterList[j]]);
- if (tempPower < minPower)
- {
- minCenter = j;
- minPower = tempPower;
- }
- }
- cluster = (ArrayList)(newPointClusterList[minCenter]);
- cluster.Add(i);
- }
- //////////////////////////////
- //开始算EM
- for (int i = 0; i < pointClusterList; i++)
- {
- cluster = (ArrayList)pointClusterList[i];
- for (int j = 0; j < cluster; j++)
- {
- sumPower += PowerUtil.countPower(alldata((int)(cluster[j])), alldata[(int)centerList[i]]);
- }
- }
- for (int i = 0; i < pointClusterList; i++)
- {
- cluster = (ArrayList)newPointClusterList[i];
- for (int j = 0; j < cluster; j++)
- {
- randPower += PowerUtil.countPower(alldata((int)(cluster[j])), alldata[(int)newCenterList[i]]);
- }
- }
- double s=randPower-sumPower;
- if (s < 0)
- {
- return true;
- }
- else
- {
- return false;
- }
- }
- }
复制代码
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论