# 基于层次聚类的工业数据分析研究

1. 数据聚类分析

2. 层次聚类分析

3.层次聚类算法流程

P1

P2

P3

P4

P5

P1

0

0.81

1.32

1.94

1.82

P2

0.81

0

1.56

2.16

1.77

P3

1.32

1.56

0

0.63

0.71

P4

1.94

2.16

0.63

0

0.71

P5

1.82

1.77

0.71

0.71

0

MIN.distance({P3, P4}, P1) = 1.32;

MIN.distance({P3, P4}, P2) = 1.56;

MIN.distance({P3, P4}, P5) = 0.70;

P1

P2

{P3, P4}

P5

P1

0

0.81

1.32

1.82

P2

0.81

0

1.56

1.77

{P3, P4}

1.32

1.56

0

0.71

P5

1.82

1.77

0.71

0

MIN.distance(P1, {P3, P4, P5}) = 1.32;

MIN.distance(P2, {P3, P4, P5}) = 1.56;

P1

P2

{P3, P4, P5}

P1

0

0.81

1.32

P2

0.81

0

1.56

{P3, P4, P5}

1.32

1.56

0

MIN.distance({P1,P2}, {P3, P4, P5}) = 1.32

{P1, P2}

{P3, P4, P5}

{P1, P2}

0

1.32

{P3, P4, P5}

1.32

0

MAX，组平均算法流程同理，只是在更新矩阵时将上述计算簇间距离变为簇间两点最大欧式距离，和簇间所有点平均欧式距离即可。

4.层次聚类算法实现

```import java.io.BufferedReader;

import java.io.IOException;

import java.io.PrintStream;

import java.text.DecimalFormat;

import java.util.ArrayList;

public class Hierarchical {

private double[][] matrix;

private int dimension;// 数据维度

private class Node {

double[] attributes;

public Node() {

attributes = new double[100];

}

}

private ArrayList<Node> arraylist;

private class Model {

int x = 0;

int y = 0;

double value = 0;

}

private Model minModel = new Model();

private double getDistance(Node one, Node two) {// 计算两点间的欧氏距离

double val = 0;

for (int i = 0; i < dimension; ++i) {

val += (one.attributes[i] - two.attributes[i]) * (one.attributes[i] - two.attributes[i]);

}

return Math.sqrt(val);

}

for (int i = 0; i < matrix.length; ++i) {

for (int j = i + 1; j < matrix.length; ++j) {

double distance = getDistance(arraylist.get(i), arraylist.get(j));

matrix[i][j] = distance;

}

}

}

private Model findMinValueOfMatrix(double[][] matrix) {// 找出矩阵中距离最近的两个簇

Model model = new Model();

double min = 0x7fffffff;

for (int i = 0; i < matrix.length; ++i) {

for (int j = i + 1; j < matrix.length; ++j) {

if (min > matrix[i][j] && matrix[i][j] != 0) {

min = matrix[i][j];

model.x = i;

model.y = j;

model.value = matrix[i][j];

}

}

}

return model;

}

private void processHierarchical(String path) {

try {

PrintStream out = new PrintStream(path);

while (true) {// 凝聚层次聚类迭代

out.println("Matrix update as below: ");

for (int i = 0; i < matrix.length; ++i) {// 输出每次迭代更新的矩阵

for (int j = 0; j < matrix.length - 1; ++j) {

out.print(new DecimalFormat("#.00").format(matrix[i][j]) + " ");

}

out.println(new DecimalFormat("#.00").format(matrix[i][matrix.length - 1]));

}

out.println();

minModel = findMinValueOfMatrix(matrix);

if (minModel.value == 0) {// 当找不出距离最近的两个簇时，迭代结束

break;

}

out.println("Combine " + (minModel.x + 1) + " " + (minModel.y + 1));

out.println("The distance is: " + minModel.value);

matrix[minModel.x][minModel.y] = 0;// 更新矩阵

for (int i = 0; i < matrix.length; ++i) {// 如果合并了点 p1 与 p2，则只保留 p1,p2 其中之一与其他点的距离，取较小值

if (matrix[i][minModel.x] <= matrix[i][minModel.y]) {

matrix[i][minModel.y] = 0;

} else {

matrix[i][minModel.x] = 0;

}

if (matrix[minModel.x][i] <= matrix[minModel.y][i]) {

matrix[minModel.y][i] = 0;

} else {

matrix[minModel.x][i] = 0;

}

}

}

out.close();

System.out.println("Please check results in: " + path);

} catch (Exception e) {

e.printStackTrace();

}

}

public void setInput(String path) {

try {

String str;

String[] strArray;

arraylist = new ArrayList<Node>();

while ((str = br.readLine()) != null) {

strArray = str.split(",");

dimension = strArray.length;

Node node = new Node();

for (int i = 0; i < dimension; ++i) {

node.attributes[i] = Double.parseDouble(strArray[i]);

}

}

matrix = new double[arraylist.size()][arraylist.size()];

br.close();

} catch (IOException e) {

e.printStackTrace();

}

}

public void printOutput(String path) {

processHierarchical(path);

}

public static void main(String[] args) {

Hierarchical hi = new Hierarchical();

hi.setInput("c:/hierarchical.txt");

hi.printOutput("c:/hierarchical_results.txt");

}

}
```

5.测试数据

```0.7,1.2

0.8,2

2,1

2.6,0.8

2.5,1.5
```

```Matrix update as below:

.00 .81 1.32 1.94 1.82

.00 .00 1.56 2.16 1.77

.00 .00 .00 .63 .71

.00 .00 .00 .00 .71

.00 .00 .00 .00 .00

Combine 3 4

The distance is: 0.6324555320336759

Matrix update as below:

.00 .81 1.32 .00 1.82

.00 .00 1.56 .00 1.77

.00 .00 .00 .00 .00

.00 .00 .00 .00 .71

.00 .00 .00 .00 .00

Combine 4 5

The distance is: 0.7071067811865475

Matrix update as below:

.00 .81 1.32 .00 .00

.00 .00 1.56 .00 .00

.00 .00 .00 .00 .00

.00 .00 .00 .00 .00

.00 .00 .00 .00 .00

Combine 1 2

The distance is: 0.806225774829855

Matrix update as below:

.00 .00 1.32 .00 .00

.00 .00 .00 .00 .00

.00 .00 .00 .00 .00

.00 .00 .00 .00 .00

.00 .00 .00 .00 .00

Combine 1 3

The distance is: 1.3152946437965907

Matrix update as below:

.00 .00 .00 .00 .00

.00 .00 .00 .00 .00

.00 .00 .00 .00 .00

.00 .00 .00 .00 .00

.00 .00 .00 .00 .00
```

