在做数据分析得时候,生成词云图是很常见得需求,正常情况下大部分都是通过python去采集数据并且生成,异常强大,本文来说说java如何生成词云!
在github上找轮子得时候,发现了这么一个项目:Kumo(项目地址:https://github.com/kennycason/kumo),
虽然功能没有python得轮子那么好使,但好歹也能满足基本需求,我们来看看该项目简介生成得效果图
本文以技术书籍随机权重来生成为例,看看生成得几个效果图
这里使用得是目前官方最新版本 1.17
,第一个是核心包,第二个是语言支持
<dependency>
<groupId>com.kennycason</groupId>
<artifactId>kumo-core</artifactId>
<version>1.17</version>
</dependency>
<dependency>
<groupId>com.kennycason</groupId>
<artifactId>kumo-tokenizers</artifactId>
<version>1.17</version>
</dependency>
final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
frequencyAnalyzer.setWordFrequenciesToReturn(600);
frequencyAnalyzer.setMinWordLength(2);
frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
// 可以直接从文件中读取
//final List<WordFrequency> wordFrequencies = frequencyAnalyzer.load(getInputStream("text/chinese_language.txt"));
final List<WordFrequency> wordFrequencies = new ArrayList<>();
// 用后端技术书籍来随机生成词云
String [] books = {"Spring实战","Spring源码深度解析","SpringBoot实战",
"SpringBoot2精髓","一步一步学SpringBoot2","Spring微服务实战",
"Head First Java","Java并发编程实战","深入理解Java 虚拟机",
"Head First Design","effective java","J2EE development without EJB",
"TCP/IP卷一"," 计算机网络:自顶向下","图解HTTP和图解TCP/IP",
"计算机网络","深入理解计算机系统","现代操作系统",
"Linux内核设计与实现","Unix网络编程","数据结构与算法",
"算法导论","数据结构与算法(Java版)","算法图解,啊哈算法",
"剑指offer","LeetCode"," Java编程思想",
"Java核心技术卷一","深入理解JVM虚拟机","Java并发编程实战",
" Java并发编程艺术","Java性能调优指南","Netty权威指南",
"深入JavaWeb技术内幕","How Tomcat Works","Tomcat架构解析",
"Spring实战","Spring源码深度解析","Spring MVC学习指南",
"Maven实战","sql必知必会","深入浅出MySQL",
"Spring cloud微服务实战","SpringBoot与Docker微服务实战","深入理解SpringBoot与微服务架构"
};
//加入分词并随机生成权重,每次生成得图片都不一样
for (String book : books){
wordFrequencies.add(new WordFrequency(book,new Random().nextInt(books.length)));
}
//此处不设置会出现中文乱码
java.awt.Font font = new java.awt.Font("STSong-Light", 2, 18);
final Dimension dimension = new Dimension(900, 900);
final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
wordCloud.setPadding(2);
wordCloud.setBackground(new CircleBackground(255));
wordCloud.setFontScalar(new SqrtFontScalar(12, 42));
//设置词云显示的三种颜色,越靠前设置表示词频越高的词语的颜色
wordCloud.setColorPalette(new LinearGradientColorPalette(Color.RED, Color.BLUE, Color.GREEN, 30, 30));
wordCloud.setKumoFont(new KumoFont(font));
wordCloud.setBackgroundColor(new Color(255, 255, 255));
//因为我这边是生成一个圆形,这边设置圆的半径
wordCloud.setBackground(new CircleBackground(255));
wordCloud.build(wordFrequencies);
wordCloud.writeToFile("d://3.png");
// 可以直接从文件中读取
//final List<WordFrequency> wordFrequencies = frequencyAnalyzer.load(getInputStream("text/chinese_language.txt"));
frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
//此处不设置会出现中文乱码
java.awt.Font font = new java.awt.Font("STSong-Light", 2, 18);
https://github.com/pengziliu/kumo