有一千万条短信,有重复,以文本文件的形式保存,一行一条,找出重复出现最多的前10条。
package com.week.ms;
import java.io.*;
import java.util.*;
public class Message {
public static void main(String[] args) {
String kFile = "data.txt";
ArrayList array=solution(kFile);
for(int i=0;i<array.size();i++){
Map map = (Map) array.get(i);
System.out.println(map);
if(i>=9){
break;
}
}
}
public static ArrayList solution(String kFile) {
Comparator c = new Comparator<Map>() {
@Override
public int compare(Map m1, Map m2) {
// TODO Auto-generated method stub
int cnt1 = (int) m1.get("cnt");
int cnt2 = (int) m2.get("cnt");
if(cnt1 >= cnt2){
return -1;
}else{
return 1;
}
}
};
HashMap count=new HashMap();
ArrayList array =new ArrayList();
FileInputStream inputStream = null;
Scanner sc = null;
try {
inputStream=new FileInputStream(kFile);
sc = new Scanner(inputStream,"UTF-8");
//我们将使用Java.util.Scanner类扫描文件的内容,一行一行连续地读取,允许对每一行进行处理,而不保持对它的引用。总之没有把它们存放在内存中:
while(sc.hasNextLine()){
String line = sc.nextLine();
if(line!=null){
if(count.containsKey(line)){
int cnt = (int) count.get(line);
cnt++;
count.put(line,cnt);
for(int i=0;i<array.size();i++){//更新Array
Map map = (Map) array.get(i);
if(map.get("msg").equals(line)){
map.put("cnt",cnt);
}
}
}else{
count.put(line,1);
Map map=new HashMap();
map.put("msg",line);
map.put("cnt",1);
array.add(map);
}
Collections.sort(array,c);
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}finally {
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (sc != null) {
sc.close();
}
}
return array;
}
}