前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >Java实现大量文件中读取关键字

Java实现大量文件中读取关键字

作者头像
用户2436820
发布2018-10-10 11:59:24
1.4K0
发布2018-10-10 11:59:24
举报
代码语言:javascript
复制
package searchWorld;

// 实现从大量文件中超过一百次的关键字

import java.io.File;
import java.io.IOException;
import java.util.Scanner;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.LongAdder;

public class BlockingQueueTest {
    // blockqueue len = 10
    private static final int FILE_QUEUE_SIZE = 10;
    private static final int SEARCH_THREADS = 1000;
    private static final File DUMMY = new File("");
    private static BlockingQueue<File> queue = new ArrayBlockingQueue<>(FILE_QUEUE_SIZE);
    private static ConcurrentHashMap<String,LongAdder> hashMap = new ConcurrentHashMap<>();
    public static void main(String[] args) {
        // input a dir

        try(Scanner in = new Scanner(System.in)) {
            System.out.println("请输入一个目录");
            String directory = in.nextLine();
            Runnable enumerator = () -> {
                try {
                    enumrate(new File(directory));

                    // add a last file DUMMY as bool
                    queue.put(DUMMY);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            };
            new Thread(enumerator).start();
            for (int i = 1; i <=SEARCH_THREADS ; i++) {
                Runnable search = ()->{
                  boolean done = false;
                  while (!done){
                      try {
                          File file = queue.take();
                          if(file == DUMMY){
                              queue.put(file);
                              done = true;
                          }else search(file);
                          done = true;
                      } catch (InterruptedException e) {
                          e.printStackTrace();
                      } catch (IOException e) {
                          e.printStackTrace();
                      }
                  }
                };
                new Thread(search).start();
            }

            while (Thread.activeCount() == 1){
                getMoreHundredWord();
            }
        }


    }

    // put all file to a blockqueue
    public static void enumrate(File directory) throws InterruptedException{
        File[] files = directory.listFiles();
        for (File file: files
             ) {
            if(file.isDirectory()) enumrate(file);
            else {queue.put(file);}
        }
    }

    // search keywords
    public static void search(File file) throws IOException{
        try(Scanner in = new Scanner(file,"UTF-8")) {
            while (in.hasNextLine()){
                String line = in.nextLine();
                // regular get word
                line =line.replaceAll("[^a-zA-Z\\s+]", "");
                String[] words =line.split("[\\s+,\\.\n]");
                System.out.println(words);
                for(String word:words) {
                    // automic update
                    hashMap.putIfAbsent(word, new LongAdder());
                    hashMap.get(word).increment();
                }
            }
        }

    }

    // get > 100 words
    public static void getMoreHundredWord(){
        hashMap.forEach(1,
                (k, v) -> {
                if (v.longValue() > new Long(100).longValue())
                System.out.println(k + " -> " + v);
        });

    }
}
本文参与 腾讯云自媒体分享计划,分享自作者个人站点/博客。
原始发表:2018.09.27 ,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档