前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >Lucene:Suggest联想词

Lucene:Suggest联想词

原创
作者头像
HLee
修改2021-01-25 11:43:36
1.4K0
修改2021-01-25 11:43:36
举报
文章被收录于专栏:房东的猫房东的猫

简介

lucene的联想词是在org.apache.lucene.search.suggest包下边,提供了自动补全或者联想提示功能的支持。

代码语言:javascript
复制
<!-- 搜索提示 -->
<dependency>
	<groupId>org.apache.lucene</groupId>
	<artifactId>lucene-suggest</artifactId>
	<version>7.2.1</version>
</dependency>

Suggest用例

Controller层

代码语言:javascript
复制
@RestController
@RequestMapping(value = "/suggest")
public class SuggestController {


    @Resource
    private SuggestService suggestService;

    /**
     * 推荐词搜索
     * @param keyword
     * @return
     */
    @GetMapping(value = "/searchSuggest")
    public List<DictionaryVO> searchSuggest(String keyword) {
        return suggestService.searchSuggest(keyword);
    }


}

访问地址:
localhost:2000/spring-master/suggest/searchSuggest?keyword=胃造

Service层

代码语言:javascript
复制
@Slf4j
@Service
public class SuggestServiceImpl implements SuggestService {

    private AnalyzingInfixSuggester suggester;

    /**
     * 内存存储:优点速度快,缺点程序退出数据就没了
     */
    protected RAMDirectory directory;

    /**
     * 索引分词
     */
    protected StandardAnalyzer indexAnalyzer;

    /**
     * 查询分词
     */
    protected StandardAnalyzer queryAnalyzer;


    @Override
    public List<DictionaryVO> searchSuggest(String keyword) {

        List dictionaryList = new ArrayList();
        HashSet<BytesRef> contexts = new HashSet<BytesRef>();
        // 先根据region域进行suggest,再根据name域进行suggest
//        contexts.add(new BytesRef(region.getBytes("UTF8")));

        // num决定了返回几条数据,参数四表明是否所有TermQuery是否都需要满足,参数五表明是否需要高亮显示
        int num = 10;
        try {
            List<Lookup.LookupResult> results = suggester.lookup(keyword, num, true, false);

            for (Lookup.LookupResult result : results) {
                // result.key中存储的是根据用户输入内部算法进行匹配后返回的suggest内容
                log.info("result_key: " + result.key);
                // 从载荷(payload)中反序列化出Product对象(实际生产中出于降低内存占用考虑一般不会在载荷中存储这么多内容)
                BytesRef bytesRef = result.payload;
                ObjectInputStream objectInputStream = new ObjectInputStream(new ByteArrayInputStream(bytesRef.bytes));
                try {
                    DictionaryVO dictionaryVO = (DictionaryVO) objectInputStream.readObject();

                    dictionaryList.add(dictionaryVO);
                } catch (ClassNotFoundException cnfe) {
                    log.error(cnfe.getMessage());
                }
            }
        } catch (Exception e) {
            log.error(e.getMessage());
        }
        return dictionaryList;
    }

    /**
     * 初始化词典
     * @return
     */
    @PostConstruct
    protected void initSuggest() {

        directory = new RAMDirectory();
        indexAnalyzer = new StandardAnalyzer();
        queryAnalyzer = new StandardAnalyzer();

        try {
            suggester = new AnalyzingInfixSuggester(directory, indexAnalyzer, queryAnalyzer,
                    AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS, false);

            long start = System.currentTimeMillis();
            // 读DictionaryVO数据
            List diseases = FileUtils.readCsv(SuggestConstants.disease);
            List facultys = FileUtils.readCsv(SuggestConstants.faculty);
            List hospitals = FileUtils.readCsv(SuggestConstants.hospital);
            List drugcatalogues = FileUtils.readCsv(SuggestConstants.drugcatalogue);
            List doctors = FileUtils.readCsv(SuggestConstants.doctor);

            List allTerms = new ArrayList();
            allTerms.addAll(facultys);
            allTerms.addAll(hospitals);
            allTerms.addAll(diseases);
            allTerms.addAll(drugcatalogues);
            allTerms.addAll(doctors);

            // 创建索引,根据InputIterator的具体实现决定数据源以及创建索引的规则
            suggester.build(new DictionaryIterator(allTerms.iterator()));
            suggester.commit();

            long end = System.currentTimeMillis();
            log.info("It takes time to initialize the dictionary:" + (end - start));

            this.initAfter();
        } catch (IOException io) {
            log.error(io.getMessage());
        }
    }

    protected void initAfter() {

    }

    /**
     * 销毁词典
     */
    @PreDestroy
    protected void destroy(){

        try {
            if(suggester != null) {
                suggester.close();
            }

            if(directory != null) {
                directory.close();
            }
        } catch (IOException e) {
            log.error(e.getMessage(), e);
        }

        if(indexAnalyzer != null) {
            indexAnalyzer.close();
        }

        if(queryAnalyzer != null) {
            queryAnalyzer.close();
        }
        this.destroyAfter();
    }

    protected void destroyAfter(){

    }
}

Util

代码语言:javascript
复制
@Slf4j
public class FileUtils {

    /**
     * 读取词典csv文件
     * @param fileNamePath
     * @return
     */
    public static List<DictionaryVO> readCsv(String fileNamePath) {

        List<DictionaryVO> dictionarys = new ArrayList<>();
        try {
            // 换成你的文件名
            BufferedReader reader = new BufferedReader(new FileReader(fileNamePath));
            String line;
            while ((line = reader.readLine()) != null) {
                // CSV格式文件为逗号分隔符文件,这里根据逗号切分
                String[] item = line.split(",");
                dictionarys.add(new DictionaryVO(item[0], item[1], Long.parseLong(item[2]), Long.parseLong(item[3])));
            }
        } catch (Exception e) {
            e.printStackTrace();
            log.error(e.getMessage());
        }
        return dictionarys;
    }
}

Core

代码语言:javascript
复制
package com.spring.master.lucene.suggest.core;

import com.spring.master.lucene.suggest.vo.DictionaryVO;
import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.util.BytesRef;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.Set;

/**
 * @author Huan Lee
 * @version 1.0
 * @date 2020-09-10 14:55
 * @describtion 核心类:决定了你的索引是如何创建的,决定了最终返回的提示关键词列表数据及其排序
 *
 */
public class DictionaryIterator implements InputIterator {

    private Iterator<DictionaryVO> dictionaryIterator;

    private DictionaryVO currentDictionary;

    public DictionaryIterator(Iterator<DictionaryVO> dictionaryIterator) {
        this.dictionaryIterator = dictionaryIterator;
    }

    @Override
    public long weight() {
        // TODO 这里可以设置权重 return currentDictionary.getWeight();
        return 1;
    }

    /**
     * 将DictionaryVO对象序列化存入payload
     * @return
     */
    @Override
    public BytesRef payload() {
        try {
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            ObjectOutputStream out = new ObjectOutputStream(bos);
            out.writeObject(currentDictionary);
            out.close();
            return new BytesRef(bos.toByteArray());
        } catch (IOException e) {
            throw new RuntimeException("Well that's unfortunate.");
        }
    }

    @Override
    public boolean hasPayloads() {
        return true;
    }

    /**
     * 设置是否启用Contexts域
     * @return
     */
    @Override
    public boolean hasContexts() {
        return false;
    }

    /**
     * 获取某个term的contexts,用来过滤suggest的内容,如果suggest的列表为空,返回null
     * @return
     */
    @Override
    public Set<BytesRef> contexts() {
//        try {
//            Set<BytesRef> regions = new HashSet<>();
//            regions.add(new BytesRef(currentDictionary.getSourceType().getBytes("UTF8")));
//            return regions;
//        } catch (UnsupportedEncodingException e) {
//            throw new RuntimeException("Couldn't convert to UTF-8");
//        }
        return null;
    }

    @Override
    public BytesRef next() throws IOException {
        if (dictionaryIterator.hasNext()) {
            currentDictionary = dictionaryIterator.next();
            try {
                //返回当前Project的name值,把product类的name属性值作为key
                return new BytesRef(currentDictionary.getWord().getBytes("UTF8"));
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException("Couldn't convert to UTF-8",e);
            }
        } else {
            return null;
        }
    }
}

VO

代码语言:javascript
复制
@Data
public class DictionaryVO implements Serializable {

    public DictionaryVO() {
    }

    public DictionaryVO(String word, String sourceType, Long sourceId, Long weight) {
        this.word = word;
        this.sourceId = sourceId;
        this.sourceType = sourceType;
        this.weight = weight;
    }

    /**
     * 词典
     */
    private String word;

    /**
     * 来源id
     */
    private Long sourceId;

    /**
     * 来源:Doctor、Disease、Hospital、Faculty、Drugcatalogue
     */
    private String sourceType;

    /**
     * 权重
     */
    private Long weight;
}

Constant

代码语言:javascript
复制
public class SuggestConstants {

    public static final String faculty = "/Users/lihuan/Documents/projects/git/me/faculty.csv";
    public static final String hospital = "/Users/lihuan/Documents/projects/git/me/hospital.csv";
    public static final String disease = "/Users/lihuan/Documents/projects/git/me/disease.csv";
    public static final String drugcatalogue = "/Users/lihuan/Documents/projects/git/me/drugcatalogue.csv";
    public static final String doctor = "/Users/lihuan/Documents/projects/git/me/doctor.csv";
}

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 简介
  • Suggest用例
    • Controller层
      • Service层
        • Util
          • Core
            • VO
              • Constant
              领券
              问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档