前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >lucene对日期(date)和整形(int)处理

lucene对日期(date)和整形(int)处理

作者头像
Hongten
发布2018-09-13 15:36:46
1.1K0
发布2018-09-13 15:36:46
举报
文章被收录于专栏:HongtenHongten

项目结构:

运行效果:

==========================================

代码部分:

==========================================

/lucene_0400_dateAndInt/src/com/b510/lucene/util/LuceneUtil.java

代码语言:javascript
复制
  1 /**
  2  * 
  3  */
  4 package com.b510.lucene.util;
  5 
  6 import java.io.File;
  7 import java.io.IOException;
  8 import java.text.ParseException;
  9 import java.text.SimpleDateFormat;
 10 import java.util.Date;
 11 import java.util.HashMap;
 12 import java.util.Map;
 13 
 14 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 15 import org.apache.lucene.document.Document;
 16 import org.apache.lucene.document.Field;
 17 import org.apache.lucene.document.NumericField;
 18 import org.apache.lucene.index.CorruptIndexException;
 19 import org.apache.lucene.index.IndexReader;
 20 import org.apache.lucene.index.IndexWriter;
 21 import org.apache.lucene.index.IndexWriterConfig;
 22 import org.apache.lucene.index.Term;
 23 import org.apache.lucene.search.IndexSearcher;
 24 import org.apache.lucene.search.ScoreDoc;
 25 import org.apache.lucene.search.TermQuery;
 26 import org.apache.lucene.search.TopDocs;
 27 import org.apache.lucene.store.Directory;
 28 import org.apache.lucene.store.FSDirectory;
 29 import org.apache.lucene.store.LockObtainFailedException;
 30 import org.apache.lucene.util.Version;
 31 
 32 /**
 33  * @author Hongten <br />
 34  * @date 2013-1-31
 35  */
 36 public class LuceneUtil {
 37 
 38     /**
 39      * 邮件id
 40      */
 41     private String[] ids = { "1", "2", "3", "4", "5", "6" };
 42     /**
 43      * 邮箱
 44      */
 45     private String[] emails = { "aa@sina.com", "bb@foxmail.com", "cc@qq.com",
 46             "dd@163.com", "ee@gmail.com", "ff@sina.com" };
 47     /**
 48      * 邮件内容
 49      */
 50     private String[] contents = { "hello,aa,hi,hell world!!", 
 51                                   "hello,bb,i'm a boy", 
 52                                   "hello,cc",
 53                                   "hello,dd,welcome to my zone,this is a test hello", 
 54                                   "hello,ee,haha,xixi,hello world!!", 
 55                                   "hello,ff" };
 56     /**
 57      * 附件数
 58      */
 59     private int[] attachs = {1,5,3,2,1,6};
 60     /**
 61      * 日期
 62      */
 63     private Date[] dates = null;
 64     /**
 65      * 收件人的名称
 66      */
 67     private String[] names = { "hongten", "hanyuan", "Devide", "Tom", "Steven",
 68             "Shala" };
 69 
 70     private Directory directory = null;
 71     /**
 72      * 评分
 73      */
 74     private Map<String, Float> scores = new HashMap<String, Float>();
 75     
 76     public LuceneUtil() {
 77         try {
 78             setDates();
 79             scores.put("sina.com", 1.0f);
 80             scores.put("foxmail.com", 1.1f);
 81             directory = FSDirectory.open(new File(
 82                     "D:/WordPlace/lucene/lucene_0400_dateAndInt/lucene/index"));
 83         } catch (IOException e) {
 84             e.printStackTrace();
 85         }
 86     }
 87 
 88     /**
 89      * 创建日期
 90      */
 91     public void setDates(){
 92         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
 93         try {
 94             dates = new Date[ids.length];
 95             dates[0] = sdf.parse("2012-11-18");
 96             dates[1] = sdf.parse("2010-01-28");
 97             dates[2] = sdf.parse("2011-11-21");
 98             dates[3] = sdf.parse("2012-12-12");
 99             dates[4] = sdf.parse("2011-06-23");
100             dates[5] = sdf.parse("2012-03-15");
101         } catch (ParseException e) {
102             e.printStackTrace();
103         }
104     }
105     
106     /**
107      * 创建索引
108      */
109     public void index() {
110         IndexWriter writer = null;
111         try {
112             writer = new IndexWriter(directory, new IndexWriterConfig(
113                     Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
114             //删除之前所建立的全部索引
115             writer.deleteAll();
116             // 创建文档
117             Document document = null;
118             for (int i = 0; i < ids.length; i++) {
119                 // Field.Store.YES:将会存储域值,原始字符串的值会保存在索引,以此可以进行相应的回复操作,对于主键,标题可以是这种方式存储
120                 // Field.Store.NO:不会存储域值,通常与Index.ANAYLIZED和起来使用,索引一些如文章正文等不需要恢复的文档
121                 // ==============================
122                 // Field.Index.ANALYZED:进行分词和索引,适用于标题,内容等
123                 // Field.Index.NOT_ANALYZED:进行索引,但是不进行分词,如身份证号码,姓名,ID等,适用于精确搜索
124                 // Field.Index.ANALYZED_NOT_NORMS:进行分词,但是不进行存储norms信息,这个norms中包括了创建索引的时间和权值等信息
125                 // Field.Index.NOT_ANALYZED_NOT_NORMS:不进行分词也不进行存储norms信息(不推荐)
126                 // Field.Index.NO:不进行分词
127                 document = new Document();
128                 document.add(new Field("id", ids[i], Field.Store.YES,
129                         Field.Index.NOT_ANALYZED_NO_NORMS));
130                 document.add(new Field("email", emails[i], Field.Store.YES,
131                         Field.Index.NOT_ANALYZED));
132                 document.add(new Field("content", contents[i], Field.Store.YES,
133                         Field.Index.ANALYZED));
134                 document.add(new Field("name", names[i], Field.Store.YES,
135                         Field.Index.NOT_ANALYZED_NO_NORMS));
136                 document.add(new NumericField("attach", Field.Store.YES,true).setIntValue(attachs[i]));
137                 document.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));
138                 
139                 //这里进行加权处理
140                 String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
141                 System.out.println(et);
142                 if(scores.containsKey(et)){
143                     document.setBoost(scores.get(et));
144                 }else{
145                     document.setBoost(0.6f);
146                 }
147                 writer.addDocument(document);
148             }
149         } catch (CorruptIndexException e) {
150             e.printStackTrace();
151         } catch (LockObtainFailedException e) {
152             e.printStackTrace();
153         } catch (IOException e) {
154             e.printStackTrace();
155         } finally {
156             if (writer != null) {
157                 try {
158                     writer.close();
159                 } catch (CorruptIndexException e) {
160                     e.printStackTrace();
161                 } catch (IOException e) {
162                     e.printStackTrace();
163                 }
164             }
165         }
166     }
167     
168     /**
169      * 搜索
170      */
171     public void search(){
172         try {
173             IndexReader reader = IndexReader.open(directory);
174             IndexSearcher searcher = new IndexSearcher(reader);
175             TermQuery query = new TermQuery(new Term("content","hello"));
176             TopDocs tds =searcher.search(query, 10);
177             for(ScoreDoc sd : tds.scoreDocs){
178                 Document doc = searcher.doc(sd.doc);
179                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
180                 Date date = null;
181                 /*try {
182                     date = sdf.parse(doc.get("date"));
183                 } catch (ParseException e) {
184                     e.printStackTrace();
185                 }*/
186                 System.out.println("文档序号:["+sd.doc+"] 得分:["+sd.score+"] 邮件名称:["+doc.get("email")+"] 邮件人:["+doc.get("name")+"] 附件数:["+doc.get("attach")+"] 日期:["+doc.get("date")+"] 内容 : ["+doc.get("content")+"]");
187             }
188         } catch (CorruptIndexException e) {
189             e.printStackTrace();
190         } catch (IOException e) {
191             e.printStackTrace();
192         }
193     }
194 }

/lucene_0400_dateAndInt/src/com/b510/lucene/test/IndexTest.java

代码语言:javascript
复制
 1 /**
 2  * 
 3  */
 4 package com.b510.lucene.test;
 5 
 6 import org.junit.Test;
 7 
 8 import com.b510.lucene.util.LuceneUtil;
 9 
10 /**
11  * @author Hongten <br />
12  * @date 2013-1-31
13  */
14 public class IndexTest {
15 
16     @Test
17     public void testIndex(){
18         LuceneUtil util = new LuceneUtil();
19         util.index();
20     }
21     
22     @Test
23     public void testSearch(){
24         LuceneUtil util = new LuceneUtil();
25         util.search();
26     }
27     
28 }
本文参与 腾讯云自媒体分享计划,分享自作者个人站点/博客。
原始发表:2013-02-02 ,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体分享计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档