项目结构:
运行效果:
==========================================
代码部分:
==========================================
/lucene_0400_dateAndInt/src/com/b510/lucene/util/LuceneUtil.java
1 /**
2 *
3 */
4 package com.b510.lucene.util;
5
6 import java.io.File;
7 import java.io.IOException;
8 import java.text.ParseException;
9 import java.text.SimpleDateFormat;
10 import java.util.Date;
11 import java.util.HashMap;
12 import java.util.Map;
13
14 import org.apache.lucene.analysis.standard.StandardAnalyzer;
15 import org.apache.lucene.document.Document;
16 import org.apache.lucene.document.Field;
17 import org.apache.lucene.document.NumericField;
18 import org.apache.lucene.index.CorruptIndexException;
19 import org.apache.lucene.index.IndexReader;
20 import org.apache.lucene.index.IndexWriter;
21 import org.apache.lucene.index.IndexWriterConfig;
22 import org.apache.lucene.index.Term;
23 import org.apache.lucene.search.IndexSearcher;
24 import org.apache.lucene.search.ScoreDoc;
25 import org.apache.lucene.search.TermQuery;
26 import org.apache.lucene.search.TopDocs;
27 import org.apache.lucene.store.Directory;
28 import org.apache.lucene.store.FSDirectory;
29 import org.apache.lucene.store.LockObtainFailedException;
30 import org.apache.lucene.util.Version;
31
32 /**
33 * @author Hongten <br />
34 * @date 2013-1-31
35 */
36 public class LuceneUtil {
37
38 /**
39 * 邮件id
40 */
41 private String[] ids = { "1", "2", "3", "4", "5", "6" };
42 /**
43 * 邮箱
44 */
45 private String[] emails = { "aa@sina.com", "bb@foxmail.com", "cc@qq.com",
46 "dd@163.com", "ee@gmail.com", "ff@sina.com" };
47 /**
48 * 邮件内容
49 */
50 private String[] contents = { "hello,aa,hi,hell world!!",
51 "hello,bb,i'm a boy",
52 "hello,cc",
53 "hello,dd,welcome to my zone,this is a test hello",
54 "hello,ee,haha,xixi,hello world!!",
55 "hello,ff" };
56 /**
57 * 附件数
58 */
59 private int[] attachs = {1,5,3,2,1,6};
60 /**
61 * 日期
62 */
63 private Date[] dates = null;
64 /**
65 * 收件人的名称
66 */
67 private String[] names = { "hongten", "hanyuan", "Devide", "Tom", "Steven",
68 "Shala" };
69
70 private Directory directory = null;
71 /**
72 * 评分
73 */
74 private Map<String, Float> scores = new HashMap<String, Float>();
75
76 public LuceneUtil() {
77 try {
78 setDates();
79 scores.put("sina.com", 1.0f);
80 scores.put("foxmail.com", 1.1f);
81 directory = FSDirectory.open(new File(
82 "D:/WordPlace/lucene/lucene_0400_dateAndInt/lucene/index"));
83 } catch (IOException e) {
84 e.printStackTrace();
85 }
86 }
87
88 /**
89 * 创建日期
90 */
91 public void setDates(){
92 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
93 try {
94 dates = new Date[ids.length];
95 dates[0] = sdf.parse("2012-11-18");
96 dates[1] = sdf.parse("2010-01-28");
97 dates[2] = sdf.parse("2011-11-21");
98 dates[3] = sdf.parse("2012-12-12");
99 dates[4] = sdf.parse("2011-06-23");
100 dates[5] = sdf.parse("2012-03-15");
101 } catch (ParseException e) {
102 e.printStackTrace();
103 }
104 }
105
106 /**
107 * 创建索引
108 */
109 public void index() {
110 IndexWriter writer = null;
111 try {
112 writer = new IndexWriter(directory, new IndexWriterConfig(
113 Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
114 //删除之前所建立的全部索引
115 writer.deleteAll();
116 // 创建文档
117 Document document = null;
118 for (int i = 0; i < ids.length; i++) {
119 // Field.Store.YES:将会存储域值,原始字符串的值会保存在索引,以此可以进行相应的回复操作,对于主键,标题可以是这种方式存储
120 // Field.Store.NO:不会存储域值,通常与Index.ANAYLIZED和起来使用,索引一些如文章正文等不需要恢复的文档
121 // ==============================
122 // Field.Index.ANALYZED:进行分词和索引,适用于标题,内容等
123 // Field.Index.NOT_ANALYZED:进行索引,但是不进行分词,如身份证号码,姓名,ID等,适用于精确搜索
124 // Field.Index.ANALYZED_NOT_NORMS:进行分词,但是不进行存储norms信息,这个norms中包括了创建索引的时间和权值等信息
125 // Field.Index.NOT_ANALYZED_NOT_NORMS:不进行分词也不进行存储norms信息(不推荐)
126 // Field.Index.NO:不进行分词
127 document = new Document();
128 document.add(new Field("id", ids[i], Field.Store.YES,
129 Field.Index.NOT_ANALYZED_NO_NORMS));
130 document.add(new Field("email", emails[i], Field.Store.YES,
131 Field.Index.NOT_ANALYZED));
132 document.add(new Field("content", contents[i], Field.Store.YES,
133 Field.Index.ANALYZED));
134 document.add(new Field("name", names[i], Field.Store.YES,
135 Field.Index.NOT_ANALYZED_NO_NORMS));
136 document.add(new NumericField("attach", Field.Store.YES,true).setIntValue(attachs[i]));
137 document.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));
138
139 //这里进行加权处理
140 String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
141 System.out.println(et);
142 if(scores.containsKey(et)){
143 document.setBoost(scores.get(et));
144 }else{
145 document.setBoost(0.6f);
146 }
147 writer.addDocument(document);
148 }
149 } catch (CorruptIndexException e) {
150 e.printStackTrace();
151 } catch (LockObtainFailedException e) {
152 e.printStackTrace();
153 } catch (IOException e) {
154 e.printStackTrace();
155 } finally {
156 if (writer != null) {
157 try {
158 writer.close();
159 } catch (CorruptIndexException e) {
160 e.printStackTrace();
161 } catch (IOException e) {
162 e.printStackTrace();
163 }
164 }
165 }
166 }
167
168 /**
169 * 搜索
170 */
171 public void search(){
172 try {
173 IndexReader reader = IndexReader.open(directory);
174 IndexSearcher searcher = new IndexSearcher(reader);
175 TermQuery query = new TermQuery(new Term("content","hello"));
176 TopDocs tds =searcher.search(query, 10);
177 for(ScoreDoc sd : tds.scoreDocs){
178 Document doc = searcher.doc(sd.doc);
179 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
180 Date date = null;
181 /*try {
182 date = sdf.parse(doc.get("date"));
183 } catch (ParseException e) {
184 e.printStackTrace();
185 }*/
186 System.out.println("文档序号:["+sd.doc+"] 得分:["+sd.score+"] 邮件名称:["+doc.get("email")+"] 邮件人:["+doc.get("name")+"] 附件数:["+doc.get("attach")+"] 日期:["+doc.get("date")+"] 内容 : ["+doc.get("content")+"]");
187 }
188 } catch (CorruptIndexException e) {
189 e.printStackTrace();
190 } catch (IOException e) {
191 e.printStackTrace();
192 }
193 }
194 }
/lucene_0400_dateAndInt/src/com/b510/lucene/test/IndexTest.java
1 /**
2 *
3 */
4 package com.b510.lucene.test;
5
6 import org.junit.Test;
7
8 import com.b510.lucene.util.LuceneUtil;
9
10 /**
11 * @author Hongten <br />
12 * @date 2013-1-31
13 */
14 public class IndexTest {
15
16 @Test
17 public void testIndex(){
18 LuceneUtil util = new LuceneUtil();
19 util.index();
20 }
21
22 @Test
23 public void testSearch(){
24 LuceneUtil util = new LuceneUtil();
25 util.search();
26 }
27
28 }