我们最近升级了一个内容管理系统,不得不从Lucene.net V2.3.1.301升级到V2.9.4.1
我们在最初的解决方案中使用了CustomScoreQuery,它执行内置查询无法实现的各种过滤。(地理位置、多日期范围等)
自从从旧版本转移到新版本的Lucene后,它开始返回文档,即使当我们检查结果时,它们的分数为0甚至是负数
以下是重新填充的代码示例,用于演示该问题
public LuceneTest()
{
Lucene.Net.Store.Directory luceneIndexDirectory = FSDirectory.Open(new System.IO.DirectoryInfo(@"C:\inetpub\wwwroot\Project\build\Data\indexes\all_site_search_en"));
Analyzer analyzer = new WhitespaceAnalyzer();
IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory, true);
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_23, "", analyzer);
parser.SetAllowLeadingWildcard(true);
Query dateQuery = ComposeEventDateQuery(new DateTime(2015, 11, 23), new DateTime(2015,11,25), searcher);
BooleanQuery combinedQuery = new BooleanQuery();
BooleanQuery.SetMaxClauseCount(10000);
combinedQuery.Add(dateQuery, BooleanClause.Occur.MUST);
TopDocs hitsFound = searcher.Search(dateQuery, 1000);
System.Console.WriteLine(String.Format("Found {0} matches with the date filters", hitsFound.TotalHits));
System.Console.ReadKey();
}
public static Query ComposeEventDateQuery(DateTime fromDate, DateTime ToDate, IndexSearcher MySearcher)
{
BooleanQuery query = new BooleanQuery();
Query boolQuery3A = new TermQuery(new Lucene.Net.Index.Term("_language", "en"));
Query eventDateQuery = new EventDateQuery1(boolQuery3A, MySearcher, fromDate, ToDate, false);
query.Add(eventDateQuery, BooleanClause.Occur.MUST);
return query;
}
public class EventDateQuery1 : CustomScoreQuery
{
private Searcher _searcher;
private DateTime _fromDT;
private DateTime _toDT;
private readonly string _dateFormat = "yyyyMMdd";
private bool _shouldMatchNonEvents = true;
public EventDateQuery1(Query subQuery, Searcher searcher, DateTime fromDT, bool shouldMatchNonEvents, int dateRange = 14)
: base(subQuery)
{
_searcher = searcher;
_fromDT = fromDT.Date;
_toDT = fromDT.AddDays(dateRange).Date;
_shouldMatchNonEvents = shouldMatchNonEvents;
}
public EventDateQuery1(Query subQuery, Searcher searcher, DateTime fromDT, DateTime toDT, bool shouldMatchNonEvents)
: base(subQuery)
{
_searcher = searcher;
_fromDT = fromDT.Date;
_toDT = toDT.Date;
_shouldMatchNonEvents = shouldMatchNonEvents;
}
public override string ToString()
{
return GenerateUniqueKey();
}
public override string ToString(string field)
{
return GenerateUniqueKey();
}
public override string Name()
{
return GenerateUniqueKey();
}
public string GenerateUniqueKey()
{
return String.Format("EventDateQuery_{0}_{1}_{2}", _fromDT.ToString(_dateFormat), _toDT.ToString(_dateFormat), _shouldMatchNonEvents.ToString());
}
protected override CustomScoreProvider GetCustomScoreProvider(IndexReader reader)
{
return new EventDateQueryCustomScoreProvider(reader, _fromDT, _toDT, _shouldMatchNonEvents);
}
}
public class EventDateQueryCustomScoreProvider : CustomScoreProvider
{
private DateTime _fromDT;
private DateTime _toDT;
private readonly string _dateFormat = "yyyyMMdd";
private bool _shouldMatchNonEvents = true;
private float NoMatchFloat = 0f;
private float MatchFloat = 1f;
public EventDateQueryCustomScoreProvider(IndexReader reader, DateTime fromDT, DateTime toDT, bool shouldMatchNonEvents)
: base(reader)
{
_fromDT = fromDT.Date;
_toDT = toDT.Date;
_shouldMatchNonEvents = shouldMatchNonEvents;
}
public override float CustomScore(int doc, float subQueryScore, float valSrcScore)
{
return myScore(doc);
}
public override float CustomScore(int doc, float subQueryScore, float[] valSrcScores)
{
return myScore(doc);
}
public float myScore(int doc)
{
//Below is a fake implementation just to prove the run
if (doc < 10)
{
return 1F;
}
else
{
return 0F;
}
}
}
任何关于如何让Lucene不返回这些文档的建议都会很棒。提前谢谢。
发布于 2015-11-05 01:58:00
您可以编写一个仅收集具有>0
分数的文档的自定义Collector
。然后将此收集器的一个实例传递给Search()
方法。有这样一个Collector
here的实现。
但是,如果您不需要所有结果,documentation建议您不要使用此解决方案。情况可能就是这样,因为您只选择了前1000个文档。
https://stackoverflow.com/questions/33360962
复制相似问题