导读:本段代码是在Lucene.Net 3.0.3版本和PanGu.Lucene.Analyzer 2.4.1版本上进行的一个简单封装,这里只是一个参考,复杂用法请读者根据自己...
本段代码是在Lucene.Net 3.0.3版本和PanGu.Lucene.Analyzer 2.4.1版本上进行的一个简单封装,这里只是一个参考,复杂用法请读者根据自己的需求进行改进。传入的泛型T需要至少包括一个属性为int型的Id,一个String类型的Title属性和一个String类型的Contents属性
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Reflection;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Messages;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.Support;
using Lucene.Net.Util;
using Lucene.Net.Analysis.PanGu;
using Lucene.Net.Analysis.Standard;
using PanGu;
using PanGu.HighLight;
namespace HoldCode.Search
{
public class LuceneBase<T> where T : class, new()
{
private PanGuAnalyzer analyzer = null;
public string indexPath { get; set; }
public LuceneBase()
{
// 初始化
analyzer = new PanGuAnalyzer();
}
private IndexWriter indexWriter = null;
private Directory directory = null;
/// <summary>
/// 合并因子,子索引(Segment)合并
/// </summary>
public int MaxMergeFactor
{
get
{
if (indexWriter != null)
{
return indexWriter.MergeFactor;
}
else
{
return 0;
}
}
set
{
if (indexWriter != null)
{
indexWriter.MergeFactor = value;
}
}
}
/// <summary>
/// 子索引(Segment)文件包含的Document数量
/// </summary>
public int MaxMergeDocs
{
get
{
if (indexWriter != null)
{
return indexWriter.MaxMergeDocs;
}
else
{
return 0;
}
}
set
{
if (indexWriter != null)
{
indexWriter.MaxMergeDocs = value;
}
}
}
public int MinMergeDocs
{
get
{
if (indexWriter != null)
{
return indexWriter.GetMaxBufferedDocs();
}
else
{
return 0;
}
}
set
{
if (indexWriter != null)
{
indexWriter.SetMaxBufferedDocs(value);
}
}
}
/// <summary>
/// 对要搜索的词分词
/// </summary>
/// <param name="keywords">关键词</param>
/// <param name="tokenizer"></param>
/// <returns></returns>
protected string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer tokenizer)
{
StringBuilder result = new StringBuilder();
ICollection<WordInfo> words = tokenizer.SegmentToWordInfos(keywords);
foreach (var item in words)
{
if (item == null)
{
continue;
}
result.AppendFormat("{0}^{1}.0 ", item.Word, (int)Math.Pow(3, item.Rank));
}
return result.ToString().Trim();
}
protected void CreateDirectory()
{
directory = FSDirectory.Open(indexPath);
}
protected void CreateIndexWriter()
{
indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
}
/// <summary>
/// 添加Document
/// </summary>
/// <param name="indexWriter"></param>
/// <param name="data"></param>
private void AddDocument(Archive data)
{
Document document = new Document();
Type type = data.GetType();
PropertyInfo[] propertys = type.GetProperties();
IFieldable field = null;
// 循环所有属性
foreach (var item in propertys)
{
string name = item.Name;
string value = item.GetValue(data, null).ToString();
if (name.ToLower().Contains("id"))
{
field = new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
// 删除重复
Term term = new Term(name, value);
indexWriter.DeleteDocuments(term);
}
else
{
field = new Field(name, value, Field.Store.YES, Field.Index.ANALYZED);
}
// 添加至Document
document.Add(field);
}
indexWriter.AddDocument(document);
}
/// <summary>
/// 创建索引
/// </summary>
/// <param name="list"></param>
/// <param name="optimize">子索引优化</param>
public int CreateIndex(List<Archive> list, bool optimize)
{
int result = 0;
try
{
foreach (var item in list)
{
AddDocument(item);
}
result = indexWriter.MaxDoc();
if (optimize)
{
indexWriter.Optimize();
}
// 关闭indexWriter
indexWriter.Dispose();
}
catch (Exception ex)
{
indexWriter.Dispose();
throw new Exception(ex.Message);
}
return result;
}
/// <summary>
/// 搜索
/// </summary>
/// <param name="keyword">关键词</param>
/// <param name="pageIndex">页数</param>
/// <param name="pageSize">每页条数</param>
/// <param name="totalRecord">总数据量</param>
/// <returns>返回数据</returns>
public List<T> SearchIndex(string keywords, int pageIndex, int pageSize, out int totalRecord)
{
int startRows = (pageIndex - 1) * pageSize;
string kw = keywords;
//对关键词进行分词
keywords = GetKeyWordsSplitBySpace(keywords, new PanGuTokenizer());
IndexReader indexReader = IndexReader.Open(directory, true);
IndexSearcher searcher = new IndexSearcher(indexReader);
//搜索内容
QueryParser queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Contents", new PanGuAnalyzer(true));
Query query = queryParser.Parse(keywords);
// 获取搜索结果中前1000条
TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);
searcher.Search(query, null, collector);
//筛选搜索结果,从第startRows条开始,选出pageSize条结果
TopDocs topDocs = collector.TopDocs(startRows, pageSize);
//搜索结果的总数量
totalRecord = topDocs.TotalHits;
ScoreDoc[] scoreDocs = topDocs.ScoreDocs;
List<T> list = new List<T>();
string highlightStr = string.Empty;
foreach (var item in scoreDocs)
{
T model = new T();
Document document = searcher.Doc(item.Doc);
Type type = model.GetType();
PropertyInfo[] propertyInfos = type.GetProperties();
foreach (var pi in propertyInfos)
{
string name = pi.Name;
string value = document.Get(name);
if (pi.Name.ToLower() == "title" || pi.Name.ToLower() == "contents")
{
// 创建HTMLFormatter,参数为高亮单词的前后缀
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<em>", "</em>");
// 创建 Highlighter ,输入HTMLFormatter 和盘古分词对象Semgent
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new Segment());
// 设置每个摘要段的字符数
highlighter.FragmentSize = 150;
// 获取最匹配的摘要段
highlightStr = highlighter.GetBestFragment(keywords, value);
if (!string.IsNullOrEmpty(highlightStr))
{
value = highlightStr;
}
}
pi.SetValue(model, value);
}
list.Add(model);
}
searcher.Dispose();
return list;
}
}
}
使用方法举例:创建索引
public class CreateIndexModels : LuceneBase<Archive>
{
public void CreateArchiveIndex(string indexPath)
{
//设置生成索引的目录
base.indexPath = indexPath;
ArticleBusinessLogic ArticleBLL = new ArticleBusinessLogic();
string fields = "Id,Title,Summary,CreateTime";
//数据库中查出文章列表
List<Archive> list = ArticleBLL.GetList(100, fields, "Status=1", "UpdateTime", false);
//创建索引目录
base.CreateDirectory();
//创建IndexWriter
base.CreateIndexWriter();
//生成索引,返回生成索引的条数
int rows = base.CreateIndex(list, true);
}
}