PHP群:95885625 Hbuilder+MUI群:81989597 站长QQ:634381967
    您现在的位置: 首页 > 开发编程 > ASP.NET教程 > 正文

    对基于Lucene.Net 3.0.3和PanGu实现搜索引擎的抽象类

    作者:admin来源:网络浏览:时间:2020-09-30 00:07:50我要评论
    导读:本段代码是在Lucene.Net 3.0.3版本和PanGu.Lucene.Analyzer 2.4.1版本上进行的一个简单封装,这里只是一个参考,复杂用法请读者根据自己...
    本段代码是在Lucene.Net 3.0.3版本和PanGu.Lucene.Analyzer 2.4.1版本上进行的一个简单封装,这里只是一个参考,复杂用法请读者根据自己的需求进行改进。传入的泛型T需要至少包括一个属性为int型的Id,一个String类型的Title属性和一个String类型的Contents属性
     
    复制代码 代码如下:
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    using System.Reflection;
    using Lucene.Net.Analysis;
    using Lucene.Net.Documents;
    using Lucene.Net.Index;
    using Lucene.Net.Messages;
    using Lucene.Net.QueryParsers;
    using Lucene.Net.Search;
    using Lucene.Net.Store;
    using Lucene.Net.Support;
    using Lucene.Net.Util;
    using Lucene.Net.Analysis.PanGu;
    using Lucene.Net.Analysis.Standard;
    using PanGu;
    using PanGu.HighLight;

    namespace HoldCode.Search
    {
        public class LuceneBase<T> where T : class, new()
        {
            private PanGuAnalyzer analyzer = null;
            public string indexPath { get; set; }

            public LuceneBase()
            {
                // 初始化
                analyzer = new PanGuAnalyzer();
            }

            private IndexWriter indexWriter = null;
            private Directory directory = null;

            /// <summary>
            /// 合并因子,子索引(Segment)合并
            /// </summary>
            public int MaxMergeFactor
            {
                get
                {
                    if (indexWriter != null)
                    {
                        return indexWriter.MergeFactor;
                    }
                    else
                    {
                        return 0;
                    }
                }
                set
                {
                    if (indexWriter != null)
                    {
                        indexWriter.MergeFactor = value;
                    }
                }
            }

            /// <summary>
            /// 子索引(Segment)文件包含的Document数量
            /// </summary>
            public int MaxMergeDocs
            {
                get
                {
                    if (indexWriter != null)
                    {
                        return indexWriter.MaxMergeDocs;
                    }
                    else
                    {
                        return 0;
                    }
                }
                set
                {
                    if (indexWriter != null)
                    {
                        indexWriter.MaxMergeDocs = value;
                    }
                }
            }

            public int MinMergeDocs
            {
                get
                {
                    if (indexWriter != null)
                    {
                        return indexWriter.GetMaxBufferedDocs();
                    }
                    else
                    {
                        return 0;
                    }
                }
                set
                {
                    if (indexWriter != null)
                    {
                        indexWriter.SetMaxBufferedDocs(value);
                    }
                }
            }

            /// <summary>
            /// 对要搜索的词分词
            /// </summary>
            /// <param name="keywords">关键词</param>
            /// <param name="tokenizer"></param>
            /// <returns></returns>
            protected string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer tokenizer)
            {
                StringBuilder result = new StringBuilder();
                ICollection<WordInfo> words = tokenizer.SegmentToWordInfos(keywords);
                foreach (var item in words)
                {
                    if (item == null)
                    {
                        continue;
                    }
                    result.AppendFormat("{0}^{1}.0 ", item.Word, (int)Math.Pow(3, item.Rank));
                }
                return result.ToString().Trim();
            }

            protected void CreateDirectory()
            {
                directory = FSDirectory.Open(indexPath);
            }

            protected void CreateIndexWriter()
            {
                indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
            }

            /// <summary>
            /// 添加Document
            /// </summary>
            /// <param name="indexWriter"></param>
            /// <param name="data"></param>
            private void AddDocument(Archive data)
            {
                Document document = new Document();

                Type type = data.GetType();

                PropertyInfo[] propertys = type.GetProperties();

                IFieldable field = null;

                // 循环所有属性
                foreach (var item in propertys)
                {
                    string name = item.Name;
                    string value = item.GetValue(data, null).ToString();

                    if (name.ToLower().Contains("id"))
                    {
                        field = new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
                        // 删除重复
                        Term term = new Term(name, value);
                        indexWriter.DeleteDocuments(term);
                    }
                    else
                    {
                        field = new Field(name, value, Field.Store.YES, Field.Index.ANALYZED);
                    }
                    // 添加至Document
                    document.Add(field);
                }

                indexWriter.AddDocument(document);
            }

            /// <summary>
            /// 创建索引
            /// </summary>
            /// <param name="list"></param>
            /// <param name="optimize">子索引优化</param>
            public int CreateIndex(List<Archive> list, bool optimize)
            {
                int result = 0;
                try
                {
                    foreach (var item in list)
                    {
                        AddDocument(item);
                    }
                    result = indexWriter.MaxDoc();
                    if (optimize)
                    {
                        indexWriter.Optimize();
                    }

                    // 关闭indexWriter
                    indexWriter.Dispose();
                }
                catch (Exception ex)
                {
                    indexWriter.Dispose();
                    throw new Exception(ex.Message);
                }
                return result;
            }

            /// <summary>
            /// 搜索
            /// </summary>
            /// <param name="keyword">关键词</param>
            /// <param name="pageIndex">页数</param>
            /// <param name="pageSize">每页条数</param>
            /// <param name="totalRecord">总数据量</param>
            /// <returns>返回数据</returns>
            public List<T> SearchIndex(string keywords, int pageIndex, int pageSize, out int totalRecord)
            {
                int startRows = (pageIndex - 1) * pageSize;

                string kw = keywords;

                //对关键词进行分词
                keywords = GetKeyWordsSplitBySpace(keywords, new PanGuTokenizer());

                IndexReader indexReader = IndexReader.Open(directory, true);
                IndexSearcher searcher = new IndexSearcher(indexReader);
                //搜索内容
                QueryParser queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Contents", new PanGuAnalyzer(true));

                Query query = queryParser.Parse(keywords);

                // 获取搜索结果中前1000条
                TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true);

                searcher.Search(query, null, collector);

                //筛选搜索结果,从第startRows条开始,选出pageSize条结果
                TopDocs topDocs = collector.TopDocs(startRows, pageSize);

                //搜索结果的总数量
                totalRecord = topDocs.TotalHits;

                ScoreDoc[] scoreDocs = topDocs.ScoreDocs;

                List<T> list = new List<T>();

                string highlightStr = string.Empty;
                foreach (var item in scoreDocs)
                {
                    T model = new T();
                    Document document = searcher.Doc(item.Doc);

                    Type type = model.GetType();

                    PropertyInfo[] propertyInfos = type.GetProperties();

                    foreach (var pi in propertyInfos)
                    {
                        string name = pi.Name;
                        string value = document.Get(name);
                        if (pi.Name.ToLower() == "title" || pi.Name.ToLower() == "contents")
                        {
                            // 创建HTMLFormatter,参数为高亮单词的前后缀
                            SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<em>", "</em>");
                            // 创建 Highlighter ,输入HTMLFormatter 和盘古分词对象Semgent
                            Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new Segment());
                            // 设置每个摘要段的字符数
                            highlighter.FragmentSize = 150;
                            // 获取最匹配的摘要段
                            highlightStr = highlighter.GetBestFragment(keywords, value);
                            if (!string.IsNullOrEmpty(highlightStr))
                            {
                                value = highlightStr;
                            }
                        }
                        pi.SetValue(model, value);
                    }
                    list.Add(model);
                }
                searcher.Dispose();
                return list;
            }
        }
    }
    使用方法举例:创建索引
     
    复制代码 代码如下:
    public class CreateIndexModels : LuceneBase<Archive>
    {
        public void CreateArchiveIndex(string indexPath)
        {
            //设置生成索引的目录
            base.indexPath = indexPath;
            ArticleBusinessLogic ArticleBLL = new ArticleBusinessLogic();
            string fields = "Id,Title,Summary,CreateTime";
            //数据库中查出文章列表
            List<Archive> list = ArticleBLL.GetList(100, fields, "Status=1", "UpdateTime", false);
            
            //创建索引目录
            base.CreateDirectory();
            //创建IndexWriter
            base.CreateIndexWriter();
            //生成索引,返回生成索引的条数
            int rows = base.CreateIndex(list, true);
        }
    }

    转载请注明(B5教程网)原文链接:https://b5.mxunkeji.com/content-11-851-1.html
    相关热词搜索: 搜索引擎 Lucene.Net PanGu