首页 新闻 会员 周边

使用Lucene.Net进行站内搜索时,为什么路径中多了一个write.lock,就会报错了呢?

0
悬赏园豆:5 [已解决问题] 解决于 2013-11-22 17:29

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using Lucene.Net.Store;
using System.IO;
using Lucene.Net.Index;
using Lucene.Net.Analysis.PanGu;
using Lucene.Net.Documents;
using log4net;
using System.Net;
using Lucene.Net.Search;
using System.Text;
using mshtml;
using PanGu;
using System.Xml.Linq;
using System.Text.RegularExpressions;
 

namespace RPSearch.Test
{
    public partial class indexText : System.Web.UI.Page
    {
        protected void Page_Load(object sender, EventArgs e)
        {

        }
        private ILog logger = LogManager.GetLogger(typeof(indexText));
        protected void Button1_Click(object sender, EventArgs e)
        {
           
            string indexPath = @"F:\如鹏项目\索引";
            //
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());//directory表示索引文件
            bool isUpdate = IndexReader.IndexExists(directory);//判断是否为索引目录
            if (isUpdate)
            {
                //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁
                if (IndexWriter.IsLocked(directory))
                {
                    IndexWriter.Unlock(directory);
                }
            }
            IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);//指定分词器,将文章分词存入索引库中
            WebClient wc = new WebClient();//WebClient 类提供向 URI 标识的任何本地、Intranet 或 Internet 资源发送数据以及从这些资源接收数据的公共方法
            wc.Encoding = Encoding.UTF8;//将字符串转换为utf-8类型
            int maxId = GetMaxId();
            for (int i = 1000; i < maxId; i++)
            {
                string url =  "http://localhost:32768/showtopic-"+ i+".aspx" ;
                string html = wc.DownloadString(url);

                HTMLDocumentClass doc = new HTMLDocumentClass();//mshtml 解析 网页中的文本 <SPAN style="COLOR: #ff0000">*IE 就使用的此方法解析</SPAN> 

                doc.designMode = "on";//不让解析引擎尝试运行javascript
                doc.IHTMLDocument2_write(html);
                doc.close();

                string title = doc.title;
                string body = doc.body.innerText;//去掉标签可以使用 document.getElementById()

                //为避免重复索引,先删掉number=1的记录,再重新添加,否则:就会成倍增加
                writer.DeleteDocuments(new Term("number",i.ToString()));

                //只有对需要全文搜索的内容才要ANALYZED
                Document document = new Document();
                document.Add(new Field("number", i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                document.Add(new Field("body", html, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));
                writer.AddDocument(document);
                logger.Debug("索引"+i+"下载完毕");
                Console.WriteLine("索引" + i + "完毕");
            }
            writer.Close();
            directory.Close();//不要忘了Close,否则索引结果搜不到
            logger.Debug( "全部下载完毕");

        }

        protected void Button2_Click(object sender, EventArgs e)
        {
            string indexPath = @"F:\如鹏项目\索引";
            string kw = TextBox1.Text;
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            PhraseQuery query = new PhraseQuery();
            //
            foreach (string word in kw.Split(' '))//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            {
                query.Add(new Term("body", word));
            }
            query.SetSlop(100);
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            searcher.Search(query, null, collector);
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;
            List<SearchResult> listResult = new List<SearchResult>();
            for (int i = 0; i < docs.Length; i++)
            {
                int docId = docs[i].doc;//取到文档的编号(主键,由lucene.net分配)
                //检索结果中只有文档的id,如果要取Document,则需要Doc在去取
                //降低了内存的占用
                Document doc = searcher.Doc(docId);
                string number = doc.Get("number");
                string title = doc.Get("title");
                string body = doc.Get("body");
                //Console.WriteLine(doc.Get("number"));
                //Console.WriteLine(doc.Get("body"));

                SearchResult result = new SearchResult();
                //Response.Write(number);
                result.Number = number;
                result.Title = title;
                result.BodyPreview = Preview(body,TextBox1.Text);
                listResult.Add(result);
                //Response.Write(body + "<br/>");
                //Response.Write(number + "<br/>");
            }
            Repeater1.DataSource = listResult;
            Repeater1.DataBind();

        }
        private static string Preview(string body,string keyword)
        {
            //创建HTMLFormatter,参数为高亮单词的前后缀
            PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight
            .SimpleHTMLFormatter("<font color=\"red\">","</font>");
            //创建Highlighter,输入HTMLFormatter和盘古分词对象Semgent
            PanGu.HighLight.Highlighter highlighter=new PanGu.HighLight.Highlighter (simpleHTMLFormatter,new Segment());
            //设置每个摘要端的字符数
            highlighter.FragmentSize=100;
            //获取最匹配的摘要段
            string  bodyPreview=highlighter.GetBestFragment(keyword,body);
            return bodyPreview;
          
        }
        private int GetMaxId()
        {
            XDocument xdoc = XDocument.Load("http://localhost:32768/tools/rss.aspx");
            XElement channel = xdoc.Root.Element("channel");
            XElement fristItem = channel.Elements("item").First();
            XElement link = fristItem.Element("link");
            Match match = Regex.Match(link.Value,@"showtopic-(\d+)\.aspx");
            string id = match.Groups[1].Value;
            return Convert.ToInt32(id);
        }
    }
}

noert的主页 noert | 初学一级 | 园豆:34
提问于:2012-11-26 10:49
< >
分享
最佳答案
0

这个错误应该是死锁引起的,每次创建writer都会创建相应的writer.lock,同一时间只允许有且只有一个存在。

收获园豆:3
today4king | 老鸟四级 |园豆:3499 | 2012-11-26 11:45
清除回答草稿
   您需要登录以后才能回答,未注册用户请先注册