使用Lucene.Net进行站内搜索时，为什么路径中多了一个write.lock，就会报错了呢？

悬赏园豆：5 [已解决问题] 解决于 2013-11-22 17:29

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using Lucene.Net.Store;
using System.IO;
using Lucene.Net.Index;
using Lucene.Net.Analysis.PanGu;
using Lucene.Net.Documents;
using log4net;
using System.Net;
using Lucene.Net.Search;
using System.Text;
using mshtml;
using PanGu;
using System.Xml.Linq;
using System.Text.RegularExpressions;

namespace RPSearch.Test
{
    public partial class indexText : System.Web.UI.Page
    {
        protected void Page_Load(object sender, EventArgs e)
        {

}
 private ILog logger = LogManager.GetLogger(typeof(indexText));
 protected void Button1_Click(object sender, EventArgs e)
 {

 string indexPath = @"F:\如鹏项目\索引";
 //
 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());//directory表示索引文件
 bool isUpdate = IndexReader.IndexExists(directory);//判断是否为索引目录
 if (isUpdate)
 {
 //如果索引目录被锁定（比如索引过程中程序异常退出），则首先解锁
 if (IndexWriter.IsLocked(directory))
 {
 IndexWriter.Unlock(directory);
 }
 }
 IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);//指定分词器，将文章分词存入索引库中
 WebClient wc = new WebClient();//WebClient 类提供向 URI 标识的任何本地、Intranet 或 Internet 资源发送数据以及从这些资源接收数据的公共方法
 wc.Encoding = Encoding.UTF8;//将字符串转换为utf-8类型
 int maxId = GetMaxId();
 for (int i = 1000; i < maxId; i++)
 {
 string url = "http://localhost:32768/showtopic-"+ i+".aspx" ;
 string html = wc.DownloadString(url);

HTMLDocumentClass doc = new HTMLDocumentClass();//mshtml 解析网页中的文本 *IE 就使用的此方法解析

                doc.designMode = "on";//不让解析引擎尝试运行javascript
                doc.IHTMLDocument2_write(html);
                doc.close();

string title = doc.title;
string body = doc.body.innerText;//去掉标签可以使用 document.getElementById()

//为避免重复索引，先删掉number=1的记录，再重新添加,否则：就会成倍增加
writer.DeleteDocuments(new Term("number",i.ToString()));

                //只有对需要全文搜索的内容才要ANALYZED
                Document document = new Document();
                document.Add(new Field("number", i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                document.Add(new Field("body", html, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));
                writer.AddDocument(document);
                logger.Debug("索引"+i+"下载完毕");
                Console.WriteLine("索引" + i + "完毕");
            }
            writer.Close();
            directory.Close();//不要忘了Close，否则索引结果搜不到
            logger.Debug( "全部下载完毕");

}

protected void Button2_Click(object sender, EventArgs e)
 {
 string indexPath = @"F:\如鹏项目\索引";
 string kw = TextBox1.Text;
 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
 IndexReader reader = IndexReader.Open(directory, true);
 IndexSearcher searcher = new IndexSearcher(reader);
 PhraseQuery query = new PhraseQuery();
 //
 foreach (string word in kw.Split(' '))//先用空格，让用户去分词，空格分隔的就是词“计算机 专业”
 {
 query.Add(new Term("body", word));
 }
 query.SetSlop(100);
 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
 searcher.Search(query, null, collector);
 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;
 List<SearchResult> listResult = new List<SearchResult>();
 for (int i = 0; i < docs.Length; i++)
 {
 int docId = docs[i].doc;//取到文档的编号（主键，由lucene.net分配）
 //检索结果中只有文档的id，如果要取Document，则需要Doc在去取
 //降低了内存的占用
 Document doc = searcher.Doc(docId);
 string number = doc.Get("number");
 string title = doc.Get("title");
 string body = doc.Get("body");
 //Console.WriteLine(doc.Get("number"));
 //Console.WriteLine(doc.Get("body"));

SearchResult result = new SearchResult();
 //Response.Write(number);
 result.Number = number;
 result.Title = title;
 result.BodyPreview = Preview(body,TextBox1.Text);
 listResult.Add(result);
 //Response.Write(body + " ");
 //Response.Write(number + " ");
 }
 Repeater1.DataSource = listResult;
 Repeater1.DataBind();

}
 private static string Preview(string body,string keyword)
 {
 //创建HTMLFormatter,参数为高亮单词的前后缀
 PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight
 .SimpleHTMLFormatter("","");
 //创建Highlighter,输入HTMLFormatter和盘古分词对象Semgent
 PanGu.HighLight.Highlighter highlighter=new PanGu.HighLight.Highlighter (simpleHTMLFormatter,new Segment());
 //设置每个摘要端的字符数
 highlighter.FragmentSize=100;
 //获取最匹配的摘要段
 string bodyPreview=highlighter.GetBestFragment(keyword,body);
 return bodyPreview;

 }
 private int GetMaxId()
 {
 XDocument xdoc = XDocument.Load("http://localhost:32768/tools/rss.aspx");
 XElement channel = xdoc.Root.Element("channel");
 XElement fristItem = channel.Elements("item").First();
 XElement link = fristItem.Element("link");
 Match match = Regex.Match(link.Value,@"showtopic-(\d+)\.aspx");
 string id = match.Groups[1].Value;
 return Convert.ToInt32(id);
 }
 }
}

ASP.NET .NET技术

noert | 初学一级 | 园豆：34
提问于：2012-11-26 10:49

< >

最佳答案

这个错误应该是死锁引起的，每次创建writer都会创建相应的writer.lock，同一时间只允许有且只有一个存在。

收获园豆：3

today4king | 老鸟四级 |园豆：3499 | 2012-11-26 11:45

清除回答草稿

您需要登录以后才能回答，未注册用户请先注册。

欢迎，请先 登录 或者 注册 。

使用Lucene.Net进行站内搜索时，为什么路径中多了一个write.lock，就会报错了呢？

欢迎，请先登录或者注册。