使用dotlucene多条件检索数据库

时间:2009-12-21 11:47来源:未知作者:admin 点击: 次

分享到：

using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.IO;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.Text.RegularExpressions;
using Lucene.Net.Search;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Search.Highlight;
using Lucene.Net.Documents;
using System.Data.SqlClient;
using Lucene.Net.Analysis.Cn;
public partial class Search : System.Web.UI.Page
{
 /// <summary>
 /// 搜索返回的结果集
 /// </summary>
 protected DataTable Results = new DataTable();
 /// <summary>
 /// 搜索返回的结果总数
 /// </summary>
 private int total;
 /// <summary>
 /// 搜索的结果每页显示多少记录条数
 /// </summary>
 private readonly int maxResults = 10;
 /// <summary>
 /// 本次搜索所花费的时间
 /// </summary>
 private TimeSpan duration;
 /// <summary>
 /// 本次搜索是从哪条数据开始的,这个东西很重要，通过他基本上可以得到一切
 /// </summary>
 private int startAt;
 /// <summary>
 /// 本页显示的数据是从哪条开始的,他总是startAt+1
 /// </summary>
 private int fromItem;
 /// <summary>
 /// 本页显示的数据记录到哪条结束
 /// </summary>
 private int toItem;
 protected void Page_Load(object sender, EventArgs e)
 {
 //给搜索按牛注册下隐藏事件
 // this.Page.ClientScript.RegisterHiddenField("__EVENTTARGET","buttonSearch");
 if (!IsPostBack)
 {
 if (this.Query != null)
 {
 search();
 DataBind();
 }
 else
 {
 Response.Redirect("./");//如果没有传入查询字符则定向到开始页面
 }
 }
 }
 protected void search()
 {
 DateTime start = DateTime.Now;//搜索的开始时间
 //得到索引所在的目录,我们在上个console程序里把索引放到了index目录下
 string indexDirectory = Server.MapPath("index");
 //创建个索引搜索器
 IndexSearcher searcher = new IndexSearcher(indexDirectory);

string[] b = this.Query.Split(' ');
 BooleanQuery m_BooleanQuery = new BooleanQuery();


 //分词并解析索引的text字段以便搜索,多条件检索
 for (int i = 0; i < b.Length; i++)
 {
 Query thisQuery = QueryParser.Parse(b[i], "text", new ChineseAnalyzer());
 m_BooleanQuery.Add(thisQuery, true, false);
 }

        //为要绑定输出到页面的results建立几列
        this.Results.Columns.Add("path",typeof(string));
        this.Results.Columns.Add("sample",typeof(string));
        this.Results.Columns.Add("title",typeof(string));
        //开始搜索
        Hits hits = searcher.Search(m_BooleanQuery);

//得到搜索返回的记录总数
 this.total = hits.Length();
 //创建一个高亮
 QueryHighlightExtractor highlighter = new QueryHighlightExtractor(m_BooleanQuery, new ChineseAnalyzer(), "", "");
 //初始化startAt,以便得到要显示的结果集
 this.startAt = initStartAt();
 //得到当前页要显示的记录数量，包括以前所有页的记录数，这样把他与this.startAt结合就能够很好的知道当前页要显示的记录数了
 int resultsCount = smallOf(this.total,this.startAt+this.maxResults);
 //开始循环得到当前页要显示的记录ex
 string pp = "";
 //for (int i = this.startAt; i < resultsCount; i++)
 //for (int i = this.startAt ; i <resultsCount; i+exit+)
 for (int i = this.total - this.startAt-1; i >= this.total - resultsCount; i--)
 {
 //得到每一行Hits的Document,因为Hits的没一行都是个Document对象
 Document doc = hits.Doc(i);
 //得到doc里面的列path的值
 string path = doc.Get("path");
 pp += path + ",";
 //再得到这个路径在web程序的路径，我们原来把文档放到了web根目录的documents目录下的
 string location = Server.MapPath(@"documents\" + path);
 //用StreamReader读取文档，因为我们不能够直接从索引中得到text字段的值，因为我们建立索引的时候没有存储他的
 string plainText;

//using (StreamReader sr = new StreamReader(location, System.Text.Encoding.Default))
 //{
 // plainText = ParseHtml(sr.ReadToEnd());
 //}
 //为结果集DataTable,Results添加个新行


 }
 if (pp.Length > 0)
 pp = pp.Substring(0, pp.Length - 1);
 //Response.Write(pp);
 //string ppp = pp.Split(',');
 //string kk = "";
 //if (kk.Length > 0)
 // kk = kk.Substring(0, kk.Length - 1);
 //for (int i = this.startAt; i < resultsCount; i++)
 // kk += ppp[i] + ",";

        if (pp.Length > 0)
        {
            SqlConnection conn = new SqlConnection("server=.;uid=sa;pwd=liugehao;dataｂａｓｅ=yaowanggu;");
             //取出10条记录
            SqlCommand cmd = new SqlCommand("select z_id,z_name,z_lx,z_pzwh,z_sccs,z_jx,z_gg,z_yfyl,z_zsqy,z_dltj,z_tgzc,z_dwmc,z_lxdh,z_lxr from ywg_zhaoshang where z_id in (" + pp + ") order by z_id desc", conn);
            conn.Open();
            SqlDataReader sdr = cmd.ExecuteReader(System.Data.CommandBehavior.CloseConnection);
            while (sdr.Read())
            {
                DataRow dr = this.Results.NewRow();
                dr["title"] = sdr[1].ToString();
                dr["path"] = @"documents/" + sdr[0].ToString();
                dr["sample"] = highlighter.GetBestFragment(sdr[1].ToString() + sdr[2].ToString() + sdr[3].ToString() + sdr[4].ToString() + sdr[5].ToString() + sdr[6].ToString() +
                    sdr[7].ToString() + sdr[8].ToString() + sdr[9].ToString() + sdr[10].ToString() + sdr[11].ToString() + sdr[12].ToString() + sdr[13].ToString(),100);
                //把行添加进DataTable
                this.Results.Rows.Add(dr);
            }
            conn.Close();
        }
        //循环完毕，关闭搜索
        searcher.Close();
        //搜索花费多少时间
        this.duration = DateTime.Now - start;
        //给fromItem赋值，他总是startAt+1
        this.fromItem = this.startAt + 1;
        //给toItem赋值
        this.toItem = smallOf(this.total,this.startAt+this.maxResults);

}

private string ParseHtml(string html)
 {
 string temp = Regex.Replace(html,"<[^>]*>","");
 return temp.Replace(" "," ");
 }
 /// <summary>
 /// 开始分页
 /// </summary>
 /// <returns></returns>
 protected DataTable Paging
 {
 get
 {
 //知道了startAt,分页也很容易了,现在根据startAt得到当前是第几页,注意，现在这里的页数也是暂时从0开始的
 int pageNumber = (this.startAt + this.maxResults - 1) / this.maxResults;
 DataTable dt = new DataTable();
 dt.Columns.Add("html");
 DataRow dr = dt.NewRow();
 //暂时得到当前页的html连接,注意这里当真正显示页数的时候要+1
 dr["html"] = pagingNumberHtml(startAt,pageNumber+1,false);
 dt.Rows.Add(dr);
 //前面显示10页，如果有的话
 int previousPagesCount = 10;
 //循环把前面页的html连接插到前面去
 for (int i = pageNumber - 1; i >= 0 && i >= pageNumber - previousPagesCount; i--)
 {
 DataRow r = dt.NewRow();
 r["html"] = pagingNumberHtml(i*this.maxResults,i+1,true);
 dt.Rows.InsertAt(r,0);;
 }
 //后面也显示10页，如果有的话
 int nextPagesCount = 10;
 for (int i = pageNumber + 1; i <= this.pageCount && i <= pageNumber + nextPagesCount; i++)
 {
 DataRow r = dt.NewRow();
 r["html"] = pagingNumberHtml(i*this.maxResults,i+1,true);
 dt.Rows.Add(r);
 }
 //添加下一页的超级连接
 DataRow lastRow = dt.NewRow();
 lastRow["html"] = "<a href='Search.aspx?q=" + HttpUtility.UrlEncode(this.Query) + "&start=" + (pageNumber + 1) * this.maxResults + "'>下一页</a>";
 dt.Rows.Add(lastRow);
 return dt;

}
 }
 /// <summary>
 /// 得到某一页的html连接字符串
 /// </summary>
 /// <param name="start">页是从那条记录开始搜索的</param>
 /// <param name="number">页数</param>
 /// <param name="active">是不是个超级连接</param>
 /// <returns></returns>
 private string pagingNumberHtml(int start, int number, bool active)
 {
 if (active)
 {
 return "<a href='Search.aspx?q="+HttpUtility.UrlEncode(this.Query)+"&start="+start+"'>"+number+"</a>";
 }
 else
 {
 return "'+number+'';
 }
 }
 private int smallOf(int first, int second)
 {
 return first < second ? first : second;//得到两个数之间的较少数
 }
 private int initStartAt()
 {
 try
 {
 int sa = Convert.ToInt32(this.Request.Params["start"]);
 if (sa < 0)
 return 0;
 if (sa >= this.total - 1)
 return lastPageStartAt;//如果不足以显示三条，从最后页的第0条开始显示
 return sa;
 }
 catch
 {
 return 0;//如果输入非法非数字，那么从第0条开始显示
 }
 }
 private int pageCount
 {
 get
 {
 return (this.total - 1) / this.maxResults;//总页数，记住都是从0开始的
 }
 }
 private int lastPageStartAt
 {
 get
 {
 return pageCount * this.maxResults;//返回最末页的第0条的序号
 }
 }
 protected string Query
 {
 get
 {
 string query = HttpUtility.UrlDecode(this.Request.Params["q"]);
 if (query == string.Empty)
 return null;
 return query;
 }
 }
 protected string Summary
 {
 get
 {
 if (this.total > 0)
 return "共有结果' + this.total + ',当前从第' + this.fromItem + '条到第' + this.toItem + '条,本次搜索耗时' + this.duration.TotalSeconds + '秒';
 else
 return "对不起，本次搜索没有找到任何结果";
 }
 }
 protected void buttonSearch_Click(object sender, EventArgs e)
 {
 this.Response.Redirect("Search.aspx?q="+HttpUtility.UrlEncode(this.textboxQuery.Text));
 }
}

编辑推荐DotLucene搜索引擎文章列表：
全文搜索解决方案：DotLucene搜索引擎之创建索引
http://www.xueit.com/html/2009-02/21_606_00.html
DotLucene搜索引擎之搜索索引Demo
http://www.xueit.com/html/2009-02/21_607_00.html
全文搜索技术：dotLucene中文分词的highlight显示
http://www.xueit.com/html/2009-02/21_608_00.html
Lucene.NET增加中文分词
http://www.xueit.com/html/2009-02/21_609_00.html
全文搜索之Lucene增加中文分词功能方法
http://www.xueit.com/html/2009-02/21_610_00.html
简介下基于.NET的全文索引引擎Lucene.NET
http://www.xueit.com/html/2009-02/21_611_00.html
使用dotlucene为数据库建立全文索引
http://www.xueit.com/html/2009-02/21_612_00.html
使用dotlucene多条件检索数据库
http://www.xueit.com/html/2009-02/21_613_00.html
Lucene中文分词实现方法：基于StopWord分割分词
http://www.xueit.com/html/2009-02/21_614_00.html
dotLucene实现增量索引源代码
http://www.xueit.com/html/2009-02/21_615_00.html

上一篇：使用dotlucene为数据库建立全文索引
下一篇：Lucene中文分词实现方法：基于StopWord分割分词

分享到： QQ空间新浪微博人人网开心网更多

精彩图集

精彩文章

热点文章

使用dotlucene多条件检索数据库

热门标签

赞助商链接