java教程

java lucene-core-3.4.0 IKAnalyzer3.2.5建立索引并查询高亮显示

位置:首页 > java教程 > java技巧,2018-02-28 19:04
java lucene-core-3.4.0 IKAnalyzer3.2.5建立索引并查询高亮显示的代码工具封装类

java lucene-core-3.4.0 IKAnalyzer3.2.5建立索引并查询高亮显示的代码工具封装类

建立索引代码如下

// 索引

          	  String rootlucene = ServletActionContext.getRequest().getRealPath("/luceneproduct/");
          	  Directory directory = FSDirectory.open(new File(rootlucene));
          	  IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_CURRENT, new IKAnalyzer());
          	  config.setOpenMode(OpenMode.CREATE_OR_APPEND);
          	  IndexWriter writerlucene = new IndexWriter(directory, config);

try {
	
	Document doc1 = null;
	doc1 = new Document();

	doc1.add(new Field("id", id.toString(), Field.Store.YES,
			Field.Index.NOT_ANALYZED));
	doc1.add(new Field("title", title, Field.Store.YES,
			Field.Index.ANALYZED,
			Field.TermVector.WITH_POSITIONS_OFFSETS));
	doc1.add(new Field("url", url, Field.Store.YES,
			Field.Index.NO, Field.TermVector.NO));
	
	doc1.add(new Field("titlesm", titlesm, Field.Store.YES,
			Field.Index.NO, Field.TermVector.NO));
	doc1.add(new Field("guige", guige, Field.Store.YES,
			Field.Index.NO, Field.TermVector.NO));
	
	doc1.add(new Field("price", price.toString(), Field.Store.YES,
			Field.Index.NO, Field.TermVector.NO));
	writerlucene.addDocument(doc1);
	writerlucene.optimize();
	writerlucene.close();
	
	
	
} catch (IOException e) {
	writerlucene.rollback();  
	System.out.println("创建索引失败:" + e.getMessage());
}
finally{  
  try {  
  	writerlucene.close();  
  } catch (Exception e) {  
      // TODO Auto-generated catch block  
      throw new RuntimeException(e);  
  }  
}  
//索引
查询工具类

Luceneben.java

package huan.lin.help;
import java.util.List;

import com.product.bean.Product;
public class Luceneben {

	/**
	 * @param args
	 */
	
	public Luceneben() {
		
	}
	
	private Integer totalpage;
	private Integer beginindex;
	private Integer endindex;
	private Integer totalcount;
	private List<Product> list;//product是一个bean,如有价格,名称,图片url等

	public Integer getTotalcount() {
		return totalcount;
	}
	public void setTotalcount(Integer totalcount) {
		this.totalcount = totalcount;
	}
	
	public Integer getTotalpage() {
		return totalpage;
	}
	public void setTotalpage(Integer totalpage) {
		this.totalpage = totalpage;
	}
	public Integer getBeginindex() {
		return beginindex;
	}
	public void setBeginindex(Integer beginindex) {
		this.beginindex = beginindex;
	}
	public Integer getEndindex() {
		return endindex;
	}
	public void setEndindex(Integer endindex) {
		this.endindex = endindex;
	}
	public List<Product> getList() {
		return list;
	}
	public void setList(List<Product> list) {
		this.list = list;
	}
}
Lucenepage.java

package huan.lin.help;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.product.bean.Product;
public class Lucenepage {
	public Luceneben queryByPage(String lucenepath,String stringQuery,boolean color,int currentPage,int pageSize,int curtotal){
		File lf=new File(lucenepath);
		if(lf.exists()){
			IKAnalyzer analyzer = new IKAnalyzer();
			//对搜索词分词
			String [] stringQueryarry=getWords(stringQuery,analyzer);
			int len=stringQueryarry.length;
			if(len>0){
			Occur[] occurs = new Occur[stringQueryarry.length];
			String[] fields = new String[stringQueryarry.length];
			int at = 0;
			for (at = 0; at < stringQueryarry.length; at++) {
				occurs[at] = Occur.SHOULD;
				fields[at] = "title";
			}
			
			//搜索词分词
			
			
		Luceneben lu=new Luceneben();
		List<Product> goodsList=new ArrayList<Product>();
		IndexSearcher indexSearcher=null;
		IndexReader reader =null;
		Integer totalcount=0;
		Integer totalPage=0;
		try {
			 reader = IndexReader.open(FSDirectory.open(lf));
			indexSearcher= new IndexSearcher(reader);
			BooleanQuery booleanq=new BooleanQuery();;
			//QueryParser paser1=new QueryParser(Version.LUCENE_CURRENT,"title",analyzer);
			//Query titlequery=paser1.parse(stringQuery);
			Query titlequery=MultiFieldQueryParser.parse(Version.LUCENE_CURRENT,stringQueryarry,fields, occurs, analyzer);
			booleanq.add(titlequery,BooleanClause.Occur.MUST);
			
			TopDocs topDocs = indexSearcher.search(booleanq,(currentPage-1)*pageSize+pageSize);
			
			totalcount=topDocs.totalHits;
//System.out.println("真正命中的结果数:" + topDocs.totalHits);
			
			if(topDocs.totalHits%pageSize!=0){
				totalPage=topDocs.totalHits/pageSize+1;
			}else{
				totalPage=topDocs.totalHits/pageSize;
			}
		//	System.out.println("通过系统的总结果数/每页显示的数量=总页数" + totalPage);
			
			// 返回的是符合条件的文档编号,并不是文档本事
			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
			
			  //此处加入的是搜索结果的高亮部分
			Highlighter highlighter = null;
			if(color){
			SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=red>","</font>"); //如果不指定参数的话,默认是加粗,即<b><b/>
            QueryScorer scorer = new QueryScorer(titlequery);//计算得分,会初始化一个查询结果最高的得分
            Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); //根据这个得分计算出一个片段
            highlighter = new Highlighter(simpleHTMLFormatter, scorer);
            highlighter.setTextFragmenter(fragmenter); //设置一下要显示的片段
			}
			// 去期望值  和实际值的 最小值
			//System.out.println("真正查询出来的数组的长度:" + scoreDocs.length);
			
			for(int i=(currentPage-1)*pageSize;i<scoreDocs.length;i++){
				ScoreDoc scoreDoc=scoreDocs[i];
				//System.out.println("存储了命中率积分:" + scoreDoc.score);
				//System.out.println("存储的是文档编号:" + scoreDoc.doc);
				// 第二次查询: 通过文档的编号,查询真正的文档信息
				Document document=indexSearcher.doc(scoreDoc.doc);
				
				Product book = new Product();
				
		if(color){
				//显示高亮部分
                if(document.get("title") != null) {
                	// 高亮关键字    
                    try {  
                    	TokenStream tokenStream = analyzer.tokenStream("title", new StringReader( document.get("title")));  
                     //String highlightStr = highlighter.getBestFragment(tokenStream, document.get("title"));  
                     book.setTitle(highlighter.getBestFragment(tokenStream, document.get("title")));
                     // System.out.println("高亮关键字  "+highlightStr);  
                      tokenStream.close();
                  } catch (InvalidTokenOffsetsException e) {  
                      // TODO Auto-generated catch block  
                      e.printStackTrace();  
                  }  
                }
		}
		else{
			 book.setTitle(document.get("title"));
		}
				book.setId(Integer.parseInt(document.get("id")));
				book.setUrl(document.get("url"));
				book.setGuige(document.get("guige"));
				book.setTitlesm(document.get("titlesm"));
				book.setPrice(document.get("price"));
				goodsList.add(book);
				
				//goodsList.add(DocumentUtil.documentToGoods(document));
			}
			
			
			
		} catch (Exception e) {
			System.out.println(e.getMessage());
			throw new RuntimeException(e);
		}finally{
			try {
				
				indexSearcher.close();
				analyzer.close();
				reader.close();
			} catch (IOException e) {
				throw new RuntimeException(e);
			}
		}
		
		int beginindex = 0,endindex=0;
		if (curtotal % 2 == 1) {

			beginindex = currentPage - (curtotal - 1) / 2;
			endindex = currentPage + (curtotal - 1) / 2;

			if (beginindex < 1 && totalPage <= curtotal) {
				beginindex = 1;
				endindex = totalPage;
			}

			if (beginindex < 1 && totalPage > curtotal) {
				beginindex = 1;
				endindex = curtotal;
			}

			if (endindex > totalPage) {
				beginindex = currentPage - (curtotal - (totalPage - currentPage)) + 1;
				if (beginindex < 1) {
					beginindex = 1;
				}
				endindex = totalPage;
			}
		}

		if (curtotal % 2 == 0) {

			beginindex = currentPage - (curtotal / 2 - 1);
			endindex = currentPage + curtotal / 2;

			if (beginindex < 1 && totalPage <= curtotal) {
				beginindex = 1;
				endindex = totalPage;
			}

			if (beginindex < 1 && totalPage > curtotal) {
				beginindex = 1;
				endindex = curtotal;
			}

			if (endindex > totalPage) {
				beginindex = currentPage - (curtotal - (totalPage - currentPage)) + 1;
				if (beginindex < 1) {
					beginindex = 1;
				}
				endindex = totalPage;
			}
		}
		
		
		
		
		lu.setList(goodsList);
		lu.setBeginindex(beginindex);
		lu.setEndindex(endindex);
		lu.setTotalpage(totalPage);
		lu.setTotalcount(totalcount);
		return lu;
			}
			else{
				return null;
			}
		}
		else{
			return null;
		}
	}
	
	
	
	public static String[] getWords(String str,IKAnalyzer analyzer){  
	    TokenStream stream = null;
	    StringBuffer fenstr=new StringBuffer();
	    try {  
	        stream = analyzer.tokenStream("content", new StringReader(str));  
	        CharTermAttribute attr = stream.addAttribute(CharTermAttribute.class);  
	        stream.reset();  
	        while(stream.incrementToken()){  
	            fenstr.append(attr).append(",");
	        }  
	    } catch (IOException e) {  
	        e.printStackTrace();  
	    }finally{  
	        if(stream != null){  
	            try {  
	                stream.close();  
	            } catch (IOException e) {  
	                e.printStackTrace();  
	            }  
	        }  
	    }  
	    String res=fenstr.toString();
	    if(res!=null&&res.endsWith(","));
	    res=res.substring(0,res.length()-1);
	    //System.out.println(res);
	    return res.split(",");
	} 

	
}
分页查询使用如下

Lucenepage lucene=new Lucenepage();
			Luceneben luceneben=lucene.queryByPage(ServletActionContext.getRequest().getRealPath("/luceneproduct/"), title,true, cur, pagesize, 7);
			if(luceneben!=null){
			list=luceneben.getList();
			totalcount=luceneben.getTotalcount();
			beginindex=luceneben.getBeginindex();
			endindex=luceneben.getEndindex();
			totalpage=luceneben.getTotalpage();
			}

TAGS:lucene-core-3.4.0IKAnalyzer3.2.5

猜你喜欢

本月热门的内容

最近更新的内容

NewHot