Lucene查询并高亮显示

2021-06-30 00:04

阅读:726

标签:lucene查询并高亮显示

1.导入jar包

技术分享


2.创建实体Bean

package com.zhishang.lucene;

/**
 * Created by Administrator on 2017/7/8.
 */
public class HtmlBean {
    private String title;
    private String content;
    private String url;

    public void setTitle(String title) {
        this.title = title;
    }

    public void setContent(String content) {
        this.content = content;
    }

    public void setUrl(String url) {
        this.url = url;
    }

    public String getTitle() {
        return title;
    }

    public String getContent() {
        return content;
    }

    public String getUrl() {
        return url;
    }
}


3.创建工具Bean

package com.zhishang.lucene;

import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.HTMLElementName;
import net.htmlparser.jericho.Source;
import org.junit.Test;

import java.io.File;
import java.io.IOException;

/**
 * Created by Administrator on 2017/7/8.
 */
public class HtmlBeanUtil {


    public static HtmlBean parseHtml(File file){
        try {
            Source sc = new Source(file);
            Element element = sc.getFirstElement(HTMLElementName.TITLE);
            if (element == null || element.getTextExtractor() == null){
                return null;
            }

            HtmlBean htmlBean = new HtmlBean();
            htmlBean.setTitle(element.getTextExtractor().toString());
            htmlBean.setContent(sc.getTextExtractor().toString());
            htmlBean.setUrl(file.getAbsolutePath());

            return htmlBean;
        } catch (IOException e) {
            e.printStackTrace();
        }

        return null;
    }
}


4.创建操作Bean

package com.zhishang.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

/**
 * Created by Administrator on 2017/7/7.
 */
public class SearchIndex {

    public List search(String keyword){
        Directory dir = null;
        try {
            dir = FSDirectory.open(new File(CreateIndex.indexDir));
            IndexReader reader = DirectoryReader.open(dir);
            IndexSearcher searcher = new IndexSearcher(reader);
            Analyzer analyzer = new IKAnalyzer();
            MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(Version.LUCENE_4_9,new String[]{"title","content"},analyzer);
            Query query = multiFieldQueryParser.parse(keyword);
            TopDocs search = searcher.search(query,10);
            ScoreDoc[] scoreDocs = search.scoreDocs;
            SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("","");
            QueryScorer queryScorer = new QueryScorer(query,"title");
            Highlighter highlighter = new Highlighter(simpleHTMLFormatter,queryScorer);
            List htmlBeanList = new ArrayList();
            for (ScoreDoc scoreDoc:scoreDocs){
                Document document = reader.document(scoreDoc.doc);
                String title = highlighter.getBestFragment(new IKAnalyzer(),"title",document.get("title"));
                String content = highlighter.getBestFragments(new IKAnalyzer().tokenStream("content",document.get("content")),document.get("content"),3,"...");
                String url = document.get("url");
                HtmlBean htmlBean = new HtmlBean();
                htmlBean.setTitle(title);
                htmlBean.setContent(content);
                htmlBean.setUrl(url);
                htmlBeanList.add(htmlBean);
            }

            return htmlBeanList;
//            System.out.println(search.totalHits);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
        }

        return null;
    }
}


4.创建测试Bean

package com.zhishang.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

import java.io.File;
import java.util.List;

/**
 * Created by Administrator on 2017/7/8.
 */
public class LuceneBean {

    @Test
    public void search(){
        SearchIndex searchIndex = new SearchIndex();
        List htmlBeanList = searchIndex.search("java");
        for (HtmlBean bean:htmlBeanList){
            System.out.println(bean.getTitle());
            System.out.println(bean.getContent());
            System.out.println(bean.getUrl());
            System.out.println("-----------------------------------------------------");
        }
    }

    /*
    创建索引
     */
    @Test
    public void createIndex(){
        File file = new File(CreateIndex.indexDir);
        if (file.exists()){
            file.delete();
            file.mkdirs();
        }
        CreateIndex createIndex = new CreateIndex();
        createIndex.createIndex();
    }
}


本文出自 “素颜” 博客,请务必保留此出处http://suyanzhu.blog.51cto.com/8050189/1945606

Lucene查询并高亮显示

标签:lucene查询并高亮显示

原文地址:http://suyanzhu.blog.51cto.com/8050189/1945606


评论


亲,登录后才可以留言!