lucene创建索引
2021-06-30 18:04
标签:lucene创建索引 本文出自 “素颜” 博客,请务必保留此出处http://suyanzhu.blog.51cto.com/8050189/1945466 lucene创建索引 标签:lucene创建索引 原文地址:http://suyanzhu.blog.51cto.com/8050189/19454661.导入jar包
2.创建实体Bean
package com.zhishang.lucene;
/**
* Created by Administrator on 2017/7/8.
*/
public class HtmlBean {
private String title;
private String content;
private String url;
public void setTitle(String title) {
this.title = title;
}
public void setContent(String content) {
this.content = content;
}
public void setUrl(String url) {
this.url = url;
}
public String getTitle() {
return title;
}
public String getContent() {
return content;
}
public String getUrl() {
return url;
}
}
3.创建工具Bean
package com.zhishang.lucene;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.HTMLElementName;
import net.htmlparser.jericho.Source;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
/**
* Created by Administrator on 2017/7/8.
*/
public class HtmlBeanUtil {
public static HtmlBean parseHtml(File file){
try {
Source sc = new Source(file);
Element element = sc.getFirstElement(HTMLElementName.TITLE);
if (element == null || element.getTextExtractor() == null){
return null;
}
HtmlBean htmlBean = new HtmlBean();
htmlBean.setTitle(element.getTextExtractor().toString());
htmlBean.setContent(sc.getTextExtractor().toString());
htmlBean.setUrl(file.getAbsolutePath());
return htmlBean;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}
4.创建操作Bean
package com.zhishang.lucene;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
/**
* Created by Administrator on 2017/7/7.
*/
public class CreateIndex {
public static final String indexDir = "G:/index";
public static final String dataDir = "G:/data";
public void createIndex(){
try {
Directory dir = FSDirectory.open(new File(indexDir));
//分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9,analyzer);
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(dir,config);
File file = new File(dataDir);
RAMDirectory ramdir = new RAMDirectory();
Analyzer analyzer1 = new IKAnalyzer();
IndexWriterConfig config1 = new IndexWriterConfig(Version.LUCENE_4_9,analyzer1);
IndexWriter ramWriter = new IndexWriter(ramdir,config1);
Collection
5.创建测试Bean
package com.zhishang.lucene;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import java.io.File;
/**
* Created by Administrator on 2017/7/8.
*/
public class LuceneBean {
/*
创建索引
*/
@Test
public void createIndex(){
File file = new File(CreateIndex.indexDir);
if (file.exists()){
file.delete();
file.mkdirs();
}
CreateIndex createIndex = new CreateIndex();
createIndex.createIndex();
}
}
6.查看生成的索引文件
上一篇:jericho解析html
下一篇:reset.css