Lucene主要分为三大块:
1、创建索引
2、分词
3、读取并查询索引
前提:由于本人目前看的是3.5的视频材料,所以可能里面有的写法还是3.5的写法,我能保证demo能跑通,能运营,能明白意思。
发现在lucene3.5里,lucene的主要jar都在core包里,但是在4.0以后好像被拆分成了多个jar,需要根据项目需要,一点一点自己往上加jar包。
所用jar: lucene 4.10.2
教学视频:lucene 3.5
依赖的jar包:
入门代码1:
package com.test;import java.io.File;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class HelloLucene2 { @SuppressWarnings("deprecation") public void index() throws Exception { //创建directory,主要为了存储索引存放的路径 //Directory directory = new RAMDirectory();// 建立在内存中 Directory fsddirectory = FSDirectory.open(new File( "D:/lucene-file/index01"));//建立在硬盘上 //indexwriter的配置信息 IndexWriterConfig IndexWriterConfig = new IndexWriterConfig( Version.LATEST, new StandardAnalyzer(Version.LATEST)); //indexwriter主要是用来写索引的,类似于file流 IndexWriter writer = new IndexWriter(fsddirectory, IndexWriterConfig); //document类似于一条数据,存放数据信息 Document document = null; //field类似于字段,用于存放数据的每个分类信息 File f = new File("D:/lucene-file/exampletxt"); for (File file : f.listFiles()) { document = new Document(); document.add(new Field("content", new FileReader(file))); document.add(new Field("filename", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.add(new Field("path", file.getAbsolutePath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(document); } if (writer != null) { writer.close(); } } public void search() throws Exception { //主要流程: //1:创建directory 从哪读取信息与资料 Directory fsddirectory = FSDirectory.open(new File( "D:/lucene-file/index01"));//放在硬盘上的信息源 //2:创建indexReader //读取Index索引 IndexReader reader = IndexReader.open(fsddirectory); //3:根据indexReader创建IndexSearcher IndexSearcher searcher = new IndexSearcher(reader); //4:创建搜索的Query //需要创建parser来确定要搜索的文件的内容,第二个参数表示搜索的域 QueryParser parser = new QueryParser(Version.LATEST, "content",new StandardAnalyzer(Version.LATEST)); //表示域中包含徐这个字的文档 Query query = parser.parse("java"); //5:根据 searcher搜索并且返回TopDocs,类似于数据库的结果集 //10代表搜索10条, TopDocs tds = searcher.search(query, 10); //6:根据TOPDocs获取ScoreDoc对象 ScoreDoc[] sds =tds.scoreDocs; //7:根据searcher和ScordDoc对象获取具体的Document对象 for (ScoreDoc scoreDoc : sds) { Document d = searcher.doc(scoreDoc.doc); System.out.println(d.get("filename")); System.out.println(d.get("path")); System.out.println("---------"); } System.out.println("length"+sds.length); //8:根据Document对象获取需要的值 //9:关闭reader reader.close(); }}
test类:
package com.junittest;import static org.junit.Assert.*;import org.junit.Test;import com.test.HelloLucene2;public class TestLucene {/* @Test public void testindex() throws Exception { HelloLucene2 hl = new HelloLucene2(); hl.index(); }*/ @Test public void testsearch() throws Exception { HelloLucene2 hl = new HelloLucene2(); hl.index(); hl.search(); }}