Lucene自带示例精简

endual

浏览: 3506882 次
性别:
来自: 杭州

最近访客更多访客>>

wrgjwrrjurhj

sindyqiu

kristy_yy

whzresponse

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

lucene

Lucene自带示例精简，只留下了主要代码。以备查看
对文件夹生成索引

Java代码

package zhch.illq.lucene;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class LuceneIndex {
static final File INDEX_DIR = new File( "d:\\temp\\index" );
// 主要代码索引docDir文件夹下文档，索引文件在INDEX_DIR文件夹中
public static void main(String[] args) {
File docDir = new File( "d:\\temp\\neirong" );
try {
IndexWriter standardWriter = new IndexWriter(FSDirectory.open(INDEX_DIR), new StandardAnalyzer(
Version.LUCENE_CURRENT), true , IndexWriter.MaxFieldLength.LIMITED);
// 如果是索引中文内容，可以使用Paoding中文分词器
IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR), new PaodingAnalyzer(), true ,
IndexWriter.MaxFieldLength.LIMITED);
String[] files = docDir.list();
for (String fileStr : files) {
File file = new File(docDir, fileStr);
if (!file.isDirectory()) {
writer.addDocument(document(file));
}
}
writer.optimize();
writer.close();
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
}
}
public static Document document(File f) throws java.io.FileNotFoundException {
Document doc = new Document();
// 添加path,索引（可查询）但不切词
doc.add(new Field( "path" , f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
// 添加最后修改日期
doc.add(new Field( "modified" , DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
Field.Store.YES, Field.Index.NOT_ANALYZED));
// 添加内容，指定一个Reader，文件内容解析但不存储，这里的Reader使用系统默认的编码读入
doc.add(new Field( "contents" , new FileReader(f)));
return doc;
}
}

对索引进行查询

Java代码

package zhch.illq.lucene;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class LuceneSearch {
/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
String index = "d:\\temp\\index" ;
String field = "contents" ;
String queries = null ;
boolean raw = false ;
// 要显示条数
int hitsPerPage = 10 ;
// searching, so read-only=true
IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true ); // only
Searcher searcher = new IndexSearcher(reader);
Analyzer standardAnalyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
// 如果是索引中文内容，可以使用Paoding中文分词器
Analyzer analyzer = new PaodingAnalyzer();
BufferedReader in = new BufferedReader( new InputStreamReader(System.in));
QueryParser parser = new QueryParser(field, analyzer);
while ( true ) {
if (queries == null ) // prompt the user
System.out.println("Enter query: " );
String line = in.readLine();
if (line == null || line.length() == - 1 )
break ;
line = line.trim();
if (line.length() == 0 )
break ;
Query query = parser.parse(line);
System.out.println("Searching for: " + query.toString(field));
doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null );
}
reader.close();
}
public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, int hitsPerPage, boolean raw,
boolean interactive) throws IOException {
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, false );
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
int end, numTotalHits = collector.getTotalHits();
System.out.println(numTotalHits + " total matching documents" );
int start = 0 ;
end = Math.min(hits.length, start + hitsPerPage);
for ( int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
String path = doc.get("path" );
if (path != null ) {
System.out.println((i + 1 ) + ". " + path);
System.out.println(" modified: " + doc.get( "modified" ));
} else {
System.out.println((i + 1 ) + ". " + "No path for this document" );
}
}
}
}

分享到：

下载资料的连接 | 几个好发点的论文期刊

2012-01-09 08:35
浏览 1657
评论(0)
分类:开源软件
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Lucene自带示例精简

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Lucene自带示例精简

评论

发表评论

相关推荐

Lucene 简介

lucene建立索引的API导读

最近访客更多访客>>