`
dreamoftch
  • 浏览: 485626 次
  • 性别: Icon_minigender_1
  • 来自: 上海
社区版块
存档分类
最新评论

lucene入门

阅读更多

 

maven dependency:

		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-core</artifactId>
			<version>4.3.0</version>
		</dependency>
		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-analyzers-common</artifactId>
			<version>4.3.0</version>
		</dependency>
		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-queryparser</artifactId>
			<version>4.3.0</version>
		</dependency>

 

 

 

 

package com.tch.test.lucene.ram;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

public final class TestLucene {

	private static Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);

	public static void main(String[] args) throws Exception {
		searchOnDisk();
		searchOnMemory();
	}

	public static void searchOnDisk() throws IOException {
		Directory directory = indexOnDisk("D:\\lucene-test\\source",
				"D:\\lucene-test\\index");
		search(directory);
		directory.close();
	}

	public static void searchOnMemory() throws IOException {
		Directory directory = indexOnMemory("D:\\lucene-test\\source");
		search(directory);
		directory.close();
	}

	public static void search(Directory directory) {
		try {
			// Now search the index:
			DirectoryReader ireader = DirectoryReader.open(directory);
			IndexSearcher isearcher = new IndexSearcher(ireader);
			// Parse a simple query that searches for "text":
			QueryParser parser = new QueryParser(Version.LUCENE_43, "content",
					analyzer);
			Query query = parser.parse("如何进行主题抓取");
			ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
			// Iterate through the results:
			for (int i = 0; i < hits.length; i++) {
				Document hitDoc = isearcher.doc(hits[i].doc);
				System.out.println(hitDoc.get("fileName"));
				System.out.println(hitDoc.get("content"));
			}
			ireader.close();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}

	public static Directory indexOnDisk(String sourceDir, String indexDir) {
		// Store the index in memory:
		// Directory directory = new RAMDirectory();
		// To store an index on disk, use this instead:
		Directory directory = null;
		try {
			directory = FSDirectory.open(new File(indexDir));
			IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43,
					analyzer);
			config.setOpenMode(OpenMode.CREATE);
			IndexWriter iwriter = new IndexWriter(directory, config);

			File[] textFiles = new File(sourceDir).listFiles();
			for (int i = 0; i < textFiles.length; i++) {
				File currentFile = textFiles[i];
				System.out.println(String.format("开始在文件 %s 上创建索引",
						currentFile.getAbsolutePath()));
				Document doc = new Document();
				doc.add(new Field("content", readFileContent(
						currentFile.getAbsolutePath(), "GBK"),
						TextField.TYPE_STORED));
				doc.add(new Field("fileName", currentFile.getAbsolutePath(),
						TextField.TYPE_STORED));
				iwriter.addDocument(doc);
			}
			iwriter.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return directory;
	}

	public static Directory indexOnMemory(String sourceDir) {
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
		// Store the index in memory:
		Directory directory = new RAMDirectory();
		try {
			IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43,
					analyzer);
			IndexWriter iwriter = new IndexWriter(directory, config);

			File[] textFiles = new File(sourceDir).listFiles();
			for (int i = 0; i < textFiles.length; i++) {
				File currentFile = textFiles[i];
				System.out.println(String.format("开始在文件 %s 上创建索引",
						currentFile.getAbsolutePath()));
				Document doc = new Document();
				doc.add(new Field("content", readFileContent(
						currentFile.getAbsolutePath(), "GBK"),
						TextField.TYPE_STORED));
				doc.add(new Field("fileName", currentFile.getAbsolutePath(),
						TextField.TYPE_STORED));
				iwriter.addDocument(doc);
			}
			iwriter.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return directory;
	}

	public static String readFileContent(String FileName, String charset)
			throws IOException {
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				new FileInputStream(FileName), charset));
		String line = new String();
		StringBuilder content = new StringBuilder();

		while ((line = reader.readLine()) != null) {
			content.append(line);
		}
		reader.close();
		return content.toString();
	}

}

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics