Friday, July 6, 2012

Lucene 3.0 example - Indexing and searching database tables


Apache Lucene(TM) is a high-performance, full-featured text search engine library written entirely in Java. It is a technology suitable for nearly any application that requires full-text search, especially cross-platform.
Apache Lucene is an open source project available for free download. To know more about Lucene , click here.

About the example:

Here is the simple java program which will create index files from the data which is fetched from database. And it will perform search from the created index files and display the results. We are using Lucene 3.0 and My SQL. The basics behind is very simple, we will fetch data from database using JDBC (you can use Hibernate or so accordingly) and create index files.

You can store database column(s) in the index files depending upon your requirement. If you want to perform search in one or two column only then no need to add all columns in index files. You can store that particular column and primary key, and perform search on that column and retrieve primary key and use it accordingly.

Creating index is simple and straight forward just add the filed and filed values in Document. Searching can be done in many ways as per requirement, if you want to perform search on one filed then you can use QueryParser, for searching multiple field you can use MultiFieldQueryParser. In query you can use wild card (e.g. DATA*), logical operators (e.g DATA1 OR DATA2) etc.

To run below example please add lucene-core-3.0.2.jar (For Lucene) and mysql-connector-java-5.1.5.jar (For JDBC - My SQL) in your application's classpath.
To download lucene-core-3.0.2.jar, click here.
How to install My SQL, click here.



Please see the self explanatory example below.

/* LuceneExample.java */


import java.io.File;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;


public class LuceneExample {

public static final File INDEX_DIRECTORY = new File("IndexDirectory");

public void createIndex() {

System.out.println("-- Indexing --");

try {
//JDBC Section
Class.forName("com.mysql.jdbc.Driver").newInstance();
//Assuming database bookstore exists
Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/bookstore", "root", "password");
Statement stmt = conn.createStatement();
String sql = "select book_id,book_title,book_details from books";
ResultSet rs = stmt.executeQuery(sql);

//Lucene Section
Directory directory = new SimpleFSDirectory(INDEX_DIRECTORY);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
IndexWriter iWriter = new IndexWriter(directory, analyzer, true,MaxFieldLength.UNLIMITED);

//Looping through resultset and adding to index file
int count = 0;
while(rs.next()) {
Document doc = new Document();

doc.add(new Field("book_id", rs.getString("book_id"), Field.Store.YES, Field.Index.ANALYZED ));
doc.add(new Field("book_title", rs.getString("book_title"), Field.Store.YES, Field.Index.ANALYZED ));
doc.add(new Field("book_details", rs.getString("book_details"), Field.Store.YES, Field.Index.ANALYZED ));

//Adding doc to iWriter
iWriter.addDocument(doc);
count++;
}

System.out.println(count+" record indexed");

//Closing iWriter
iWriter.optimize(); 
iWriter.commit();
iWriter.close();

//Closing JDBC connection
rs.close();
stmt.close();
conn.close();

} catch (Exception e) {
e.printStackTrace();
}

}

public void search(String keyword) {

System.out.println("-- Seaching --");

try {

//Searching
IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_DIRECTORY), true);
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
//MultiFieldQueryParser is used to search multiple fields
String[] filesToSearch = {"book_title","book_details"};
MultiFieldQueryParser mqp = new MultiFieldQueryParser(Version.LUCENE_30, filesToSearch , analyzer);

Query query = mqp.parse(keyword);//search the given keyword

System.out.println("query >> " + query);

TopDocs hits = searcher.search(query, 100); // run the query

System.out.println("Results found >> " + hits.totalHits);

for (int i = 0; i < hits.totalHits; i++) {
Document doc = searcher.doc(hits.scoreDocs[i].doc);//get the next  document
System.out.println(doc.get("book_id")+" "+doc.get("book_title")+" "+doc.get("book_details"));
}

} catch (Exception e) {
e.printStackTrace();
}

}


public static void main(String[] args) {


LuceneExample obj = new LuceneExample();

//creating index
obj.createIndex();

//searching keyword
obj.search("data");


//using wild card serach
obj.search("data*");


//using logical operator
obj.search("data1 OR data2");
obj.search("data1 AND data2");

}
}