Mercurial Hosting > luan
view lucene/src/luan/modules/lucene/LuceneIndex.java @ 545:ddcd4296107a
clean up lucene search
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sun, 14 Jun 2015 01:34:42 -0600 |
parents | c5a93767cc5c |
children | eaef1005ab87 |
line wrap: on
line source
package luan.modules.lucene; import java.io.Closeable; import java.io.File; import java.io.FileOutputStream; import java.io.FileInputStream; import java.io.IOException; import java.util.Iterator; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.zip.ZipOutputStream; import java.util.zip.ZipEntry; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.SnapshotDeletionPolicy; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; import sane.lucene.queryparser.SaneQueryParser; import sane.lucene.queryparser.FieldParser; import sane.lucene.queryparser.MultiFieldParser; import sane.lucene.queryparser.StringFieldParser; import sane.lucene.queryparser.NumberFieldParser; import sane.lucene.queryparser.ParseException; import luan.modules.Utils; import luan.Luan; import luan.LuanState; import luan.LuanTable; import luan.LuanFunction; import luan.LuanJavaFunction; import luan.LuanException; import luan.LuanMeta; import luan.LuanRuntimeException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public final class LuceneIndex implements Closeable { private static final Logger logger = LoggerFactory.getLogger(LuceneIndex.class); private static final String FLD_NEXT_ID = "nextId"; private static final Analyzer analyzer = new KeywordAnalyzer(); public static final FieldParser STRING_FIELD_PARSER = new StringFieldParser(analyzer); final LuanTable myTable; final Lock writeLock = new ReentrantLock(); private final File indexDir; final SnapshotDeletionPolicy snapshotDeletionPolicy; final IndexWriter writer; private DirectoryReader reader; private IndexSearcher searcher; private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); private boolean isClosed = false; private final MultiFieldParser mfp = new MultiFieldParser(); public LuceneIndex(LuanState luan,String indexDirStr,LuanTable myTable) throws LuanException, IOException { this.myTable = myTable; mfp.fields.put( "type", STRING_FIELD_PARSER ); mfp.fields.put( "id", NumberFieldParser.LONG ); File indexDir = new File(indexDirStr); this.indexDir = indexDir; Directory dir = FSDirectory.open(indexDir); Version version = Version.LUCENE_4_9; IndexWriterConfig conf = new IndexWriterConfig(version,analyzer); snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy()); conf.setIndexDeletionPolicy(snapshotDeletionPolicy); writer = new IndexWriter(dir,conf); writer.commit(); // commit index creation reader = DirectoryReader.open(dir); luan.onClose(this); searcher = new IndexSearcher(reader); initId(luan); } Document toLucene(LuanState luan,LuanTable table) throws LuanException { return LuceneDocument.toLucene(luan,table,mfp.fields.keySet()); } public LuceneWriter openWriter() { return new LuceneWriter(this); } private synchronized IndexSearcher openSearcher() throws IOException { DirectoryReader newReader = DirectoryReader.openIfChanged(reader); if( newReader != null ) { reader.decRef(); reader = newReader; searcher = new IndexSearcher(reader); } reader.incRef(); return searcher; } // call in finally block private static void close(IndexSearcher searcher) throws IOException { searcher.getIndexReader().decRef(); } LuceneSnapshot openSnapshot() throws IOException { return new LuceneSnapshot(this); } public void delete_all() throws IOException { writeLock.lock(); try { writer.deleteAll(); writer.commit(); id = idLim = 0; } finally { writeLock.unlock(); } } private long id = 0; private long idLim = 0; private final int idBatch = 10; private void initId(LuanState luan) throws LuanException, IOException { TopDocs td = searcher.search(new TermQuery(new Term("type","next_id")),1); switch(td.totalHits) { case 0: break; // do nothing case 1: idLim = (Long)searcher.doc(td.scoreDocs[0].doc).getField(FLD_NEXT_ID).numericValue(); id = idLim; break; default: throw new RuntimeException(); } } synchronized long nextId(LuanState luan) throws LuanException, IOException { if( ++id > idLim ) { idLim += idBatch; LuanTable doc = new LuanTable(); doc.rawPut( "type", "next_id" ); doc.rawPut( FLD_NEXT_ID, idLim ); writer.updateDocument(new Term("type","next_id"),toLucene(luan,doc)); } return id; } public void backup(LuanState luan,String zipFile) throws LuanException, IOException { if( !zipFile.endsWith(".zip") ) throw luan.exception("file "+zipFile+" doesn't end with '.zip'"); LuceneSnapshot snapshot = openSnapshot(); try { ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile)); for( String fileName : snapshot.getFileNames() ) { out.putNextEntry(new ZipEntry(fileName)); FileInputStream in = new FileInputStream(new File(indexDir,fileName)); Utils.copyAll(in,out); in.close(); out.closeEntry(); } out.close(); } finally { snapshot.close(); } } // luan public String to_string() { return writer.getDirectory().toString(); } public void writer(LuanState luan,LuanFunction fn) throws LuanException, IOException { LuceneWriter writer = openWriter(); try { luan.call( fn, new Object[]{writer.table()} ); writer.commit(); } finally { writer.close(); } } public void close() throws IOException { if( !isClosed ) { writer.close(); reader.close(); isClosed = true; } } protected void finalize() throws Throwable { if( !isClosed ) { logger.error("not closed"); close(); } super.finalize(); } private static class DocFn extends LuanFunction { final IndexSearcher searcher; int docID; DocFn(IndexSearcher searcher) { this.searcher = searcher; } @Override public Object call(LuanState luan,Object[] args) throws LuanException { try { return LuceneDocument.toTable(luan,searcher.doc(docID)); } catch(IOException e) { throw luan.exception(e); } } } private static abstract class MyCollector extends Collector { int docBase; int i = 0; @Override public void setScorer(Scorer scorer) {} @Override public void setNextReader(AtomicReaderContext context) { this.docBase = context.docBase; } @Override public boolean acceptsDocsOutOfOrder() { return true; } } public int advanced_search( final LuanState luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException { Utils.checkNotNull(luan,queryStr); Query query = parseQuery(queryStr); IndexSearcher searcher = threadLocalSearcher.get(); boolean inTransaction = searcher != null; if( !inTransaction ) searcher = openSearcher(); try { if( fn!=null && n==null ) { if( sortStr != null ) throw luan.exception("sort must be nil when n is nil"); final DocFn docFn = new DocFn(searcher); MyCollector col = new MyCollector() { @Override public void collect(int doc) { try { docFn.docID = doc; luan.call(fn,new Object[]{++i,docFn}); } catch(LuanException e) { throw new LuanRuntimeException(e); } } }; try { searcher.search(query,col); } catch(LuanRuntimeException e) { throw (LuanException)e.getCause(); } return col.i; } if( fn==null || n==0 ) { TotalHitCountCollector thcc = new TotalHitCountCollector(); searcher.search(query,thcc); return thcc.getTotalHits(); } Sort sort = sortStr==null ? null : parseSort(sortStr); TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort); final ScoreDoc[] scoreDocs = td.scoreDocs; DocFn docFn = new DocFn(searcher); for( int i=0; i<scoreDocs.length; i++ ) { docFn.docID = scoreDocs[i].doc; luan.call(fn,new Object[]{i+1,docFn}); } return td.totalHits; } finally { if( !inTransaction ) close(searcher); } } public Object search_in_transaction(LuanState luan,LuanFunction fn) throws LuanException, IOException { if( threadLocalSearcher.get() != null ) throw luan.exception("can't nest search_in_transaction calls"); IndexSearcher searcher = openSearcher(); threadLocalSearcher.set(searcher); try { return luan.call(fn); } finally { threadLocalSearcher.set(null); close(searcher); } } public final LuanMeta indexedFieldsMeta = new LuanMeta() { @Override public boolean canNewindex() { return true; } @Override public Object __index(LuanState luan,LuanTable tbl,Object key) { return mfp.fields.get(key); } @Override public void __new_index(LuanState luan,LuanTable tbl,Object key,Object value) throws LuanException { if( !(key instanceof String) ) throw luan.exception("key must be string"); String field = (String)key; if( value==null ) { // delete mfp.fields.remove(field); return; } if( !(value instanceof FieldParser) ) throw luan.exception("value must be FieldParser like the values of Lucene.type"); FieldParser parser = (FieldParser)value; mfp.fields.put( field, parser ); } @Override public final Iterator keys(LuanTable tbl) { return mfp.fields.keySet().iterator(); } @Override protected String type(LuanTable tbl) { return "lucene-indexed-fields"; } }; public Query parseQuery(String s) throws ParseException { return SaneQueryParser.parseQuery(mfp,s); } public Sort parseSort(String s) throws ParseException { return SaneQueryParser.parseSort(mfp,s); } }