Mercurial Hosting > luan
diff lucene/src/luan/modules/lucene/LuceneIndex.java @ 546:eaef1005ab87
general lucene cleanup
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sun, 14 Jun 2015 22:17:58 -0600 |
parents | ddcd4296107a |
children | 0be287ab0309 |
line wrap: on
line diff
--- a/lucene/src/luan/modules/lucene/LuceneIndex.java Sun Jun 14 01:34:42 2015 -0600 +++ b/lucene/src/luan/modules/lucene/LuceneIndex.java Sun Jun 14 22:17:58 2015 -0600 @@ -6,6 +6,10 @@ import java.io.FileInputStream; import java.io.IOException; import java.util.Iterator; +import java.util.Map; +import java.util.List; +import java.util.ArrayList; +import java.util.Set; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.zip.ZipOutputStream; @@ -13,15 +17,25 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.LongField; +import org.apache.lucene.document.DoubleField; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.SnapshotDeletionPolicy; +import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; @@ -58,19 +72,17 @@ private static final Analyzer analyzer = new KeywordAnalyzer(); public static final FieldParser STRING_FIELD_PARSER = new StringFieldParser(analyzer); - final LuanTable myTable; - final Lock writeLock = new ReentrantLock(); + private final ReentrantLock writeLock = new ReentrantLock(); private final File indexDir; final SnapshotDeletionPolicy snapshotDeletionPolicy; - final IndexWriter writer; + private final IndexWriter writer; private DirectoryReader reader; private IndexSearcher searcher; private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); private boolean isClosed = false; private final MultiFieldParser mfp = new MultiFieldParser(); - public LuceneIndex(LuanState luan,String indexDirStr,LuanTable myTable) throws LuanException, IOException { - this.myTable = myTable; + public LuceneIndex(LuanState luan,String indexDirStr) throws LuanException, IOException { mfp.fields.put( "type", STRING_FIELD_PARSER ); mfp.fields.put( "id", NumberFieldParser.LONG ); File indexDir = new File(indexDirStr); @@ -88,40 +100,93 @@ initId(luan); } - Document toLucene(LuanState luan,LuanTable table) throws LuanException { - return LuceneDocument.toLucene(luan,table,mfp.fields.keySet()); - } - public LuceneWriter openWriter() { - return new LuceneWriter(this); - } - - private synchronized IndexSearcher openSearcher() throws IOException { - DirectoryReader newReader = DirectoryReader.openIfChanged(reader); - if( newReader != null ) { - reader.decRef(); - reader = newReader; - searcher = new IndexSearcher(reader); - } - reader.incRef(); - return searcher; - } - - // call in finally block - private static void close(IndexSearcher searcher) throws IOException { - searcher.getIndexReader().decRef(); - } - - LuceneSnapshot openSnapshot() throws IOException { - return new LuceneSnapshot(this); - } public void delete_all() throws IOException { + boolean commit = !writeLock.isHeldByCurrentThread(); writeLock.lock(); try { writer.deleteAll(); - writer.commit(); id = idLim = 0; + if(commit) writer.commit(); + } finally { + writeLock.unlock(); + } + } + + private static Term term(String key,int value) { + BytesRef br = new BytesRef(); + NumericUtils.intToPrefixCoded(value,0,br); + return new Term(key,br); + } + + private static Term term(String key,long value) { + BytesRef br = new BytesRef(); + NumericUtils.longToPrefixCoded(value,0,br); + return new Term(key,br); + } + + private static Term term(LuanState luan,String key,Object value) throws LuanException { + if( value instanceof String ) + return new Term( key, (String)value ); + if( value instanceof Integer ) + return term( key, (Integer)value ); + if( value instanceof Long ) + return term( key, (Long)value ); + if( value instanceof Float ) + return term( key, NumericUtils.floatToSortableInt((Float)value) ); + if( value instanceof Double ) + return term( key, NumericUtils.doubleToSortableLong((Double)value) ); + throw luan.exception("invalid value type '"+value.getClass().getSimpleName()+"' for key '"+key+"'"); + } + + public void delete_documents(LuanState luan,LuanTable tblTerms) throws LuanException, IOException { + List<Term> list = new ArrayList<Term>(); + for( Map.Entry<Object,Object> entry : tblTerms.iterable(luan) ) { + Object key = entry.getKey(); + Object value = entry.getValue(); + if( !(key instanceof String) ) + throw luan.exception("key must be a string but got "+key.getClass().getSimpleName()); + list.add( term( luan, (String)key, value ) ); + } + + boolean commit = !writeLock.isHeldByCurrentThread(); + writeLock.lock(); + try { + writer.deleteDocuments(list.toArray(new Term[list.size()])); + if(commit) writer.commit(); + } finally { + writeLock.unlock(); + } + } + + public void save_document(LuanState luan,LuanTable doc) throws LuanException, IOException { + if( doc.get(luan,"type")==null ) + throw luan.exception("missing 'type' field"); + Long id = (Long)doc.get(luan,"id"); + + boolean commit = !writeLock.isHeldByCurrentThread(); + writeLock.lock(); + try { + if( id == null ) { + id = nextId(luan); + doc.put(luan,"id",id); + writer.addDocument(toLucene(luan,doc)); + } else { + writer.updateDocument( term("id",id), toLucene(luan,doc) ); + } + if(commit) writer.commit(); + } finally { + writeLock.unlock(); + } + } + + public void update_in_transaction(LuanState luan,LuanFunction fn) throws IOException, LuanException { + boolean commit = !writeLock.isHeldByCurrentThread(); + writeLock.lock(); + try { + luan.call(fn); + if(commit) writer.commit(); } finally { writeLock.unlock(); } @@ -146,7 +211,7 @@ } } - synchronized long nextId(LuanState luan) throws LuanException, IOException { + private synchronized long nextId(LuanState luan) throws LuanException, IOException { if( ++id > idLim ) { idLim += idBatch; LuanTable doc = new LuanTable(); @@ -161,10 +226,10 @@ public void backup(LuanState luan,String zipFile) throws LuanException, IOException { if( !zipFile.endsWith(".zip") ) throw luan.exception("file "+zipFile+" doesn't end with '.zip'"); - LuceneSnapshot snapshot = openSnapshot(); + IndexCommit ic = snapshotDeletionPolicy.snapshot(); try { ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile)); - for( String fileName : snapshot.getFileNames() ) { + for( String fileName : ic.getFileNames() ) { out.putNextEntry(new ZipEntry(fileName)); FileInputStream in = new FileInputStream(new File(indexDir,fileName)); Utils.copyAll(in,out); @@ -173,28 +238,16 @@ } out.close(); } finally { - snapshot.close(); + snapshotDeletionPolicy.release(ic); } } - // luan - public String to_string() { return writer.getDirectory().toString(); } - public void writer(LuanState luan,LuanFunction fn) throws LuanException, IOException { - LuceneWriter writer = openWriter(); - try { - luan.call( fn, new Object[]{writer.table()} ); - writer.commit(); - } finally { - writer.close(); - } - } - public void close() throws IOException { if( !isClosed ) { writer.close(); @@ -223,7 +276,7 @@ @Override public Object call(LuanState luan,Object[] args) throws LuanException { try { - return LuceneDocument.toTable(luan,searcher.doc(docID)); + return toTable(luan,searcher.doc(docID)); } catch(IOException e) { throw luan.exception(e); } @@ -243,9 +296,25 @@ } } + private synchronized IndexSearcher openSearcher() throws IOException { + DirectoryReader newReader = DirectoryReader.openIfChanged(reader); + if( newReader != null ) { + reader.decRef(); + reader = newReader; + searcher = new IndexSearcher(reader); + } + reader.incRef(); + return searcher; + } + + // call in finally block + private static void close(IndexSearcher searcher) throws IOException { + searcher.getIndexReader().decRef(); + } + public int advanced_search( final LuanState luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException { Utils.checkNotNull(luan,queryStr); - Query query = parseQuery(queryStr); + Query query = SaneQueryParser.parseQuery(mfp,queryStr); IndexSearcher searcher = threadLocalSearcher.get(); boolean inTransaction = searcher != null; if( !inTransaction ) @@ -277,7 +346,7 @@ searcher.search(query,thcc); return thcc.getTotalHits(); } - Sort sort = sortStr==null ? null : parseSort(sortStr); + Sort sort = sortStr==null ? null : SaneQueryParser.parseSort(mfp,sortStr); TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort); final ScoreDoc[] scoreDocs = td.scoreDocs; DocFn docFn = new DocFn(searcher); @@ -341,12 +410,79 @@ }; - public Query parseQuery(String s) throws ParseException { - return SaneQueryParser.parseQuery(mfp,s); + + + + private Document toLucene(LuanState luan,LuanTable table) throws LuanException { + Set<String> indexed = mfp.fields.keySet(); + Document doc = new Document(); + for( Map.Entry<Object,Object> entry : table.iterable(luan) ) { + Object key = entry.getKey(); + if( !(key instanceof String) ) + throw luan.exception("key must be string"); + String name = (String)key; + Object value = entry.getValue(); + if( value instanceof String ) { + String s = (String)value; + if( indexed.contains(name) ) { + doc.add(new StringField(name, s, Field.Store.YES)); + } else { + doc.add(new StoredField(name, s)); + } + } else if( value instanceof Integer ) { + int i = (Integer)value; + if( indexed.contains(name) ) { + doc.add(new IntField(name, i, Field.Store.YES)); + } else { + doc.add(new StoredField(name, i)); + } + } else if( value instanceof Long ) { + long i = (Long)value; + if( indexed.contains(name) ) { + doc.add(new LongField(name, i, Field.Store.YES)); + } else { + doc.add(new StoredField(name, i)); + } + } else if( value instanceof Double ) { + double i = (Double)value; + if( indexed.contains(name) ) { + doc.add(new DoubleField(name, i, Field.Store.YES)); + } else { + doc.add(new StoredField(name, i)); + } + } else if( value instanceof byte[] ) { + byte[] b = (byte[])value; + doc.add(new StoredField(name, b)); + } else + throw luan.exception("invalid value type "+value.getClass()+"' for '"+name+"'"); + } + return doc; } - public Sort parseSort(String s) throws ParseException { - return SaneQueryParser.parseSort(mfp,s); + private static LuanTable toTable(LuanState luan,Document doc) throws LuanException { + if( doc==null ) + return null; + LuanTable table = new LuanTable(); + for( IndexableField ifld : doc ) { + String name = ifld.name(); + BytesRef br = ifld.binaryValue(); + if( br != null ) { + table.rawPut(name,br.bytes); + continue; + } + Number n = ifld.numericValue(); + if( n != null ) { + table.rawPut(name,n); + continue; + } + String s = ifld.stringValue(); + if( s != null ) { + table.rawPut(name,s); + continue; + } + throw luan.exception("invalid field type for "+ifld); + } + return table; } }