diff lucene/src/luan/modules/lucene/LuceneIndex.java @ 546:eaef1005ab87

general lucene cleanup
author Franklin Schmidt <fschmidt@gmail.com>
date Sun, 14 Jun 2015 22:17:58 -0600
parents ddcd4296107a
children 0be287ab0309
line wrap: on
line diff
--- a/lucene/src/luan/modules/lucene/LuceneIndex.java	Sun Jun 14 01:34:42 2015 -0600
+++ b/lucene/src/luan/modules/lucene/LuceneIndex.java	Sun Jun 14 22:17:58 2015 -0600
@@ -6,6 +6,10 @@
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.Map;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Set;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.zip.ZipOutputStream;
@@ -13,15 +17,25 @@
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.LongField;
+import org.apache.lucene.document.DoubleField;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.SnapshotDeletionPolicy;
+import org.apache.lucene.index.IndexCommit;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
@@ -58,19 +72,17 @@
 	private static final Analyzer analyzer = new KeywordAnalyzer();
 	public static final FieldParser STRING_FIELD_PARSER = new StringFieldParser(analyzer);
 
-	final LuanTable myTable;
-	final Lock writeLock = new ReentrantLock();
+	private final ReentrantLock writeLock = new ReentrantLock();
 	private final File indexDir;
 	final SnapshotDeletionPolicy snapshotDeletionPolicy;
-	final IndexWriter writer;
+	private final IndexWriter writer;
 	private DirectoryReader reader;
 	private IndexSearcher searcher;
 	private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>();
 	private boolean isClosed = false;
 	private final MultiFieldParser mfp = new MultiFieldParser();
 
-	public LuceneIndex(LuanState luan,String indexDirStr,LuanTable myTable) throws LuanException, IOException {
-		this.myTable = myTable;
+	public LuceneIndex(LuanState luan,String indexDirStr) throws LuanException, IOException {
 		mfp.fields.put( "type", STRING_FIELD_PARSER );
 		mfp.fields.put( "id", NumberFieldParser.LONG );
 		File indexDir = new File(indexDirStr);
@@ -88,40 +100,93 @@
 		initId(luan);
 	}
 
-	Document toLucene(LuanState luan,LuanTable table) throws LuanException {
-		return LuceneDocument.toLucene(luan,table,mfp.fields.keySet());
-	}
 
-	public LuceneWriter openWriter() {
-		return new LuceneWriter(this);
-	}
-
-	private synchronized IndexSearcher openSearcher() throws IOException {
-		DirectoryReader newReader = DirectoryReader.openIfChanged(reader);
-		if( newReader != null ) {
-			reader.decRef();
-			reader = newReader;
-			searcher = new IndexSearcher(reader);
-		}
-		reader.incRef();
-		return searcher;
-	}
-
-	// call in finally block
-	private static void close(IndexSearcher searcher) throws IOException {
-		searcher.getIndexReader().decRef();
-	}
-
-	LuceneSnapshot openSnapshot() throws IOException {
-		return new LuceneSnapshot(this);
-	}
 
 	public void delete_all() throws IOException {
+		boolean commit = !writeLock.isHeldByCurrentThread();
 		writeLock.lock();
 		try {
 			writer.deleteAll();
-			writer.commit();
 			id = idLim = 0;
+			if(commit) writer.commit();
+		} finally {
+			writeLock.unlock();
+		}
+	}
+
+	private static Term term(String key,int value) {
+		BytesRef br = new BytesRef();
+		NumericUtils.intToPrefixCoded(value,0,br);
+		return new Term(key,br);
+	}
+
+	private static Term term(String key,long value) {
+		BytesRef br = new BytesRef();
+		NumericUtils.longToPrefixCoded(value,0,br);
+		return new Term(key,br);
+	}
+
+	private static Term term(LuanState luan,String key,Object value) throws LuanException {
+		if( value instanceof String )
+			return new Term( key, (String)value );
+		if( value instanceof Integer )
+			return term( key, (Integer)value );
+		if( value instanceof Long )
+			return term( key, (Long)value );
+		if( value instanceof Float )
+			return term( key, NumericUtils.floatToSortableInt((Float)value) );
+		if( value instanceof Double )
+			return term( key, NumericUtils.doubleToSortableLong((Double)value) );
+		throw luan.exception("invalid value type '"+value.getClass().getSimpleName()+"' for key '"+key+"'");
+	}
+
+	public void delete_documents(LuanState luan,LuanTable tblTerms) throws LuanException, IOException {
+		List<Term> list = new ArrayList<Term>();
+		for( Map.Entry<Object,Object> entry : tblTerms.iterable(luan) ) {
+			Object key = entry.getKey();
+			Object value = entry.getValue();
+			if( !(key instanceof String) )
+				throw luan.exception("key must be a string but got "+key.getClass().getSimpleName());
+			list.add( term( luan, (String)key, value ) );
+		}
+
+		boolean commit = !writeLock.isHeldByCurrentThread();
+		writeLock.lock();
+		try {
+			writer.deleteDocuments(list.toArray(new Term[list.size()]));
+			if(commit) writer.commit();
+		} finally {
+			writeLock.unlock();
+		}
+	}
+
+	public void save_document(LuanState luan,LuanTable doc) throws LuanException, IOException {
+		if( doc.get(luan,"type")==null )
+			throw luan.exception("missing 'type' field");
+		Long id = (Long)doc.get(luan,"id");
+
+		boolean commit = !writeLock.isHeldByCurrentThread();
+		writeLock.lock();
+		try {
+			if( id == null ) {
+				id = nextId(luan);
+				doc.put(luan,"id",id);
+				writer.addDocument(toLucene(luan,doc));
+			} else {
+				writer.updateDocument( term("id",id), toLucene(luan,doc) );
+			}
+			if(commit) writer.commit();
+		} finally {
+			writeLock.unlock();
+		}
+	}
+
+	public void update_in_transaction(LuanState luan,LuanFunction fn) throws IOException, LuanException {
+		boolean commit = !writeLock.isHeldByCurrentThread();
+		writeLock.lock();
+		try {
+			luan.call(fn);
+			if(commit) writer.commit();
 		} finally {
 			writeLock.unlock();
 		}
@@ -146,7 +211,7 @@
 		}
 	}
 
-	synchronized long nextId(LuanState luan) throws LuanException, IOException {
+	private synchronized long nextId(LuanState luan) throws LuanException, IOException {
 		if( ++id > idLim ) {
 			idLim += idBatch;
 			LuanTable doc = new LuanTable();
@@ -161,10 +226,10 @@
 	public void backup(LuanState luan,String zipFile) throws LuanException, IOException {
 		if( !zipFile.endsWith(".zip") )
 			throw luan.exception("file "+zipFile+" doesn't end with '.zip'");
-		LuceneSnapshot snapshot = openSnapshot();
+		IndexCommit ic = snapshotDeletionPolicy.snapshot();
 		try {
 			ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile));
-			for( String fileName : snapshot.getFileNames() ) {
+			for( String fileName : ic.getFileNames() ) {
 				out.putNextEntry(new ZipEntry(fileName));
 				FileInputStream in = new FileInputStream(new File(indexDir,fileName));
 				Utils.copyAll(in,out);
@@ -173,28 +238,16 @@
 			}
 			out.close();
 		} finally {
-			snapshot.close();
+			snapshotDeletionPolicy.release(ic);
 		}
 	}
 
 
 
-	// luan
-
 	public String to_string() {
 		return writer.getDirectory().toString();
 	}
 
-	public void writer(LuanState luan,LuanFunction fn) throws LuanException, IOException {
-		LuceneWriter writer = openWriter();
-		try {
-			luan.call( fn, new Object[]{writer.table()} );
-			writer.commit();
-		} finally {
-			writer.close();
-		}
-	}
-
 	public void close() throws IOException {
 		if( !isClosed ) {
 			writer.close();
@@ -223,7 +276,7 @@
 
 		@Override public Object call(LuanState luan,Object[] args) throws LuanException {
 			try {
-				return LuceneDocument.toTable(luan,searcher.doc(docID));
+				return toTable(luan,searcher.doc(docID));
 			} catch(IOException e) {
 				throw luan.exception(e);
 			}
@@ -243,9 +296,25 @@
 		}
 	}
 
+	private synchronized IndexSearcher openSearcher() throws IOException {
+		DirectoryReader newReader = DirectoryReader.openIfChanged(reader);
+		if( newReader != null ) {
+			reader.decRef();
+			reader = newReader;
+			searcher = new IndexSearcher(reader);
+		}
+		reader.incRef();
+		return searcher;
+	}
+
+	// call in finally block
+	private static void close(IndexSearcher searcher) throws IOException {
+		searcher.getIndexReader().decRef();
+	}
+
 	public int advanced_search( final LuanState luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException {
 		Utils.checkNotNull(luan,queryStr);
-		Query query = parseQuery(queryStr);
+		Query query = SaneQueryParser.parseQuery(mfp,queryStr);
 		IndexSearcher searcher = threadLocalSearcher.get();
 		boolean inTransaction = searcher != null;
 		if( !inTransaction )
@@ -277,7 +346,7 @@
 				searcher.search(query,thcc);
 				return thcc.getTotalHits();
 			}
-			Sort sort = sortStr==null ? null : parseSort(sortStr);
+			Sort sort = sortStr==null ? null : SaneQueryParser.parseSort(mfp,sortStr);
 			TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort);
 			final ScoreDoc[] scoreDocs = td.scoreDocs;
 			DocFn docFn = new DocFn(searcher);
@@ -341,12 +410,79 @@
 
 	};
 
-	public Query parseQuery(String s) throws ParseException {
-		return SaneQueryParser.parseQuery(mfp,s);
+
+
+
+	private Document toLucene(LuanState luan,LuanTable table) throws LuanException {
+		Set<String> indexed = mfp.fields.keySet();
+		Document doc = new Document();
+		for( Map.Entry<Object,Object> entry : table.iterable(luan) ) {
+			Object key = entry.getKey();
+			if( !(key instanceof String) )
+				throw luan.exception("key must be string");
+			String name = (String)key;
+			Object value = entry.getValue();
+			if( value instanceof String ) {
+				String s = (String)value;
+				if( indexed.contains(name) ) {
+					doc.add(new StringField(name, s, Field.Store.YES));
+				} else {
+					doc.add(new StoredField(name, s));
+				}
+			} else if( value instanceof Integer ) {
+				int i = (Integer)value;
+				if( indexed.contains(name) ) {
+					doc.add(new IntField(name, i, Field.Store.YES));
+				} else {
+					doc.add(new StoredField(name, i));
+				}
+			} else if( value instanceof Long ) {
+				long i = (Long)value;
+				if( indexed.contains(name) ) {
+					doc.add(new LongField(name, i, Field.Store.YES));
+				} else {
+					doc.add(new StoredField(name, i));
+				}
+			} else if( value instanceof Double ) {
+				double i = (Double)value;
+				if( indexed.contains(name) ) {
+					doc.add(new DoubleField(name, i, Field.Store.YES));
+				} else {
+					doc.add(new StoredField(name, i));
+				}
+			} else if( value instanceof byte[] ) {
+				byte[] b = (byte[])value;
+				doc.add(new StoredField(name, b));
+			} else
+				throw luan.exception("invalid value type "+value.getClass()+"' for '"+name+"'");
+		}
+		return doc;
 	}
 
-	public Sort parseSort(String s) throws ParseException {
-		return SaneQueryParser.parseSort(mfp,s);
+	private static LuanTable toTable(LuanState luan,Document doc) throws LuanException {
+		if( doc==null )
+			return null;
+		LuanTable table = new LuanTable();
+		for( IndexableField ifld : doc ) {
+			String name = ifld.name();
+			BytesRef br = ifld.binaryValue();
+			if( br != null ) {
+				table.rawPut(name,br.bytes);
+				continue;
+			}
+			Number n = ifld.numericValue();
+			if( n != null ) {
+				table.rawPut(name,n);
+				continue;
+			}
+			String s = ifld.stringValue();
+			if( s != null ) {
+				table.rawPut(name,s);
+				continue;
+			}
+			throw luan.exception("invalid field type for "+ifld);
+		}
+		return table;
 	}
 
 }