view lucene/src/luan/modules/lucene/LuceneIndex.java @ 544:c5a93767cc5c

lucene overhaul, untested
author Franklin Schmidt <fschmidt@gmail.com>
date Fri, 12 Jun 2015 19:11:44 -0600
parents ef0336efe33c
children ddcd4296107a
line wrap: on
line source

package luan.modules.lucene;

import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.zip.ZipOutputStream;
import java.util.zip.ZipEntry;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.SnapshotDeletionPolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.IndexSearcher;
import sane.lucene.queryparser.SaneQueryParser;
import sane.lucene.queryparser.FieldParser;
import sane.lucene.queryparser.MultiFieldParser;
import sane.lucene.queryparser.StringFieldParser;
import sane.lucene.queryparser.NumberFieldParser;
import sane.lucene.queryparser.ParseException;
import luan.modules.Utils;
import luan.Luan;
import luan.LuanState;
import luan.LuanTable;
import luan.LuanFunction;
import luan.LuanJavaFunction;
import luan.LuanException;
import luan.LuanMeta;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public final class LuceneIndex implements Closeable {
	private static final Logger logger = LoggerFactory.getLogger(LuceneIndex.class);

	private static final String FLD_NEXT_ID = "nextId";
	private static final Analyzer analyzer = new KeywordAnalyzer();
	public static final FieldParser STRING_FIELD_PARSER = new StringFieldParser(analyzer);

	final Lock writeLock = new ReentrantLock();
	private final File indexDir;
	final SnapshotDeletionPolicy snapshotDeletionPolicy;
	final IndexWriter writer;
	private DirectoryReader reader;
	private LuceneSearcher searcher;
	private boolean isClosed = false;
	private final MultiFieldParser mfp = new MultiFieldParser();

	public LuceneIndex(LuanState luan,String indexDirStr) throws LuanException, IOException {
		mfp.fields.put( "type", STRING_FIELD_PARSER );
		mfp.fields.put( "id", NumberFieldParser.LONG );
		File indexDir = new File(indexDirStr);
		this.indexDir = indexDir;
		Directory dir = FSDirectory.open(indexDir);
		Version version = Version.LUCENE_4_9;
		IndexWriterConfig conf = new IndexWriterConfig(version,analyzer);
		snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy());
		conf.setIndexDeletionPolicy(snapshotDeletionPolicy);
		writer = new IndexWriter(dir,conf);
		writer.commit();  // commit index creation
		reader = DirectoryReader.open(dir);
		luan.onClose(this);
		searcher = new LuceneSearcher(this,reader);
		initId(luan);
	}

	Document toLucene(LuanState luan,LuanTable table) throws LuanException {
		return LuceneDocument.toLucene(luan,table,mfp.fields.keySet());
	}

	public LuceneWriter openWriter() {
		return new LuceneWriter(this);
	}

	synchronized LuceneSearcher openSearcher() throws IOException {
		DirectoryReader newReader = DirectoryReader.openIfChanged(reader);
		if( newReader != null ) {
			reader.decRef();
			reader = newReader;
			searcher = new LuceneSearcher(this,reader);
		}
		reader.incRef();
		return searcher;
	}

	LuceneSnapshot openSnapshot() throws IOException {
		return new LuceneSnapshot(this);
	}

	public void delete_all() throws IOException {
		writeLock.lock();
		try {
			writer.deleteAll();
			writer.commit();
			id = idLim = 0;
		} finally {
			writeLock.unlock();
		}
	}


	private long id = 0;
	private long idLim = 0;
	private final int idBatch = 10;

	private void initId(LuanState luan) throws LuanException, IOException {
		IndexSearcher searcher = this.searcher.searcher;
		TopDocs td = searcher.search(new TermQuery(new Term("type","next_id")),1);
		switch(td.totalHits) {
		case 0:
			break;  // do nothing
		case 1:
			idLim = (Long)searcher.doc(td.scoreDocs[0].doc).getField(FLD_NEXT_ID).numericValue();
			id = idLim;
			break;
		default:
			throw new RuntimeException();
		}
	}

	synchronized long nextId(LuanState luan) throws LuanException, IOException {
		if( ++id > idLim ) {
			idLim += idBatch;
			LuanTable doc = new LuanTable();
			doc.rawPut( "type", "next_id" );
			doc.rawPut( FLD_NEXT_ID, idLim );
			writer.updateDocument(new Term("type","next_id"),toLucene(luan,doc));
		}
		return id;
	}


	public void backup(LuanState luan,String zipFile) throws LuanException, IOException {
		if( !zipFile.endsWith(".zip") )
			throw luan.exception("file "+zipFile+" doesn't end with '.zip'");
		LuceneSnapshot snapshot = openSnapshot();
		try {
			ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile));
			for( String fileName : snapshot.getFileNames() ) {
				out.putNextEntry(new ZipEntry(fileName));
				FileInputStream in = new FileInputStream(new File(indexDir,fileName));
				Utils.copyAll(in,out);
				in.close();
				out.closeEntry();
			}
			out.close();
		} finally {
			snapshot.close();
		}
	}



	// luan

	public String to_string() {
		return writer.getDirectory().toString();
	}

	public void Writer(LuanState luan,LuanFunction fn) throws LuanException, IOException {
		LuceneWriter writer = openWriter();
		try {
			luan.call( fn, new Object[]{writer.table()} );
			writer.commit();
		} finally {
			writer.close();
		}
	}

	public Object Searcher(LuanState luan,LuanFunction fn) throws LuanException, IOException {
		LuceneSearcher searcher = openSearcher();
		try {
			return luan.call( fn, new Object[]{searcher.table()} );
		} finally {
			searcher.close();
		}
	}

	public void close() throws IOException {
		if( !isClosed ) {
			writer.close();
			reader.close();
			isClosed = true;
		}
	}

	protected void finalize() throws Throwable {
		if( !isClosed ) {
			logger.error("not closed");
			close();
		}
		super.finalize();
	}



	public final LuanMeta indexedFieldsMeta = new LuanMeta() {

		@Override public boolean canNewindex() {
			return true;
		}

		@Override public Object __index(LuanState luan,LuanTable tbl,Object key) {
			return mfp.fields.get(key);
		}

		@Override public void __new_index(LuanState luan,LuanTable tbl,Object key,Object value) throws LuanException {
			if( !(key instanceof String) )
				throw luan.exception("key must be string");
			String field = (String)key;
			if( value==null ) {  // delete
				mfp.fields.remove(field);
				return;
			}
			if( !(value instanceof FieldParser) )
				throw luan.exception("value must be FieldParser like the values of Lucene.type");
			FieldParser parser = (FieldParser)value;
			mfp.fields.put( field, parser );
		}

		@Override public final Iterator keys(LuanTable tbl) {
			return mfp.fields.keySet().iterator();
		}

		@Override protected String type(LuanTable tbl) {
			return "lucene-indexed-fields";
		}

	};

	public Query parseQuery(String s) throws ParseException {
		return SaneQueryParser.parseQuery(mfp,s);
	}

	public Sort parseSort(String s) throws ParseException {
		return SaneQueryParser.parseSort(mfp,s);
	}

}