changeset 1461:e5d48b85351c

start lucene.logging
author Franklin Schmidt <fschmidt@gmail.com>
date Sat, 28 Mar 2020 21:30:42 -0600
parents 3ab0d043370f
children bfbf9c9c1586
files src/goodjava/lucene/api/LuceneUtils.java src/goodjava/lucene/logging/LogFile.java src/goodjava/lucene/logging/LoggingIndexWriter.java
diffstat 3 files changed, 449 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
diff -r 3ab0d043370f -r e5d48b85351c src/goodjava/lucene/api/LuceneUtils.java
--- a/src/goodjava/lucene/api/LuceneUtils.java	Mon Mar 23 00:04:42 2020 -0600
+++ b/src/goodjava/lucene/api/LuceneUtils.java	Sat Mar 28 21:30:42 2020 -0600
@@ -59,6 +59,10 @@
 			BytesRef br = new BytesRef();
 			NumericUtils.longToPrefixCoded((Long)value,0,br);
 			return new Term(name,br);
+		} else if( value instanceof Integer ) {
+			BytesRef br = new BytesRef();
+			NumericUtils.intToPrefixCoded((Integer)value,0,br);
+			return new Term(name,br);
 		} else
 			throw new RuntimeException("invalid value type "+value.getClass()+"' for term '"+name+"'");
 	}
diff -r 3ab0d043370f -r e5d48b85351c src/goodjava/lucene/logging/LogFile.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/goodjava/lucene/logging/LogFile.java	Sat Mar 28 21:30:42 2020 -0600
@@ -0,0 +1,351 @@
+package goodjava.lucene.logging;
+
+import java.io.File;
+import java.io.RandomAccessFile;
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.LinkedHashMap;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.NumericRangeQuery;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.util.BytesRef;
+
+
+public final class LogFile extends RandomAccessFile {
+	private long end;
+
+	public LogFile(File file,String mode) throws IOException {
+		super(file,mode);
+		init();
+	}
+
+	public LogFile(String file,String mode) throws IOException {
+		super(file,mode);
+		init();
+	}
+
+	private void init() throws IOException {
+		if( length() == 0 ) {
+			end = 8;
+			writeLong(end);
+		} else {
+			seek(0L);
+			end = readLong();
+			gotoEnd();
+		}
+	}
+
+	public void gotoStart() throws IOException {
+		seek(8L);
+	}
+
+	public void gotoEnd() throws IOException {
+		seek(end);
+	}
+
+	public void commit() throws IOException {
+		end = getFilePointer();
+		seek(0L);
+		writeLong(end);
+		gotoEnd();
+	}
+
+	public boolean hasMore() throws IOException {
+		return getFilePointer() < end;
+	}
+
+	private static final int TYPE_NULL = 0;
+	private static final int TYPE_STRING = 1;
+	private static final int TYPE_INT = 2;
+	private static final int TYPE_LONG = 3;
+	private static final int TYPE_FLOAT = 4;
+	private static final int TYPE_DOUBLE = 5;
+	private static final int TYPE_BYTES = 6;
+	private static final int TYPE_LIST = 7;
+	private static final int TYPE_QUERY_MATCH_ALL_DOCS = 8;
+	private static final int TYPE_QUERY_TERM = 9;
+	private static final int TYPE_QUERY_PREFIX = 10;
+	private static final int TYPE_QUERY_WILDCARD = 11;
+	private static final int TYPE_QUERY_TERM_RANGE = 12;
+	private static final int TYPE_QUERY_PHRASE = 13;
+	private static final int TYPE_QUERY_NUMERIC_RANGE = 14;
+	private static final int TYPE_QUERY_BOOLEAN = 15;
+
+	public void writeObject(Object obj) throws IOException {
+		if( obj==null ) {
+			writeByte(TYPE_NULL);
+			return;
+		}
+		if( obj instanceof String ) {
+			writeByte(TYPE_STRING);
+			writeUTF((String)obj);
+			return;
+		}
+		if( obj instanceof Integer ) {
+			writeByte(TYPE_INT);
+			writeInt((Integer)obj);
+			return;
+		}
+		if( obj instanceof Long ) {
+			writeByte(TYPE_LONG);
+			writeLong((Long)obj);
+			return;
+		}
+		if( obj instanceof Float ) {
+			writeByte(TYPE_FLOAT);
+			writeFloat((Float)obj);
+			return;
+		}
+		if( obj instanceof Double ) {
+			writeByte(TYPE_DOUBLE);
+			writeDouble((Double)obj);
+			return;
+		}
+		if( obj instanceof byte[] ) {
+			writeByte(TYPE_BYTES);
+			writeByteArray((byte[])obj);
+			return;
+		}
+		if( obj instanceof List ) {
+			writeByte(TYPE_LIST);
+			writeList((List)obj);
+			return;
+		}
+		if( obj instanceof MatchAllDocsQuery ) {
+			writeByte(TYPE_QUERY_MATCH_ALL_DOCS);
+			return;
+		}
+		if( obj instanceof TermQuery ) {
+			writeByte(TYPE_QUERY_TERM);
+			TermQuery query = (TermQuery)obj;
+			writeTerm( query.getTerm() );
+			return;
+		}
+		if( obj instanceof PrefixQuery ) {
+			writeByte(TYPE_QUERY_PREFIX);
+			PrefixQuery query = (PrefixQuery)obj;
+			writeTerm( query.getPrefix() );
+			return;
+		}
+		if( obj instanceof WildcardQuery ) {
+			writeByte(TYPE_QUERY_TERM_RANGE);
+			WildcardQuery query = (WildcardQuery)obj;
+			writeTerm( query.getTerm() );
+			return;
+		}
+		if( obj instanceof TermRangeQuery ) {
+			writeByte(TYPE_QUERY_TERM_RANGE);
+			TermRangeQuery query = (TermRangeQuery)obj;
+			writeUTF( query.getField() );
+			writeBytesRef( query.getLowerTerm() );
+			writeBytesRef( query.getUpperTerm() );
+			writeBoolean( query.includesLower() );
+			writeBoolean( query.includesUpper() );
+			return;
+		}
+		if( obj instanceof PhraseQuery ) {
+			writeByte(TYPE_QUERY_PHRASE);
+			PhraseQuery query = (PhraseQuery)obj;
+			Term[] terms = query.getTerms();
+			int[] positions = query.getPositions();
+			if( terms.length != positions.length )
+				throw new RuntimeException();
+			writeInt( terms.length );
+			for( int i=0; i<terms.length; i++ ) {
+				writeTerm( terms[i] );
+				writeInt( positions[i] );
+			}
+			return;
+		}
+		if( obj instanceof NumericRangeQuery ) {
+			writeByte(TYPE_QUERY_NUMERIC_RANGE);
+			NumericRangeQuery query = (NumericRangeQuery)obj;
+			writeUTF( query.getField() );
+			writeObject( query.getMin() );
+			writeObject( query.getMax() );
+			writeBoolean( query.includesMin() );
+			writeBoolean( query.includesMax() );
+			return;
+		}
+		if( obj instanceof BooleanQuery ) {
+			writeByte(TYPE_QUERY_BOOLEAN);
+			BooleanQuery query = (BooleanQuery)obj;
+			BooleanClause[] a = query.getClauses();
+			writeInt(a.length);
+			for( BooleanClause bc : a ) {
+				writeQuery( bc.getQuery() );
+				writeUTF( bc.getOccur().name() );
+			}
+			return;
+		}
+		throw new IllegalArgumentException("invalid type for "+obj);
+	}
+
+	public Object readObject() throws IOException {
+		int type = readByte();
+		switch(type) {
+		case TYPE_NULL:
+			return null;
+		case TYPE_STRING:
+			return readUTF();
+		case TYPE_INT:
+			return readInt();
+		case TYPE_LONG:
+			return readLong();
+		case TYPE_FLOAT:
+			return readFloat();
+		case TYPE_DOUBLE:
+			return readDouble();
+		case TYPE_BYTES:
+			return readByteArray();
+		case TYPE_LIST:
+			return readList();
+		case TYPE_QUERY_MATCH_ALL_DOCS:
+			return new MatchAllDocsQuery();
+		case TYPE_QUERY_TERM:
+			return new TermQuery( readTerm() );
+		case TYPE_QUERY_PREFIX:
+			return new PrefixQuery( readTerm() );
+		case TYPE_QUERY_WILDCARD:
+			return new WildcardQuery( readTerm() );
+		case TYPE_QUERY_TERM_RANGE:
+			{
+				String field = readUTF();
+				BytesRef lowerTerm = readBytesRef();
+				BytesRef upperTerm = readBytesRef();
+				boolean includeLower = readBoolean();
+				boolean includeUpper = readBoolean();
+				return new TermRangeQuery(field,lowerTerm,upperTerm,includeLower,includeUpper);
+			}
+		case TYPE_QUERY_PHRASE:
+			{
+				PhraseQuery query = new PhraseQuery();
+				int n = readInt();
+				for( int i=0; i<n; i++ ) {
+					Term term = readTerm();
+					int position = readInt();
+					query.add(term,position);
+				}
+				return query;
+			}
+		case TYPE_QUERY_NUMERIC_RANGE:
+			{
+				String field = readUTF();
+				Number min = (Number)readObject();
+				Number max = (Number)readObject();
+				boolean minInclusive = readBoolean();
+				boolean maxInclusive = readBoolean();
+				Number n = min!=null ? min : max;
+				if( n instanceof Integer )
+					return NumericRangeQuery.newIntRange(field,(Integer)min,(Integer)max,minInclusive,maxInclusive);
+				if( n instanceof Long )
+					return NumericRangeQuery.newLongRange(field,(Long)min,(Long)max,minInclusive,maxInclusive);
+				if( n instanceof Float )
+					return NumericRangeQuery.newFloatRange(field,(Float)min,(Float)max,minInclusive,maxInclusive);
+				if( n instanceof Double )
+					return NumericRangeQuery.newDoubleRange(field,(Double)min,(Double)max,minInclusive,maxInclusive);
+				throw new RuntimeException("bad numeric type for "+n);
+			}
+		case TYPE_QUERY_BOOLEAN:
+			{
+				BooleanQuery query = new BooleanQuery();
+				int n = readInt();
+				for( int i=0; i<n; i++ ) {
+					Query subquery = readQuery();
+					BooleanClause.Occur occur = BooleanClause.Occur.valueOf( readUTF() );
+					query.add(subquery,occur);
+				}
+				return query;
+			}
+		default:
+			throw new RuntimeException("invalid type "+type);
+		}
+	}
+
+	public void writeByteArray(byte[] bytes) throws IOException {
+		writeInt(bytes.length);
+		write(bytes);
+	}
+
+	public byte[] readByteArray() throws IOException {
+		int len = readInt();
+		byte[] bytes = new byte[len];
+		readFully(bytes);
+		return bytes;
+	}
+
+	public void writeList(List list) throws IOException {
+		writeInt(list.size());
+		for( Object obj : list ) {
+			writeObject(obj);
+		}
+	}
+
+	public List readList() throws IOException {
+		final int size = readInt();
+		List list = new ArrayList(size);
+		for( int i=0; i<size; i++ ) {
+			list.add( readObject() );
+		}
+		return list;
+	}
+
+	public void writeMap(Map map) throws IOException {
+		writeInt(map.size());
+		for( Object obj : map.entrySet() ) {
+			Map.Entry entry = (Map.Entry)obj;
+			writeObject( entry.getKey() );
+			writeObject( entry.getValue() );
+		}
+	}
+
+	public Map readMap() throws IOException {
+		final int size = readInt();
+		Map map = new LinkedHashMap();
+		for( int i=0; i<size; i++ ) {
+			Object key = readObject();
+			Object value = readObject();
+			map.put(key,value);
+		}
+		return map;
+	}
+
+	public void writeQuery(Query query) throws IOException {
+		writeObject(query);
+	}
+
+	public Query readQuery() throws IOException {
+		return (Query)readObject();
+	}
+
+	public void writeBytesRef(BytesRef br) throws IOException {
+		writeInt(br.length);
+		write(br.bytes,0,br.length);
+	}
+
+	public BytesRef readBytesRef() throws IOException {
+		return new BytesRef( readByteArray() );
+	}
+
+	public void writeTerm(Term term) throws IOException {
+		writeUTF(term.field());
+		writeBytesRef( term.bytes() );
+	}
+
+	public Term readTerm() throws IOException {
+		String key = readUTF();
+		BytesRef value = readBytesRef();
+		return new Term(key,value);
+	}
+
+}
diff -r 3ab0d043370f -r e5d48b85351c src/goodjava/lucene/logging/LoggingIndexWriter.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/goodjava/lucene/logging/LoggingIndexWriter.java	Sat Mar 28 21:30:42 2020 -0600
@@ -0,0 +1,94 @@
+package goodjava.lucene.logging;
+
+import java.io.IOException;
+import java.util.Map;
+import org.apache.lucene.search.Query;
+import goodjava.lucene.api.GoodIndexWriter;
+
+
+public class LoggingIndexWriter implements GoodIndexWriter {
+	private static final int OP_DELETE_ALL = 1;
+	private static final int OP_DELETE_DOCUMENTS = 2;
+	private static final int OP_ADD_DOCUMENT = 3;
+	private static final int OP_UPDATE_DOCUMENT = 4;
+
+	public final GoodIndexWriter indexWriter;
+	private final LogFile logFile;
+
+	public LoggingIndexWriter(GoodIndexWriter indexWriter) throws IOException {
+		this.indexWriter = indexWriter;
+		logFile = new LogFile("lucene.log","rw");
+		logFile.gotoStart();  // for now
+	}
+
+	public void close() throws IOException {
+		indexWriter.close();
+		logFile.commit();
+	}
+
+	public void commit() throws IOException {
+		indexWriter.commit();
+		logFile.commit();
+	}
+
+	public void rollback() throws IOException {
+		indexWriter.rollback();
+		logFile.gotoEnd();
+	}
+
+	public void deleteAll() throws IOException {
+		indexWriter.deleteAll();
+		logFile.writeByte(OP_DELETE_ALL);
+	}
+
+	public void deleteDocuments(Query query) throws IOException {
+		indexWriter.deleteDocuments(query);
+		logFile.writeByte(OP_DELETE_DOCUMENTS);
+		logFile.writeQuery(query);
+	}
+
+	public void addDocument(Map<String,Object> storedFields) throws IOException {
+		indexWriter.addDocument(storedFields);
+		logFile.writeByte(OP_ADD_DOCUMENT);
+		logFile.writeMap(storedFields);
+	}
+
+	public void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException {
+		indexWriter.updateDocument(keyFieldName,storedFields);
+		logFile.writeByte(OP_UPDATE_DOCUMENT);
+		logFile.writeUTF(keyFieldName);
+		logFile.writeMap(storedFields);
+	}
+
+	public void reindexDocuments(String keyFieldName,Query query) throws IOException {
+		indexWriter.reindexDocuments(keyFieldName,query);
+	}
+
+	private void playOp() throws IOException {
+		int op = logFile.readByte();
+		switch(op) {
+		case OP_DELETE_ALL:
+			indexWriter.deleteAll();
+			return;
+		case OP_DELETE_DOCUMENTS:
+			indexWriter.deleteDocuments( logFile.readQuery() );
+			return;
+		case OP_ADD_DOCUMENT:
+			indexWriter.addDocument( logFile.readMap() );
+			return;
+		case OP_UPDATE_DOCUMENT:
+			indexWriter.updateDocument( logFile.readUTF(), logFile.readMap() );
+			return;
+		default:
+			throw new RuntimeException("invalid op "+op);
+		}
+	}
+
+	public void playLog() throws IOException {
+		logFile.gotoStart();
+		while( logFile.hasMore() ) {
+			playOp();
+		}
+		indexWriter.commit();
+	}
+}