comparison lucene/src/luan/modules/lucene/LuceneIndex.java @ 618:5e495e4e560b

add lucene indexed_only_fields
author Franklin Schmidt <fschmidt@gmail.com>
date Fri, 01 Jan 2016 01:24:10 -0700
parents e54c1646eed0
children 89eb02f9827f
comparison
equal deleted inserted replaced
617:e54c1646eed0 618:5e495e4e560b
8 import java.util.Iterator; 8 import java.util.Iterator;
9 import java.util.Map; 9 import java.util.Map;
10 import java.util.List; 10 import java.util.List;
11 import java.util.ArrayList; 11 import java.util.ArrayList;
12 import java.util.Set; 12 import java.util.Set;
13 import java.util.HashSet;
14 import java.util.Collections;
13 import java.util.concurrent.locks.Lock; 15 import java.util.concurrent.locks.Lock;
14 import java.util.concurrent.locks.ReentrantLock; 16 import java.util.concurrent.locks.ReentrantLock;
15 import java.util.zip.ZipOutputStream; 17 import java.util.zip.ZipOutputStream;
16 import java.util.zip.ZipEntry; 18 import java.util.zip.ZipEntry;
17 import org.apache.lucene.analysis.Analyzer; 19 import org.apache.lucene.analysis.Analyzer;
79 private DirectoryReader reader; 81 private DirectoryReader reader;
80 private IndexSearcher searcher; 82 private IndexSearcher searcher;
81 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); 83 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>();
82 private boolean isClosed = false; 84 private boolean isClosed = false;
83 private final MultiFieldParser mfp; 85 private final MultiFieldParser mfp;
86 public final LuanTable indexed_only_fields = new LuanTable();
84 87
85 public LuceneIndex(LuanState luan,String indexDirStr,FieldParser defaultFieldParser,String[] defaultFields) throws LuanException, IOException { 88 public LuceneIndex(LuanState luan,String indexDirStr,FieldParser defaultFieldParser,String[] defaultFields) throws LuanException, IOException {
86 mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields); 89 mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields);
87 mfp.fields.put( "type", STRING_FIELD_PARSER ); 90 mfp.fields.put( "type", STRING_FIELD_PARSER );
88 mfp.fields.put( "id", NumberFieldParser.LONG ); 91 mfp.fields.put( "id", NumberFieldParser.LONG );
138 writeLock.unlock(); 141 writeLock.unlock();
139 } 142 }
140 } 143 }
141 144
142 public void save(LuanState luan,LuanTable doc) throws LuanException, IOException { 145 public void save(LuanState luan,LuanTable doc) throws LuanException, IOException {
143 if( doc.get(luan,"type")==null ) 146 Set indexedOnlySet = new HashSet();
147 Object typeObj = doc.get(luan,"type");
148 if( typeObj==null )
144 throw new LuanException(luan,"missing 'type' field"); 149 throw new LuanException(luan,"missing 'type' field");
150 if( !(typeObj instanceof String) )
151 throw new LuanException(luan,"type must be string");
152 String type = (String)typeObj;
153 Object indexedOnlyObj = indexed_only_fields.get(luan,type);
154 if( indexedOnlyObj != null ) {
155 if( !(indexedOnlyObj instanceof LuanTable) )
156 throw new LuanException(luan,"indexed_only_fields elements must be tables");
157 LuanTable indexedOnly = (LuanTable)indexedOnlyObj;
158 for( Map.Entry<Object,Object> entry : indexedOnly.iterable(luan) ) {
159 Object key = entry.getKey();
160 if( !(key instanceof String) )
161 throw new LuanException(luan,"indexed_only_fields."+type+" entries must be strings");
162 String name = (String)key;
163 Object value = entry.getValue();
164 if( !(value instanceof LuanFunction) )
165 throw new LuanException(luan,"indexed_only_fields."+type+" values must be functions");
166 LuanFunction fn = (LuanFunction)value;
167 value = Luan.first(fn.call(luan,new Object[]{doc}));
168 doc.put(luan, name, value );
169 indexedOnlySet.add(name);
170 }
171 }
145 Object obj = doc.get(luan,"id"); 172 Object obj = doc.get(luan,"id");
146 Long id; 173 Long id;
147 try { 174 try {
148 id = (Long)obj; 175 id = (Long)obj;
149 } catch(ClassCastException e) { 176 } catch(ClassCastException e) {
154 writeLock.lock(); 181 writeLock.lock();
155 try { 182 try {
156 if( id == null ) { 183 if( id == null ) {
157 id = nextId(luan); 184 id = nextId(luan);
158 doc.put(luan,"id",id); 185 doc.put(luan,"id",id);
159 writer.addDocument(toLucene(luan,doc)); 186 writer.addDocument(toLucene(luan,doc,indexedOnlySet));
160 } else { 187 } else {
161 writer.updateDocument( term("id",id), toLucene(luan,doc) ); 188 writer.updateDocument( term("id",id), toLucene(luan,doc,indexedOnlySet) );
162 } 189 }
163 if(commit) writer.commit(); 190 if(commit) writer.commit();
164 } finally { 191 } finally {
165 writeLock.unlock(); 192 writeLock.unlock();
166 } 193 }
182 private long idLim = 0; 209 private long idLim = 0;
183 private final int idBatch = 10; 210 private final int idBatch = 10;
184 211
185 private void initId(LuanState luan) throws LuanException, IOException { 212 private void initId(LuanState luan) throws LuanException, IOException {
186 TopDocs td = searcher.search(new TermQuery(new Term("type","next_id")),1); 213 TopDocs td = searcher.search(new TermQuery(new Term("type","next_id")),1);
187 214 /*
188 // tmp hack 215 // tmp hack
189 if( td.totalHits == 0 ) { 216 if( td.totalHits == 0 ) {
190 td = searcher.search(new TermQuery(new Term("type index","next_id")),1); 217 td = searcher.search(new TermQuery(new Term("type index","next_id")),1);
191 if( td.totalHits == 1 ) { 218 if( td.totalHits == 1 ) {
192 long idLim = (Long)searcher.doc(td.scoreDocs[0].doc).getField(FLD_NEXT_ID).numericValue(); 219 long idLim = (Long)searcher.doc(td.scoreDocs[0].doc).getField(FLD_NEXT_ID).numericValue();
195 doc.rawPut( FLD_NEXT_ID, idLim ); 222 doc.rawPut( FLD_NEXT_ID, idLim );
196 writer.addDocument(toLucene(luan,doc)); 223 writer.addDocument(toLucene(luan,doc));
197 writer.commit(); 224 writer.commit();
198 } 225 }
199 } 226 }
200 227 */
201 switch(td.totalHits) { 228 switch(td.totalHits) {
202 case 0: 229 case 0:
203 break; // do nothing 230 break; // do nothing
204 case 1: 231 case 1:
205 idLim = (Long)searcher.doc(td.scoreDocs[0].doc).getField(FLD_NEXT_ID).numericValue(); 232 idLim = (Long)searcher.doc(td.scoreDocs[0].doc).getField(FLD_NEXT_ID).numericValue();
214 if( ++id > idLim ) { 241 if( ++id > idLim ) {
215 idLim += idBatch; 242 idLim += idBatch;
216 LuanTable doc = new LuanTable(); 243 LuanTable doc = new LuanTable();
217 doc.rawPut( "type", "next_id" ); 244 doc.rawPut( "type", "next_id" );
218 doc.rawPut( FLD_NEXT_ID, idLim ); 245 doc.rawPut( FLD_NEXT_ID, idLim );
219 writer.updateDocument(new Term("type","next_id"),toLucene(luan,doc)); 246 writer.updateDocument(new Term("type","next_id"),toLucene(luan,doc,Collections.EMPTY_SET));
220 } 247 }
221 return id; 248 return id;
222 } 249 }
223 250
224 251
414 }; 441 };
415 442
416 443
417 444
418 445
419 private Document toLucene(LuanState luan,LuanTable table) throws LuanException { 446 private Document toLucene(LuanState luan,LuanTable table,Set indexOnly) throws LuanException {
420 Set<String> indexed = mfp.fields.keySet(); 447 Set<String> indexed = mfp.fields.keySet();
421 Document doc = new Document(); 448 Document doc = new Document();
422 for( Map.Entry<Object,Object> entry : table.iterable(luan) ) { 449 for( Map.Entry<Object,Object> entry : table.iterable(luan) ) {
423 Object key = entry.getKey(); 450 Object key = entry.getKey();
424 if( !(key instanceof String) ) 451 if( !(key instanceof String) )
425 throw new LuanException(luan,"key must be string"); 452 throw new LuanException(luan,"key must be string");
426 String name = (String)key; 453 String name = (String)key;
427 Object value = entry.getValue(); 454 Object value = entry.getValue();
455 Field.Store store = indexOnly.contains(name) ? Field.Store.NO : Field.Store.YES;
428 if( value instanceof String ) { 456 if( value instanceof String ) {
429 String s = (String)value; 457 String s = (String)value;
430 FieldParser fp = mfp.fields.get(name); 458 FieldParser fp = mfp.fields.get(name);
431 if( fp != null ) { 459 if( fp != null ) {
432 if( fp instanceof StringFieldParser && fp != STRING_FIELD_PARSER ) { 460 if( fp instanceof StringFieldParser && fp != STRING_FIELD_PARSER ) {
433 doc.add(new TextField(name, s, Field.Store.YES)); 461 doc.add(new TextField(name, s, store));
434 } else { 462 } else {
435 doc.add(new StringField(name, s, Field.Store.YES)); 463 doc.add(new StringField(name, s, store));
436 } 464 }
437 } else { 465 } else {
438 doc.add(new StoredField(name, s)); 466 doc.add(new StoredField(name, s));
439 } 467 }
440 } else if( value instanceof Integer ) { 468 } else if( value instanceof Integer ) {
441 int i = (Integer)value; 469 int i = (Integer)value;
442 if( indexed.contains(name) ) { 470 if( indexed.contains(name) ) {
443 doc.add(new IntField(name, i, Field.Store.YES)); 471 doc.add(new IntField(name, i, store));
444 } else { 472 } else {
445 doc.add(new StoredField(name, i)); 473 doc.add(new StoredField(name, i));
446 } 474 }
447 } else if( value instanceof Long ) { 475 } else if( value instanceof Long ) {
448 long i = (Long)value; 476 long i = (Long)value;
449 if( indexed.contains(name) ) { 477 if( indexed.contains(name) ) {
450 doc.add(new LongField(name, i, Field.Store.YES)); 478 doc.add(new LongField(name, i, store));
451 } else { 479 } else {
452 doc.add(new StoredField(name, i)); 480 doc.add(new StoredField(name, i));
453 } 481 }
454 } else if( value instanceof Double ) { 482 } else if( value instanceof Double ) {
455 double i = (Double)value; 483 double i = (Double)value;
456 if( indexed.contains(name) ) { 484 if( indexed.contains(name) ) {
457 doc.add(new DoubleField(name, i, Field.Store.YES)); 485 doc.add(new DoubleField(name, i, store));
458 } else { 486 } else {
459 doc.add(new StoredField(name, i)); 487 doc.add(new StoredField(name, i));
460 } 488 }
461 } else if( value instanceof byte[] ) { 489 } else if( value instanceof byte[] ) {
462 byte[] b = (byte[])value; 490 byte[] b = (byte[])value;