Mercurial Hosting > luan
comparison lucene/src/luan/modules/lucene/LuceneIndex.java @ 618:5e495e4e560b
add lucene indexed_only_fields
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Fri, 01 Jan 2016 01:24:10 -0700 |
parents | e54c1646eed0 |
children | 89eb02f9827f |
comparison
equal
deleted
inserted
replaced
617:e54c1646eed0 | 618:5e495e4e560b |
---|---|
8 import java.util.Iterator; | 8 import java.util.Iterator; |
9 import java.util.Map; | 9 import java.util.Map; |
10 import java.util.List; | 10 import java.util.List; |
11 import java.util.ArrayList; | 11 import java.util.ArrayList; |
12 import java.util.Set; | 12 import java.util.Set; |
13 import java.util.HashSet; | |
14 import java.util.Collections; | |
13 import java.util.concurrent.locks.Lock; | 15 import java.util.concurrent.locks.Lock; |
14 import java.util.concurrent.locks.ReentrantLock; | 16 import java.util.concurrent.locks.ReentrantLock; |
15 import java.util.zip.ZipOutputStream; | 17 import java.util.zip.ZipOutputStream; |
16 import java.util.zip.ZipEntry; | 18 import java.util.zip.ZipEntry; |
17 import org.apache.lucene.analysis.Analyzer; | 19 import org.apache.lucene.analysis.Analyzer; |
79 private DirectoryReader reader; | 81 private DirectoryReader reader; |
80 private IndexSearcher searcher; | 82 private IndexSearcher searcher; |
81 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); | 83 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); |
82 private boolean isClosed = false; | 84 private boolean isClosed = false; |
83 private final MultiFieldParser mfp; | 85 private final MultiFieldParser mfp; |
86 public final LuanTable indexed_only_fields = new LuanTable(); | |
84 | 87 |
85 public LuceneIndex(LuanState luan,String indexDirStr,FieldParser defaultFieldParser,String[] defaultFields) throws LuanException, IOException { | 88 public LuceneIndex(LuanState luan,String indexDirStr,FieldParser defaultFieldParser,String[] defaultFields) throws LuanException, IOException { |
86 mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields); | 89 mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields); |
87 mfp.fields.put( "type", STRING_FIELD_PARSER ); | 90 mfp.fields.put( "type", STRING_FIELD_PARSER ); |
88 mfp.fields.put( "id", NumberFieldParser.LONG ); | 91 mfp.fields.put( "id", NumberFieldParser.LONG ); |
138 writeLock.unlock(); | 141 writeLock.unlock(); |
139 } | 142 } |
140 } | 143 } |
141 | 144 |
142 public void save(LuanState luan,LuanTable doc) throws LuanException, IOException { | 145 public void save(LuanState luan,LuanTable doc) throws LuanException, IOException { |
143 if( doc.get(luan,"type")==null ) | 146 Set indexedOnlySet = new HashSet(); |
147 Object typeObj = doc.get(luan,"type"); | |
148 if( typeObj==null ) | |
144 throw new LuanException(luan,"missing 'type' field"); | 149 throw new LuanException(luan,"missing 'type' field"); |
150 if( !(typeObj instanceof String) ) | |
151 throw new LuanException(luan,"type must be string"); | |
152 String type = (String)typeObj; | |
153 Object indexedOnlyObj = indexed_only_fields.get(luan,type); | |
154 if( indexedOnlyObj != null ) { | |
155 if( !(indexedOnlyObj instanceof LuanTable) ) | |
156 throw new LuanException(luan,"indexed_only_fields elements must be tables"); | |
157 LuanTable indexedOnly = (LuanTable)indexedOnlyObj; | |
158 for( Map.Entry<Object,Object> entry : indexedOnly.iterable(luan) ) { | |
159 Object key = entry.getKey(); | |
160 if( !(key instanceof String) ) | |
161 throw new LuanException(luan,"indexed_only_fields."+type+" entries must be strings"); | |
162 String name = (String)key; | |
163 Object value = entry.getValue(); | |
164 if( !(value instanceof LuanFunction) ) | |
165 throw new LuanException(luan,"indexed_only_fields."+type+" values must be functions"); | |
166 LuanFunction fn = (LuanFunction)value; | |
167 value = Luan.first(fn.call(luan,new Object[]{doc})); | |
168 doc.put(luan, name, value ); | |
169 indexedOnlySet.add(name); | |
170 } | |
171 } | |
145 Object obj = doc.get(luan,"id"); | 172 Object obj = doc.get(luan,"id"); |
146 Long id; | 173 Long id; |
147 try { | 174 try { |
148 id = (Long)obj; | 175 id = (Long)obj; |
149 } catch(ClassCastException e) { | 176 } catch(ClassCastException e) { |
154 writeLock.lock(); | 181 writeLock.lock(); |
155 try { | 182 try { |
156 if( id == null ) { | 183 if( id == null ) { |
157 id = nextId(luan); | 184 id = nextId(luan); |
158 doc.put(luan,"id",id); | 185 doc.put(luan,"id",id); |
159 writer.addDocument(toLucene(luan,doc)); | 186 writer.addDocument(toLucene(luan,doc,indexedOnlySet)); |
160 } else { | 187 } else { |
161 writer.updateDocument( term("id",id), toLucene(luan,doc) ); | 188 writer.updateDocument( term("id",id), toLucene(luan,doc,indexedOnlySet) ); |
162 } | 189 } |
163 if(commit) writer.commit(); | 190 if(commit) writer.commit(); |
164 } finally { | 191 } finally { |
165 writeLock.unlock(); | 192 writeLock.unlock(); |
166 } | 193 } |
182 private long idLim = 0; | 209 private long idLim = 0; |
183 private final int idBatch = 10; | 210 private final int idBatch = 10; |
184 | 211 |
185 private void initId(LuanState luan) throws LuanException, IOException { | 212 private void initId(LuanState luan) throws LuanException, IOException { |
186 TopDocs td = searcher.search(new TermQuery(new Term("type","next_id")),1); | 213 TopDocs td = searcher.search(new TermQuery(new Term("type","next_id")),1); |
187 | 214 /* |
188 // tmp hack | 215 // tmp hack |
189 if( td.totalHits == 0 ) { | 216 if( td.totalHits == 0 ) { |
190 td = searcher.search(new TermQuery(new Term("type index","next_id")),1); | 217 td = searcher.search(new TermQuery(new Term("type index","next_id")),1); |
191 if( td.totalHits == 1 ) { | 218 if( td.totalHits == 1 ) { |
192 long idLim = (Long)searcher.doc(td.scoreDocs[0].doc).getField(FLD_NEXT_ID).numericValue(); | 219 long idLim = (Long)searcher.doc(td.scoreDocs[0].doc).getField(FLD_NEXT_ID).numericValue(); |
195 doc.rawPut( FLD_NEXT_ID, idLim ); | 222 doc.rawPut( FLD_NEXT_ID, idLim ); |
196 writer.addDocument(toLucene(luan,doc)); | 223 writer.addDocument(toLucene(luan,doc)); |
197 writer.commit(); | 224 writer.commit(); |
198 } | 225 } |
199 } | 226 } |
200 | 227 */ |
201 switch(td.totalHits) { | 228 switch(td.totalHits) { |
202 case 0: | 229 case 0: |
203 break; // do nothing | 230 break; // do nothing |
204 case 1: | 231 case 1: |
205 idLim = (Long)searcher.doc(td.scoreDocs[0].doc).getField(FLD_NEXT_ID).numericValue(); | 232 idLim = (Long)searcher.doc(td.scoreDocs[0].doc).getField(FLD_NEXT_ID).numericValue(); |
214 if( ++id > idLim ) { | 241 if( ++id > idLim ) { |
215 idLim += idBatch; | 242 idLim += idBatch; |
216 LuanTable doc = new LuanTable(); | 243 LuanTable doc = new LuanTable(); |
217 doc.rawPut( "type", "next_id" ); | 244 doc.rawPut( "type", "next_id" ); |
218 doc.rawPut( FLD_NEXT_ID, idLim ); | 245 doc.rawPut( FLD_NEXT_ID, idLim ); |
219 writer.updateDocument(new Term("type","next_id"),toLucene(luan,doc)); | 246 writer.updateDocument(new Term("type","next_id"),toLucene(luan,doc,Collections.EMPTY_SET)); |
220 } | 247 } |
221 return id; | 248 return id; |
222 } | 249 } |
223 | 250 |
224 | 251 |
414 }; | 441 }; |
415 | 442 |
416 | 443 |
417 | 444 |
418 | 445 |
419 private Document toLucene(LuanState luan,LuanTable table) throws LuanException { | 446 private Document toLucene(LuanState luan,LuanTable table,Set indexOnly) throws LuanException { |
420 Set<String> indexed = mfp.fields.keySet(); | 447 Set<String> indexed = mfp.fields.keySet(); |
421 Document doc = new Document(); | 448 Document doc = new Document(); |
422 for( Map.Entry<Object,Object> entry : table.iterable(luan) ) { | 449 for( Map.Entry<Object,Object> entry : table.iterable(luan) ) { |
423 Object key = entry.getKey(); | 450 Object key = entry.getKey(); |
424 if( !(key instanceof String) ) | 451 if( !(key instanceof String) ) |
425 throw new LuanException(luan,"key must be string"); | 452 throw new LuanException(luan,"key must be string"); |
426 String name = (String)key; | 453 String name = (String)key; |
427 Object value = entry.getValue(); | 454 Object value = entry.getValue(); |
455 Field.Store store = indexOnly.contains(name) ? Field.Store.NO : Field.Store.YES; | |
428 if( value instanceof String ) { | 456 if( value instanceof String ) { |
429 String s = (String)value; | 457 String s = (String)value; |
430 FieldParser fp = mfp.fields.get(name); | 458 FieldParser fp = mfp.fields.get(name); |
431 if( fp != null ) { | 459 if( fp != null ) { |
432 if( fp instanceof StringFieldParser && fp != STRING_FIELD_PARSER ) { | 460 if( fp instanceof StringFieldParser && fp != STRING_FIELD_PARSER ) { |
433 doc.add(new TextField(name, s, Field.Store.YES)); | 461 doc.add(new TextField(name, s, store)); |
434 } else { | 462 } else { |
435 doc.add(new StringField(name, s, Field.Store.YES)); | 463 doc.add(new StringField(name, s, store)); |
436 } | 464 } |
437 } else { | 465 } else { |
438 doc.add(new StoredField(name, s)); | 466 doc.add(new StoredField(name, s)); |
439 } | 467 } |
440 } else if( value instanceof Integer ) { | 468 } else if( value instanceof Integer ) { |
441 int i = (Integer)value; | 469 int i = (Integer)value; |
442 if( indexed.contains(name) ) { | 470 if( indexed.contains(name) ) { |
443 doc.add(new IntField(name, i, Field.Store.YES)); | 471 doc.add(new IntField(name, i, store)); |
444 } else { | 472 } else { |
445 doc.add(new StoredField(name, i)); | 473 doc.add(new StoredField(name, i)); |
446 } | 474 } |
447 } else if( value instanceof Long ) { | 475 } else if( value instanceof Long ) { |
448 long i = (Long)value; | 476 long i = (Long)value; |
449 if( indexed.contains(name) ) { | 477 if( indexed.contains(name) ) { |
450 doc.add(new LongField(name, i, Field.Store.YES)); | 478 doc.add(new LongField(name, i, store)); |
451 } else { | 479 } else { |
452 doc.add(new StoredField(name, i)); | 480 doc.add(new StoredField(name, i)); |
453 } | 481 } |
454 } else if( value instanceof Double ) { | 482 } else if( value instanceof Double ) { |
455 double i = (Double)value; | 483 double i = (Double)value; |
456 if( indexed.contains(name) ) { | 484 if( indexed.contains(name) ) { |
457 doc.add(new DoubleField(name, i, Field.Store.YES)); | 485 doc.add(new DoubleField(name, i, store)); |
458 } else { | 486 } else { |
459 doc.add(new StoredField(name, i)); | 487 doc.add(new StoredField(name, i)); |
460 } | 488 } |
461 } else if( value instanceof byte[] ) { | 489 } else if( value instanceof byte[] ) { |
462 byte[] b = (byte[])value; | 490 byte[] b = (byte[])value; |