Mercurial Hosting > luan
comparison src/luan/modules/lucene/LuceneIndex.java @ 1528:3bd4d7963456
use goodjava/lucene/api
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sun, 26 Jul 2020 23:11:53 -0600 |
parents | efbc3720d3f3 |
children | e6d808f40bbc |
comparison
equal
deleted
inserted
replaced
1527:fa1e3adbebfb | 1528:3bd4d7963456 |
---|---|
10 import java.sql.SQLException; | 10 import java.sql.SQLException; |
11 import java.util.Arrays; | 11 import java.util.Arrays; |
12 import java.util.Iterator; | 12 import java.util.Iterator; |
13 import java.util.Map; | 13 import java.util.Map; |
14 import java.util.HashMap; | 14 import java.util.HashMap; |
15 import java.util.LinkedHashMap; | |
15 import java.util.List; | 16 import java.util.List; |
16 import java.util.ArrayList; | 17 import java.util.ArrayList; |
17 import java.util.Set; | 18 import java.util.Set; |
18 import java.util.HashSet; | 19 import java.util.HashSet; |
19 import java.util.Collections; | 20 import java.util.Collections; |
72 import goodjava.lucene.queryparser.GoodQueryParser; | 73 import goodjava.lucene.queryparser.GoodQueryParser; |
73 import goodjava.lucene.queryparser.FieldParser; | 74 import goodjava.lucene.queryparser.FieldParser; |
74 import goodjava.lucene.queryparser.MultiFieldParser; | 75 import goodjava.lucene.queryparser.MultiFieldParser; |
75 import goodjava.lucene.queryparser.StringFieldParser; | 76 import goodjava.lucene.queryparser.StringFieldParser; |
76 import goodjava.lucene.queryparser.NumberFieldParser; | 77 import goodjava.lucene.queryparser.NumberFieldParser; |
78 import goodjava.lucene.api.GoodIndexWriter; | |
79 import goodjava.lucene.api.LuceneIndexWriter; | |
80 import goodjava.lucene.api.GoodIndexWriterConfig; | |
81 import goodjava.lucene.api.LuceneUtils; | |
77 import goodjava.parser.ParseException; | 82 import goodjava.parser.ParseException; |
78 import luan.modules.Utils; | 83 import luan.modules.Utils; |
79 import luan.Luan; | 84 import luan.Luan; |
80 import luan.LuanTable; | 85 import luan.LuanTable; |
81 import luan.LuanFunction; | 86 import luan.LuanFunction; |
120 | 125 |
121 private final Object version; | 126 private final Object version; |
122 | 127 |
123 private final ReentrantLock writeLock = new ReentrantLock(); | 128 private final ReentrantLock writeLock = new ReentrantLock(); |
124 private final File indexDir; | 129 private final File indexDir; |
125 private SnapshotDeletionPolicy snapshotDeletionPolicy; | 130 private GoodIndexWriter writer; |
126 private IndexWriter writer; | |
127 private DirectoryReader reader; | 131 private DirectoryReader reader; |
128 private IndexSearcher searcher; | 132 private IndexSearcher searcher; |
129 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); | 133 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); |
130 private final MultiFieldParser mfp; | 134 private final MultiFieldParser mfp; |
131 private final Analyzer analyzer; | 135 private final Analyzer analyzer; // ??? |
132 | 136 |
133 private FSDirectory fsDir; | 137 private FSDirectory fsDir; |
134 private int writeCount; | 138 private int writeCount; |
135 private AtomicInteger writeCounter = new AtomicInteger(); | 139 private AtomicInteger writeCounter = new AtomicInteger(); |
136 | 140 private final GoodIndexWriterConfig config; |
137 private Set<String> indexOnly = new HashSet<String>(); | |
138 // private final FieldParser defaultFieldParser; | |
139 // private final String[] defaultFields; | |
140 | 141 |
141 private final PostgresBackup postgresBackup; | 142 private final PostgresBackup postgresBackup; |
142 private boolean wasCreated; | 143 private boolean wasCreated; |
143 | 144 |
144 private LuceneIndex(Luan luan,File indexDir,LuanTable options) | 145 private LuceneIndex(Luan luan,File indexDir,LuanTable options) |
148 this.version = options.remove("version"); | 149 this.version = options.remove("version"); |
149 FieldParser defaultFieldParser = (FieldParser)options.remove("default_type"); | 150 FieldParser defaultFieldParser = (FieldParser)options.remove("default_type"); |
150 LuanTable defaultFieldsTbl = Utils.removeTable(options,"default_fields"); | 151 LuanTable defaultFieldsTbl = Utils.removeTable(options,"default_fields"); |
151 String[] defaultFields = defaultFieldsTbl==null ? null : (String[])defaultFieldsTbl.asList().toArray(new String[0]); | 152 String[] defaultFields = defaultFieldsTbl==null ? null : (String[])defaultFieldsTbl.asList().toArray(new String[0]); |
152 LuanTable postgresSpec = Utils.removeTable(options,"postgres_spec"); | 153 LuanTable postgresSpec = Utils.removeTable(options,"postgres_spec"); |
154 LuanFunction supplementer = Utils.removeFunction(options,"supplementer"); | |
153 Utils.checkEmpty(options); | 155 Utils.checkEmpty(options); |
154 | 156 |
155 // this.defaultFieldParser = defaultFieldParser; | |
156 // this.defaultFields = defaultFields; | |
157 mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields); | 157 mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields); |
158 mfp.fields.put( "type", STRING_FIELD_PARSER ); | 158 mfp.fields.put( "type", STRING_FIELD_PARSER ); |
159 mfp.fields.put( "id", NumberFieldParser.LONG ); | 159 mfp.fields.put( "id", NumberFieldParser.LONG ); |
160 this.indexDir = indexDir; | 160 this.indexDir = indexDir; |
161 Analyzer analyzer = STRING_FIELD_PARSER.analyzer; | 161 Analyzer analyzer = STRING_FIELD_PARSER.analyzer; |
162 if( defaultFieldParser instanceof StringFieldParser ) { | 162 if( defaultFieldParser instanceof StringFieldParser ) { |
163 StringFieldParser sfp = (StringFieldParser)defaultFieldParser; | 163 StringFieldParser sfp = (StringFieldParser)defaultFieldParser; |
164 analyzer = sfp.analyzer; | 164 analyzer = sfp.analyzer; |
165 } | 165 } |
166 this.analyzer = analyzer; | 166 this.analyzer = analyzer; |
167 this.config = new SupplementingConfig(luceneVersion,mfp,supplementer); | |
167 wasCreated = reopen(); | 168 wasCreated = reopen(); |
168 if( postgresSpec == null ) { | 169 if( postgresSpec == null ) { |
169 postgresBackup = null; | 170 postgresBackup = null; |
170 } else { | 171 } else { |
171 postgresBackup = new PostgresBackup(luan,postgresSpec); | 172 postgresBackup = new PostgresBackup(luan,postgresSpec); |
180 } | 181 } |
181 } | 182 } |
182 } | 183 } |
183 | 184 |
184 public boolean reopen() throws IOException { | 185 public boolean reopen() throws IOException { |
185 IndexWriterConfig conf = new IndexWriterConfig(luceneVersion,analyzer); | |
186 snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy()); | |
187 conf.setIndexDeletionPolicy(snapshotDeletionPolicy); | |
188 fsDir = FSDirectory.open(indexDir); | 186 fsDir = FSDirectory.open(indexDir); |
189 boolean wasCreated = !fsDir.getDirectory().exists(); | 187 boolean wasCreated = !fsDir.getDirectory().exists(); |
190 writer = new IndexWriter(fsDir,conf); | 188 writer = new LuceneIndexWriter(fsDir,config); |
191 writer.commit(); // commit index creation | |
192 reader = DirectoryReader.open(fsDir); | 189 reader = DirectoryReader.open(fsDir); |
193 searcher = new IndexSearcher(reader); | 190 searcher = new IndexSearcher(reader); |
194 initId(); | 191 initId(); |
195 return wasCreated; | 192 return wasCreated; |
196 } | 193 } |
261 wrote(); | 258 wrote(); |
262 writeLock.unlock(); | 259 writeLock.unlock(); |
263 } | 260 } |
264 } | 261 } |
265 | 262 |
266 public void indexed_only_fields(List<String> fields) { | 263 public void save(LuanTable doc) |
267 indexOnly.addAll(fields); | |
268 } | |
269 | |
270 public void save(LuanFunction completer,LuanTable doc,LuanTable boosts) | |
271 throws LuanException, IOException, SQLException | 264 throws LuanException, IOException, SQLException |
272 { | 265 { |
273 if( boosts!=null && postgresBackup!=null ) | |
274 throw new LuanException("boosts are not saved to postgres backup"); | |
275 | |
276 Object obj = doc.get("id"); | 266 Object obj = doc.get("id"); |
277 Long id; | 267 Long id; |
278 try { | 268 try { |
279 id = (Long)obj; | 269 id = (Long)obj; |
280 } catch(ClassCastException e) { | 270 } catch(ClassCastException e) { |
287 if( id == null ) { | 277 if( id == null ) { |
288 id = nextId(); | 278 id = nextId(); |
289 doc.put("id",id); | 279 doc.put("id",id); |
290 if( postgresBackup != null ) | 280 if( postgresBackup != null ) |
291 postgresBackup.add(doc); | 281 postgresBackup.add(doc); |
292 writer.addDocument(toLucene(completer,doc,boosts)); | 282 writer.addDocument(toLucene(doc)); |
293 } else { | 283 } else { |
294 if( postgresBackup != null ) | 284 if( postgresBackup != null ) |
295 postgresBackup.update(doc); | 285 postgresBackup.update(doc); |
296 writer.updateDocument( term("id",id), toLucene(completer,doc,boosts) ); | 286 writer.updateDocument( "id", toLucene(doc) ); |
297 } | 287 } |
298 if(commit) writer.commit(); | 288 if(commit) writer.commit(); |
299 } finally { | 289 } finally { |
300 wrote(); | 290 wrote(); |
301 writeLock.unlock(); | 291 writeLock.unlock(); |
366 throw new RuntimeException(); | 356 throw new RuntimeException(); |
367 } | 357 } |
368 } | 358 } |
369 | 359 |
370 private void saveNextId(long nextId) throws LuanException, IOException { | 360 private void saveNextId(long nextId) throws LuanException, IOException { |
371 Map doc = new HashMap(); | 361 Map<String,Object> doc = new HashMap(); |
372 doc.put( "type", "next_id" ); | 362 doc.put( "type", "next_id" ); |
373 doc.put( FLD_NEXT_ID, idLim ); | 363 doc.put( FLD_NEXT_ID, idLim ); |
374 writer.updateDocument(new Term("type","next_id"),toLucene(doc.entrySet(),null)); | 364 writer.updateDocument("type",doc); |
375 } | 365 } |
376 | 366 |
377 public synchronized long nextId() throws LuanException, IOException { | 367 public synchronized long nextId() throws LuanException, IOException { |
378 if( ++id > idLim ) { | 368 if( ++id > idLim ) { |
379 idLim += idBatch; | 369 idLim += idBatch; |
402 snapshotDeletionPolicy.release(ic); | 392 snapshotDeletionPolicy.release(ic); |
403 } | 393 } |
404 } | 394 } |
405 */ | 395 */ |
406 public SnapshotDeletionPolicy snapshotDeletionPolicy() { | 396 public SnapshotDeletionPolicy snapshotDeletionPolicy() { |
407 return snapshotDeletionPolicy; | 397 return (SnapshotDeletionPolicy)writer.getLuceneConfig().getIndexDeletionPolicy(); |
408 } | 398 } |
409 | 399 |
410 public Object snapshot(LuanFunction fn) throws LuanException, IOException { | 400 public Object snapshot(LuanFunction fn) throws LuanException, IOException { |
401 SnapshotDeletionPolicy snapshotDeletionPolicy = snapshotDeletionPolicy(); | |
411 IndexCommit ic = snapshotDeletionPolicy.snapshot(); | 402 IndexCommit ic = snapshotDeletionPolicy.snapshot(); |
412 try { | 403 try { |
413 String dir = fsDir.getDirectory().toString(); | 404 String dir = fsDir.getDirectory().toString(); |
414 LuanTable fileNames = new LuanTable(fn.luan(),new ArrayList(ic.getFileNames())); | 405 LuanTable fileNames = new LuanTable(fn.luan(),new ArrayList(ic.getFileNames())); |
415 return fn.call(dir,fileNames); | 406 return fn.call(dir,fileNames); |
585 return; | 576 return; |
586 } | 577 } |
587 mfp.fields.put( field, fp ); | 578 mfp.fields.put( field, fp ); |
588 } | 579 } |
589 | 580 |
590 | 581 static Map<String,Object> toLucene(LuanTable table) throws LuanException { |
591 private IndexableField newField(String name,Object value,Set<String> indexed,Float boost) | 582 Map<String,Object> map = new LinkedHashMap<String,Object>(); |
592 throws LuanException | 583 for( Map.Entry<Object,Object> entry : table.iterable() ) { |
593 { | 584 String name = (String)entry.getKey(); |
594 boolean hasBoost = boost!=null; | 585 Object value = entry.getValue(); |
595 IndexableField fld = newField2(name,value,indexed,hasBoost); | 586 if( value instanceof LuanTable ) { |
596 if( hasBoost ) | |
597 ((Field)fld).setBoost(boost); | |
598 return fld; | |
599 } | |
600 | |
601 private IndexableField newField2(String name,Object value,Set<String> indexed,boolean hasBoost) | |
602 throws LuanException | |
603 { | |
604 Field.Store store = indexOnly.contains(name) ? Field.Store.NO : Field.Store.YES; | |
605 if( value instanceof String ) { | |
606 String s = (String)value; | |
607 FieldParser fp = mfp.fields.get(name); | |
608 if( fp != null ) { | |
609 if( fp instanceof StringFieldParser && fp != STRING_FIELD_PARSER ) { | |
610 return new TextField(name, s, store); | |
611 } else if (hasBoost) { | |
612 // fuck you modern lucene developers | |
613 return new Field(name, s, store, Field.Index.NOT_ANALYZED); | |
614 } else { | |
615 return new StringField(name, s, store); | |
616 } | |
617 } else { | |
618 return new StoredField(name, s); | |
619 } | |
620 } else if( value instanceof Integer ) { | |
621 int i = (Integer)value; | |
622 if( indexed.contains(name) ) { | |
623 return new IntField(name, i, store); | |
624 } else { | |
625 return new StoredField(name, i); | |
626 } | |
627 } else if( value instanceof Long ) { | |
628 long i = (Long)value; | |
629 if( indexed.contains(name) ) { | |
630 return new LongField(name, i, store); | |
631 } else { | |
632 return new StoredField(name, i); | |
633 } | |
634 } else if( value instanceof Double ) { | |
635 double i = (Double)value; | |
636 if( indexed.contains(name) ) { | |
637 return new DoubleField(name, i, store); | |
638 } else { | |
639 return new StoredField(name, i); | |
640 } | |
641 } else if( value instanceof byte[] ) { | |
642 byte[] b = (byte[])value; | |
643 return new StoredField(name, b); | |
644 } else | |
645 throw new LuanException("invalid value type "+value.getClass()+"' for '"+name+"'"); | |
646 } | |
647 | |
648 private Document toLucene(LuanFunction completer,LuanTable table,LuanTable boosts) throws LuanException { | |
649 if( completer != null ) | |
650 table = (LuanTable)completer.call(table); | |
651 return toLucene(table.iterable(),boosts); | |
652 } | |
653 | |
654 private Document toLucene(Iterable<Map.Entry> iterable,LuanTable boosts) throws LuanException { | |
655 Set<String> indexed = mfp.fields.keySet(); | |
656 Document doc = new Document(); | |
657 for( Map.Entry<Object,Object> entry : iterable ) { | |
658 Object key = entry.getKey(); | |
659 if( !(key instanceof String) ) | |
660 throw new LuanException("key must be string"); | |
661 String name = (String)key; | |
662 Object value = entry.getValue(); | |
663 Float boost = null; | |
664 if( boosts != null ) { | |
665 Object obj = boosts.get(name); | |
666 if( obj != null ) { | |
667 if( !(obj instanceof Number) ) | |
668 throw new LuanException("boost '"+name+"' must be number"); | |
669 boost = ((Number)obj).floatValue(); | |
670 } | |
671 } | |
672 if( !(value instanceof LuanTable) ) { | |
673 doc.add(newField( name, value, indexed, boost )); | |
674 } else { // list | |
675 LuanTable list = (LuanTable)value; | 587 LuanTable list = (LuanTable)value; |
676 if( !list.isList() ) | 588 if( !list.isList() ) |
677 throw new LuanException("table value for '"+name+"' must be a list"); | 589 throw new LuanException("table value for '"+name+"' must be a list"); |
678 for( Object el : list.asList() ) { | 590 value = list.asList(); |
679 doc.add(newField( name, el, indexed, boost )); | 591 } |
680 } | 592 map.put(name,value); |
681 } | 593 } |
682 } | 594 return map; |
683 return doc; | |
684 } | |
685 | |
686 private static Object getValue(IndexableField ifld) throws LuanException { | |
687 BytesRef br = ifld.binaryValue(); | |
688 if( br != null ) | |
689 return br.bytes; | |
690 Number n = ifld.numericValue(); | |
691 if( n != null ) | |
692 return n; | |
693 String s = ifld.stringValue(); | |
694 if( s != null ) | |
695 return s; | |
696 throw new LuanException("invalid field type for "+ifld); | |
697 } | 595 } |
698 | 596 |
699 private static LuanTable toTable(Luan luan,Document doc) throws LuanException { | 597 private static LuanTable toTable(Luan luan,Document doc) throws LuanException { |
700 if( doc==null ) | 598 return doc==null ? null : toTable(luan,LuceneUtils.toMap(doc)); |
701 return null; | 599 } |
600 | |
601 static LuanTable toTable(Luan luan,Map map) throws LuanException { | |
702 LuanTable table = new LuanTable(luan); | 602 LuanTable table = new LuanTable(luan); |
703 for( IndexableField ifld : doc ) { | 603 for( Object obj : map.entrySet() ) { |
704 String name = ifld.name(); | 604 Map.Entry entry = (Map.Entry)obj; |
705 Object value = getValue(ifld); | 605 Object value = entry.getValue(); |
706 Object old = table.rawGet(name); | 606 if( value instanceof List ) |
707 if( old == null ) { | 607 value = new LuanTable(luan,(List)value); |
708 table.rawPut(name,value); | 608 table.rawPut( entry.getKey(), value ); |
709 } else { | |
710 LuanTable list; | |
711 if( old instanceof LuanTable ) { | |
712 list = (LuanTable)old; | |
713 } else { | |
714 list = new LuanTable(luan); | |
715 list.rawPut(1,old); | |
716 table.rawPut(name,list); | |
717 } | |
718 list.rawPut(list.rawLength()+1,value); | |
719 } | |
720 } | 609 } |
721 return table; | 610 return table; |
722 } | 611 } |
723 | 612 |
724 | 613 |
841 writeLock.unlock(); | 730 writeLock.unlock(); |
842 } | 731 } |
843 logger.info("end rebuild_postgres_backup"); | 732 logger.info("end rebuild_postgres_backup"); |
844 } | 733 } |
845 | 734 |
846 public void restore_from_postgres(LuanFunction completer) | 735 public void restore_from_postgres() |
847 throws IOException, LuanException, SQLException, ParseException | 736 throws IOException, LuanException, SQLException, ParseException |
848 { | 737 { |
849 if( postgresBackup!=null && wasCreated && !postgresBackup.wasCreated ) { | 738 if( postgresBackup!=null && wasCreated && !postgresBackup.wasCreated ) { |
850 logger.error("restoring from postgres"); | 739 logger.error("restoring from postgres"); |
851 force_restore_from_postgres(completer); | 740 force_restore_from_postgres(); |
852 } | 741 } |
853 } | 742 } |
854 | 743 |
855 public void force_restore_from_postgres(LuanFunction completer) | 744 public void force_restore_from_postgres() |
856 throws IOException, LuanException, SQLException, ParseException | 745 throws IOException, LuanException, SQLException, ParseException |
857 { | 746 { |
858 logger.warn("start restore_from_postgres"); | 747 logger.warn("start restore_from_postgres"); |
859 if( postgresBackup==null ) | 748 if( postgresBackup==null ) |
860 throw new NullPointerException(); | 749 throw new NullPointerException(); |
863 writeLock.lock(); | 752 writeLock.lock(); |
864 boolean ok = false; | 753 boolean ok = false; |
865 try { | 754 try { |
866 writer.deleteAll(); | 755 writer.deleteAll(); |
867 long nextId = postgresBackup.maxId() + 1; | 756 long nextId = postgresBackup.maxId() + 1; |
868 postgresBackup.restoreLucene(this,completer); | 757 postgresBackup.restoreLucene(this); |
869 id = idLim = nextId; | 758 id = idLim = nextId; |
870 saveNextId(nextId); | 759 saveNextId(nextId); |
871 ok = true; | 760 ok = true; |
872 writer.commit(); | 761 writer.commit(); |
873 wasCreated = false; | 762 wasCreated = false; |
880 writeLock.unlock(); | 769 writeLock.unlock(); |
881 } | 770 } |
882 logger.warn("end restore_from_postgres"); | 771 logger.warn("end restore_from_postgres"); |
883 } | 772 } |
884 | 773 |
885 void restore(LuanFunction completer,LuanTable doc) | 774 void restore(LuanTable doc) |
886 throws LuanException, IOException | 775 throws LuanException, IOException |
887 { | 776 { |
888 writer.addDocument(toLucene(completer,doc,null)); | 777 writer.addDocument(toLucene(doc)); |
889 } | 778 } |
890 | 779 |
891 public void check(Luan luan) throws IOException, SQLException, LuanException, ParseException { | 780 public void check(Luan luan) throws IOException, SQLException, LuanException, ParseException { |
892 boolean hasPostgres = postgresBackup != null; | 781 boolean hasPostgres = postgresBackup != null; |
893 String msg = "start check"; | 782 String msg = "start check"; |