Mercurial Hosting > luan
annotate src/luan/modules/lucene/LuceneIndex.java @ 1402:27efb1fcbcb5
move luan.lib to goodjava
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Tue, 17 Sep 2019 01:35:01 -0400 |
parents | 38a1c1b4279a |
children | 8187ddb0e827 |
rev | line source |
---|---|
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
1 package luan.modules.lucene; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
2 |
527 | 3 import java.io.Closeable; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
4 import java.io.File; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
5 import java.io.FileOutputStream; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
6 import java.io.FileInputStream; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
7 import java.io.IOException; |
1397 | 8 import java.lang.ref.Reference; |
9 import java.lang.ref.WeakReference; | |
1391 | 10 import java.sql.SQLException; |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
11 import java.util.Arrays; |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
12 import java.util.Iterator; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
13 import java.util.Map; |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
14 import java.util.HashMap; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
15 import java.util.List; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
16 import java.util.ArrayList; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
17 import java.util.Set; |
618
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
18 import java.util.HashSet; |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
19 import java.util.Collections; |
704 | 20 import java.util.concurrent.atomic.AtomicInteger; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
21 import java.util.concurrent.locks.Lock; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
22 import java.util.concurrent.locks.ReentrantLock; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
23 import java.util.zip.ZipOutputStream; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
24 import java.util.zip.ZipEntry; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
25 import org.apache.lucene.analysis.Analyzer; |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
26 import org.apache.lucene.analysis.TokenStream; |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
27 import org.apache.lucene.analysis.core.KeywordAnalyzer; |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
28 import org.apache.lucene.analysis.en.EnglishAnalyzer; |
233
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
29 import org.apache.lucene.document.Document; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
30 import org.apache.lucene.document.Field; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
31 import org.apache.lucene.document.StoredField; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
32 import org.apache.lucene.document.StringField; |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
33 import org.apache.lucene.document.TextField; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
34 import org.apache.lucene.document.IntField; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
35 import org.apache.lucene.document.LongField; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
36 import org.apache.lucene.document.DoubleField; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
37 import org.apache.lucene.index.IndexableField; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
38 import org.apache.lucene.index.IndexWriter; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
39 import org.apache.lucene.index.IndexWriterConfig; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
40 import org.apache.lucene.index.DirectoryReader; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
41 import org.apache.lucene.index.Term; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
42 import org.apache.lucene.index.SnapshotDeletionPolicy; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
43 import org.apache.lucene.index.IndexCommit; |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
44 import org.apache.lucene.index.AtomicReaderContext; |
1391 | 45 import org.apache.lucene.index.CheckIndex; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
46 import org.apache.lucene.store.Directory; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
47 import org.apache.lucene.store.FSDirectory; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
48 import org.apache.lucene.util.Version; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
49 import org.apache.lucene.util.BytesRef; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
50 import org.apache.lucene.util.NumericUtils; |
312
d34be4588556
add lucene query parsing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
303
diff
changeset
|
51 import org.apache.lucene.search.Query; |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
52 import org.apache.lucene.search.PrefixQuery; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
53 import org.apache.lucene.search.TermQuery; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
54 import org.apache.lucene.search.TopDocs; |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
55 import org.apache.lucene.search.Sort; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
56 import org.apache.lucene.search.SortField; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
57 import org.apache.lucene.search.IndexSearcher; |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
58 import org.apache.lucene.search.TotalHitCountCollector; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
59 import org.apache.lucene.search.ScoreDoc; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
60 import org.apache.lucene.search.Collector; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
61 import org.apache.lucene.search.Scorer; |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
62 import org.apache.lucene.search.Explanation; |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
63 import org.apache.lucene.search.highlight.Formatter; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
64 import org.apache.lucene.search.highlight.Highlighter; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
65 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; |
1248
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
66 import org.apache.lucene.search.highlight.Fragmenter; |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
67 import org.apache.lucene.search.highlight.NullFragmenter; |
1248
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
68 import org.apache.lucene.search.highlight.SimpleSpanFragmenter; |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
69 import org.apache.lucene.search.highlight.QueryScorer; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
70 import org.apache.lucene.search.highlight.TokenGroup; |
1402
27efb1fcbcb5
move luan.lib to goodjava
Franklin Schmidt <fschmidt@gmail.com>
parents:
1399
diff
changeset
|
71 import goodjava.queryparser.SaneQueryParser; |
27efb1fcbcb5
move luan.lib to goodjava
Franklin Schmidt <fschmidt@gmail.com>
parents:
1399
diff
changeset
|
72 import goodjava.queryparser.FieldParser; |
27efb1fcbcb5
move luan.lib to goodjava
Franklin Schmidt <fschmidt@gmail.com>
parents:
1399
diff
changeset
|
73 import goodjava.queryparser.MultiFieldParser; |
27efb1fcbcb5
move luan.lib to goodjava
Franklin Schmidt <fschmidt@gmail.com>
parents:
1399
diff
changeset
|
74 import goodjava.queryparser.StringFieldParser; |
27efb1fcbcb5
move luan.lib to goodjava
Franklin Schmidt <fschmidt@gmail.com>
parents:
1399
diff
changeset
|
75 import goodjava.queryparser.NumberFieldParser; |
27efb1fcbcb5
move luan.lib to goodjava
Franklin Schmidt <fschmidt@gmail.com>
parents:
1399
diff
changeset
|
76 import goodjava.parser.ParseException; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
77 import luan.modules.Utils; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
78 import luan.Luan; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
79 import luan.LuanTable; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
80 import luan.LuanFunction; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
81 import luan.LuanException; |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
82 import luan.LuanRuntimeException; |
1391 | 83 import luan.modules.parsers.LuanToString; |
1402
27efb1fcbcb5
move luan.lib to goodjava
Franklin Schmidt <fschmidt@gmail.com>
parents:
1399
diff
changeset
|
84 import goodjava.logging.Logger; |
27efb1fcbcb5
move luan.lib to goodjava
Franklin Schmidt <fschmidt@gmail.com>
parents:
1399
diff
changeset
|
85 import goodjava.logging.LoggerFactory; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
86 |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
87 |
1398 | 88 public final class LuceneIndex { |
1393 | 89 private static final Logger sysLogger = LoggerFactory.getLogger(LuceneIndex.class); |
521
8a217fe5b4f3
cleaner LuanState.onClose()
Franklin Schmidt <fschmidt@gmail.com>
parents:
427
diff
changeset
|
90 |
1397 | 91 private static Map<String,Reference<LuceneIndex>> indexes = new HashMap<String,Reference<LuceneIndex>>(); |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
92 |
1397 | 93 public static LuceneIndex getLuceneIndex(Luan luan,File indexDir,LuanTable options) |
1392 | 94 throws LuanException, IOException, ClassNotFoundException, SQLException |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
95 { |
1369
709f7498a363
change Lucene.index() and add Lucene.recover()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1347
diff
changeset
|
96 String key = indexDir.getCanonicalPath(); |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
97 synchronized(indexes) { |
1397 | 98 Reference<LuceneIndex> ref = indexes.get(key); |
99 if( ref != null ) { | |
100 LuceneIndex li = ref.get(); | |
1398 | 101 if( li != null ) { |
102 Object version = options.get("version"); | |
103 if( version==null || version.equals(li.version) ) | |
104 return li; | |
1397 | 105 li.closeWriter(); |
1398 | 106 } |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
107 } |
1397 | 108 LuceneIndex li = new LuceneIndex(luan,indexDir,options); |
109 indexes.put(key, new WeakReference<LuceneIndex>(li)); | |
110 return li; | |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
111 } |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
112 } |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
113 |
1398 | 114 private static final Version luceneVersion = Version.LUCENE_4_9; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
115 private static final String FLD_NEXT_ID = "nextId"; |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
116 public static final StringFieldParser STRING_FIELD_PARSER = new StringFieldParser(new KeywordAnalyzer()); |
1398 | 117 public static final StringFieldParser ENGLISH_FIELD_PARSER = new StringFieldParser(new EnglishAnalyzer(luceneVersion)); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
118 |
1393 | 119 private final Logger luanLogger; |
1398 | 120 private final Object version; |
1393 | 121 |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
122 private final ReentrantLock writeLock = new ReentrantLock(); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
123 private final File indexDir; |
754 | 124 private SnapshotDeletionPolicy snapshotDeletionPolicy; |
125 private IndexWriter writer; | |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
126 private DirectoryReader reader; |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
127 private IndexSearcher searcher; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
128 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
129 private final MultiFieldParser mfp; |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
130 private final Analyzer analyzer; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
131 |
1391 | 132 private FSDirectory fsDir; |
704 | 133 private int writeCount; |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
134 private AtomicInteger writeCounter = new AtomicInteger(); |
704 | 135 |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
136 private Set<String> indexOnly = new HashSet<String>(); |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
137 private final FieldParser defaultFieldParser; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
138 private final String[] defaultFields; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
139 |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
140 private final PostgresBackup postgresBackup; |
1397 | 141 private boolean wasCreated; |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
142 |
1397 | 143 private LuceneIndex(Luan luan,File indexDir,LuanTable options) |
1392 | 144 throws LuanException, IOException, ClassNotFoundException, SQLException |
1267 | 145 { |
1397 | 146 Map map = options.asMap(); |
1398 | 147 this.version = map.remove("version"); |
1397 | 148 FieldParser defaultFieldParser = (FieldParser)map.remove("default_type"); |
149 LuanTable defaultFieldsTbl = Utils.removeTable(map,"default_fields"); | |
150 String[] defaultFields = defaultFieldsTbl==null ? null : (String[])defaultFieldsTbl.asList().toArray(new String[0]); | |
151 LuanFunction completer = Utils.removeFunction(map,"completer"); | |
152 LuanTable postgresSpec = Utils.removeTable(map,"postgres_spec"); | |
153 Utils.checkEmpty(map); | |
154 | |
1393 | 155 this.luanLogger = luan.getLogger(LuceneIndex.class); |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
156 this.defaultFieldParser = defaultFieldParser; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
157 this.defaultFields = defaultFields; |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
158 mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields); |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
159 mfp.fields.put( "type", STRING_FIELD_PARSER ); |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
160 mfp.fields.put( "id", NumberFieldParser.LONG ); |
233
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
161 this.indexDir = indexDir; |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
162 Analyzer analyzer = STRING_FIELD_PARSER.analyzer; |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
163 if( defaultFieldParser instanceof StringFieldParser ) { |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
164 StringFieldParser sfp = (StringFieldParser)defaultFieldParser; |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
165 analyzer = sfp.analyzer; |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
166 } |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
167 this.analyzer = analyzer; |
1397 | 168 wasCreated = reopen(); |
1392 | 169 if( postgresSpec == null ) { |
170 postgresBackup = null; | |
171 } else { | |
1395
9dfff82dfc59
finish postgres work
Franklin Schmidt <fschmidt@gmail.com>
parents:
1393
diff
changeset
|
172 if( completer == null ) |
9dfff82dfc59
finish postgres work
Franklin Schmidt <fschmidt@gmail.com>
parents:
1393
diff
changeset
|
173 throw new LuanException("completer is required for postgres_spec"); |
1392 | 174 Map spec = postgresSpec.asMap(); |
1393 | 175 postgresBackup = new PostgresBackup(luan,spec); |
1397 | 176 if( !wasCreated && postgresBackup.wasCreated ) { |
177 luanLogger.error("rebuilding postgres backup"); | |
178 rebuild_postgres_backup(completer); | |
179 /* | |
180 } else if( wasCreated && !postgresBackup.wasCreated ) { | |
181 luanLogger.error("restoring from postgres"); | |
182 restore_from_postgres(); | |
183 */ | |
1390 | 184 } |
185 } | |
754 | 186 } |
187 | |
1390 | 188 public boolean reopen() throws IOException { |
1398 | 189 IndexWriterConfig conf = new IndexWriterConfig(luceneVersion,analyzer); |
233
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
190 snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy()); |
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
191 conf.setIndexDeletionPolicy(snapshotDeletionPolicy); |
1391 | 192 fsDir = FSDirectory.open(indexDir); |
193 boolean wasCreated = !fsDir.getDirectory().exists(); | |
194 writer = new IndexWriter(fsDir,conf); | |
233
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
195 writer.commit(); // commit index creation |
1391 | 196 reader = DirectoryReader.open(fsDir); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
197 searcher = new IndexSearcher(reader); |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
198 initId(); |
1390 | 199 return wasCreated; |
233
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
200 } |
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
201 |
704 | 202 private void wrote() { |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
203 writeCounter.incrementAndGet(); |
704 | 204 } |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
205 |
1392 | 206 public void delete_all() throws IOException, SQLException { |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
207 boolean commit = !writeLock.isHeldByCurrentThread(); |
252
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
208 writeLock.lock(); |
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
209 try { |
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
210 writer.deleteAll(); |
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
211 id = idLim = 0; |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
212 if( postgresBackup != null ) |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
213 postgresBackup.deleteAll(); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
214 if(commit) writer.commit(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
215 } finally { |
704 | 216 wrote(); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
217 writeLock.unlock(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
218 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
219 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
220 |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
221 private static Term term(String key,long value) { |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
222 BytesRef br = new BytesRef(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
223 NumericUtils.longToPrefixCoded(value,0,br); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
224 return new Term(key,br); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
225 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
226 |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
227 private void backupDelete(Query query) |
1392 | 228 throws IOException, SQLException, LuanException |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
229 { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
230 if( postgresBackup != null ) { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
231 final List<Long> ids = new ArrayList<Long>(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
232 IndexSearcher searcher = openSearcher(); |
1391 | 233 try { |
234 MyCollector col = new MyCollector() { | |
235 @Override public void collect(int iDoc) throws IOException { | |
236 Document doc = searcher.doc( docBase + iDoc ); | |
237 Long id = (Long)doc.getField("id").numericValue(); | |
238 ids.add(id); | |
239 } | |
240 }; | |
241 searcher.search(query,col); | |
242 } finally { | |
243 close(searcher); | |
244 } | |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
245 postgresBackup.begin(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
246 for( Long id : ids ) { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
247 postgresBackup.delete(id); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
248 } |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
249 postgresBackup.commit(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
250 } |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
251 } |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
252 |
1346 | 253 public void delete(String queryStr) |
1392 | 254 throws IOException, ParseException, SQLException, LuanException |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
255 { |
622
1a53333eb4d5
remove Lucene all_search_terms_must_match() since now query suffix "~a" handles this
Franklin Schmidt <fschmidt@gmail.com>
parents:
621
diff
changeset
|
256 Query query = SaneQueryParser.parseQuery(mfp,queryStr); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
257 |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
258 boolean commit = !writeLock.isHeldByCurrentThread(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
259 writeLock.lock(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
260 try { |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
261 backupDelete(query); |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
262 writer.deleteDocuments(query); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
263 if(commit) writer.commit(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
264 } finally { |
704 | 265 wrote(); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
266 writeLock.unlock(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
267 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
268 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
269 |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
270 public void indexed_only_fields(List<String> fields) { |
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
271 indexOnly.addAll(fields); |
1271
48f302bdc187
fix indexed_only_field
Franklin Schmidt <fschmidt@gmail.com>
parents:
1267
diff
changeset
|
272 } |
48f302bdc187
fix indexed_only_field
Franklin Schmidt <fschmidt@gmail.com>
parents:
1267
diff
changeset
|
273 |
1346 | 274 public void save(LuanTable doc,LuanTable boosts) |
1392 | 275 throws LuanException, IOException, SQLException |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
276 { |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
277 if( boosts!=null && postgresBackup!=null ) |
1392 | 278 throw new LuanException("boosts are not saved to postgres backup"); |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
279 |
1267 | 280 Object obj = doc.get("id"); |
601 | 281 Long id; |
282 try { | |
283 id = (Long)obj; | |
284 } catch(ClassCastException e) { | |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
285 throw new LuanException("id should be Long but is "+obj.getClass().getSimpleName()); |
601 | 286 } |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
287 |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
288 boolean commit = !writeLock.isHeldByCurrentThread(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
289 writeLock.lock(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
290 try { |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
291 if( id == null ) { |
1346 | 292 id = nextId(); |
1267 | 293 doc.put("id",id); |
1391 | 294 if( postgresBackup != null ) |
295 postgresBackup.add(doc); | |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
296 writer.addDocument(toLucene(doc,boosts)); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
297 } else { |
1391 | 298 if( postgresBackup != null ) |
299 postgresBackup.update(doc); | |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
300 writer.updateDocument( term("id",id), toLucene(doc,boosts) ); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
301 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
302 if(commit) writer.commit(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
303 } finally { |
704 | 304 wrote(); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
305 writeLock.unlock(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
306 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
307 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
308 |
1392 | 309 public Object run_in_transaction(LuanFunction fn) |
310 throws IOException, LuanException, SQLException | |
311 { | |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
312 boolean commit = !writeLock.isHeldByCurrentThread(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
313 writeLock.lock(); |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
314 boolean ok = false; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
315 try { |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
316 if( commit && postgresBackup != null ) |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
317 postgresBackup.begin(); |
1379 | 318 Object rtn = fn.call(); |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
319 ok = true; |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
320 if(commit) { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
321 if( postgresBackup != null ) |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
322 postgresBackup.commit(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
323 writer.commit(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
324 } |
1379 | 325 return rtn; |
252
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
326 } finally { |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
327 if( !ok && commit ) { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
328 if( postgresBackup != null ) |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
329 postgresBackup.rollback(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
330 writer.rollback(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
331 reopen(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
332 } |
704 | 333 wrote(); |
252
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
334 writeLock.unlock(); |
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
335 } |
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
336 } |
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
337 |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
338 // ??? |
1379 | 339 public Object run_in_lock(LuanFunction fn) throws IOException, LuanException { |
756
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
340 if( writeLock.isHeldByCurrentThread() ) |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
341 throw new RuntimeException(); |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
342 writeLock.lock(); |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
343 try { |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
344 synchronized(this) { |
1379 | 345 return fn.call(); |
756
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
346 } |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
347 } finally { |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
348 wrote(); |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
349 writeLock.unlock(); |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
350 } |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
351 } |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
352 |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
353 |
754 | 354 private long id; |
355 private long idLim; | |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
356 private final int idBatch = 10; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
357 |
1388
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
358 private void initId() throws IOException { |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
359 TopDocs td = searcher.search(new TermQuery(new Term("type","next_id")),1); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
360 switch(td.totalHits) { |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
361 case 0: |
754 | 362 id = 0; |
363 idLim = 0; | |
364 break; | |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
365 case 1: |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
366 idLim = (Long)searcher.doc(td.scoreDocs[0].doc).getField(FLD_NEXT_ID).numericValue(); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
367 id = idLim; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
368 break; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
369 default: |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
370 throw new RuntimeException(); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
371 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
372 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
373 |
1388
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
374 private void saveNextId(long nextId) throws LuanException, IOException { |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
375 Map doc = new HashMap(); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
376 doc.put( "type", "next_id" ); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
377 doc.put( FLD_NEXT_ID, idLim ); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
378 writer.updateDocument(new Term("type","next_id"),toLucene(doc.entrySet(),null)); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
379 } |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
380 |
1346 | 381 public synchronized long nextId() throws LuanException, IOException { |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
382 if( ++id > idLim ) { |
233
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
383 idLim += idBatch; |
1388
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
384 saveNextId(idLim); |
704 | 385 wrote(); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
386 } |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
387 return id; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
388 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
389 |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
390 /* |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
391 public void backup(String zipFile) throws LuanException, IOException { |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
392 if( !zipFile.endsWith(".zip") ) |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
393 throw new LuanException("file "+zipFile+" doesn't end with '.zip'"); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
394 IndexCommit ic = snapshotDeletionPolicy.snapshot(); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
395 try { |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
396 ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile)); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
397 for( String fileName : ic.getFileNames() ) { |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
398 out.putNextEntry(new ZipEntry(fileName)); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
399 FileInputStream in = new FileInputStream(new File(indexDir,fileName)); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
400 Utils.copyAll(in,out); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
401 in.close(); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
402 out.closeEntry(); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
403 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
404 out.close(); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
405 } finally { |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
406 snapshotDeletionPolicy.release(ic); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
407 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
408 } |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
409 */ |
1129
3234a14bb1f8
minor lucene changes
Franklin Schmidt <fschmidt@gmail.com>
parents:
1111
diff
changeset
|
410 public SnapshotDeletionPolicy snapshotDeletionPolicy() { |
3234a14bb1f8
minor lucene changes
Franklin Schmidt <fschmidt@gmail.com>
parents:
1111
diff
changeset
|
411 return snapshotDeletionPolicy; |
3234a14bb1f8
minor lucene changes
Franklin Schmidt <fschmidt@gmail.com>
parents:
1111
diff
changeset
|
412 } |
3234a14bb1f8
minor lucene changes
Franklin Schmidt <fschmidt@gmail.com>
parents:
1111
diff
changeset
|
413 |
1346 | 414 public Object snapshot(LuanFunction fn) throws LuanException, IOException { |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
415 IndexCommit ic = snapshotDeletionPolicy.snapshot(); |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
416 try { |
1391 | 417 String dir = fsDir.getDirectory().toString(); |
1346 | 418 LuanTable fileNames = new LuanTable(fn.luan(),new ArrayList(ic.getFileNames())); |
1335 | 419 return fn.call(dir,fileNames); |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
420 } finally { |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
421 snapshotDeletionPolicy.release(ic); |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
422 } |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
423 } |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
424 |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
425 |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
426 |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
427 public String to_string() { |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
428 return writer.getDirectory().toString(); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
429 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
430 |
1398 | 431 protected void finalize() throws Throwable { |
432 close(); | |
433 super.finalize(); | |
521
8a217fe5b4f3
cleaner LuanState.onClose()
Franklin Schmidt <fschmidt@gmail.com>
parents:
427
diff
changeset
|
434 } |
8a217fe5b4f3
cleaner LuanState.onClose()
Franklin Schmidt <fschmidt@gmail.com>
parents:
427
diff
changeset
|
435 |
1398 | 436 public void close() throws IOException, SQLException { |
437 closeWriter(); | |
1392 | 438 reader.close(); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
439 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
440 |
1398 | 441 private void closeWriter() throws IOException, SQLException { |
1397 | 442 writeLock.lock(); |
443 try { | |
444 writer.close(); | |
1398 | 445 if( postgresBackup != null ) |
446 postgresBackup.close(); | |
1397 | 447 } finally { |
448 writeLock.unlock(); | |
449 } | |
450 } | |
451 | |
312
d34be4588556
add lucene query parsing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
303
diff
changeset
|
452 |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
453 private static class DocFn extends LuanFunction { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
454 final IndexSearcher searcher; |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
455 final Query query; |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
456 int docID; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
457 |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
458 DocFn(Luan luan,IndexSearcher searcher,Query query) { |
1335 | 459 super(luan); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
460 this.searcher = searcher; |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
461 this.query = query; |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
462 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
463 |
1335 | 464 @Override public Object call(Object[] args) throws LuanException { |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
465 try { |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
466 LuanTable doc = toTable(luan(),searcher.doc(docID)); |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
467 if( args.length > 0 && "explain".equals(args[0]) ) { |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
468 Explanation explanation = searcher.explain(query,docID); |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
469 return new Object[]{doc,explanation}; |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
470 } else { |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
471 return doc; |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
472 } |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
473 } catch(IOException e) { |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
474 throw new LuanException(e); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
475 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
476 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
477 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
478 |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
479 private static abstract class MyCollector extends Collector { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
480 int docBase; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
481 int i = 0; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
482 |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
483 @Override public void setScorer(Scorer scorer) {} |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
484 @Override public void setNextReader(AtomicReaderContext context) { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
485 this.docBase = context.docBase; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
486 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
487 @Override public boolean acceptsDocsOutOfOrder() { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
488 return true; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
489 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
490 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
491 |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
492 private synchronized IndexSearcher openSearcher() throws IOException { |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
493 int gwc = writeCounter.get(); |
704 | 494 if( writeCount != gwc ) { |
495 writeCount = gwc; | |
496 DirectoryReader newReader = DirectoryReader.openIfChanged(reader); | |
497 if( newReader != null ) { | |
498 reader.decRef(); | |
499 reader = newReader; | |
500 searcher = new IndexSearcher(reader); | |
501 } | |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
502 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
503 reader.incRef(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
504 return searcher; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
505 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
506 |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
507 // call in finally block |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
508 private static void close(IndexSearcher searcher) throws IOException { |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
509 searcher.getIndexReader().decRef(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
510 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
511 |
591
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
578
diff
changeset
|
512 public void ensure_open() throws IOException { |
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
578
diff
changeset
|
513 close(openSearcher()); |
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
578
diff
changeset
|
514 } |
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
578
diff
changeset
|
515 |
1346 | 516 public int advanced_search( String queryStr, LuanFunction fn, Integer n, String sortStr ) |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
517 throws LuanException, IOException, ParseException |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
518 { |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
519 Utils.checkNotNull(queryStr); |
622
1a53333eb4d5
remove Lucene all_search_terms_must_match() since now query suffix "~a" handles this
Franklin Schmidt <fschmidt@gmail.com>
parents:
621
diff
changeset
|
520 Query query = SaneQueryParser.parseQuery(mfp,queryStr); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
521 IndexSearcher searcher = threadLocalSearcher.get(); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
522 boolean inTransaction = searcher != null; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
523 if( !inTransaction ) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
524 searcher = openSearcher(); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
525 try { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
526 if( fn!=null && n==null ) { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
527 if( sortStr != null ) |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
528 throw new LuanException("sort must be nil when n is nil"); |
1346 | 529 final DocFn docFn = new DocFn(fn.luan(),searcher,query); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
530 MyCollector col = new MyCollector() { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
531 @Override public void collect(int doc) { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
532 try { |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
533 docFn.docID = docBase + doc; |
1335 | 534 fn.call(++i,docFn); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
535 } catch(LuanException e) { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
536 throw new LuanRuntimeException(e); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
537 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
538 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
539 }; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
540 try { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
541 searcher.search(query,col); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
542 } catch(LuanRuntimeException e) { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
543 throw (LuanException)e.getCause(); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
544 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
545 return col.i; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
546 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
547 if( fn==null || n==0 ) { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
548 TotalHitCountCollector thcc = new TotalHitCountCollector(); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
549 searcher.search(query,thcc); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
550 return thcc.getTotalHits(); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
551 } |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
552 Sort sort = sortStr==null ? null : SaneQueryParser.parseSort(mfp,sortStr); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
553 TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
554 final ScoreDoc[] scoreDocs = td.scoreDocs; |
1346 | 555 DocFn docFn = new DocFn(fn.luan(),searcher,query); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
556 for( int i=0; i<scoreDocs.length; i++ ) { |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
557 ScoreDoc scoreDoc = scoreDocs[i]; |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
558 docFn.docID = scoreDoc.doc; |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
559 fn.call(i+1,docFn,scoreDoc.score); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
560 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
561 return td.totalHits; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
562 } finally { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
563 if( !inTransaction ) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
564 close(searcher); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
565 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
566 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
567 |
1335 | 568 public Object search_in_transaction(LuanFunction fn) throws LuanException, IOException { |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
569 if( threadLocalSearcher.get() != null ) |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
570 throw new LuanException("can't nest search_in_transaction calls"); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
571 IndexSearcher searcher = openSearcher(); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
572 threadLocalSearcher.set(searcher); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
573 try { |
1335 | 574 return fn.call(); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
575 } finally { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
576 threadLocalSearcher.set(null); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
577 close(searcher); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
578 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
579 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
580 |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
581 |
796 | 582 public FieldParser getIndexedFieldParser(String field) { |
583 return mfp.fields.get(field); | |
584 } | |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
585 |
796 | 586 public void setIndexedFieldParser(String field,FieldParser fp) { |
587 if( fp==null ) { // delete | |
588 mfp.fields.remove(field); | |
589 return; | |
590 } | |
591 mfp.fields.put( field, fp ); | |
592 } | |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
593 |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
594 |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
595 private IndexableField newField(String name,Object value,Set<String> indexed,Float boost) |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
596 throws LuanException |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
597 { |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
598 boolean hasBoost = boost!=null; |
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
599 IndexableField fld = newField2(name,value,indexed,hasBoost); |
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
600 if( hasBoost ) |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
601 ((Field)fld).setBoost(boost); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
602 return fld; |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
603 } |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
604 |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
605 private IndexableField newField2(String name,Object value,Set<String> indexed,boolean hasBoost) |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
606 throws LuanException |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
607 { |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
608 Field.Store store = indexOnly.contains(name) ? Field.Store.NO : Field.Store.YES; |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
609 if( value instanceof String ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
610 String s = (String)value; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
611 FieldParser fp = mfp.fields.get(name); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
612 if( fp != null ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
613 if( fp instanceof StringFieldParser && fp != STRING_FIELD_PARSER ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
614 return new TextField(name, s, store); |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
615 } else if (hasBoost) { |
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
616 // fuck you modern lucene developers |
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
617 return new Field(name, s, store, Field.Index.NOT_ANALYZED); |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
618 } else { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
619 return new StringField(name, s, store); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
620 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
621 } else { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
622 return new StoredField(name, s); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
623 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
624 } else if( value instanceof Integer ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
625 int i = (Integer)value; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
626 if( indexed.contains(name) ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
627 return new IntField(name, i, store); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
628 } else { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
629 return new StoredField(name, i); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
630 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
631 } else if( value instanceof Long ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
632 long i = (Long)value; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
633 if( indexed.contains(name) ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
634 return new LongField(name, i, store); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
635 } else { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
636 return new StoredField(name, i); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
637 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
638 } else if( value instanceof Double ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
639 double i = (Double)value; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
640 if( indexed.contains(name) ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
641 return new DoubleField(name, i, store); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
642 } else { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
643 return new StoredField(name, i); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
644 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
645 } else if( value instanceof byte[] ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
646 byte[] b = (byte[])value; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
647 return new StoredField(name, b); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
648 } else |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
649 throw new LuanException("invalid value type "+value.getClass()+"' for '"+name+"'"); |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
650 } |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
651 |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
652 private Document toLucene(LuanTable table,LuanTable boosts) throws LuanException { |
1347
643cf1c37723
move webserver to lib and bug fixes
Franklin Schmidt <fschmidt@gmail.com>
parents:
1346
diff
changeset
|
653 return toLucene(table.iterable(),boosts); |
1346 | 654 } |
655 | |
656 private Document toLucene(Iterable<Map.Entry> iterable,LuanTable boosts) throws LuanException { | |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
657 Set<String> indexed = mfp.fields.keySet(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
658 Document doc = new Document(); |
1346 | 659 for( Map.Entry<Object,Object> entry : iterable ) { |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
660 Object key = entry.getKey(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
661 if( !(key instanceof String) ) |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
662 throw new LuanException("key must be string"); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
663 String name = (String)key; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
664 Object value = entry.getValue(); |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
665 Float boost = null; |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
666 if( boosts != null ) { |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
667 Object obj = boosts.get(name); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
668 if( obj != null ) { |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
669 if( !(obj instanceof Number) ) |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
670 throw new LuanException("boost '"+name+"' must be number"); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
671 boost = ((Number)obj).floatValue(); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
672 } |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
673 } |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
674 if( !(value instanceof LuanTable) ) { |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
675 doc.add(newField( name, value, indexed, boost )); |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
676 } else { // list |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
677 LuanTable list = (LuanTable)value; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
678 for( Object el : list.asList() ) { |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
679 doc.add(newField( name, el, indexed, boost )); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
680 } |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
681 } |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
682 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
683 return doc; |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
684 } |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
685 |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
686 private static Object getValue(IndexableField ifld) throws LuanException { |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
687 BytesRef br = ifld.binaryValue(); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
688 if( br != null ) |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
689 return br.bytes; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
690 Number n = ifld.numericValue(); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
691 if( n != null ) |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
692 return n; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
693 String s = ifld.stringValue(); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
694 if( s != null ) |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
695 return s; |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
696 throw new LuanException("invalid field type for "+ifld); |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
697 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
698 |
1333
25746915a241
merge Luan and LuanState
Franklin Schmidt <fschmidt@gmail.com>
parents:
1271
diff
changeset
|
699 private static LuanTable toTable(Luan luan,Document doc) throws LuanException { |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
700 if( doc==null ) |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
701 return null; |
1267 | 702 LuanTable table = new LuanTable(luan); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
703 for( IndexableField ifld : doc ) { |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
704 String name = ifld.name(); |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
705 Object value = getValue(ifld); |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
706 Object old = table.rawGet(name); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
707 if( old == null ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
708 table.rawPut(name,value); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
709 } else { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
710 LuanTable list; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
711 if( old instanceof LuanTable ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
712 list = (LuanTable)old; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
713 } else { |
1267 | 714 list = new LuanTable(luan); |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
715 list.rawPut(1,old); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
716 table.rawPut(name,list); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
717 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
718 list.rawPut(list.rawLength()+1,value); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
719 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
720 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
721 return table; |
312
d34be4588556
add lucene query parsing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
303
diff
changeset
|
722 } |
d34be4588556
add lucene query parsing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
303
diff
changeset
|
723 |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
724 |
1248
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
725 private static final Formatter nullFormatter = new Formatter() { |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
726 public String highlightTerm(String originalText,TokenGroup tokenGroup) { |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
727 return originalText; |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
728 } |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
729 }; |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
730 |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
731 public LuanFunction highlighter(String queryStr,final LuanFunction formatter,final Integer fragmentSize,String dotdotdot) |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
732 throws ParseException |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
733 { |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
734 Query query = SaneQueryParser.parseQuery(mfp,queryStr); |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
735 Formatter fmt = new Formatter() { |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
736 public String highlightTerm(String originalText,TokenGroup tokenGroup) { |
1248
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
737 if( tokenGroup.getTotalScore() <= 0 ) |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
738 return originalText; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
739 try { |
1335 | 740 return (String)Luan.first(formatter.call(originalText)); |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
741 } catch(LuanException e) { |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
742 throw new LuanRuntimeException(e); |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
743 } |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
744 } |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
745 }; |
1248
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
746 QueryScorer queryScorer = new QueryScorer(query); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
747 final Highlighter chooser = fragmentSize==null ? null : new Highlighter(nullFormatter,queryScorer); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
748 if( chooser != null ) |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
749 chooser.setTextFragmenter( new SimpleSpanFragmenter(queryScorer,fragmentSize) ); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
750 final Highlighter hl = new Highlighter(fmt,queryScorer); |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
751 hl.setTextFragmenter( new NullFragmenter() ); |
1335 | 752 return new LuanFunction(false) { // ??? |
753 @Override public String call(Object[] args) throws LuanException { | |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
754 String text = (String)args[0]; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
755 try { |
1248
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
756 if( chooser != null ) { |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
757 String s = chooser.getBestFragment(analyzer,null,text); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
758 if( s != null ) { |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
759 if( dotdotdot != null ) { |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
760 boolean atStart = text.startsWith(s); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
761 boolean atEnd = text.endsWith(s); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
762 if( !atStart ) |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
763 s = dotdotdot + s; |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
764 if( !atEnd ) |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
765 s = s + dotdotdot; |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
766 } |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
767 text = s; |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
768 } else if( text.length() > fragmentSize ) { |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
769 text = text.substring(0,fragmentSize); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
770 if( dotdotdot != null ) |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
771 text += "..."; |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
772 } |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
773 } |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
774 String s = hl.getBestFragment(analyzer,null,text); |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
775 return s!=null ? s : text; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
776 } catch(LuanRuntimeException e) { |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
777 throw (LuanException)e.getCause(); |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
778 } catch(IOException e) { |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
779 throw new RuntimeException(e); |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
780 } catch(InvalidTokenOffsetsException e) { |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
781 throw new RuntimeException(e); |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
782 } |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
783 } |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
784 }; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
785 } |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
786 |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
787 public int count_tokens(String text) |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
788 throws IOException |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
789 { |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
790 int n = 0; |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
791 TokenStream ts = analyzer.tokenStream(null,text); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
792 ts.reset(); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
793 while( ts.incrementToken() ) { |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
794 n++; |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
795 } |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
796 ts.close(); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
797 return n; |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
798 } |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
799 |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
800 |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
801 |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
802 public boolean hasPostgresBackup() { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
803 return postgresBackup != null; |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
804 } |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
805 |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
806 public void rebuild_postgres_backup(LuanFunction completer) |
1392 | 807 throws IOException, LuanException, SQLException |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
808 { |
1393 | 809 luanLogger.info("start rebuild_postgres_backup"); |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
810 writeLock.lock(); |
1391 | 811 IndexSearcher searcher = openSearcher(); |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
812 boolean ok = false; |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
813 try { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
814 postgresBackup.begin(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
815 postgresBackup.deleteAll(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
816 Query query = new PrefixQuery(new Term("id")); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
817 MyCollector col = new MyCollector() { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
818 @Override public void collect(int iDoc) throws IOException { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
819 try { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
820 Document doc = searcher.doc( docBase + iDoc ); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
821 LuanTable tbl = toTable(completer.luan(),doc); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
822 tbl = (LuanTable)completer.call(tbl); |
1391 | 823 postgresBackup.add(tbl); |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
824 } catch(LuanException e) { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
825 throw new LuanRuntimeException(e); |
1392 | 826 } catch(SQLException e) { |
827 throw new RuntimeException(e); | |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
828 } |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
829 } |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
830 }; |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
831 try { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
832 searcher.search(query,col); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
833 } catch(LuanRuntimeException e) { |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
834 throw (LuanException)e.getCause(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
835 } |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
836 ok = true; |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
837 postgresBackup.commit(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
838 } finally { |
1391 | 839 close(searcher); |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
840 if( !ok ) |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
841 postgresBackup.rollback(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
842 writeLock.unlock(); |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
843 } |
1393 | 844 luanLogger.info("end rebuild_postgres_backup"); |
1387
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
845 } |
bc40bc9aab3a
start postgres backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1379
diff
changeset
|
846 |
1393 | 847 public void restore_from_postgres() |
1392 | 848 throws IOException, LuanException, SQLException |
1388
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
849 { |
1397 | 850 if( postgresBackup!=null && wasCreated && !postgresBackup.wasCreated ) { |
851 luanLogger.error("restoring from postgres"); | |
852 force_restore_from_postgres(); | |
853 } | |
854 } | |
855 | |
856 public void force_restore_from_postgres() | |
857 throws IOException, LuanException, SQLException | |
858 { | |
1393 | 859 luanLogger.warn("start restore_from_postgres"); |
1388
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
860 if( postgresBackup==null ) |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
861 throw new NullPointerException(); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
862 if( writeLock.isHeldByCurrentThread() ) |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
863 throw new RuntimeException(); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
864 writeLock.lock(); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
865 boolean ok = false; |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
866 try { |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
867 writer.deleteAll(); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
868 long nextId = postgresBackup.maxId() + 1; |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
869 postgresBackup.restoreLucene(this); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
870 id = idLim = nextId; |
1395
9dfff82dfc59
finish postgres work
Franklin Schmidt <fschmidt@gmail.com>
parents:
1393
diff
changeset
|
871 saveNextId(nextId); |
1388
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
872 ok = true; |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
873 writer.commit(); |
1398 | 874 wasCreated = false; |
1388
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
875 } finally { |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
876 if( !ok ) { |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
877 writer.rollback(); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
878 reopen(); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
879 } |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
880 wrote(); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
881 writeLock.unlock(); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
882 } |
1393 | 883 luanLogger.warn("end restore_from_postgres"); |
1388
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
884 } |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
885 |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
886 void restore(LuanTable doc) |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
887 throws LuanException, IOException |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
888 { |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
889 writer.addDocument(toLucene(doc,null)); |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
890 } |
2024d23ddd64
add restore_from_postgres
Franklin Schmidt <fschmidt@gmail.com>
parents:
1387
diff
changeset
|
891 |
1391 | 892 public void check(LuanFunction completer) throws IOException, SQLException, LuanException { |
1393 | 893 luanLogger.info("start check"); |
1391 | 894 CheckIndex.Status status = new CheckIndex(fsDir).checkIndex(); |
895 if( !status.clean ) | |
1393 | 896 luanLogger.error("index not clean"); |
1391 | 897 if( postgresBackup != null ) |
898 checkPostgres(completer); | |
1393 | 899 luanLogger.info("end check"); |
1391 | 900 } |
901 | |
902 private void checkPostgres(LuanFunction completer) throws IOException, SQLException, LuanException { | |
1395
9dfff82dfc59
finish postgres work
Franklin Schmidt <fschmidt@gmail.com>
parents:
1393
diff
changeset
|
903 luanLogger.info("start postgres check"); |
1391 | 904 final PostgresBackup.Checker postgresChecker; |
905 final IndexSearcher searcher; | |
906 writeLock.lock(); | |
907 try { | |
908 postgresChecker = postgresBackup.newChecker(); | |
909 searcher = openSearcher(); | |
910 } finally { | |
911 writeLock.unlock(); | |
912 } | |
913 try { | |
914 final List<Long> idsLucene = new ArrayList<Long>(); | |
915 Query query = new PrefixQuery(new Term("id")); | |
916 MyCollector col = new MyCollector() { | |
917 @Override public void collect(int iDoc) throws IOException { | |
918 Document doc = searcher.doc( docBase + iDoc ); | |
919 Long id = (Long)doc.getField("id").numericValue(); | |
920 idsLucene.add(id); | |
921 } | |
922 }; | |
923 searcher.search(query,col); | |
924 Collections.sort(idsLucene); | |
925 final List<Long> idsPostgres = postgresChecker.getIds(); | |
926 final int nLucene = idsLucene.size(); | |
927 final int nPostgres = idsPostgres.size(); | |
928 int iLucene = 0; | |
929 int iPostgres = 0; | |
930 LuanToString lts = new LuanToString(); | |
931 lts.strict = true; | |
932 lts.numberTypes = true; | |
933 while( iLucene < nLucene && iPostgres < nPostgres ) { | |
934 long idLucene = idsLucene.get(iLucene); | |
935 long idPostgres = idsPostgres.get(iPostgres); | |
936 if( idLucene < idPostgres ) { | |
937 iLucene++; | |
1393 | 938 luanLogger.error("id "+idLucene+" found in lucene but not postgres"); |
1391 | 939 } else if( idLucene > idPostgres ) { |
940 iPostgres++; | |
1393 | 941 luanLogger.error("id "+idPostgres+" found in postgres but not lucene"); |
1391 | 942 } else { // == |
943 LuanTable docPostgres = postgresChecker.getDoc(idPostgres); | |
944 TopDocs td = searcher.search(new TermQuery(term("id",idLucene)),1); | |
945 if( td.totalHits != 1 ) throw new RuntimeException(); | |
946 Document doc = searcher.doc( td.scoreDocs[0].doc ); | |
947 LuanTable docLucene = toTable(completer.luan(),doc); | |
948 docLucene = (LuanTable)completer.call(docLucene); | |
949 if( !equal(docPostgres,docLucene) ) { | |
1393 | 950 luanLogger.error("id "+idLucene+" not equal"); |
951 luanLogger.error("lucene = "+lts.toString(docLucene)); | |
952 luanLogger.error("postgres = "+lts.toString(docPostgres)); | |
1391 | 953 } |
954 iLucene++; | |
955 iPostgres++; | |
956 } | |
957 } | |
958 while( iLucene < nLucene ) { | |
959 long idLucene = idsLucene.get(iLucene++); | |
1393 | 960 luanLogger.error("id "+idLucene+" found in lucene but not postgres"); |
1391 | 961 } |
962 while( iPostgres < nPostgres ) { | |
963 long idPostgres = idsPostgres.get(iPostgres++); | |
1393 | 964 luanLogger.error("id "+idPostgres+" found in postgres but not lucene"); |
1391 | 965 } |
966 } finally { | |
967 close(searcher); | |
968 postgresChecker.close(); | |
969 } | |
970 } | |
971 | |
972 private boolean equal(LuanTable t1,LuanTable t2) throws LuanException { | |
1399 | 973 return t1!=null && t2!=null && t1.asMap().equals(t2.asMap()); |
1391 | 974 } |
975 | |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
976 } |