Mercurial Hosting > luan
annotate src/luan/modules/lucene/LuceneIndex.java @ 1345:6f8988830098
unique LuceneIndex per dir
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Mon, 25 Feb 2019 11:00:10 -0700 |
parents | dc2af9d5463b |
children | efd1c6380f2c |
rev | line source |
---|---|
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
1 package luan.modules.lucene; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
2 |
527 | 3 import java.io.Closeable; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
4 import java.io.File; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
5 import java.io.FileOutputStream; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
6 import java.io.FileInputStream; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
7 import java.io.IOException; |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
8 import java.util.Arrays; |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
9 import java.util.Iterator; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
10 import java.util.Map; |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
11 import java.util.HashMap; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
12 import java.util.List; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
13 import java.util.ArrayList; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
14 import java.util.Set; |
618
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
15 import java.util.HashSet; |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
16 import java.util.Collections; |
704 | 17 import java.util.concurrent.atomic.AtomicInteger; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
18 import java.util.concurrent.locks.Lock; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
19 import java.util.concurrent.locks.ReentrantLock; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
20 import java.util.zip.ZipOutputStream; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
21 import java.util.zip.ZipEntry; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
22 import org.apache.lucene.analysis.Analyzer; |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
23 import org.apache.lucene.analysis.TokenStream; |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
24 import org.apache.lucene.analysis.core.KeywordAnalyzer; |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
25 import org.apache.lucene.analysis.en.EnglishAnalyzer; |
233
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
26 import org.apache.lucene.document.Document; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
27 import org.apache.lucene.document.Field; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
28 import org.apache.lucene.document.StoredField; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
29 import org.apache.lucene.document.StringField; |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
30 import org.apache.lucene.document.TextField; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
31 import org.apache.lucene.document.IntField; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
32 import org.apache.lucene.document.LongField; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
33 import org.apache.lucene.document.DoubleField; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
34 import org.apache.lucene.index.IndexableField; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
35 import org.apache.lucene.index.IndexWriter; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
36 import org.apache.lucene.index.IndexWriterConfig; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
37 import org.apache.lucene.index.DirectoryReader; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
38 import org.apache.lucene.index.Term; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
39 import org.apache.lucene.index.SnapshotDeletionPolicy; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
40 import org.apache.lucene.index.IndexCommit; |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
41 import org.apache.lucene.index.AtomicReaderContext; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
42 import org.apache.lucene.store.Directory; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
43 import org.apache.lucene.store.FSDirectory; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
44 import org.apache.lucene.util.Version; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
45 import org.apache.lucene.util.BytesRef; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
46 import org.apache.lucene.util.NumericUtils; |
312
d34be4588556
add lucene query parsing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
303
diff
changeset
|
47 import org.apache.lucene.search.Query; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
48 import org.apache.lucene.search.TermQuery; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
49 import org.apache.lucene.search.TopDocs; |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
50 import org.apache.lucene.search.Sort; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
51 import org.apache.lucene.search.SortField; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
52 import org.apache.lucene.search.IndexSearcher; |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
53 import org.apache.lucene.search.TotalHitCountCollector; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
54 import org.apache.lucene.search.ScoreDoc; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
55 import org.apache.lucene.search.Collector; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
56 import org.apache.lucene.search.Scorer; |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
57 import org.apache.lucene.search.Explanation; |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
58 import org.apache.lucene.search.highlight.Formatter; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
59 import org.apache.lucene.search.highlight.Highlighter; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
60 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; |
1248
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
61 import org.apache.lucene.search.highlight.Fragmenter; |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
62 import org.apache.lucene.search.highlight.NullFragmenter; |
1248
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
63 import org.apache.lucene.search.highlight.SimpleSpanFragmenter; |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
64 import org.apache.lucene.search.highlight.QueryScorer; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
65 import org.apache.lucene.search.highlight.TokenGroup; |
1344
dc2af9d5463b
move queryparser to lib
Franklin Schmidt <fschmidt@gmail.com>
parents:
1343
diff
changeset
|
66 import luan.lib.queryparser.SaneQueryParser; |
dc2af9d5463b
move queryparser to lib
Franklin Schmidt <fschmidt@gmail.com>
parents:
1343
diff
changeset
|
67 import luan.lib.queryparser.FieldParser; |
dc2af9d5463b
move queryparser to lib
Franklin Schmidt <fschmidt@gmail.com>
parents:
1343
diff
changeset
|
68 import luan.lib.queryparser.MultiFieldParser; |
dc2af9d5463b
move queryparser to lib
Franklin Schmidt <fschmidt@gmail.com>
parents:
1343
diff
changeset
|
69 import luan.lib.queryparser.StringFieldParser; |
dc2af9d5463b
move queryparser to lib
Franklin Schmidt <fschmidt@gmail.com>
parents:
1343
diff
changeset
|
70 import luan.lib.queryparser.NumberFieldParser; |
1111
88b5b81cad4a
move Parser to luan.lib.parser
Franklin Schmidt <fschmidt@gmail.com>
parents:
1110
diff
changeset
|
71 import luan.lib.parser.ParseException; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
72 import luan.modules.Utils; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
73 import luan.Luan; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
74 import luan.LuanTable; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
75 import luan.LuanFunction; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
76 import luan.LuanException; |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
77 import luan.LuanRuntimeException; |
1337 | 78 import luan.lib.logging.Logger; |
79 import luan.lib.logging.LoggerFactory; | |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
80 |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
81 |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
82 public final class LuceneIndex { |
521
8a217fe5b4f3
cleaner LuanState.onClose()
Franklin Schmidt <fschmidt@gmail.com>
parents:
427
diff
changeset
|
83 private static final Logger logger = LoggerFactory.getLogger(LuceneIndex.class); |
8a217fe5b4f3
cleaner LuanState.onClose()
Franklin Schmidt <fschmidt@gmail.com>
parents:
427
diff
changeset
|
84 |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
85 private static final class Closer implements Closeable { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
86 final LuceneIndex li; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
87 boolean isClosed = false; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
88 private final Exception created = new Exception("created"); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
89 |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
90 Closer(Luan luan,LuceneIndex li) { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
91 this.li = li; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
92 luan.onClose(this); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
93 } |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
94 |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
95 public void close() throws IOException { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
96 if( !isClosed ) { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
97 li.close(); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
98 isClosed = true; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
99 } |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
100 } |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
101 |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
102 protected void finalize() throws Throwable { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
103 if( !isClosed ) { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
104 logger.error("not closed",created); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
105 close(); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
106 } |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
107 super.finalize(); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
108 } |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
109 } |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
110 |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
111 private static Map<String,LuceneIndex> indexes = new HashMap<String,LuceneIndex>(); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
112 |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
113 public static Object[] getLuceneIndex(Luan luan,String indexDirStr,FieldParser defaultFieldParser,String[] defaultFields) |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
114 throws LuanException, IOException |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
115 { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
116 String key = new File(indexDirStr).getCanonicalPath(); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
117 synchronized(indexes) { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
118 LuceneIndex li = indexes.get(key); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
119 if( li == null ) { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
120 li = new LuceneIndex(indexDirStr,defaultFieldParser,defaultFields,key); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
121 li.openCount = 1; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
122 indexes.put(key,li); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
123 } else { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
124 if( defaultFieldParser != li.defaultFieldParser ) |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
125 throw new LuanException("default_type doesn't match previous use"); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
126 if( !Arrays.equals(defaultFields,li.defaultFields) ) |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
127 throw new LuanException("default_fields don't match previous use"); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
128 li.openCount++; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
129 } |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
130 return new Object[]{li,new Closer(luan,li)}; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
131 } |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
132 } |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
133 |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
134 private static final Version version = Version.LUCENE_4_9; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
135 private static final String FLD_NEXT_ID = "nextId"; |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
136 public static final StringFieldParser STRING_FIELD_PARSER = new StringFieldParser(new KeywordAnalyzer()); |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
137 public static final StringFieldParser ENGLISH_FIELD_PARSER = new StringFieldParser(new EnglishAnalyzer(version)); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
138 |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
139 private final ReentrantLock writeLock = new ReentrantLock(); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
140 private final File indexDir; |
754 | 141 private SnapshotDeletionPolicy snapshotDeletionPolicy; |
142 private IndexWriter writer; | |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
143 private DirectoryReader reader; |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
144 private IndexSearcher searcher; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
145 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
146 private final MultiFieldParser mfp; |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
147 private final Analyzer analyzer; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
148 |
754 | 149 private File fileDir; |
704 | 150 private int writeCount; |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
151 private AtomicInteger writeCounter = new AtomicInteger(); |
704 | 152 |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
153 private Set<String> indexOnly = new HashSet<String>(); |
1271
48f302bdc187
fix indexed_only_field
Franklin Schmidt <fschmidt@gmail.com>
parents:
1267
diff
changeset
|
154 |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
155 private int openCount; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
156 private final String key; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
157 private final FieldParser defaultFieldParser; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
158 private final String[] defaultFields; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
159 |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
160 private LuceneIndex(String indexDirStr,FieldParser defaultFieldParser,String[] defaultFields,String key) |
1267 | 161 throws LuanException, IOException |
162 { | |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
163 this.key = key; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
164 this.defaultFieldParser = defaultFieldParser; |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
165 this.defaultFields = defaultFields; |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
166 mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields); |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
167 mfp.fields.put( "type", STRING_FIELD_PARSER ); |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
168 mfp.fields.put( "id", NumberFieldParser.LONG ); |
233
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
169 File indexDir = new File(indexDirStr); |
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
170 this.indexDir = indexDir; |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
171 Analyzer analyzer = STRING_FIELD_PARSER.analyzer; |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
172 if( defaultFieldParser instanceof StringFieldParser ) { |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
173 StringFieldParser sfp = (StringFieldParser)defaultFieldParser; |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
174 analyzer = sfp.analyzer; |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
175 } |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
176 this.analyzer = analyzer; |
754 | 177 reopen(); |
178 } | |
179 | |
180 public void reopen() throws LuanException, IOException { | |
233
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
181 IndexWriterConfig conf = new IndexWriterConfig(version,analyzer); |
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
182 snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy()); |
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
183 conf.setIndexDeletionPolicy(snapshotDeletionPolicy); |
754 | 184 FSDirectory dir = FSDirectory.open(indexDir); |
185 fileDir = dir.getDirectory(); | |
233
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
186 writer = new IndexWriter(dir,conf); |
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
187 writer.commit(); // commit index creation |
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
188 reader = DirectoryReader.open(dir); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
189 searcher = new IndexSearcher(reader); |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
190 initId(); |
233
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
191 } |
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
192 |
704 | 193 private void wrote() { |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
194 writeCounter.incrementAndGet(); |
704 | 195 } |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
196 |
252
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
197 public void delete_all() throws IOException { |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
198 boolean commit = !writeLock.isHeldByCurrentThread(); |
252
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
199 writeLock.lock(); |
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
200 try { |
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
201 writer.deleteAll(); |
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
202 id = idLim = 0; |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
203 if(commit) writer.commit(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
204 } finally { |
704 | 205 wrote(); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
206 writeLock.unlock(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
207 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
208 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
209 |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
210 private static Term term(String key,long value) { |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
211 BytesRef br = new BytesRef(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
212 NumericUtils.longToPrefixCoded(value,0,br); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
213 return new Term(key,br); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
214 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
215 |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
216 public void delete(Luan luan,String queryStr) |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
217 throws LuanException, IOException, ParseException |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
218 { |
622
1a53333eb4d5
remove Lucene all_search_terms_must_match() since now query suffix "~a" handles this
Franklin Schmidt <fschmidt@gmail.com>
parents:
621
diff
changeset
|
219 Query query = SaneQueryParser.parseQuery(mfp,queryStr); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
220 |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
221 boolean commit = !writeLock.isHeldByCurrentThread(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
222 writeLock.lock(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
223 try { |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
224 writer.deleteDocuments(query); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
225 if(commit) writer.commit(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
226 } finally { |
704 | 227 wrote(); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
228 writeLock.unlock(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
229 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
230 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
231 |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
232 public void indexed_only_fields(List<String> fields) { |
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
233 indexOnly.addAll(fields); |
1271
48f302bdc187
fix indexed_only_field
Franklin Schmidt <fschmidt@gmail.com>
parents:
1267
diff
changeset
|
234 } |
48f302bdc187
fix indexed_only_field
Franklin Schmidt <fschmidt@gmail.com>
parents:
1267
diff
changeset
|
235 |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
236 public void save(Luan luan,LuanTable doc,LuanTable boosts) |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
237 throws LuanException, IOException |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
238 { |
1267 | 239 Object obj = doc.get("id"); |
601 | 240 Long id; |
241 try { | |
242 id = (Long)obj; | |
243 } catch(ClassCastException e) { | |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
244 throw new LuanException("id should be Long but is "+obj.getClass().getSimpleName()); |
601 | 245 } |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
246 |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
247 boolean commit = !writeLock.isHeldByCurrentThread(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
248 writeLock.lock(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
249 try { |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
250 if( id == null ) { |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
251 id = nextId(luan); |
1267 | 252 doc.put("id",id); |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
253 writer.addDocument(toLucene(doc,boosts)); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
254 } else { |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
255 writer.updateDocument( term("id",id), toLucene(doc,boosts) ); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
256 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
257 if(commit) writer.commit(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
258 } finally { |
704 | 259 wrote(); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
260 writeLock.unlock(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
261 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
262 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
263 |
1335 | 264 public void update_in_transaction(LuanFunction fn) throws IOException, LuanException { |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
265 boolean commit = !writeLock.isHeldByCurrentThread(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
266 writeLock.lock(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
267 try { |
1335 | 268 fn.call(); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
269 if(commit) writer.commit(); |
252
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
270 } finally { |
704 | 271 wrote(); |
252
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
272 writeLock.unlock(); |
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
273 } |
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
274 } |
3896138955b1
web testing...
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
275 |
1335 | 276 public void run_in_lock(LuanFunction fn) throws IOException, LuanException { |
756
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
277 if( writeLock.isHeldByCurrentThread() ) |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
278 throw new RuntimeException(); |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
279 writeLock.lock(); |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
280 try { |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
281 synchronized(this) { |
1335 | 282 fn.call(); |
756
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
283 } |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
284 } finally { |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
285 wrote(); |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
286 writeLock.unlock(); |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
287 } |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
288 } |
9092e52f94eb
better synchronization for lucene restore
Franklin Schmidt <fschmidt@gmail.com>
parents:
754
diff
changeset
|
289 |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
290 |
754 | 291 private long id; |
292 private long idLim; | |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
293 private final int idBatch = 10; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
294 |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
295 private void initId() throws LuanException, IOException { |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
296 TopDocs td = searcher.search(new TermQuery(new Term("type","next_id")),1); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
297 switch(td.totalHits) { |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
298 case 0: |
754 | 299 id = 0; |
300 idLim = 0; | |
301 break; | |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
302 case 1: |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
303 idLim = (Long)searcher.doc(td.scoreDocs[0].doc).getField(FLD_NEXT_ID).numericValue(); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
304 id = idLim; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
305 break; |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
306 default: |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
307 throw new RuntimeException(); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
308 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
309 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
310 |
1333
25746915a241
merge Luan and LuanState
Franklin Schmidt <fschmidt@gmail.com>
parents:
1271
diff
changeset
|
311 public synchronized long nextId(Luan luan) throws LuanException, IOException { |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
312 if( ++id > idLim ) { |
233
ef39bc4d3f70
basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
232
diff
changeset
|
313 idLim += idBatch; |
1267 | 314 LuanTable doc = new LuanTable(luan); |
427
dae264ad6a7b
fix LuanTable.put() to use metatables
Franklin Schmidt <fschmidt@gmail.com>
parents:
426
diff
changeset
|
315 doc.rawPut( "type", "next_id" ); |
dae264ad6a7b
fix LuanTable.put() to use metatables
Franklin Schmidt <fschmidt@gmail.com>
parents:
426
diff
changeset
|
316 doc.rawPut( FLD_NEXT_ID, idLim ); |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
317 writer.updateDocument(new Term("type","next_id"),toLucene(doc,null)); |
704 | 318 wrote(); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
319 } |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
320 return id; |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
321 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
322 |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
323 /* |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
324 public void backup(String zipFile) throws LuanException, IOException { |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
325 if( !zipFile.endsWith(".zip") ) |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
326 throw new LuanException("file "+zipFile+" doesn't end with '.zip'"); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
327 IndexCommit ic = snapshotDeletionPolicy.snapshot(); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
328 try { |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
329 ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile)); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
330 for( String fileName : ic.getFileNames() ) { |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
331 out.putNextEntry(new ZipEntry(fileName)); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
332 FileInputStream in = new FileInputStream(new File(indexDir,fileName)); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
333 Utils.copyAll(in,out); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
334 in.close(); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
335 out.closeEntry(); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
336 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
337 out.close(); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
338 } finally { |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
339 snapshotDeletionPolicy.release(ic); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
340 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
341 } |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
342 */ |
1129
3234a14bb1f8
minor lucene changes
Franklin Schmidt <fschmidt@gmail.com>
parents:
1111
diff
changeset
|
343 public SnapshotDeletionPolicy snapshotDeletionPolicy() { |
3234a14bb1f8
minor lucene changes
Franklin Schmidt <fschmidt@gmail.com>
parents:
1111
diff
changeset
|
344 return snapshotDeletionPolicy; |
3234a14bb1f8
minor lucene changes
Franklin Schmidt <fschmidt@gmail.com>
parents:
1111
diff
changeset
|
345 } |
3234a14bb1f8
minor lucene changes
Franklin Schmidt <fschmidt@gmail.com>
parents:
1111
diff
changeset
|
346 |
1333
25746915a241
merge Luan and LuanState
Franklin Schmidt <fschmidt@gmail.com>
parents:
1271
diff
changeset
|
347 public Object snapshot(Luan luan,LuanFunction fn) throws LuanException, IOException { |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
348 IndexCommit ic = snapshotDeletionPolicy.snapshot(); |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
349 try { |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
350 String dir = fileDir.toString(); |
1267 | 351 LuanTable fileNames = new LuanTable(luan,new ArrayList(ic.getFileNames())); |
1335 | 352 return fn.call(dir,fileNames); |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
353 } finally { |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
354 snapshotDeletionPolicy.release(ic); |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
355 } |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
704
diff
changeset
|
356 } |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
357 |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
358 |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
359 |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
360 public String to_string() { |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
361 return writer.getDirectory().toString(); |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
362 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
363 |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
364 private synchronized void close() throws IOException { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
365 if( openCount > 0 ) { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
366 if( --openCount == 0 ) { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
367 doClose(); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
368 synchronized(indexes) { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
369 indexes.remove(key); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
370 } |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
371 } |
521
8a217fe5b4f3
cleaner LuanState.onClose()
Franklin Schmidt <fschmidt@gmail.com>
parents:
427
diff
changeset
|
372 } |
8a217fe5b4f3
cleaner LuanState.onClose()
Franklin Schmidt <fschmidt@gmail.com>
parents:
427
diff
changeset
|
373 } |
8a217fe5b4f3
cleaner LuanState.onClose()
Franklin Schmidt <fschmidt@gmail.com>
parents:
427
diff
changeset
|
374 |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
375 public void doClose() throws IOException { |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
376 writer.close(); |
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
377 reader.close(); |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
378 } |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
379 |
312
d34be4588556
add lucene query parsing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
303
diff
changeset
|
380 |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
381 private static class DocFn extends LuanFunction { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
382 final IndexSearcher searcher; |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
383 final Query query; |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
384 int docID; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
385 |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
386 DocFn(Luan luan,IndexSearcher searcher,Query query) { |
1335 | 387 super(luan); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
388 this.searcher = searcher; |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
389 this.query = query; |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
390 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
391 |
1335 | 392 @Override public Object call(Object[] args) throws LuanException { |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
393 try { |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
394 LuanTable doc = toTable(luan(),searcher.doc(docID)); |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
395 if( args.length > 0 && "explain".equals(args[0]) ) { |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
396 Explanation explanation = searcher.explain(query,docID); |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
397 return new Object[]{doc,explanation}; |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
398 } else { |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
399 return doc; |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
400 } |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
401 } catch(IOException e) { |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
402 throw new LuanException(e); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
403 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
404 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
405 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
406 |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
407 private static abstract class MyCollector extends Collector { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
408 int docBase; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
409 int i = 0; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
410 |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
411 @Override public void setScorer(Scorer scorer) {} |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
412 @Override public void setNextReader(AtomicReaderContext context) { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
413 this.docBase = context.docBase; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
414 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
415 @Override public boolean acceptsDocsOutOfOrder() { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
416 return true; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
417 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
418 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
419 |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
420 private synchronized IndexSearcher openSearcher() throws IOException { |
1345
6f8988830098
unique LuceneIndex per dir
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
421 int gwc = writeCounter.get(); |
704 | 422 if( writeCount != gwc ) { |
423 writeCount = gwc; | |
424 DirectoryReader newReader = DirectoryReader.openIfChanged(reader); | |
425 if( newReader != null ) { | |
426 reader.decRef(); | |
427 reader = newReader; | |
428 searcher = new IndexSearcher(reader); | |
429 } | |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
430 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
431 reader.incRef(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
432 return searcher; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
433 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
434 |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
435 // call in finally block |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
436 private static void close(IndexSearcher searcher) throws IOException { |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
437 searcher.getIndexReader().decRef(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
438 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
439 |
591
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
578
diff
changeset
|
440 public void ensure_open() throws IOException { |
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
578
diff
changeset
|
441 close(openSearcher()); |
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
578
diff
changeset
|
442 } |
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
578
diff
changeset
|
443 |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
444 public int advanced_search( final Luan luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
445 throws LuanException, IOException, ParseException |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
446 { |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
447 Utils.checkNotNull(queryStr); |
622
1a53333eb4d5
remove Lucene all_search_terms_must_match() since now query suffix "~a" handles this
Franklin Schmidt <fschmidt@gmail.com>
parents:
621
diff
changeset
|
448 Query query = SaneQueryParser.parseQuery(mfp,queryStr); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
449 IndexSearcher searcher = threadLocalSearcher.get(); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
450 boolean inTransaction = searcher != null; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
451 if( !inTransaction ) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
452 searcher = openSearcher(); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
453 try { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
454 if( fn!=null && n==null ) { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
455 if( sortStr != null ) |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
456 throw new LuanException("sort must be nil when n is nil"); |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
457 final DocFn docFn = new DocFn(luan,searcher,query); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
458 MyCollector col = new MyCollector() { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
459 @Override public void collect(int doc) { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
460 try { |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
461 docFn.docID = docBase + doc; |
1335 | 462 fn.call(++i,docFn); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
463 } catch(LuanException e) { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
464 throw new LuanRuntimeException(e); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
465 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
466 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
467 }; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
468 try { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
469 searcher.search(query,col); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
470 } catch(LuanRuntimeException e) { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
471 throw (LuanException)e.getCause(); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
472 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
473 return col.i; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
474 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
475 if( fn==null || n==0 ) { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
476 TotalHitCountCollector thcc = new TotalHitCountCollector(); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
477 searcher.search(query,thcc); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
478 return thcc.getTotalHits(); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
479 } |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
480 Sort sort = sortStr==null ? null : SaneQueryParser.parseSort(mfp,sortStr); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
481 TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
482 final ScoreDoc[] scoreDocs = td.scoreDocs; |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
483 DocFn docFn = new DocFn(luan,searcher,query); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
484 for( int i=0; i<scoreDocs.length; i++ ) { |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
485 ScoreDoc scoreDoc = scoreDocs[i]; |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
486 docFn.docID = scoreDoc.doc; |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
487 fn.call(i+1,docFn,scoreDoc.score); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
488 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
489 return td.totalHits; |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
490 } finally { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
491 if( !inTransaction ) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
492 close(searcher); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
493 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
494 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
495 |
1335 | 496 public Object search_in_transaction(LuanFunction fn) throws LuanException, IOException { |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
497 if( threadLocalSearcher.get() != null ) |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
498 throw new LuanException("can't nest search_in_transaction calls"); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
499 IndexSearcher searcher = openSearcher(); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
500 threadLocalSearcher.set(searcher); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
501 try { |
1335 | 502 return fn.call(); |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
503 } finally { |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
504 threadLocalSearcher.set(null); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
505 close(searcher); |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
506 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
507 } |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
508 |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
509 |
796 | 510 public FieldParser getIndexedFieldParser(String field) { |
511 return mfp.fields.get(field); | |
512 } | |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
513 |
796 | 514 public void setIndexedFieldParser(String field,FieldParser fp) { |
515 if( fp==null ) { // delete | |
516 mfp.fields.remove(field); | |
517 return; | |
518 } | |
519 mfp.fields.put( field, fp ); | |
520 } | |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
521 |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
522 |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
523 private IndexableField newField(String name,Object value,Set<String> indexed,Float boost) |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
524 throws LuanException |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
525 { |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
526 boolean hasBoost = boost!=null; |
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
527 IndexableField fld = newField2(name,value,indexed,hasBoost); |
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
528 if( hasBoost ) |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
529 ((Field)fld).setBoost(boost); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
530 return fld; |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
531 } |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
532 |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
533 private IndexableField newField2(String name,Object value,Set<String> indexed,boolean hasBoost) |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
534 throws LuanException |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
535 { |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
536 Field.Store store = indexOnly.contains(name) ? Field.Store.NO : Field.Store.YES; |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
537 if( value instanceof String ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
538 String s = (String)value; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
539 FieldParser fp = mfp.fields.get(name); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
540 if( fp != null ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
541 if( fp instanceof StringFieldParser && fp != STRING_FIELD_PARSER ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
542 return new TextField(name, s, store); |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
543 } else if (hasBoost) { |
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
544 // fuck you modern lucene developers |
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
545 return new Field(name, s, store, Field.Index.NOT_ANALYZED); |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
546 } else { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
547 return new StringField(name, s, store); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
548 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
549 } else { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
550 return new StoredField(name, s); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
551 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
552 } else if( value instanceof Integer ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
553 int i = (Integer)value; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
554 if( indexed.contains(name) ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
555 return new IntField(name, i, store); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
556 } else { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
557 return new StoredField(name, i); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
558 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
559 } else if( value instanceof Long ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
560 long i = (Long)value; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
561 if( indexed.contains(name) ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
562 return new LongField(name, i, store); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
563 } else { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
564 return new StoredField(name, i); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
565 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
566 } else if( value instanceof Double ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
567 double i = (Double)value; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
568 if( indexed.contains(name) ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
569 return new DoubleField(name, i, store); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
570 } else { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
571 return new StoredField(name, i); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
572 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
573 } else if( value instanceof byte[] ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
574 byte[] b = (byte[])value; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
575 return new StoredField(name, b); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
576 } else |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
577 throw new LuanException("invalid value type "+value.getClass()+"' for '"+name+"'"); |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
578 } |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
579 |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
580 private Document toLucene(LuanTable table,LuanTable boosts) throws LuanException { |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
581 Set<String> indexed = mfp.fields.keySet(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
582 Document doc = new Document(); |
1267 | 583 for( Map.Entry<Object,Object> entry : table.iterable() ) { |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
584 Object key = entry.getKey(); |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
585 if( !(key instanceof String) ) |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
586 throw new LuanException("key must be string"); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
587 String name = (String)key; |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
588 Object value = entry.getValue(); |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
589 Float boost = null; |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
590 if( boosts != null ) { |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
591 Object obj = boosts.get(name); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
592 if( obj != null ) { |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
593 if( !(obj instanceof Number) ) |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
594 throw new LuanException("boost '"+name+"' must be number"); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
595 boost = ((Number)obj).floatValue(); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
596 } |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
597 } |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
598 if( !(value instanceof LuanTable) ) { |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
599 doc.add(newField( name, value, indexed, boost )); |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
600 } else { // list |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
601 LuanTable list = (LuanTable)value; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
602 for( Object el : list.asList() ) { |
1343
7d9a1f8894b0
lucene change indexed_only_field() to indexed_only_fields()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1342
diff
changeset
|
603 doc.add(newField( name, el, indexed, boost )); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
604 } |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
605 } |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
606 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
607 return doc; |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
608 } |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
527
diff
changeset
|
609 |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
610 private static Object getValue(IndexableField ifld) throws LuanException { |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
611 BytesRef br = ifld.binaryValue(); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
612 if( br != null ) |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
613 return br.bytes; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
614 Number n = ifld.numericValue(); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
615 if( n != null ) |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
616 return n; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
617 String s = ifld.stringValue(); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
618 if( s != null ) |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
619 return s; |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
620 throw new LuanException("invalid field type for "+ifld); |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
621 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
622 |
1333
25746915a241
merge Luan and LuanState
Franklin Schmidt <fschmidt@gmail.com>
parents:
1271
diff
changeset
|
623 private static LuanTable toTable(Luan luan,Document doc) throws LuanException { |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
624 if( doc==null ) |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
625 return null; |
1267 | 626 LuanTable table = new LuanTable(luan); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
627 for( IndexableField ifld : doc ) { |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
628 String name = ifld.name(); |
646
cdc70de628b5
simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
629 Object value = getValue(ifld); |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
630 Object old = table.rawGet(name); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
631 if( old == null ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
632 table.rawPut(name,value); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
633 } else { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
634 LuanTable list; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
635 if( old instanceof LuanTable ) { |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
636 list = (LuanTable)old; |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
637 } else { |
1267 | 638 list = new LuanTable(luan); |
621
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
639 list.rawPut(1,old); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
640 table.rawPut(name,list); |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
641 } |
fd15da41afca
allow list of values to be stored in lucene
Franklin Schmidt <fschmidt@gmail.com>
parents:
620
diff
changeset
|
642 list.rawPut(list.rawLength()+1,value); |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
643 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
644 } |
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
645 return table; |
312
d34be4588556
add lucene query parsing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
303
diff
changeset
|
646 } |
d34be4588556
add lucene query parsing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
303
diff
changeset
|
647 |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
648 |
1248
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
649 private static final Formatter nullFormatter = new Formatter() { |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
650 public String highlightTerm(String originalText,TokenGroup tokenGroup) { |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
651 return originalText; |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
652 } |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
653 }; |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
654 |
1342
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
655 public LuanFunction highlighter(String queryStr,final LuanFunction formatter,final Integer fragmentSize,String dotdotdot) |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
656 throws ParseException |
60599adc27b8
add lucene search options
Franklin Schmidt <fschmidt@gmail.com>
parents:
1341
diff
changeset
|
657 { |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
658 Query query = SaneQueryParser.parseQuery(mfp,queryStr); |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
659 Formatter fmt = new Formatter() { |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
660 public String highlightTerm(String originalText,TokenGroup tokenGroup) { |
1248
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
661 if( tokenGroup.getTotalScore() <= 0 ) |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
662 return originalText; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
663 try { |
1335 | 664 return (String)Luan.first(formatter.call(originalText)); |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
665 } catch(LuanException e) { |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
666 throw new LuanRuntimeException(e); |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
667 } |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
668 } |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
669 }; |
1248
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
670 QueryScorer queryScorer = new QueryScorer(query); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
671 final Highlighter chooser = fragmentSize==null ? null : new Highlighter(nullFormatter,queryScorer); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
672 if( chooser != null ) |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
673 chooser.setTextFragmenter( new SimpleSpanFragmenter(queryScorer,fragmentSize) ); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
674 final Highlighter hl = new Highlighter(fmt,queryScorer); |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
675 hl.setTextFragmenter( new NullFragmenter() ); |
1335 | 676 return new LuanFunction(false) { // ??? |
677 @Override public String call(Object[] args) throws LuanException { | |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
678 String text = (String)args[0]; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
679 try { |
1248
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
680 if( chooser != null ) { |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
681 String s = chooser.getBestFragment(analyzer,null,text); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
682 if( s != null ) { |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
683 if( dotdotdot != null ) { |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
684 boolean atStart = text.startsWith(s); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
685 boolean atEnd = text.endsWith(s); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
686 if( !atStart ) |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
687 s = dotdotdot + s; |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
688 if( !atEnd ) |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
689 s = s + dotdotdot; |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
690 } |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
691 text = s; |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
692 } else if( text.length() > fragmentSize ) { |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
693 text = text.substring(0,fragmentSize); |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
694 if( dotdotdot != null ) |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
695 text += "..."; |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
696 } |
475905984870
improve lucene highlighter and allow bbcode_to_text quoter to be nil
Franklin Schmidt <fschmidt@gmail.com>
parents:
1227
diff
changeset
|
697 } |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
698 String s = hl.getBestFragment(analyzer,null,text); |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
699 return s!=null ? s : text; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
700 } catch(LuanRuntimeException e) { |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
701 throw (LuanException)e.getCause(); |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
702 } catch(IOException e) { |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
703 throw new RuntimeException(e); |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
704 } catch(InvalidTokenOffsetsException e) { |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
705 throw new RuntimeException(e); |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
706 } |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
707 } |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
708 }; |
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
709 } |
1341
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
710 |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
711 public int count_tokens(String text) |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
712 throws IOException |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
713 { |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
714 int n = 0; |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
715 TokenStream ts = analyzer.tokenStream(null,text); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
716 ts.reset(); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
717 while( ts.incrementToken() ) { |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
718 n++; |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
719 } |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
720 ts.close(); |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
721 return n; |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
722 } |
a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents:
1337
diff
changeset
|
723 |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
724 } |