Mercurial Hosting > luan
comparison lucene/src/luan/modules/lucene/LuceneIndex.java @ 546:eaef1005ab87
general lucene cleanup
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sun, 14 Jun 2015 22:17:58 -0600 |
parents | ddcd4296107a |
children | 0be287ab0309 |
comparison
equal
deleted
inserted
replaced
545:ddcd4296107a | 546:eaef1005ab87 |
---|---|
4 import java.io.File; | 4 import java.io.File; |
5 import java.io.FileOutputStream; | 5 import java.io.FileOutputStream; |
6 import java.io.FileInputStream; | 6 import java.io.FileInputStream; |
7 import java.io.IOException; | 7 import java.io.IOException; |
8 import java.util.Iterator; | 8 import java.util.Iterator; |
9 import java.util.Map; | |
10 import java.util.List; | |
11 import java.util.ArrayList; | |
12 import java.util.Set; | |
9 import java.util.concurrent.locks.Lock; | 13 import java.util.concurrent.locks.Lock; |
10 import java.util.concurrent.locks.ReentrantLock; | 14 import java.util.concurrent.locks.ReentrantLock; |
11 import java.util.zip.ZipOutputStream; | 15 import java.util.zip.ZipOutputStream; |
12 import java.util.zip.ZipEntry; | 16 import java.util.zip.ZipEntry; |
13 import org.apache.lucene.analysis.Analyzer; | 17 import org.apache.lucene.analysis.Analyzer; |
14 import org.apache.lucene.analysis.core.KeywordAnalyzer; | 18 import org.apache.lucene.analysis.core.KeywordAnalyzer; |
15 import org.apache.lucene.document.Document; | 19 import org.apache.lucene.document.Document; |
20 import org.apache.lucene.document.Field; | |
21 import org.apache.lucene.document.StoredField; | |
22 import org.apache.lucene.document.StringField; | |
23 import org.apache.lucene.document.IntField; | |
24 import org.apache.lucene.document.LongField; | |
25 import org.apache.lucene.document.DoubleField; | |
26 import org.apache.lucene.index.IndexableField; | |
16 import org.apache.lucene.index.IndexWriter; | 27 import org.apache.lucene.index.IndexWriter; |
17 import org.apache.lucene.index.IndexWriterConfig; | 28 import org.apache.lucene.index.IndexWriterConfig; |
18 import org.apache.lucene.index.DirectoryReader; | 29 import org.apache.lucene.index.DirectoryReader; |
19 import org.apache.lucene.index.Term; | 30 import org.apache.lucene.index.Term; |
20 import org.apache.lucene.index.SnapshotDeletionPolicy; | 31 import org.apache.lucene.index.SnapshotDeletionPolicy; |
32 import org.apache.lucene.index.IndexCommit; | |
21 import org.apache.lucene.index.AtomicReaderContext; | 33 import org.apache.lucene.index.AtomicReaderContext; |
22 import org.apache.lucene.store.Directory; | 34 import org.apache.lucene.store.Directory; |
23 import org.apache.lucene.store.FSDirectory; | 35 import org.apache.lucene.store.FSDirectory; |
24 import org.apache.lucene.util.Version; | 36 import org.apache.lucene.util.Version; |
37 import org.apache.lucene.util.BytesRef; | |
38 import org.apache.lucene.util.NumericUtils; | |
25 import org.apache.lucene.search.Query; | 39 import org.apache.lucene.search.Query; |
26 import org.apache.lucene.search.TermQuery; | 40 import org.apache.lucene.search.TermQuery; |
27 import org.apache.lucene.search.TopDocs; | 41 import org.apache.lucene.search.TopDocs; |
28 import org.apache.lucene.search.Sort; | 42 import org.apache.lucene.search.Sort; |
29 import org.apache.lucene.search.SortField; | 43 import org.apache.lucene.search.SortField; |
56 | 70 |
57 private static final String FLD_NEXT_ID = "nextId"; | 71 private static final String FLD_NEXT_ID = "nextId"; |
58 private static final Analyzer analyzer = new KeywordAnalyzer(); | 72 private static final Analyzer analyzer = new KeywordAnalyzer(); |
59 public static final FieldParser STRING_FIELD_PARSER = new StringFieldParser(analyzer); | 73 public static final FieldParser STRING_FIELD_PARSER = new StringFieldParser(analyzer); |
60 | 74 |
61 final LuanTable myTable; | 75 private final ReentrantLock writeLock = new ReentrantLock(); |
62 final Lock writeLock = new ReentrantLock(); | |
63 private final File indexDir; | 76 private final File indexDir; |
64 final SnapshotDeletionPolicy snapshotDeletionPolicy; | 77 final SnapshotDeletionPolicy snapshotDeletionPolicy; |
65 final IndexWriter writer; | 78 private final IndexWriter writer; |
66 private DirectoryReader reader; | 79 private DirectoryReader reader; |
67 private IndexSearcher searcher; | 80 private IndexSearcher searcher; |
68 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); | 81 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); |
69 private boolean isClosed = false; | 82 private boolean isClosed = false; |
70 private final MultiFieldParser mfp = new MultiFieldParser(); | 83 private final MultiFieldParser mfp = new MultiFieldParser(); |
71 | 84 |
72 public LuceneIndex(LuanState luan,String indexDirStr,LuanTable myTable) throws LuanException, IOException { | 85 public LuceneIndex(LuanState luan,String indexDirStr) throws LuanException, IOException { |
73 this.myTable = myTable; | |
74 mfp.fields.put( "type", STRING_FIELD_PARSER ); | 86 mfp.fields.put( "type", STRING_FIELD_PARSER ); |
75 mfp.fields.put( "id", NumberFieldParser.LONG ); | 87 mfp.fields.put( "id", NumberFieldParser.LONG ); |
76 File indexDir = new File(indexDirStr); | 88 File indexDir = new File(indexDirStr); |
77 this.indexDir = indexDir; | 89 this.indexDir = indexDir; |
78 Directory dir = FSDirectory.open(indexDir); | 90 Directory dir = FSDirectory.open(indexDir); |
86 luan.onClose(this); | 98 luan.onClose(this); |
87 searcher = new IndexSearcher(reader); | 99 searcher = new IndexSearcher(reader); |
88 initId(luan); | 100 initId(luan); |
89 } | 101 } |
90 | 102 |
91 Document toLucene(LuanState luan,LuanTable table) throws LuanException { | 103 |
92 return LuceneDocument.toLucene(luan,table,mfp.fields.keySet()); | |
93 } | |
94 | |
95 public LuceneWriter openWriter() { | |
96 return new LuceneWriter(this); | |
97 } | |
98 | |
99 private synchronized IndexSearcher openSearcher() throws IOException { | |
100 DirectoryReader newReader = DirectoryReader.openIfChanged(reader); | |
101 if( newReader != null ) { | |
102 reader.decRef(); | |
103 reader = newReader; | |
104 searcher = new IndexSearcher(reader); | |
105 } | |
106 reader.incRef(); | |
107 return searcher; | |
108 } | |
109 | |
110 // call in finally block | |
111 private static void close(IndexSearcher searcher) throws IOException { | |
112 searcher.getIndexReader().decRef(); | |
113 } | |
114 | |
115 LuceneSnapshot openSnapshot() throws IOException { | |
116 return new LuceneSnapshot(this); | |
117 } | |
118 | 104 |
119 public void delete_all() throws IOException { | 105 public void delete_all() throws IOException { |
106 boolean commit = !writeLock.isHeldByCurrentThread(); | |
120 writeLock.lock(); | 107 writeLock.lock(); |
121 try { | 108 try { |
122 writer.deleteAll(); | 109 writer.deleteAll(); |
123 writer.commit(); | |
124 id = idLim = 0; | 110 id = idLim = 0; |
111 if(commit) writer.commit(); | |
112 } finally { | |
113 writeLock.unlock(); | |
114 } | |
115 } | |
116 | |
117 private static Term term(String key,int value) { | |
118 BytesRef br = new BytesRef(); | |
119 NumericUtils.intToPrefixCoded(value,0,br); | |
120 return new Term(key,br); | |
121 } | |
122 | |
123 private static Term term(String key,long value) { | |
124 BytesRef br = new BytesRef(); | |
125 NumericUtils.longToPrefixCoded(value,0,br); | |
126 return new Term(key,br); | |
127 } | |
128 | |
129 private static Term term(LuanState luan,String key,Object value) throws LuanException { | |
130 if( value instanceof String ) | |
131 return new Term( key, (String)value ); | |
132 if( value instanceof Integer ) | |
133 return term( key, (Integer)value ); | |
134 if( value instanceof Long ) | |
135 return term( key, (Long)value ); | |
136 if( value instanceof Float ) | |
137 return term( key, NumericUtils.floatToSortableInt((Float)value) ); | |
138 if( value instanceof Double ) | |
139 return term( key, NumericUtils.doubleToSortableLong((Double)value) ); | |
140 throw luan.exception("invalid value type '"+value.getClass().getSimpleName()+"' for key '"+key+"'"); | |
141 } | |
142 | |
143 public void delete_documents(LuanState luan,LuanTable tblTerms) throws LuanException, IOException { | |
144 List<Term> list = new ArrayList<Term>(); | |
145 for( Map.Entry<Object,Object> entry : tblTerms.iterable(luan) ) { | |
146 Object key = entry.getKey(); | |
147 Object value = entry.getValue(); | |
148 if( !(key instanceof String) ) | |
149 throw luan.exception("key must be a string but got "+key.getClass().getSimpleName()); | |
150 list.add( term( luan, (String)key, value ) ); | |
151 } | |
152 | |
153 boolean commit = !writeLock.isHeldByCurrentThread(); | |
154 writeLock.lock(); | |
155 try { | |
156 writer.deleteDocuments(list.toArray(new Term[list.size()])); | |
157 if(commit) writer.commit(); | |
158 } finally { | |
159 writeLock.unlock(); | |
160 } | |
161 } | |
162 | |
163 public void save_document(LuanState luan,LuanTable doc) throws LuanException, IOException { | |
164 if( doc.get(luan,"type")==null ) | |
165 throw luan.exception("missing 'type' field"); | |
166 Long id = (Long)doc.get(luan,"id"); | |
167 | |
168 boolean commit = !writeLock.isHeldByCurrentThread(); | |
169 writeLock.lock(); | |
170 try { | |
171 if( id == null ) { | |
172 id = nextId(luan); | |
173 doc.put(luan,"id",id); | |
174 writer.addDocument(toLucene(luan,doc)); | |
175 } else { | |
176 writer.updateDocument( term("id",id), toLucene(luan,doc) ); | |
177 } | |
178 if(commit) writer.commit(); | |
179 } finally { | |
180 writeLock.unlock(); | |
181 } | |
182 } | |
183 | |
184 public void update_in_transaction(LuanState luan,LuanFunction fn) throws IOException, LuanException { | |
185 boolean commit = !writeLock.isHeldByCurrentThread(); | |
186 writeLock.lock(); | |
187 try { | |
188 luan.call(fn); | |
189 if(commit) writer.commit(); | |
125 } finally { | 190 } finally { |
126 writeLock.unlock(); | 191 writeLock.unlock(); |
127 } | 192 } |
128 } | 193 } |
129 | 194 |
144 default: | 209 default: |
145 throw new RuntimeException(); | 210 throw new RuntimeException(); |
146 } | 211 } |
147 } | 212 } |
148 | 213 |
149 synchronized long nextId(LuanState luan) throws LuanException, IOException { | 214 private synchronized long nextId(LuanState luan) throws LuanException, IOException { |
150 if( ++id > idLim ) { | 215 if( ++id > idLim ) { |
151 idLim += idBatch; | 216 idLim += idBatch; |
152 LuanTable doc = new LuanTable(); | 217 LuanTable doc = new LuanTable(); |
153 doc.rawPut( "type", "next_id" ); | 218 doc.rawPut( "type", "next_id" ); |
154 doc.rawPut( FLD_NEXT_ID, idLim ); | 219 doc.rawPut( FLD_NEXT_ID, idLim ); |
159 | 224 |
160 | 225 |
161 public void backup(LuanState luan,String zipFile) throws LuanException, IOException { | 226 public void backup(LuanState luan,String zipFile) throws LuanException, IOException { |
162 if( !zipFile.endsWith(".zip") ) | 227 if( !zipFile.endsWith(".zip") ) |
163 throw luan.exception("file "+zipFile+" doesn't end with '.zip'"); | 228 throw luan.exception("file "+zipFile+" doesn't end with '.zip'"); |
164 LuceneSnapshot snapshot = openSnapshot(); | 229 IndexCommit ic = snapshotDeletionPolicy.snapshot(); |
165 try { | 230 try { |
166 ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile)); | 231 ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile)); |
167 for( String fileName : snapshot.getFileNames() ) { | 232 for( String fileName : ic.getFileNames() ) { |
168 out.putNextEntry(new ZipEntry(fileName)); | 233 out.putNextEntry(new ZipEntry(fileName)); |
169 FileInputStream in = new FileInputStream(new File(indexDir,fileName)); | 234 FileInputStream in = new FileInputStream(new File(indexDir,fileName)); |
170 Utils.copyAll(in,out); | 235 Utils.copyAll(in,out); |
171 in.close(); | 236 in.close(); |
172 out.closeEntry(); | 237 out.closeEntry(); |
173 } | 238 } |
174 out.close(); | 239 out.close(); |
175 } finally { | 240 } finally { |
176 snapshot.close(); | 241 snapshotDeletionPolicy.release(ic); |
177 } | 242 } |
178 } | 243 } |
179 | 244 |
180 | 245 |
181 | |
182 // luan | |
183 | 246 |
184 public String to_string() { | 247 public String to_string() { |
185 return writer.getDirectory().toString(); | 248 return writer.getDirectory().toString(); |
186 } | |
187 | |
188 public void writer(LuanState luan,LuanFunction fn) throws LuanException, IOException { | |
189 LuceneWriter writer = openWriter(); | |
190 try { | |
191 luan.call( fn, new Object[]{writer.table()} ); | |
192 writer.commit(); | |
193 } finally { | |
194 writer.close(); | |
195 } | |
196 } | 249 } |
197 | 250 |
198 public void close() throws IOException { | 251 public void close() throws IOException { |
199 if( !isClosed ) { | 252 if( !isClosed ) { |
200 writer.close(); | 253 writer.close(); |
221 this.searcher = searcher; | 274 this.searcher = searcher; |
222 } | 275 } |
223 | 276 |
224 @Override public Object call(LuanState luan,Object[] args) throws LuanException { | 277 @Override public Object call(LuanState luan,Object[] args) throws LuanException { |
225 try { | 278 try { |
226 return LuceneDocument.toTable(luan,searcher.doc(docID)); | 279 return toTable(luan,searcher.doc(docID)); |
227 } catch(IOException e) { | 280 } catch(IOException e) { |
228 throw luan.exception(e); | 281 throw luan.exception(e); |
229 } | 282 } |
230 } | 283 } |
231 } | 284 } |
241 @Override public boolean acceptsDocsOutOfOrder() { | 294 @Override public boolean acceptsDocsOutOfOrder() { |
242 return true; | 295 return true; |
243 } | 296 } |
244 } | 297 } |
245 | 298 |
299 private synchronized IndexSearcher openSearcher() throws IOException { | |
300 DirectoryReader newReader = DirectoryReader.openIfChanged(reader); | |
301 if( newReader != null ) { | |
302 reader.decRef(); | |
303 reader = newReader; | |
304 searcher = new IndexSearcher(reader); | |
305 } | |
306 reader.incRef(); | |
307 return searcher; | |
308 } | |
309 | |
310 // call in finally block | |
311 private static void close(IndexSearcher searcher) throws IOException { | |
312 searcher.getIndexReader().decRef(); | |
313 } | |
314 | |
246 public int advanced_search( final LuanState luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException { | 315 public int advanced_search( final LuanState luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException { |
247 Utils.checkNotNull(luan,queryStr); | 316 Utils.checkNotNull(luan,queryStr); |
248 Query query = parseQuery(queryStr); | 317 Query query = SaneQueryParser.parseQuery(mfp,queryStr); |
249 IndexSearcher searcher = threadLocalSearcher.get(); | 318 IndexSearcher searcher = threadLocalSearcher.get(); |
250 boolean inTransaction = searcher != null; | 319 boolean inTransaction = searcher != null; |
251 if( !inTransaction ) | 320 if( !inTransaction ) |
252 searcher = openSearcher(); | 321 searcher = openSearcher(); |
253 try { | 322 try { |
275 if( fn==null || n==0 ) { | 344 if( fn==null || n==0 ) { |
276 TotalHitCountCollector thcc = new TotalHitCountCollector(); | 345 TotalHitCountCollector thcc = new TotalHitCountCollector(); |
277 searcher.search(query,thcc); | 346 searcher.search(query,thcc); |
278 return thcc.getTotalHits(); | 347 return thcc.getTotalHits(); |
279 } | 348 } |
280 Sort sort = sortStr==null ? null : parseSort(sortStr); | 349 Sort sort = sortStr==null ? null : SaneQueryParser.parseSort(mfp,sortStr); |
281 TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort); | 350 TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort); |
282 final ScoreDoc[] scoreDocs = td.scoreDocs; | 351 final ScoreDoc[] scoreDocs = td.scoreDocs; |
283 DocFn docFn = new DocFn(searcher); | 352 DocFn docFn = new DocFn(searcher); |
284 for( int i=0; i<scoreDocs.length; i++ ) { | 353 for( int i=0; i<scoreDocs.length; i++ ) { |
285 docFn.docID = scoreDocs[i].doc; | 354 docFn.docID = scoreDocs[i].doc; |
339 return "lucene-indexed-fields"; | 408 return "lucene-indexed-fields"; |
340 } | 409 } |
341 | 410 |
342 }; | 411 }; |
343 | 412 |
344 public Query parseQuery(String s) throws ParseException { | 413 |
345 return SaneQueryParser.parseQuery(mfp,s); | 414 |
346 } | 415 |
347 | 416 private Document toLucene(LuanState luan,LuanTable table) throws LuanException { |
348 public Sort parseSort(String s) throws ParseException { | 417 Set<String> indexed = mfp.fields.keySet(); |
349 return SaneQueryParser.parseSort(mfp,s); | 418 Document doc = new Document(); |
419 for( Map.Entry<Object,Object> entry : table.iterable(luan) ) { | |
420 Object key = entry.getKey(); | |
421 if( !(key instanceof String) ) | |
422 throw luan.exception("key must be string"); | |
423 String name = (String)key; | |
424 Object value = entry.getValue(); | |
425 if( value instanceof String ) { | |
426 String s = (String)value; | |
427 if( indexed.contains(name) ) { | |
428 doc.add(new StringField(name, s, Field.Store.YES)); | |
429 } else { | |
430 doc.add(new StoredField(name, s)); | |
431 } | |
432 } else if( value instanceof Integer ) { | |
433 int i = (Integer)value; | |
434 if( indexed.contains(name) ) { | |
435 doc.add(new IntField(name, i, Field.Store.YES)); | |
436 } else { | |
437 doc.add(new StoredField(name, i)); | |
438 } | |
439 } else if( value instanceof Long ) { | |
440 long i = (Long)value; | |
441 if( indexed.contains(name) ) { | |
442 doc.add(new LongField(name, i, Field.Store.YES)); | |
443 } else { | |
444 doc.add(new StoredField(name, i)); | |
445 } | |
446 } else if( value instanceof Double ) { | |
447 double i = (Double)value; | |
448 if( indexed.contains(name) ) { | |
449 doc.add(new DoubleField(name, i, Field.Store.YES)); | |
450 } else { | |
451 doc.add(new StoredField(name, i)); | |
452 } | |
453 } else if( value instanceof byte[] ) { | |
454 byte[] b = (byte[])value; | |
455 doc.add(new StoredField(name, b)); | |
456 } else | |
457 throw luan.exception("invalid value type "+value.getClass()+"' for '"+name+"'"); | |
458 } | |
459 return doc; | |
460 } | |
461 | |
462 private static LuanTable toTable(LuanState luan,Document doc) throws LuanException { | |
463 if( doc==null ) | |
464 return null; | |
465 LuanTable table = new LuanTable(); | |
466 for( IndexableField ifld : doc ) { | |
467 String name = ifld.name(); | |
468 BytesRef br = ifld.binaryValue(); | |
469 if( br != null ) { | |
470 table.rawPut(name,br.bytes); | |
471 continue; | |
472 } | |
473 Number n = ifld.numericValue(); | |
474 if( n != null ) { | |
475 table.rawPut(name,n); | |
476 continue; | |
477 } | |
478 String s = ifld.stringValue(); | |
479 if( s != null ) { | |
480 table.rawPut(name,s); | |
481 continue; | |
482 } | |
483 throw luan.exception("invalid field type for "+ifld); | |
484 } | |
485 return table; | |
350 } | 486 } |
351 | 487 |
352 } | 488 } |