annotate lucene/src/luan/modules/lucene/LuceneWriter.java @ 545:ddcd4296107a

clean up lucene search
author Franklin Schmidt <fschmidt@gmail.com>
date Sun, 14 Jun 2015 01:34:42 -0600
parents c5a93767cc5c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
1 package luan.modules.lucene;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
2
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
3 import java.io.IOException;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
4 import java.util.Map;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
5 import java.util.Set;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
6 import java.util.List;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
7 import java.util.ArrayList;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
8 import org.apache.lucene.index.IndexableField;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
9 import org.apache.lucene.index.Term;
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
10 import org.apache.lucene.util.BytesRef;
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
11 import org.apache.lucene.util.NumericUtils;
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
12 import luan.Luan;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
13 import luan.LuanState;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
14 import luan.LuanTable;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
15 import luan.LuanJavaFunction;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
16 import luan.LuanException;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
17
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
18
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
19 public final class LuceneWriter {
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
20 private final LuceneIndex index;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
21
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
22 LuceneWriter(LuceneIndex index) {
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
23 index.writeLock.lock();
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
24 this.index = index;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
25 }
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
26
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
27 // call in finally block
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
28 void close() {
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
29 index.writeLock.unlock();
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
30 }
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
31
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
32 void commit() throws IOException {
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
33 index.writer.commit();
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
34 }
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
35
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
36 private Term term(String key,int value) {
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
37 BytesRef br = new BytesRef();
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
38 NumericUtils.intToPrefixCoded(value,0,br);
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
39 return new Term(key,br);
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
40 }
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
41
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
42 private Term term(String key,long value) {
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
43 BytesRef br = new BytesRef();
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
44 NumericUtils.longToPrefixCoded(value,0,br);
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
45 return new Term(key,br);
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
46 }
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
47
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
48 private Term term(LuanState luan,String key,Object value) throws LuanException {
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
49 if( value instanceof String )
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
50 return new Term( key, (String)value );
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
51 if( value instanceof Integer )
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
52 return term( key, (Integer)value );
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
53 if( value instanceof Long )
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
54 return term( key, (Long)value );
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
55 if( value instanceof Float )
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
56 return term( key, NumericUtils.floatToSortableInt((Float)value) );
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
57 if( value instanceof Double )
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
58 return term( key, NumericUtils.doubleToSortableLong((Double)value) );
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
59 throw luan.exception("invalid value type '"+value.getClass().getSimpleName()+"' for key '"+key+"'");
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
60 }
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
61
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
62 public void delete_documents(LuanState luan,LuanTable tblTerms) throws LuanException, IOException {
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
63 List<Term> list = new ArrayList<Term>();
432
d9df6d6cb927 finish fixing LuanTable to use metatables
Franklin Schmidt <fschmidt@gmail.com>
parents: 427
diff changeset
64 for( Map.Entry<Object,Object> entry : tblTerms.iterable(luan) ) {
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
65 Object key = entry.getKey();
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
66 Object value = entry.getValue();
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
67 if( !(key instanceof String) )
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
68 throw luan.exception("key must be a string but got "+key.getClass().getSimpleName());
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
69 list.add( term( luan, (String)key, value ) );
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
70 }
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
71 index.writer.deleteDocuments(list.toArray(new Term[list.size()]));
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
72 }
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
73
233
ef39bc4d3f70 basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 230
diff changeset
74 public void save_document(LuanState luan,LuanTable doc) throws LuanException, IOException {
426
23a93c118042 fix LuanTable.get() to use metatables
Franklin Schmidt <fschmidt@gmail.com>
parents: 411
diff changeset
75 if( doc.get(luan,"type")==null )
323
cd2924a1052c improve testing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
76 throw luan.exception("missing 'type' field");
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
77 Long id = (Long)doc.get(luan,"id");
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
78 if( id == null ) {
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
79 id = index.nextId(luan);
427
dae264ad6a7b fix LuanTable.put() to use metatables
Franklin Schmidt <fschmidt@gmail.com>
parents: 426
diff changeset
80 doc.put(luan,"id",id);
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
81 index.writer.addDocument(index.toLucene(luan,doc));
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
82 } else {
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 432
diff changeset
83 index.writer.updateDocument( term("id",id), index.toLucene(luan,doc) );
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
84 }
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
85 }
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
86
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
87 // luan
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
88
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
89 private void add(LuanTable t,String method,Class<?>... parameterTypes) throws NoSuchMethodException {
427
dae264ad6a7b fix LuanTable.put() to use metatables
Franklin Schmidt <fschmidt@gmail.com>
parents: 426
diff changeset
90 t.rawPut( method, new LuanJavaFunction(LuceneWriter.class.getMethod(method,parameterTypes),this) );
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
91 }
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
92
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
93 LuanTable table() {
411
23b99a5039b5 remove Luan.newTable()
Franklin Schmidt <fschmidt@gmail.com>
parents: 323
diff changeset
94 LuanTable tbl = new LuanTable();
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
95 try {
233
ef39bc4d3f70 basic lucene works
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 230
diff changeset
96 add( tbl, "save_document", LuanState.class, LuanTable.class );
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
97 add( tbl, "delete_documents", LuanState.class, LuanTable.class );
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
98 } catch(NoSuchMethodException e) {
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
99 throw new RuntimeException(e);
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
100 }
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
101 tbl.rawPut("index",index.myTable);
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
102 return tbl;
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
103 }
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
104
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
105 }