annotate lucene/src/luan/modules/lucene/Lucene.luan @ 618:5e495e4e560b

add lucene indexed_only_fields
author Franklin Schmidt <fschmidt@gmail.com>
date Fri, 01 Jan 2016 01:24:10 -0700
parents e54c1646eed0
children 89eb02f9827f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
321
7f7708e8fdd4 remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 320
diff changeset
1 java()
7f7708e8fdd4 remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 320
diff changeset
2 local Luan = require "luan:Luan"
503
92c3d22745b8 make _ENV optional
Franklin Schmidt <fschmidt@gmail.com>
parents: 435
diff changeset
3 local error = Luan.error
321
7f7708e8fdd4 remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 320
diff changeset
4 local LuceneIndex = require "java:luan.modules.lucene.LuceneIndex"
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
5 local NumberFieldParser = require "java:sane.lucene.queryparser.NumberFieldParser"
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
6 local StringFieldParser = require "java:sane.lucene.queryparser.StringFieldParser"
547
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
7 local SaneQueryParser = require "java:sane.lucene.queryparser.SaneQueryParser"
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
8 local Version = require "java:org.apache.lucene.util.Version"
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
9 local EnglishAnalyzer = require "java:org.apache.lucene.analysis.en.EnglishAnalyzer"
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
10
320
fed1893821bf remove global namespace
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 312
diff changeset
11
503
92c3d22745b8 make _ENV optional
Franklin Schmidt <fschmidt@gmail.com>
parents: 435
diff changeset
12 local M = {}
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
13
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
14 M.type = {
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
15 string = LuceneIndex.STRING_FIELD_PARSER;
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
16 integer = NumberFieldParser.INT;
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
17 long = NumberFieldParser.LONG;
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
18 double = NumberFieldParser.DOUBLE;
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
19
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
20 english = StringFieldParser.new(EnglishAnalyzer.new(Version.LUCENE_CURRENT))
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
21 }
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
22
547
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
23 M.literal = SaneQueryParser.literal
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
24
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
25 function M.index(index_dir,default_type,default_fields)
303
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
26 local index = {}
591
790d5de23042 add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents: 547
diff changeset
27 index.dir = index_dir
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
28 local java_index = LuceneIndex.new(index_dir,default_type,default_fields)
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
29 index.indexed_fields = java_index.indexedFieldsMeta.newTable()
618
5e495e4e560b add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents: 617
diff changeset
30
5e495e4e560b add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents: 617
diff changeset
31 -- index.indexed_only_fields[type][field] = fn(doc)
5e495e4e560b add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents: 617
diff changeset
32 index.indexed_only_fields = java_index.indexed_only_fields
5e495e4e560b add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents: 617
diff changeset
33
303
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
34 index.to_string = java_index.to_string
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
35 index.backup = java_index.backup
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
36 index.advanced_search = java_index.advanced_search
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
37 index.search_in_transaction = java_index.search_in_transaction
303
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
38 index.delete_all = java_index.delete_all
547
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
39 index.delete = java_index.delete
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
40 index.save = java_index.save
546
eaef1005ab87 general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents: 545
diff changeset
41 index.update_in_transaction = java_index.update_in_transaction
303
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
42 index.close = java_index.close
591
790d5de23042 add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents: 547
diff changeset
43 index.ensure_open = java_index.ensure_open
617
e54c1646eed0 add Lucene.next_id()
Franklin Schmidt <fschmidt@gmail.com>
parents: 599
diff changeset
44 index.next_id = java_index.nextId
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
45
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
46 function index.search(query, from, to, sort)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
47 local results = {}
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
48 local function fn(i,doc_fn)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
49 if i >= from then
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
50 results[#results+1] = doc_fn()
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
51 end
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
52 end
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
53 local total_hits = index.advanced_search(query,fn,to,sort)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
54 return results, total_hits
257
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
55 end
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
56
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
57 function index.get_document(query)
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
58 local doc
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
59 local function fn(_,doc_fn)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
60 doc = doc_fn()
257
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
61 end
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
62 local total_hits = index.advanced_search(query,fn,1)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
63 total_hits <= 1 or error( "found " .. total_hits .. " documents" )
257
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
64 return doc
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
65 end
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
66
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
67 function index.count(query)
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
68 return index.advanced_search(query)
232
9ce18106f95a more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 230
diff changeset
69 end
9ce18106f95a more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 230
diff changeset
70
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
71 return index
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
72 end
503
92c3d22745b8 make _ENV optional
Franklin Schmidt <fschmidt@gmail.com>
parents: 435
diff changeset
73
92c3d22745b8 make _ENV optional
Franklin Schmidt <fschmidt@gmail.com>
parents: 435
diff changeset
74 return M