annotate lucene/src/luan/modules/lucene/Lucene.luan @ 709:96a280ca32a2

add Lucene.instances
author Franklin Schmidt <fschmidt@gmail.com>
date Wed, 18 May 2016 19:55:48 -0600
parents 1ed9e55f0be8
children 01e68da6983b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
321
7f7708e8fdd4 remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 320
diff changeset
1 java()
693
ca169567ce07 module URIs must now include ".luan"
Franklin Schmidt <fschmidt@gmail.com>
parents: 625
diff changeset
2 local Luan = require "luan:Luan.luan"
503
92c3d22745b8 make _ENV optional
Franklin Schmidt <fschmidt@gmail.com>
parents: 435
diff changeset
3 local error = Luan.error
625
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
4 local ipairs = Luan.ipairs or error()
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
5 local type = Luan.type or error()
693
ca169567ce07 module URIs must now include ".luan"
Franklin Schmidt <fschmidt@gmail.com>
parents: 625
diff changeset
6 local Html = require "luan:Html.luan"
707
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
7 local Io = require "luan:Io.luan"
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
8 local String = require "luan:String.luan"
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
9 local matches = String.matches or error()
321
7f7708e8fdd4 remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 320
diff changeset
10 local LuceneIndex = require "java:luan.modules.lucene.LuceneIndex"
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
11 local NumberFieldParser = require "java:sane.lucene.queryparser.NumberFieldParser"
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
12 local StringFieldParser = require "java:sane.lucene.queryparser.StringFieldParser"
547
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
13 local SaneQueryParser = require "java:sane.lucene.queryparser.SaneQueryParser"
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
14 local Version = require "java:org.apache.lucene.util.Version"
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
15 local EnglishAnalyzer = require "java:org.apache.lucene.analysis.en.EnglishAnalyzer"
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
16
320
fed1893821bf remove global namespace
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 312
diff changeset
17
503
92c3d22745b8 make _ENV optional
Franklin Schmidt <fschmidt@gmail.com>
parents: 435
diff changeset
18 local M = {}
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
19
709
96a280ca32a2 add Lucene.instances
Franklin Schmidt <fschmidt@gmail.com>
parents: 707
diff changeset
20 M.instances = {}
96a280ca32a2 add Lucene.instances
Franklin Schmidt <fschmidt@gmail.com>
parents: 707
diff changeset
21
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
22 M.type = {
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
23 string = LuceneIndex.STRING_FIELD_PARSER;
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
24 integer = NumberFieldParser.INT;
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
25 long = NumberFieldParser.LONG;
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
26 double = NumberFieldParser.DOUBLE;
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
27
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
28 english = StringFieldParser.new(EnglishAnalyzer.new(Version.LUCENE_CURRENT))
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
29 }
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
30
547
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
31 M.literal = SaneQueryParser.literal
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
32
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
33 function M.index(index_dir,default_type,default_fields)
303
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
34 local index = {}
591
790d5de23042 add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents: 547
diff changeset
35 index.dir = index_dir
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
36 local java_index = LuceneIndex.new(index_dir,default_type,default_fields)
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
37 index.indexed_fields = java_index.indexedFieldsMeta.newTable()
618
5e495e4e560b add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents: 617
diff changeset
38
5e495e4e560b add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents: 617
diff changeset
39 -- index.indexed_only_fields[type][field] = fn(doc)
5e495e4e560b add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents: 617
diff changeset
40 index.indexed_only_fields = java_index.indexed_only_fields
5e495e4e560b add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents: 617
diff changeset
41
303
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
42 index.to_string = java_index.to_string
707
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
43 -- index.backup = java_index.backup
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
44 index.snapshot = java_index.snapshot
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
45 index.advanced_search = java_index.advanced_search
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
46 index.search_in_transaction = java_index.search_in_transaction
303
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
47 index.delete_all = java_index.delete_all
547
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
48 index.delete = java_index.delete
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
49 index.save = java_index.save
546
eaef1005ab87 general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents: 545
diff changeset
50 index.update_in_transaction = java_index.update_in_transaction
709
96a280ca32a2 add Lucene.instances
Franklin Schmidt <fschmidt@gmail.com>
parents: 707
diff changeset
51 -- index.close = java_index.close
591
790d5de23042 add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents: 547
diff changeset
52 index.ensure_open = java_index.ensure_open
617
e54c1646eed0 add Lucene.next_id()
Franklin Schmidt <fschmidt@gmail.com>
parents: 599
diff changeset
53 index.next_id = java_index.nextId
624
8281a248c47e add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents: 622
diff changeset
54 index.highlighter = java_index.highlighter
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
55
709
96a280ca32a2 add Lucene.instances
Franklin Schmidt <fschmidt@gmail.com>
parents: 707
diff changeset
56 M.instances[index] = true
96a280ca32a2 add Lucene.instances
Franklin Schmidt <fschmidt@gmail.com>
parents: 707
diff changeset
57
96a280ca32a2 add Lucene.instances
Franklin Schmidt <fschmidt@gmail.com>
parents: 707
diff changeset
58 function index.close()
96a280ca32a2 add Lucene.instances
Franklin Schmidt <fschmidt@gmail.com>
parents: 707
diff changeset
59 M.instances[index] = nil
96a280ca32a2 add Lucene.instances
Franklin Schmidt <fschmidt@gmail.com>
parents: 707
diff changeset
60 java_index.close()
96a280ca32a2 add Lucene.instances
Franklin Schmidt <fschmidt@gmail.com>
parents: 707
diff changeset
61 end
96a280ca32a2 add Lucene.instances
Franklin Schmidt <fschmidt@gmail.com>
parents: 707
diff changeset
62
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
63 function index.search(query, from, to, sort)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
64 local results = {}
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
65 local function fn(i,doc_fn)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
66 if i >= from then
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
67 results[#results+1] = doc_fn()
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
68 end
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
69 end
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
70 local total_hits = index.advanced_search(query,fn,to,sort)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
71 return results, total_hits
257
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
72 end
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
73
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
74 function index.get_document(query)
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
75 local doc
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
76 local function fn(_,doc_fn)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
77 doc = doc_fn()
257
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
78 end
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
79 local total_hits = index.advanced_search(query,fn,1)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
80 total_hits <= 1 or error( "found " .. total_hits .. " documents" )
257
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
81 return doc
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
82 end
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
83
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
84 function index.count(query)
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
85 return index.advanced_search(query)
232
9ce18106f95a more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 230
diff changeset
86 end
9ce18106f95a more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 230
diff changeset
87
625
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
88 function index.html_highlighter(query,formatter,container_tags)
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
89 local highlighter = index.highlighter(query,formatter)
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
90 return function(html)
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
91 local list = Html.parse(html,container_tags)
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
92 local result = {}
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
93 for _, obj in ipairs(list) do
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
94 if type(obj) == "string" then
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
95 obj = highlighter(obj)
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
96 end
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
97 result[#result+1] = obj
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
98 end
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
99 return Html.to_string(result)
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
100 end
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
101 end
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
102
707
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
103 function index.zip(zip_file)
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
104 matches(zip_file,[[\.zip$]]) or error("file "..zip_file.." doesn't end with '.zip'")
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
105 index.snapshot( function(dir,file_names)
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
106 local t = {}
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
107 for _, file_name in ipairs(file_names) do
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
108 t[file_name] = "file:"..dir.."/"..file_name
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
109 end
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
110 Io.zip(zip_file,t)
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
111 end )
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
112 end
1ed9e55f0be8 replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents: 693
diff changeset
113
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
114 return index
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
115 end
503
92c3d22745b8 make _ENV optional
Franklin Schmidt <fschmidt@gmail.com>
parents: 435
diff changeset
116
92c3d22745b8 make _ENV optional
Franklin Schmidt <fschmidt@gmail.com>
parents: 435
diff changeset
117 return M