Mercurial Hosting > luan
annotate lucene/src/luan/modules/lucene/Lucene.luan @ 707:1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Wed, 18 May 2016 15:46:51 -0600 |
parents | ca169567ce07 |
children | 96a280ca32a2 |
rev | line source |
---|---|
321
7f7708e8fdd4
remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
320
diff
changeset
|
1 java() |
693
ca169567ce07
module URIs must now include ".luan"
Franklin Schmidt <fschmidt@gmail.com>
parents:
625
diff
changeset
|
2 local Luan = require "luan:Luan.luan" |
503 | 3 local error = Luan.error |
625
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
4 local ipairs = Luan.ipairs or error() |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
5 local type = Luan.type or error() |
693
ca169567ce07
module URIs must now include ".luan"
Franklin Schmidt <fschmidt@gmail.com>
parents:
625
diff
changeset
|
6 local Html = require "luan:Html.luan" |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
7 local Io = require "luan:Io.luan" |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
8 local String = require "luan:String.luan" |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
9 local matches = String.matches or error() |
321
7f7708e8fdd4
remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
320
diff
changeset
|
10 local LuceneIndex = require "java:luan.modules.lucene.LuceneIndex" |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
11 local NumberFieldParser = require "java:sane.lucene.queryparser.NumberFieldParser" |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
12 local StringFieldParser = require "java:sane.lucene.queryparser.StringFieldParser" |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
13 local SaneQueryParser = require "java:sane.lucene.queryparser.SaneQueryParser" |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
14 local Version = require "java:org.apache.lucene.util.Version" |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
15 local EnglishAnalyzer = require "java:org.apache.lucene.analysis.en.EnglishAnalyzer" |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
16 |
320
fed1893821bf
remove global namespace
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
312
diff
changeset
|
17 |
503 | 18 local M = {} |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
19 |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
20 M.type = { |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
21 string = LuceneIndex.STRING_FIELD_PARSER; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
22 integer = NumberFieldParser.INT; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
23 long = NumberFieldParser.LONG; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
24 double = NumberFieldParser.DOUBLE; |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
25 |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
26 english = StringFieldParser.new(EnglishAnalyzer.new(Version.LUCENE_CURRENT)) |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
27 } |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
28 |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
29 M.literal = SaneQueryParser.literal |
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
30 |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
31 function M.index(index_dir,default_type,default_fields) |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
32 local index = {} |
591
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
547
diff
changeset
|
33 index.dir = index_dir |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
34 local java_index = LuceneIndex.new(index_dir,default_type,default_fields) |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
35 index.indexed_fields = java_index.indexedFieldsMeta.newTable() |
618
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
36 |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
37 -- index.indexed_only_fields[type][field] = fn(doc) |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
38 index.indexed_only_fields = java_index.indexed_only_fields |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
39 |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
40 index.to_string = java_index.to_string |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
41 -- index.backup = java_index.backup |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
42 index.snapshot = java_index.snapshot |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
43 index.advanced_search = java_index.advanced_search |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
44 index.search_in_transaction = java_index.search_in_transaction |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
45 index.delete_all = java_index.delete_all |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
46 index.delete = java_index.delete |
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
47 index.save = java_index.save |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
48 index.update_in_transaction = java_index.update_in_transaction |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
49 index.close = java_index.close |
591
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
547
diff
changeset
|
50 index.ensure_open = java_index.ensure_open |
617 | 51 index.next_id = java_index.nextId |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
52 index.highlighter = java_index.highlighter |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
53 |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
54 function index.search(query, from, to, sort) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
55 local results = {} |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
56 local function fn(i,doc_fn) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
57 if i >= from then |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
58 results[#results+1] = doc_fn() |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
59 end |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
60 end |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
61 local total_hits = index.advanced_search(query,fn,to,sort) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
62 return results, total_hits |
257
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
63 end |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
64 |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
65 function index.get_document(query) |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
66 local doc |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
67 local function fn(_,doc_fn) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
68 doc = doc_fn() |
257
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
69 end |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
70 local total_hits = index.advanced_search(query,fn,1) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
71 total_hits <= 1 or error( "found " .. total_hits .. " documents" ) |
257
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
72 return doc |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
73 end |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
74 |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
75 function index.count(query) |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
76 return index.advanced_search(query) |
232
9ce18106f95a
more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
230
diff
changeset
|
77 end |
9ce18106f95a
more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
230
diff
changeset
|
78 |
625
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
79 function index.html_highlighter(query,formatter,container_tags) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
80 local highlighter = index.highlighter(query,formatter) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
81 return function(html) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
82 local list = Html.parse(html,container_tags) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
83 local result = {} |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
84 for _, obj in ipairs(list) do |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
85 if type(obj) == "string" then |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
86 obj = highlighter(obj) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
87 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
88 result[#result+1] = obj |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
89 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
90 return Html.to_string(result) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
91 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
92 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
93 |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
94 function index.zip(zip_file) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
95 matches(zip_file,[[\.zip$]]) or error("file "..zip_file.." doesn't end with '.zip'") |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
96 index.snapshot( function(dir,file_names) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
97 local t = {} |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
98 for _, file_name in ipairs(file_names) do |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
99 t[file_name] = "file:"..dir.."/"..file_name |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
100 end |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
101 Io.zip(zip_file,t) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
102 end ) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
103 end |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
104 |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
105 return index |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
106 end |
503 | 107 |
108 return M |