Mercurial Hosting > luan
annotate lucene/src/luan/modules/lucene/Lucene.luan @ 746:293c397e8dee
improve zip
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Thu, 14 Jul 2016 19:36:02 -0600 |
parents | 01e68da6983b |
children | de2418d11786 |
rev | line source |
---|---|
321
7f7708e8fdd4
remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
320
diff
changeset
|
1 java() |
693
ca169567ce07
module URIs must now include ".luan"
Franklin Schmidt <fschmidt@gmail.com>
parents:
625
diff
changeset
|
2 local Luan = require "luan:Luan.luan" |
503 | 3 local error = Luan.error |
625
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
4 local ipairs = Luan.ipairs or error() |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
5 local type = Luan.type or error() |
693
ca169567ce07
module URIs must now include ".luan"
Franklin Schmidt <fschmidt@gmail.com>
parents:
625
diff
changeset
|
6 local Html = require "luan:Html.luan" |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
7 local Io = require "luan:Io.luan" |
746 | 8 local uri = Io.uri or error() |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
9 local String = require "luan:String.luan" |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
10 local matches = String.matches or error() |
746 | 11 local Table = require "luan:Table.luan" |
321
7f7708e8fdd4
remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
320
diff
changeset
|
12 local LuceneIndex = require "java:luan.modules.lucene.LuceneIndex" |
730
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
709
diff
changeset
|
13 local NumberFieldParser = require "java:luan.modules.lucene.queryparser.NumberFieldParser" |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
709
diff
changeset
|
14 local StringFieldParser = require "java:luan.modules.lucene.queryparser.StringFieldParser" |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
709
diff
changeset
|
15 local SaneQueryParser = require "java:luan.modules.lucene.queryparser.SaneQueryParser" |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
16 local Version = require "java:org.apache.lucene.util.Version" |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
17 local EnglishAnalyzer = require "java:org.apache.lucene.analysis.en.EnglishAnalyzer" |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
18 |
320
fed1893821bf
remove global namespace
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
312
diff
changeset
|
19 |
503 | 20 local M = {} |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
21 |
709 | 22 M.instances = {} |
23 | |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
24 M.type = { |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
25 string = LuceneIndex.STRING_FIELD_PARSER; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
26 integer = NumberFieldParser.INT; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
27 long = NumberFieldParser.LONG; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
28 double = NumberFieldParser.DOUBLE; |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
29 |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
30 english = StringFieldParser.new(EnglishAnalyzer.new(Version.LUCENE_CURRENT)) |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
31 } |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
32 |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
33 M.literal = SaneQueryParser.literal |
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
34 |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
35 function M.index(index_dir,default_type,default_fields) |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
36 local index = {} |
591
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
547
diff
changeset
|
37 index.dir = index_dir |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
38 local java_index = LuceneIndex.new(index_dir,default_type,default_fields) |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
39 index.indexed_fields = java_index.indexedFieldsMeta.newTable() |
618
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
40 |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
41 -- index.indexed_only_fields[type][field] = fn(doc) |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
42 index.indexed_only_fields = java_index.indexed_only_fields |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
43 |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
44 index.to_string = java_index.to_string |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
45 -- index.backup = java_index.backup |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
46 index.snapshot = java_index.snapshot |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
47 index.advanced_search = java_index.advanced_search |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
48 index.search_in_transaction = java_index.search_in_transaction |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
49 index.delete_all = java_index.delete_all |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
50 index.delete = java_index.delete |
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
51 index.save = java_index.save |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
52 index.update_in_transaction = java_index.update_in_transaction |
709 | 53 -- index.close = java_index.close |
591
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
547
diff
changeset
|
54 index.ensure_open = java_index.ensure_open |
617 | 55 index.next_id = java_index.nextId |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
56 index.highlighter = java_index.highlighter |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
57 |
709 | 58 M.instances[index] = true |
59 | |
60 function index.close() | |
61 M.instances[index] = nil | |
62 java_index.close() | |
63 end | |
64 | |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
65 function index.search(query, from, to, sort) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
66 local results = {} |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
67 local function fn(i,doc_fn) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
68 if i >= from then |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
69 results[#results+1] = doc_fn() |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
70 end |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
71 end |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
72 local total_hits = index.advanced_search(query,fn,to,sort) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
73 return results, total_hits |
257
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
74 end |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
75 |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
76 function index.get_document(query) |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
77 local doc |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
78 local function fn(_,doc_fn) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
79 doc = doc_fn() |
257
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
80 end |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
81 local total_hits = index.advanced_search(query,fn,1) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
82 total_hits <= 1 or error( "found " .. total_hits .. " documents" ) |
257
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
83 return doc |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
84 end |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
85 |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
86 function index.count(query) |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
87 return index.advanced_search(query) |
232
9ce18106f95a
more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
230
diff
changeset
|
88 end |
9ce18106f95a
more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
230
diff
changeset
|
89 |
625
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
90 function index.html_highlighter(query,formatter,container_tags) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
91 local highlighter = index.highlighter(query,formatter) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
92 return function(html) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
93 local list = Html.parse(html,container_tags) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
94 local result = {} |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
95 for _, obj in ipairs(list) do |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
96 if type(obj) == "string" then |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
97 obj = highlighter(obj) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
98 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
99 result[#result+1] = obj |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
100 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
101 return Html.to_string(result) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
102 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
103 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
104 |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
105 function index.zip(zip_file) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
106 matches(zip_file,[[\.zip$]]) or error("file "..zip_file.." doesn't end with '.zip'") |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
107 index.snapshot( function(dir,file_names) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
108 local t = {} |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
109 for _, file_name in ipairs(file_names) do |
746 | 110 t[#t+1] = dir.."/"..file_name |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
111 end |
746 | 112 local base = uri("file:"..dir).parent().to_string() |
113 uri(zip_file).zip(base,Table.unpack(t)) | |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
114 end ) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
115 end |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
116 |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
117 return index |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
118 end |
503 | 119 |
120 return M |