Mercurial Hosting > luan
annotate lucene/src/luan/modules/lucene/Lucene.luan @ 749:85f5444fb7d4
add rcp lucene backup
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Fri, 15 Jul 2016 01:18:01 -0600 |
parents | de2418d11786 |
children | 5e3970ccd86a |
rev | line source |
---|---|
321
7f7708e8fdd4
remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
320
diff
changeset
|
1 java() |
693
ca169567ce07
module URIs must now include ".luan"
Franklin Schmidt <fschmidt@gmail.com>
parents:
625
diff
changeset
|
2 local Luan = require "luan:Luan.luan" |
503 | 3 local error = Luan.error |
625
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
4 local ipairs = Luan.ipairs or error() |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
5 local type = Luan.type or error() |
693
ca169567ce07
module URIs must now include ".luan"
Franklin Schmidt <fschmidt@gmail.com>
parents:
625
diff
changeset
|
6 local Html = require "luan:Html.luan" |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
7 local Io = require "luan:Io.luan" |
746 | 8 local uri = Io.uri or error() |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
9 local String = require "luan:String.luan" |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
10 local matches = String.matches or error() |
749
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
11 local Rpc = require "luan:Rpc.luan" |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
12 local Hosting = require "luan:host/Hosting.luan" |
321
7f7708e8fdd4
remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
320
diff
changeset
|
13 local LuceneIndex = require "java:luan.modules.lucene.LuceneIndex" |
730
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
709
diff
changeset
|
14 local NumberFieldParser = require "java:luan.modules.lucene.queryparser.NumberFieldParser" |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
709
diff
changeset
|
15 local StringFieldParser = require "java:luan.modules.lucene.queryparser.StringFieldParser" |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
709
diff
changeset
|
16 local SaneQueryParser = require "java:luan.modules.lucene.queryparser.SaneQueryParser" |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
17 local Version = require "java:org.apache.lucene.util.Version" |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
18 local EnglishAnalyzer = require "java:org.apache.lucene.analysis.en.EnglishAnalyzer" |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
19 |
320
fed1893821bf
remove global namespace
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
312
diff
changeset
|
20 |
503 | 21 local M = {} |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
22 |
709 | 23 M.instances = {} |
24 | |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
25 M.type = { |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
26 string = LuceneIndex.STRING_FIELD_PARSER; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
27 integer = NumberFieldParser.INT; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
28 long = NumberFieldParser.LONG; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
29 double = NumberFieldParser.DOUBLE; |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
30 |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
31 english = StringFieldParser.new(EnglishAnalyzer.new(Version.LUCENE_CURRENT)) |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
32 } |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
33 |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
34 M.literal = SaneQueryParser.literal |
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
35 |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
36 function M.index(index_dir,default_type,default_fields) |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
37 local index = {} |
591
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
547
diff
changeset
|
38 index.dir = index_dir |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
39 local java_index = LuceneIndex.new(index_dir,default_type,default_fields) |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
40 index.indexed_fields = java_index.indexedFieldsMeta.newTable() |
618
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
41 |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
42 -- index.indexed_only_fields[type][field] = fn(doc) |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
43 index.indexed_only_fields = java_index.indexed_only_fields |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
44 |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
45 index.to_string = java_index.to_string |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
46 -- index.backup = java_index.backup |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
47 index.snapshot = java_index.snapshot |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
48 index.advanced_search = java_index.advanced_search |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
49 index.search_in_transaction = java_index.search_in_transaction |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
50 index.delete_all = java_index.delete_all |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
51 index.delete = java_index.delete |
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
52 index.save = java_index.save |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
53 index.update_in_transaction = java_index.update_in_transaction |
709 | 54 -- index.close = java_index.close |
591
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
547
diff
changeset
|
55 index.ensure_open = java_index.ensure_open |
617 | 56 index.next_id = java_index.nextId |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
57 index.highlighter = java_index.highlighter |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
58 |
709 | 59 M.instances[index] = true |
60 | |
61 function index.close() | |
62 M.instances[index] = nil | |
63 java_index.close() | |
64 end | |
65 | |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
66 function index.search(query, from, to, sort) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
67 local results = {} |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
68 local function fn(i,doc_fn) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
69 if i >= from then |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
70 results[#results+1] = doc_fn() |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
71 end |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
72 end |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
73 local total_hits = index.advanced_search(query,fn,to,sort) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
74 return results, total_hits |
257
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
75 end |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
76 |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
77 function index.get_document(query) |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
78 local doc |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
79 local function fn(_,doc_fn) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
80 doc = doc_fn() |
257
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
81 end |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
82 local total_hits = index.advanced_search(query,fn,1) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
83 total_hits <= 1 or error( "found " .. total_hits .. " documents" ) |
257
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
84 return doc |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
85 end |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
86 |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
87 function index.count(query) |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
88 return index.advanced_search(query) |
232
9ce18106f95a
more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
230
diff
changeset
|
89 end |
9ce18106f95a
more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
230
diff
changeset
|
90 |
625
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
91 function index.html_highlighter(query,formatter,container_tags) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
92 local highlighter = index.highlighter(query,formatter) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
93 return function(html) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
94 local list = Html.parse(html,container_tags) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
95 local result = {} |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
96 for _, obj in ipairs(list) do |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
97 if type(obj) == "string" then |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
98 obj = highlighter(obj) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
99 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
100 result[#result+1] = obj |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
101 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
102 return Html.to_string(result) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
103 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
104 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
105 |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
106 function index.zip(zip_file) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
107 index.snapshot( function(dir,file_names) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
108 local t = {} |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
109 for _, file_name in ipairs(file_names) do |
746 | 110 t[#t+1] = dir.."/"..file_name |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
111 end |
746 | 112 local base = uri("file:"..dir).parent().to_string() |
749
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
113 zip_file.zip(base,t) |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
114 end ) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
115 end |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
116 |
749
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
117 if Rpc.functions.backup == nil then |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
118 function Rpc.functions.lucene_backup(password) |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
119 if Hosting.password ~= nil and Hosting.password ~= password then |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
120 error "wrong password" |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
121 end |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
122 local zip_file = uri("file:"..index.dir).parent().child("backup.zip") |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
123 index.zip(zip_file) |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
124 return zip_file |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
125 end |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
126 else |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
127 function Rpc.functions.lucene_backup() |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
128 error "multiple lucene instance" |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
129 end |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
130 end |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
131 |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
132 return index |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
133 end |
503 | 134 |
135 return M |