Mercurial Hosting > luan
annotate lucene/src/luan/modules/lucene/Lucene.luan @ 754:1a101ac9ea46
add lucene restore
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sun, 17 Jul 2016 19:21:52 -0600 |
parents | 5e3970ccd86a |
children | 9092e52f94eb |
rev | line source |
---|---|
321
7f7708e8fdd4
remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
320
diff
changeset
|
1 java() |
693
ca169567ce07
module URIs must now include ".luan"
Franklin Schmidt <fschmidt@gmail.com>
parents:
625
diff
changeset
|
2 local Luan = require "luan:Luan.luan" |
503 | 3 local error = Luan.error |
625
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
4 local ipairs = Luan.ipairs or error() |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
5 local type = Luan.type or error() |
693
ca169567ce07
module URIs must now include ".luan"
Franklin Schmidt <fschmidt@gmail.com>
parents:
625
diff
changeset
|
6 local Html = require "luan:Html.luan" |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
7 local Io = require "luan:Io.luan" |
746 | 8 local uri = Io.uri or error() |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
9 local String = require "luan:String.luan" |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
10 local matches = String.matches or error() |
749
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
11 local Rpc = require "luan:Rpc.luan" |
754 | 12 local Thread = require "luan:Thread.luan" |
13 local synchronized = Thread.synchronized or error() | |
321
7f7708e8fdd4
remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
320
diff
changeset
|
14 local LuceneIndex = require "java:luan.modules.lucene.LuceneIndex" |
730
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
709
diff
changeset
|
15 local NumberFieldParser = require "java:luan.modules.lucene.queryparser.NumberFieldParser" |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
709
diff
changeset
|
16 local StringFieldParser = require "java:luan.modules.lucene.queryparser.StringFieldParser" |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
709
diff
changeset
|
17 local SaneQueryParser = require "java:luan.modules.lucene.queryparser.SaneQueryParser" |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
18 local Version = require "java:org.apache.lucene.util.Version" |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
19 local EnglishAnalyzer = require "java:org.apache.lucene.analysis.en.EnglishAnalyzer" |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
20 |
320
fed1893821bf
remove global namespace
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
312
diff
changeset
|
21 |
503 | 22 local M = {} |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
23 |
709 | 24 M.instances = {} |
25 | |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
26 M.type = { |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
27 string = LuceneIndex.STRING_FIELD_PARSER; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
28 integer = NumberFieldParser.INT; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
29 long = NumberFieldParser.LONG; |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
30 double = NumberFieldParser.DOUBLE; |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
31 |
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
32 english = StringFieldParser.new(EnglishAnalyzer.new(Version.LUCENE_CURRENT)) |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
33 } |
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
34 |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
35 M.literal = SaneQueryParser.literal |
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
36 |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
37 function M.index(index_dir,default_type,default_fields) |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
38 local index = {} |
591
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
547
diff
changeset
|
39 index.dir = index_dir |
599
50540f0813e2
support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents:
591
diff
changeset
|
40 local java_index = LuceneIndex.new(index_dir,default_type,default_fields) |
544
c5a93767cc5c
lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents:
542
diff
changeset
|
41 index.indexed_fields = java_index.indexedFieldsMeta.newTable() |
618
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
42 |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
43 -- index.indexed_only_fields[type][field] = fn(doc) |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
44 index.indexed_only_fields = java_index.indexed_only_fields |
5e495e4e560b
add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents:
617
diff
changeset
|
45 |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
46 index.to_string = java_index.to_string |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
47 -- index.backup = java_index.backup |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
48 index.snapshot = java_index.snapshot |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
49 index.advanced_search = java_index.advanced_search |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
50 index.search_in_transaction = java_index.search_in_transaction |
303
fdb4bd391c28
add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
300
diff
changeset
|
51 index.delete_all = java_index.delete_all |
547
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
52 index.delete = java_index.delete |
0be287ab0309
add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents:
546
diff
changeset
|
53 index.save = java_index.save |
546
eaef1005ab87
general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
545
diff
changeset
|
54 index.update_in_transaction = java_index.update_in_transaction |
709 | 55 -- index.close = java_index.close |
591
790d5de23042
add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents:
547
diff
changeset
|
56 index.ensure_open = java_index.ensure_open |
617 | 57 index.next_id = java_index.nextId |
624
8281a248c47e
add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents:
622
diff
changeset
|
58 index.highlighter = java_index.highlighter |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
59 |
709 | 60 M.instances[index] = true |
61 | |
62 function index.close() | |
63 M.instances[index] = nil | |
64 java_index.close() | |
65 end | |
66 | |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
67 function index.search(query, from, to, sort) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
68 local results = {} |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
69 local function fn(i,doc_fn) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
70 if i >= from then |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
71 results[#results+1] = doc_fn() |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
72 end |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
73 end |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
74 local total_hits = index.advanced_search(query,fn,to,sort) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
75 return results, total_hits |
257
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
76 end |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
77 |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
78 function index.get_document(query) |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
79 local doc |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
80 local function fn(_,doc_fn) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
81 doc = doc_fn() |
257
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
82 end |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
83 local total_hits = index.advanced_search(query,fn,1) |
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
84 total_hits <= 1 or error( "found " .. total_hits .. " documents" ) |
257
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
85 return doc |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
86 end |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
87 |
c5c60eca33dd
allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
233
diff
changeset
|
88 function index.count(query) |
545
ddcd4296107a
clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents:
544
diff
changeset
|
89 return index.advanced_search(query) |
232
9ce18106f95a
more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
230
diff
changeset
|
90 end |
9ce18106f95a
more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
230
diff
changeset
|
91 |
625
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
92 function index.html_highlighter(query,formatter,container_tags) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
93 local highlighter = index.highlighter(query,formatter) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
94 return function(html) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
95 local list = Html.parse(html,container_tags) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
96 local result = {} |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
97 for _, obj in ipairs(list) do |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
98 if type(obj) == "string" then |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
99 obj = highlighter(obj) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
100 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
101 result[#result+1] = obj |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
102 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
103 return Html.to_string(result) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
104 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
105 end |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
624
diff
changeset
|
106 |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
107 function index.zip(zip_file) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
108 index.snapshot( function(dir,file_names) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
109 local t = {} |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
110 for _, file_name in ipairs(file_names) do |
746 | 111 t[#t+1] = dir.."/"..file_name |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
112 end |
746 | 113 local base = uri("file:"..dir).parent().to_string() |
749
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
114 zip_file.zip(base,t) |
707
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
115 end ) |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
116 end |
1ed9e55f0be8
replace lucene.backup with lucene.zip implemented in luan, and add lucene.snapshot
Franklin Schmidt <fschmidt@gmail.com>
parents:
693
diff
changeset
|
117 |
754 | 118 function index.restore(zip_file) |
119 local lucene_dir = uri("file:"..index.dir) | |
120 local before_restore = lucene_dir.parent().child("before_restore.zip") | |
121 index.zip(before_restore) | |
122 java_index.close() | |
123 lucene_dir.delete() | |
124 zip_file.unzip(lucene_dir.parent().to_string()) | |
125 java_index.reopen() | |
126 end | |
127 index.restore = synchronized(index.restore) | |
128 | |
129 local function multi_error() | |
130 error "multiple lucene instances" | |
131 end | |
132 | |
749
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
133 if Rpc.functions.backup == nil then |
754 | 134 |
749
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
135 function Rpc.functions.lucene_backup(password) |
753
5e3970ccd86a
improve password handling
Franklin Schmidt <fschmidt@gmail.com>
parents:
749
diff
changeset
|
136 if Io.password ~= password then |
749
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
137 error "wrong password" |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
138 end |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
139 local zip_file = uri("file:"..index.dir).parent().child("backup.zip") |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
140 index.zip(zip_file) |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
141 return zip_file |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
142 end |
754 | 143 |
144 function Rpc.functions.lucene_restore(password,zip_file) | |
145 if Io.password ~= password then | |
146 error "wrong password" | |
147 end | |
148 index.restore(zip_file) | |
149 end | |
150 | |
749
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
151 else |
754 | 152 Rpc.functions.lucene_backup = multi_error |
153 Rpc.functions.lucene_restore = multi_error | |
749
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
154 end |
85f5444fb7d4
add rcp lucene backup
Franklin Schmidt <fschmidt@gmail.com>
parents:
748
diff
changeset
|
155 |
230
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
156 return index |
4438cb2e04d0
start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff
changeset
|
157 end |
503 | 158 |
159 return M |