annotate lucene/src/luan/modules/lucene/Lucene.luan @ 625:a3c1e11fb6aa

rewrite much of Html to be more understandable; add Lucene html_highlighter();
author Franklin Schmidt <fschmidt@gmail.com>
date Tue, 12 Jan 2016 23:52:56 -0700
parents 8281a248c47e
children ca169567ce07
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
321
7f7708e8fdd4 remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 320
diff changeset
1 java()
7f7708e8fdd4 remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 320
diff changeset
2 local Luan = require "luan:Luan"
503
92c3d22745b8 make _ENV optional
Franklin Schmidt <fschmidt@gmail.com>
parents: 435
diff changeset
3 local error = Luan.error
625
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
4 local ipairs = Luan.ipairs or error()
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
5 local type = Luan.type or error()
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
6 local Html = require "luan:Html"
321
7f7708e8fdd4 remove import statement
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 320
diff changeset
7 local LuceneIndex = require "java:luan.modules.lucene.LuceneIndex"
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
8 local NumberFieldParser = require "java:sane.lucene.queryparser.NumberFieldParser"
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
9 local StringFieldParser = require "java:sane.lucene.queryparser.StringFieldParser"
547
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
10 local SaneQueryParser = require "java:sane.lucene.queryparser.SaneQueryParser"
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
11 local Version = require "java:org.apache.lucene.util.Version"
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
12 local EnglishAnalyzer = require "java:org.apache.lucene.analysis.en.EnglishAnalyzer"
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
13
320
fed1893821bf remove global namespace
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 312
diff changeset
14
503
92c3d22745b8 make _ENV optional
Franklin Schmidt <fschmidt@gmail.com>
parents: 435
diff changeset
15 local M = {}
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
16
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
17 M.type = {
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
18 string = LuceneIndex.STRING_FIELD_PARSER;
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
19 integer = NumberFieldParser.INT;
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
20 long = NumberFieldParser.LONG;
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
21 double = NumberFieldParser.DOUBLE;
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
22
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
23 english = StringFieldParser.new(EnglishAnalyzer.new(Version.LUCENE_CURRENT))
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
24 }
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
25
547
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
26 M.literal = SaneQueryParser.literal
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
27
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
28 function M.index(index_dir,default_type,default_fields)
303
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
29 local index = {}
591
790d5de23042 add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents: 547
diff changeset
30 index.dir = index_dir
599
50540f0813e2 support default search fields in lucene;
Franklin Schmidt <fschmidt@gmail.com>
parents: 591
diff changeset
31 local java_index = LuceneIndex.new(index_dir,default_type,default_fields)
544
c5a93767cc5c lucene overhaul, untested
Franklin Schmidt <fschmidt@gmail.com>
parents: 542
diff changeset
32 index.indexed_fields = java_index.indexedFieldsMeta.newTable()
618
5e495e4e560b add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents: 617
diff changeset
33
5e495e4e560b add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents: 617
diff changeset
34 -- index.indexed_only_fields[type][field] = fn(doc)
5e495e4e560b add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents: 617
diff changeset
35 index.indexed_only_fields = java_index.indexed_only_fields
5e495e4e560b add lucene indexed_only_fields
Franklin Schmidt <fschmidt@gmail.com>
parents: 617
diff changeset
36
303
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
37 index.to_string = java_index.to_string
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
38 index.backup = java_index.backup
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
39 index.advanced_search = java_index.advanced_search
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
40 index.search_in_transaction = java_index.search_in_transaction
303
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
41 index.delete_all = java_index.delete_all
547
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
42 index.delete = java_index.delete
0be287ab0309 add lucene/Versioning and simplify Lucene fn names
Franklin Schmidt <fschmidt@gmail.com>
parents: 546
diff changeset
43 index.save = java_index.save
546
eaef1005ab87 general lucene cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents: 545
diff changeset
44 index.update_in_transaction = java_index.update_in_transaction
303
fdb4bd391c28 add lucene close();
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 300
diff changeset
45 index.close = java_index.close
591
790d5de23042 add "strict" param to Io.repr();
Franklin Schmidt <fschmidt@gmail.com>
parents: 547
diff changeset
46 index.ensure_open = java_index.ensure_open
617
e54c1646eed0 add Lucene.next_id()
Franklin Schmidt <fschmidt@gmail.com>
parents: 599
diff changeset
47 index.next_id = java_index.nextId
624
8281a248c47e add lucene highlighter
Franklin Schmidt <fschmidt@gmail.com>
parents: 622
diff changeset
48 index.highlighter = java_index.highlighter
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
49
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
50 function index.search(query, from, to, sort)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
51 local results = {}
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
52 local function fn(i,doc_fn)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
53 if i >= from then
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
54 results[#results+1] = doc_fn()
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
55 end
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
56 end
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
57 local total_hits = index.advanced_search(query,fn,to,sort)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
58 return results, total_hits
257
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
59 end
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
60
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
61 function index.get_document(query)
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
62 local doc
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
63 local function fn(_,doc_fn)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
64 doc = doc_fn()
257
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
65 end
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
66 local total_hits = index.advanced_search(query,fn,1)
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
67 total_hits <= 1 or error( "found " .. total_hits .. " documents" )
257
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
68 return doc
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
69 end
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
70
c5c60eca33dd allow Lucene search for 0 rows
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 233
diff changeset
71 function index.count(query)
545
ddcd4296107a clean up lucene search
Franklin Schmidt <fschmidt@gmail.com>
parents: 544
diff changeset
72 return index.advanced_search(query)
232
9ce18106f95a more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 230
diff changeset
73 end
9ce18106f95a more lucene work
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 230
diff changeset
74
625
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
75 function index.html_highlighter(query,formatter,container_tags)
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
76 local highlighter = index.highlighter(query,formatter)
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
77 return function(html)
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
78 local list = Html.parse(html,container_tags)
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
79 local result = {}
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
80 for _, obj in ipairs(list) do
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
81 if type(obj) == "string" then
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
82 obj = highlighter(obj)
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
83 end
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
84 result[#result+1] = obj
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
85 end
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
86 return Html.to_string(result)
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
87 end
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
88 end
a3c1e11fb6aa rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents: 624
diff changeset
89
230
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
90 return index
4438cb2e04d0 start lucene
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
91 end
503
92c3d22745b8 make _ENV optional
Franklin Schmidt <fschmidt@gmail.com>
parents: 435
diff changeset
92
92c3d22745b8 make _ENV optional
Franklin Schmidt <fschmidt@gmail.com>
parents: 435
diff changeset
93 return M