annotate lucene/src/luan/modules/lucene/queryparser/StringFieldParser.java @ 730:01e68da6983b

add sane-lucene-queryparser source to luan
author Franklin Schmidt <fschmidt@gmail.com>
date Fri, 10 Jun 2016 15:41:15 -0600
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
730
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
1 package luan.modules.lucene.queryparser;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
2
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
3 import java.io.StringReader;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
4 import java.io.IOException;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
5 import org.apache.lucene.analysis.Analyzer;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
6 import org.apache.lucene.analysis.TokenStream;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
7 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
8 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
9 import org.apache.lucene.search.Query;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
10 import org.apache.lucene.search.TermQuery;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
11 import org.apache.lucene.search.TermRangeQuery;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
12 import org.apache.lucene.search.PhraseQuery;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
13 import org.apache.lucene.search.WildcardQuery;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
14 import org.apache.lucene.search.PrefixQuery;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
15 import org.apache.lucene.search.SortField;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
16 import org.apache.lucene.index.Term;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
17
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
18
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
19 public class StringFieldParser implements FieldParser {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
20 public int slop = 0;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
21 public final Analyzer analyzer;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
22
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
23 public StringFieldParser(Analyzer analyzer) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
24 this.analyzer = analyzer;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
25 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
26
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
27 @Override public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
28 String wildcard = wildcard(qp,query);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
29 if( wildcard != null )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
30 return new WildcardQuery(new Term(field,wildcard));
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
31 if( query.endsWith("*") && !query.endsWith("\\*") )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
32 return new PrefixQuery(new Term(field,query.substring(0,query.length()-1)));
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
33 query = escape(qp,query);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
34 PhraseQuery pq = new PhraseQuery();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
35 try {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
36 TokenStream ts = analyzer.tokenStream(field,new StringReader(query));
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
37 CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
38 PositionIncrementAttribute posAttr = ts.addAttribute(PositionIncrementAttribute.class);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
39 ts.reset();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
40 int pos = -1;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
41 while( ts.incrementToken() ) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
42 pos += posAttr.getPositionIncrement();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
43 pq.add( new Term(field,termAttr.toString()), pos );
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
44 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
45 ts.end();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
46 ts.close();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
47 } catch(IOException e) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
48 throw new RuntimeException(e);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
49 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
50 Term[] terms = pq.getTerms();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
51 if( terms.length==1 && pq.getPositions()[0]==0 )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
52 return new TermQuery(terms[0]);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
53 return pq;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
54 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
55
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
56 @Override public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
57 minQuery = escape(qp,minQuery);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
58 maxQuery = escape(qp,maxQuery);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
59 return TermRangeQuery.newStringRange(field,minQuery,maxQuery,includeMin,includeMax);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
60 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
61
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
62 static String escape(SaneQueryParser qp,String s) throws ParseException {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
63 final char[] a = s.toCharArray();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
64 int i, n;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
65 if( a[0] == '"' ) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
66 if( a[a.length-1] != '"' ) throw new RuntimeException();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
67 i = 1;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
68 n = a.length - 1;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
69 } else {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
70 i = 0;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
71 n = a.length;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
72 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
73 StringBuilder sb = new StringBuilder();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
74 for( ; i<n; i++ ) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
75 char c = a[i];
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
76 if( c == '\\' ) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
77 if( ++i == a.length )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
78 throw new ParseException(qp,"ends with '\\'");
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
79 c = a[i];
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
80 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
81 sb.append(c);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
82 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
83 return sb.toString();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
84 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
85
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
86 private static String wildcard(SaneQueryParser qp,String s) throws ParseException {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
87 final char[] a = s.toCharArray();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
88 if( a[0] == '"' )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
89 return null;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
90 boolean hasWildcard = false;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
91 StringBuilder sb = new StringBuilder();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
92 for( int i=0; i<a.length; i++ ) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
93 char c = a[i];
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
94 if( c=='?' || c=='*' && i<a.length-1 )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
95 hasWildcard = true;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
96 if( c == '\\' ) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
97 if( ++i == a.length )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
98 throw new ParseException(qp,"ends with '\\'");
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
99 c = a[i];
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
100 if( c=='?' || c=='*' )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
101 sb.append('\\');
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
102 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
103 sb.append(c);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
104 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
105 return hasWildcard ? sb.toString() : null;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
106 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
107
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
108 @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
109 return new SortField( field, SortField.Type.STRING, reverse );
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
110 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
111
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
112 }