comparison src/goodjava/queryparser/SaneQueryParser.java @ 1402:27efb1fcbcb5

move luan.lib to goodjava
author Franklin Schmidt <fschmidt@gmail.com>
date Tue, 17 Sep 2019 01:35:01 -0400
parents src/luan/lib/queryparser/SaneQueryParser.java@77f2d091f17f
children e48290f3d9fb
comparison
equal deleted inserted replaced
1401:ef1620aa99cb 1402:27efb1fcbcb5
1 package goodjava.queryparser;
2
3 import java.util.List;
4 import java.util.ArrayList;
5 import java.util.regex.Pattern;
6 import org.apache.lucene.search.Query;
7 import org.apache.lucene.search.MatchAllDocsQuery;
8 import org.apache.lucene.search.BooleanClause;
9 import org.apache.lucene.search.BooleanQuery;
10 import org.apache.lucene.search.Sort;
11 import org.apache.lucene.search.SortField;
12 import goodjava.parser.Parser;
13 import goodjava.parser.ParseException;
14
15
16 public class SaneQueryParser {
17
18 public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException {
19 return new SaneQueryParser(fieldParser,query).parseQuery();
20 }
21
22 private static Pattern specialChar = Pattern.compile("[ \\t\\r\\n\":\\[\\]{}^+\\-(),?*\\\\]");
23
24 public static String literal(String s) {
25 return specialChar.matcher(s).replaceAll("\\\\$0");
26 }
27
28 public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException {
29 return new SaneQueryParser(fieldParser,sort).parseSort();
30 }
31
32
33 private static final String NOT_IN_RANGE = " \t\r\n\":[]{}^+()";
34 private static final String NOT_IN_TERM = NOT_IN_RANGE + "-";
35 private static final String NOT_IN_FIELD = NOT_IN_TERM + ",";
36 private final FieldParser fieldParser;
37 private final Parser parser;
38
39 private SaneQueryParser(FieldParser fieldParser,String query) {
40 this.fieldParser = fieldParser;
41 this.parser = new Parser(query);
42 parser.begin();
43 }
44
45 ParseException exception(String msg) {
46 parser.failure();
47 return new ParseException(parser,msg);
48 }
49
50 ParseException exception(Exception cause) {
51 parser.failure();
52 return new ParseException(parser,cause);
53 }
54
55 private Query parseQuery() throws ParseException {
56 Spaces();
57 BooleanQuery bq = new BooleanQuery();
58 while( !parser.endOfInput() ) {
59 bq.add( Term(null) );
60 }
61 BooleanClause[] clauses = bq.getClauses();
62 switch( clauses.length ) {
63 case 0:
64 return new MatchAllDocsQuery();
65 case 1:
66 {
67 BooleanClause bc = clauses[0];
68 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT )
69 return bc.getQuery();
70 }
71 default:
72 return bq;
73 }
74 }
75
76 private BooleanClause Term(String defaultField) throws ParseException {
77 BooleanClause.Occur occur;
78 if( parser.match('+') ) {
79 occur = BooleanClause.Occur.MUST;
80 Spaces();
81 } else if( parser.match('-') ) {
82 occur = BooleanClause.Occur.MUST_NOT;
83 Spaces();
84 } else {
85 occur = BooleanClause.Occur.SHOULD;
86 }
87 String field = QueryField();
88 if( field == null )
89 field = defaultField;
90 Query query = NestedTerm(field);
91 if( query == null )
92 query = RangeTerm(field);
93 if( query == null ) {
94 parser.begin();
95 String match = SimpleTerm(NOT_IN_TERM);
96 query = fieldParser.getQuery(this,field,match);
97 parser.success();
98 }
99 if( parser.match('^') ) {
100 Spaces();
101 int start = parser.begin();
102 try {
103 while( parser.anyOf("0123456789.") );
104 String match = parser.textFrom(start);
105 float boost = Float.parseFloat(match);
106 query.setBoost(boost);
107 } catch(NumberFormatException e) {
108 throw exception(e);
109 }
110 parser.success();
111 Spaces();
112 }
113 BooleanClause bc = new BooleanClause(query,occur);
114 return bc;
115 }
116
117 private Query NestedTerm(String field) throws ParseException {
118 parser.begin();
119 if( !parser.match('(') )
120 return parser.failure(null);
121 BooleanQuery bq = new BooleanQuery();
122 while( !parser.match(')') ) {
123 if( parser.endOfInput() )
124 throw exception("unclosed parentheses");
125 bq.add( Term(field) );
126 }
127 Spaces();
128 BooleanClause[] clauses = bq.getClauses();
129 switch( clauses.length ) {
130 case 0:
131 throw exception("empty parentheses");
132 case 1:
133 {
134 BooleanClause bc = clauses[0];
135 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT )
136 return parser.success(bc.getQuery());
137 }
138 default:
139 return parser.success(bq);
140 }
141 }
142
143 private Query RangeTerm(String field) throws ParseException {
144 parser.begin();
145 if( !parser.anyOf("[{") )
146 return parser.failure(null);
147 boolean includeMin = parser.lastChar() == '[';
148 Spaces();
149 String minQuery = SimpleTerm(NOT_IN_RANGE);
150 TO();
151 String maxQuery = SimpleTerm(NOT_IN_RANGE);
152 if( !parser.anyOf("]}") )
153 throw exception("unclosed range");
154 boolean includeMax = parser.lastChar() == ']';
155 Spaces();
156 Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax);
157 return parser.success(query);
158 }
159
160 private void TO() throws ParseException {
161 parser.begin();
162 if( !(parser.match("TO") && Space()) )
163 throw exception("'TO' expected");
164 Spaces();
165 parser.success();
166 }
167
168 private String SimpleTerm(String exclude) throws ParseException {
169 parser.begin();
170 String match;
171 if( parser.match('"') ) {
172 int start = parser.currentIndex() - 1;
173 while( !parser.match('"') ) {
174 if( parser.endOfInput() )
175 throw exception("unclosed quotes");
176 parser.anyChar();
177 checkEscape();
178 }
179 match = parser.textFrom(start);
180 Spaces();
181 } else {
182 match = Unquoted(exclude);
183 }
184 if( match.length() == 0 )
185 throw exception("invalid input");
186 return parser.success(match);
187 }
188
189 private String QueryField() throws ParseException {
190 parser.begin();
191 String match = Field();
192 if( match==null || !parser.match(':') )
193 return parser.failure((String)null);
194 Spaces();
195 return parser.success(match);
196 }
197
198 private String Field() throws ParseException {
199 parser.begin();
200 String match = Unquoted(NOT_IN_FIELD);
201 if( match.length()==0 )
202 return parser.failure((String)null);
203 match = StringFieldParser.escape(this,match);
204 return parser.success(match);
205 }
206
207 private String Unquoted(String exclude) throws ParseException {
208 int start = parser.begin();
209 while( parser.noneOf(exclude) ) {
210 checkEscape();
211 }
212 String match = parser.textFrom(start);
213 Spaces();
214 return parser.success(match);
215 }
216
217 private void checkEscape() {
218 if( parser.lastChar() == '\\' )
219 parser.anyChar();
220 }
221
222 private void Spaces() {
223 while( Space() );
224 }
225
226 private boolean Space() {
227 return parser.anyOf(" \t\r\n");
228 }
229
230
231 // sort
232
233 private Sort parseSort() throws ParseException {
234 Spaces();
235 if( parser.endOfInput() )
236 return null;
237 List<SortField> list = new ArrayList<SortField>();
238 list.add( SortField() );
239 while( !parser.endOfInput() ) {
240 parser.begin();
241 if( !parser.match(',') )
242 throw exception("',' expected");
243 Spaces();
244 parser.success();
245 list.add( SortField() );
246 }
247 return new Sort(list.toArray(new SortField[0]));
248 }
249
250 private SortField SortField() throws ParseException {
251 parser.begin();
252 String field = Field();
253 if( field==null )
254 throw exception("invalid input");
255 boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc");
256 Spaces();
257 SortField sf = fieldParser.getSortField(this,field,reverse);
258 return parser.success(sf);
259 }
260
261 }