comparison src/goodjava/lucene/queryparser/GoodQueryParser.java @ 1459:b04b8fc5f4f4

GoodQueryParser
author Franklin Schmidt <fschmidt@gmail.com>
date Fri, 20 Mar 2020 11:06:53 -0600
parents src/goodjava/lucene/queryparser/SaneQueryParser.java@6b6c11c9164e
children b1195cfe8712
comparison
equal deleted inserted replaced
1458:6b6c11c9164e 1459:b04b8fc5f4f4
1 package goodjava.lucene.queryparser;
2
3 import java.util.List;
4 import java.util.ArrayList;
5 import java.util.regex.Pattern;
6 import org.apache.lucene.search.Query;
7 import org.apache.lucene.search.MatchAllDocsQuery;
8 import org.apache.lucene.search.BooleanClause;
9 import org.apache.lucene.search.BooleanQuery;
10 import org.apache.lucene.search.Sort;
11 import org.apache.lucene.search.SortField;
12 import goodjava.parser.Parser;
13 import goodjava.parser.ParseException;
14
15
16 public class GoodQueryParser {
17
18 public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException {
19 return new GoodQueryParser(fieldParser,query).parseQuery();
20 }
21
22 public static String quote(String s) {
23 s = s.replace("\\","\\\\");
24 s = s.replace("\b","\\b");
25 s = s.replace("\f","\\f");
26 s = s.replace("\n","\\n");
27 s = s.replace("\r","\\r");
28 s = s.replace("\t","\\t");
29 s = s.replace("\"","\\\"");
30 return "\""+s+"\"";
31 }
32
33 public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException {
34 return new GoodQueryParser(fieldParser,sort).parseSort();
35 }
36
37
38 private static final String NOT_IN_RANGE = " \t\r\n\":[]{}^+()";
39 private static final String NOT_IN_TERM = NOT_IN_RANGE + "-";
40 private static final String NOT_IN_FIELD = NOT_IN_TERM + ",";
41 private final FieldParser fieldParser;
42 private final Parser parser;
43
44 private GoodQueryParser(FieldParser fieldParser,String query) {
45 this.fieldParser = fieldParser;
46 this.parser = new Parser(query);
47 parser.begin();
48 }
49
50 ParseException exception(String msg) {
51 parser.failure();
52 return new ParseException(parser,msg);
53 }
54
55 ParseException exception(Exception cause) {
56 parser.failure();
57 return new ParseException(parser,cause);
58 }
59
60 private Query parseQuery() throws ParseException {
61 Spaces();
62 BooleanQuery bq = new BooleanQuery();
63 while( !parser.endOfInput() ) {
64 bq.add( Term(null) );
65 }
66 BooleanClause[] clauses = bq.getClauses();
67 switch( clauses.length ) {
68 case 0:
69 return new MatchAllDocsQuery();
70 case 1:
71 {
72 BooleanClause bc = clauses[0];
73 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT )
74 return bc.getQuery();
75 }
76 default:
77 return bq;
78 }
79 }
80
81 private BooleanClause Term(String defaultField) throws ParseException {
82 BooleanClause.Occur occur;
83 if( parser.match('+') ) {
84 occur = BooleanClause.Occur.MUST;
85 Spaces();
86 } else if( parser.match('-') ) {
87 occur = BooleanClause.Occur.MUST_NOT;
88 Spaces();
89 } else {
90 occur = BooleanClause.Occur.SHOULD;
91 }
92 String field = QueryField();
93 if( field == null )
94 field = defaultField;
95 Query query = NestedTerm(field);
96 if( query == null )
97 query = RangeTerm(field);
98 if( query == null ) {
99 parser.begin();
100 String match = SimpleTerm(NOT_IN_TERM);
101 query = fieldParser.getQuery(this,field,match);
102 parser.success();
103 }
104 if( parser.match('^') ) {
105 Spaces();
106 int start = parser.begin();
107 try {
108 while( parser.anyOf("0123456789.") );
109 String match = parser.textFrom(start);
110 float boost = Float.parseFloat(match);
111 query.setBoost(boost);
112 } catch(NumberFormatException e) {
113 throw exception(e);
114 }
115 parser.success();
116 Spaces();
117 }
118 BooleanClause bc = new BooleanClause(query,occur);
119 return bc;
120 }
121
122 private Query NestedTerm(String field) throws ParseException {
123 parser.begin();
124 if( !parser.match('(') )
125 return parser.failure(null);
126 BooleanQuery bq = new BooleanQuery();
127 while( !parser.match(')') ) {
128 if( parser.endOfInput() )
129 throw exception("unclosed parentheses");
130 bq.add( Term(field) );
131 }
132 Spaces();
133 BooleanClause[] clauses = bq.getClauses();
134 switch( clauses.length ) {
135 case 0:
136 throw exception("empty parentheses");
137 case 1:
138 {
139 BooleanClause bc = clauses[0];
140 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT )
141 return parser.success(bc.getQuery());
142 }
143 default:
144 return parser.success(bq);
145 }
146 }
147
148 private Query RangeTerm(String field) throws ParseException {
149 parser.begin();
150 if( !parser.anyOf("[{") )
151 return parser.failure(null);
152 boolean includeMin = parser.lastChar() == '[';
153 Spaces();
154 String minQuery = SimpleTerm(NOT_IN_RANGE);
155 TO();
156 String maxQuery = SimpleTerm(NOT_IN_RANGE);
157 if( !parser.anyOf("]}") )
158 throw exception("unclosed range");
159 boolean includeMax = parser.lastChar() == ']';
160 Spaces();
161 Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax);
162 return parser.success(query);
163 }
164
165 private void TO() throws ParseException {
166 parser.begin();
167 if( !(parser.match("TO") && Space()) )
168 throw exception("'TO' expected");
169 Spaces();
170 parser.success();
171 }
172
173 private String SimpleTerm(String exclude) throws ParseException {
174 parser.begin();
175 String match = Quoted();
176 if( match==null )
177 match = Unquoted(exclude);
178 if( match.length() == 0 )
179 throw exception("invalid input");
180 return parser.success(match);
181 }
182
183 private String QueryField() throws ParseException {
184 parser.begin();
185 String match = Field();
186 if( match==null || !parser.match(':') )
187 return parser.failure((String)null);
188 Spaces();
189 return parser.success(match);
190 }
191
192 private String Field() throws ParseException {
193 parser.begin();
194 String match = Unquoted(NOT_IN_FIELD);
195 if( match.length()==0 )
196 return parser.failure((String)null);
197 match = StringFieldParser.escape(this,match);
198 return parser.success(match);
199 }
200
201 private String Quoted() throws ParseException {
202 parser.begin();
203 if( !parser.match('"') )
204 return parser.failure(null);
205 StringBuilder sb = new StringBuilder();
206 while( parser.anyChar() ) {
207 char c = parser.lastChar();
208 switch(c) {
209 case '"':
210 return parser.success(sb.toString());
211 case '\\':
212 if( parser.anyChar() ) {
213 c = parser.lastChar();
214 switch(c) {
215 case '"':
216 case '\\':
217 sb.append(c);
218 continue;
219 case 'b':
220 sb.append('\b');
221 continue;
222 case 'f':
223 sb.append('\f');
224 continue;
225 case 'n':
226 sb.append('\n');
227 continue;
228 case 'r':
229 sb.append('\r');
230 continue;
231 case 't':
232 sb.append('\t');
233 continue;
234 case 'u':
235 int n = 0;
236 for( int i=0; i<4; i++ ) {
237 int d;
238 if( parser.inCharRange('0','9') ) {
239 d = parser.lastChar() - '0';
240 } else if( parser.inCharRange('a','f') ) {
241 d = parser.lastChar() - 'a' + 10;
242 } else if( parser.inCharRange('A','F') ) {
243 d = parser.lastChar() - 'A' + 10;
244 } else {
245 throw exception("invalid hex digit");
246 }
247 n = 16*n + d;
248 }
249 sb.append((char)n);
250 continue;
251 }
252 }
253 throw exception("invalid escape char");
254 default:
255 sb.append(c);
256 }
257 }
258 parser.failure();
259 throw exception("unclosed string");
260 }
261
262 private String Unquoted(String exclude) throws ParseException {
263 int start = parser.begin();
264 while( parser.noneOf(exclude) ) {
265 checkEscape();
266 }
267 String match = parser.textFrom(start);
268 Spaces();
269 return parser.success(match);
270 }
271
272 private void checkEscape() {
273 if( parser.lastChar() == '\\' )
274 parser.anyChar();
275 }
276
277 private void Spaces() {
278 while( Space() );
279 }
280
281 private boolean Space() {
282 return parser.anyOf(" \t\r\n");
283 }
284
285
286 // sort
287
288 private Sort parseSort() throws ParseException {
289 Spaces();
290 if( parser.endOfInput() )
291 return null;
292 List<SortField> list = new ArrayList<SortField>();
293 list.add( SortField() );
294 while( !parser.endOfInput() ) {
295 parser.begin();
296 if( !parser.match(',') )
297 throw exception("',' expected");
298 Spaces();
299 parser.success();
300 list.add( SortField() );
301 }
302 return new Sort(list.toArray(new SortField[0]));
303 }
304
305 private SortField SortField() throws ParseException {
306 parser.begin();
307 String field = Field();
308 if( field==null )
309 throw exception("invalid input");
310 boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc");
311 Spaces();
312 SortField sf = fieldParser.getSortField(this,field,reverse);
313 return parser.success(sf);
314 }
315
316 }