view src/nabble/model/NodeSearcher.java @ 47:72765b66e2c3

remove mailing list code
author Franklin Schmidt <fschmidt@gmail.com>
date Fri, 18 Jun 2021 17:44:24 -0600
parents 7ecd1a4ef557
children
line wrap: on
line source

package nabble.model;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Filter;
import nabble.model.lucene.HitCollector;
import nabble.model.lucene.LuceneSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.NullFragmenter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TokenGroup;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;


public final class NodeSearcher {
	private static final Logger logger = LoggerFactory.getLogger(NodeSearcher.class);

	public static final Sort SORT_BY_DATE = new Sort(new SortField(Lucene.DATE_FLD, SortField.INT));

	public static class Builder {
		private static final String[] nodeSearchFields = new String[]{
			Lucene.SUBJECT_FLD, Lucene.MESSAGE_FLD, Lucene.AUTHOR_FLD
		};

		private final SiteImpl site;
		private final BooleanQuery query = new BooleanQuery();
		private Query textQuery = null;
		private boolean isAuthenticated = false;
		private final long nodeId;
		private User currentUser;
		private String userSearchId = null;
		private Sort sort = null;
		private Filter filter = null;
		private Date from = null;
		private Date to = null;

		public Builder(Node node) {
			this(node.getSite(),node.getId());
		}
	
		public Builder(Site site,long nodeId) {
			if( nodeId == 0L )
				throw new RuntimeException();
			this.site = (SiteImpl)site;
			this.nodeId = nodeId;
			Query query2 = new TermQuery(new Term(Lucene.ANCESTORS_FLD,Long.toString(nodeId)));
			query.add(query2,BooleanClause.Occur.MUST);
		}

		public void setCurrentUser(User user) {
			this.isAuthenticated = true;
			this.currentUser = user;
		}

		private BooleanQuery getQuery() {
			if( !isAuthenticated )
				return query;
			if( currentUser!=null && currentUser.getSearchId().equals(userSearchId) )
				return query;
			BooleanQuery q = new BooleanQuery();
			q.add(query, BooleanClause.Occur.MUST);
			if( currentUser != null ) {
				NodeImpl node = NodeImpl.getNode(site.siteKey,nodeId);
				q.add(new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD, Lucene.formatPrivateNode(node))), BooleanClause.Occur.MUST);
				return q;
			}
			q.add(publicQuery, BooleanClause.Occur.MUST);
			return q;
		}

		public void addQuery(Query query2) {
			query.add(query2,BooleanClause.Occur.MUST);
		}
		
		public void addLine(String line) throws ParseException {
			if( textQuery != null )
				throw new RuntimeException();
			textQuery = parse(line,nodeSearchFields);
			if( textQuery != null )
				query.add(textQuery,BooleanClause.Occur.MUST);
		}

		public void addUser(Person user) {
			if( user==null )
				return;
			addUser(user.getSearchId());
		}
	
		public void addUser(String userSearchId) {
			this.userSearchId = userSearchId;
			Query query2 = new TermQuery(new Term(Lucene.USER_ID_FLD,userSearchId));
			query.add(query2,BooleanClause.Occur.MUST);
		}

		public void addUsers(List<? extends Person> visitors) {
			if (visitors != null && visitors.size() > 0) {
				BooleanQuery usersClause = new BooleanQuery();
				for (Person v : visitors) {
					Query q = new TermQuery(new Term(Lucene.USER_ID_FLD,v.getSearchId()));
					usersClause.add(q, BooleanClause.Occur.SHOULD);
				}
				query.add(usersClause, BooleanClause.Occur.MUST);
			}
		}
	
		void addExcludeUser(String userSearchId) {
			BooleanClause excludeUserClause = new BooleanClause(
					new TermQuery(new Term(Lucene.USER_ID_FLD, userSearchId)),
					BooleanClause.Occur.MUST_NOT);
			query.add(excludeUserClause);
		}

		public void setUserSearchId(String userSearchId) {
			this.userSearchId = userSearchId;
		}

		private final static Query appQuery =
			new ConstantScoreQuery(
				new CachingWrapperFilter(
					new QueryWrapperFilter(
						new TermQuery(new Term(Lucene.KIND_FLD,Node.Kind.APP.toString()))
					)
				)
			)
		;
		
		public void addNodeKind(Node.Kind kind) {
			query.add(appQuery,
					kind==Node.Kind.APP?BooleanClause.Occur.MUST:BooleanClause.Occur.MUST_NOT);
		}
	
		private final static Query publicQuery =
			new ConstantScoreQuery(
				new CachingWrapperFilter(
					new QueryWrapperFilter(
						new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD,"none"))
					)
				)
			)
		;
	
		public void excludePrivate() {
			query.add(publicQuery,BooleanClause.Occur.MUST);
		}

		public void setSort(Sort sort) {
			this.sort = sort;
		}
	
		public void setFilter(Filter filter) {
			this.filter = filter;
		}
	
		public void setDateRange(Date from, Date to) {
			if( sort != SORT_BY_DATE )
				throw new UnsupportedOperationException();
			this.from = from;
			this.to = to;
		}

		public NodeSearcher build() {
			return new NodeSearcher(this);
		}
	}

	private final SiteImpl site;
	private final BooleanQuery query;
	private final Query textQuery;
	private final Sort sort;
	private final Filter filter;
	private final Date from;
	private final Date to;
	private Set<String> searchTerms = null;
	private int totalHits = -1;
	private final QueryScorer scorer;

	private NodeSearcher(Builder builder) {
		this.site = builder.site;
		this.query = builder.getQuery();
		this.textQuery = builder.textQuery;
		this.sort = builder.sort;
		this.filter = builder.filter;
		this.from = builder.from;
		this.to = builder.to;
		this.scorer = new QueryScorer(query);
	}

	public BooleanQuery getQuery() {
		return query;
	}
	
	static Query parse(String line, String[] fields) throws ParseException {
		if( line == null || line.length() == 0 )
			return null;
		line = line.replace('[','|').replace(']','|'); // hack - treat [] as punctuation
		MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT,fields, Lucene.analyzer);
		parser.setDefaultOperator(QueryParser.AND_OPERATOR);
		return parser.parse(line);
	}
	
	public String toString() {
		return query.toString();
	}

	public Set<String> getSearchTerms() {
		if( searchTerms==null ) {
			searchTerms = new HashSet<String>();
			if( textQuery != null )
				searchTerms(searchTerms,textQuery);
		}
		return searchTerms;
	}

	private static void searchTerms(Set<String> searchTerms,Query query) {
		if( query instanceof BooleanQuery ) {
			BooleanQuery q = (BooleanQuery)query;
			BooleanClause[] clauses = q.getClauses();
			for (BooleanClause clause : clauses) {
				if (!clause.isProhibited())
					searchTerms(searchTerms, clause.getQuery());
			}
		} else if( query instanceof TermQuery ) {
			TermQuery q = (TermQuery)query;
			searchTerms.add( q.getTerm().text() );
		} else if( query instanceof PhraseQuery ) {
			PhraseQuery q = (PhraseQuery)query;
			Term[] terms = q.getTerms();
			for (Term term : terms) {
				searchTerms.add(term.text());
			}
		} 
	}

	public String highlight(String text,String pre,String post) {
		try {
			Highlighter hl = new Highlighter( new SimpleHTMLFormatter(pre,post), scorer );
			hl.setTextFragmenter( new NullFragmenter() );
			String s = hl.getBestFragment(Lucene.analyzer,null,text);
			return s != null ? s : text;
		} catch(IOException e) {
			throw new RuntimeException(e);
		} catch(InvalidTokenOffsetsException e) {
			throw new RuntimeException(e);
		}
	}

	public static String getStartingFragment(String text,int size,String dotdotdot) {
		if (text.length() <= size) return text;
		int end = text.lastIndexOf(' ', size);
		if (end < 0) end = size;
		String fragment = text.substring(0, end);
		if (dotdotdot != null && fragment.length() < text.length()) 
			fragment = fragment + dotdotdot;
		return fragment;
	}

	private static final Formatter nullFormatter = new Formatter() {
		public String highlightTerm(String originalText,TokenGroup tokenGroup) {
			return originalText;
		}
	};

	public String getFragment(String text,int size,String dotdotdot) {
		try {
			Highlighter hl = new Highlighter(nullFormatter,scorer);
			hl.setTextFragmenter( new SimpleSpanFragmenter(scorer,size) );
			String s = hl.getBestFragment(Lucene.analyzer,null,text);
			if( s == null )
				s = getStartingFragment(text,size,dotdotdot);
			if( dotdotdot != null && s.length() < text.length() ) {
				boolean atStart = text.startsWith(s);
				boolean atEnd = text.endsWith(s);
				if( !atStart )
					s = dotdotdot + s;
				if( !atEnd )
					s = s + dotdotdot;
			}
			return s;
		} catch(IOException e) {
			throw new RuntimeException(e);
		} catch(InvalidTokenOffsetsException e) {
			throw new RuntimeException(e);
		}
	}

	private static class DoneException extends RuntimeException {}

	public boolean hasNodes() {
		try {
			LuceneSearcher searcher = Lucene.newSearcher(site);
			try {
				try {
					searcher.search( query, new HitCollector() {
						protected void process(Document doc) {
							throw new DoneException();
						}
					} );
					return false;
				} catch(DoneException e) {
					return true;
				}
			} finally {
				searcher.close();
			}
		} catch(IOException e) {
			throw new RuntimeException(e);
		}
	}

	public interface Handler {
		public void handle(long nodeId);
	}

	public void forEach(final Handler h) {
		try {
			final LuceneSearcher searcher = Lucene.newSearcher(site);
			try {
				searcher.search( query, new HitCollector() {
					protected void process(Document doc) {
						h.handle( Lucene.getNodeId(doc) );
					}
				} );
			} finally {
				searcher.close();
			}
		} catch(IOException e) {
			throw new RuntimeException(e);
		}
	}

	public int getTotalHits() {
		if( totalHits == -1 ) {
			try {
				LuceneSearcher searcher = Lucene.newSearcher(site);
				try {
					TopDocs hits = searcher.search(query, filter, 0);
					totalHits = hits.totalHits;
				} finally {
					searcher.close();
				}
			} catch (BooleanQuery.TooManyClauses e) {
				throw new RuntimeException("Your search will give too many matches.");
			} catch(IOException e) {
				throw new RuntimeException(e);
			}
		}
		return totalHits;
	}

	public List<Node> getNodes(int i, int n) throws TooManyClauses {
		try {
			LuceneSearcher searcher = Lucene.newSearcher(site);
			try {
				TopDocs hits = sort==null ? searcher.search(query,filter,i+n) : searcher.search(query,filter,i+n,sort);
				totalHits = hits.totalHits;
				int lim = hits.scoreDocs.length;
				if( lim <= i )
					return Collections.emptyList();
				List<Node> a = new ArrayList<Node>();
				for (int j=i; j<lim; j++) {
					try {
						int docId = hits.scoreDocs[j].doc;
						Node node = Lucene.getNode(site, searcher, docId);
						if (node != null) {
							a.add(node);
						}
					} catch(IOException e) {
						logger.error(e.toString());
					}
				}
				return a;
			} finally {
				searcher.close();
			}
		} catch (BooleanQuery.TooManyClauses e) {
			throw new TooManyClauses(e);
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}

	public static final class TooManyClauses extends RuntimeException {
		TooManyClauses(BooleanQuery.TooManyClauses e) {
			super(e);
		}
	}

}