view src/nabble/model/Lucene.java @ 47:72765b66e2c3

remove mailing list code
author Franklin Schmidt <fschmidt@gmail.com>
date Fri, 18 Jun 2021 17:44:24 -0600
parents abe0694e9849
children
line wrap: on
line source

/*

Copyright (C) 2004  Franklin Schmidt <frank@gustos.com>

*/

package nabble.model;

import fschmidt.db.Listener;
import fschmidt.util.java.CollectionUtils;
import fschmidt.util.mail.MailEncodingException;
import nabble.model.lucene.HitCollector;
import nabble.model.lucene.IndexCache;
import nabble.model.lucene.LuceneSearcher;
import nabble.view.lib.Permissions;
import nabble.view.lib.help.Help;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanFilter;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilterClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;


public final class Lucene {
	private static final Logger logger = LoggerFactory.getLogger(Lucene.class);

	public static interface DocumentListener {
		public void event(Node node,Document doc);
	}

	private static final int nodeIndexVersion = 3;

	private static final String NODE_ID_FLD = "nodeId";
	static final String KIND_FLD = "kind";
	static final String SUBJECT_FLD = "subject";
	static final String MESSAGE_FLD = "message";
	static final String ANCESTORS_FLD = "ancestors";
	static final String PARENT_ID_FLD = "parentId";
	static final String DATE_FLD = "date";
	private static final String RANGE_SEARCH_DATE_FLD = "rangeSearchDate";
	private static final String DAY_FLD = "day";
	static final String USER_ID_FLD = "userId";
	static final String AUTHOR_FLD = "author";
	static final String PRIVATE_NODE_FLD = "privateNode";

	static final Analyzer analyzer = new SnowballAnalyzer(Version.LUCENE_CURRENT,"English");

	private static final List<DocumentListener> documentListeners = new ArrayList<DocumentListener>();

	private Lucene() {}  // never

	static LuceneSearcher newSearcher(Site site) throws IOException {
		return nodeIndex.openSearcher(site.getId());
	}

	static long getNodeId(Document doc) {
		return Long.parseLong(doc.get(NODE_ID_FLD));
	}

	static NodeImpl getNode(SiteImpl site, LuceneSearcher searcher, int docId) throws IOException {
		return getNode( site, searcher.doc(docId) );
	}

	static NodeImpl getNode(SiteImpl site,Document doc) {
		long nodeId = getNodeId(doc);
		NodeImpl node = NodeImpl.getNode(site.siteKey,nodeId);
		if( node==null ) {
			logger.error("missing node "+nodeId+", removing from lucene");
			removeNode(site,nodeId);
		}
		return node;
	}

	private static void add(final Node node) {
		Document doc = document(node);
		try {
			IndexWriter indexWriter = nodeIndex.openIndexWriter(node.getSite().getId());
			try {
				indexWriter.addDocument(doc);
			} finally {
				indexWriter.close();
			}
		} catch(IOException e) {
			throw new RuntimeException(e);
		}
	}
/*
	private static void removeSite(long siteId) {
		try {
			nodeIndex.delete(siteId);
		} catch(IOException e) {
			throw new RuntimeException(e);
		}
	}
*/
	private static void removeNode(Site site,long nodeId) {
		Term term = new Term(NODE_ID_FLD,Long.toString(nodeId));
		try {
			IndexWriter indexWriter = nodeIndex.openIndexWriter(site.getId());
			try {
				indexWriter.deleteDocuments(term);
			} finally {
				indexWriter.close();
			}
		} catch(IOException e) {
			throw new RuntimeException(e);
		}
	}

	public static void update(final Node node) {
		try {
			Document doc = document(node);
			if( doc==null ) {
				removeNode(node.getSite(),node.getId());
			} else {
				IndexWriter indexWriter = nodeIndex.openIndexWriter(node.getSite().getId());
				try {
					indexWriter.updateDocument( new Term(NODE_ID_FLD,doc.get(NODE_ID_FLD)), doc );
				} finally {
					indexWriter.close();
				}
			}
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}

	static void updateNode(SiteImpl site,long nodeId) {
		Node node = NodeImpl.getNode(site.siteKey,nodeId);
		if( node == null ) {
			removeNode(site,nodeId);
		} else {
			update(node);
		}
	}

	static {
/*
		SiteImpl.table.getPostDeleteListeners().add(new Listener<SiteImpl>(){
			public void event(SiteImpl site) {
				removeSite(site.getId());
			}
		});
*/
		NodeImpl.postDeleteListeners.add(new Listener<NodeImpl>(){
			public void event(NodeImpl node) {
				// remove descendants
				Term term = new Term(ANCESTORS_FLD,Long.toString(node.getId()));
				try {
					IndexWriter indexWriter = nodeIndex.openIndexWriter(node.siteKey.getId());
					try {
						indexWriter.deleteDocuments(term);
					} finally {
						indexWriter.close();
					}
				} catch(IOException e) {
					throw new RuntimeException(e);
				}
			}
		});
		NodeImpl.postInsertListeners.add(new Listener<NodeImpl>(){
			public void event(final NodeImpl node) {
				node.siteKey.getDb().runAfterCommit(new Runnable(){public void run(){
					try {
						add(node);
					} catch(MailEncodingException e) {
						logger.warn(node.toString(),e);
					}
				}});
			}
		});
		NodeImpl.preUpdateListeners.add(new Listener<NodeImpl>(){
			public void event(NodeImpl node) {
				Set fields = node.getDbRecord().fields().keySet();
				if( CollectionUtils.intersects(fields,nodeDbFields) ) {
					final long nodeId = node.getId();
					final SiteKey siteKey = node.siteKey;
					siteKey.getDb().runAfterCommit(new Runnable() {
						public void run() {
							NodeImpl node = NodeImpl.getNode(siteKey,nodeId);
							if (node != null) update(node);
						}
					});
				}
			}
		});
	}

	static void staleNode(NodeImpl node) throws IOException {
		if( node==null )
			return;
		logger.debug("staleNode update");
		updateNodes( node.getSiteImpl(), descendants(node) );
		logger.debug("staleNode done");
	}

	static void nop() {}

	public static void addDocumentListener(DocumentListener documentListener) {
		documentListeners.add(documentListener);
	}

	static Document document(Node node) {
		Document doc = new Document();
		doc.add( new Field(NODE_ID_FLD, Long.toString(node.getId()), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS) );
		doc.add( new Field(KIND_FLD, node.getKind().toString(), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS) );
		String subject = node.getSubject();
		Field subjectFld = new Field(SUBJECT_FLD, subject, Field.Store.NO, Field.Index.ANALYZED);
		subjectFld.setBoost(2.0f);
		doc.add(subjectFld);
		try {
			String message = MessageUtils.htmlToSearchText(node.getMessage().parse());
			doc.add( new Field(MESSAGE_FLD, message, Field.Store.NO, Field.Index.ANALYZED) );
		} catch(RuntimeException e) {
			logger.error("nodeId="+node.getId(),e);
		}

		for( Node f : node.getAncestors() ) {
			doc.add( new Field(ANCESTORS_FLD, Long.toString(f.getId()), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS) );
		}
		Node parent = node.getParent();
		if (parent != null)
			doc.add(new Field(PARENT_ID_FLD, Long.toString(parent.getId()), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));

		int date = (int)(-node.getWhenCreated().getTime()/1000);
		doc.add( new NumericField(DATE_FLD).setIntValue(date) );
		int rangeSearchDate = formatRangeSearchDate(node.getWhenCreated());
		doc.add( new NumericField(RANGE_SEARCH_DATE_FLD).setIntValue(rangeSearchDate) );
		String day = formatDay(node.getWhenCreated());
		doc.add( new Field(DAY_FLD, day, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS) );

		Person owner = node.getOwner();
		String userId = owner.getSearchId();
		doc.add( new Field(USER_ID_FLD, userId, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS) );
		String author = owner.getName();
		doc.add( new Field(AUTHOR_FLD, author, Field.Store.NO, Field.Index.ANALYZED) );
		doc.add( new Field(PRIVATE_NODE_FLD, formatPrivateNode(node), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
		for( DocumentListener documentListener : documentListeners ) {
			documentListener.event(node,doc);
		}
		return doc;
	}

	private static final String[] nodeDbFields =
		{"subject", "when_created", "msg_fmt", "parent_id", "is_app", "owner_id", "cookie", "anonymous_name"};


	public static void updateRecursively(Node node) {
		update(node);
		for (Node n : node.getChildren()) {
			updateRecursively(n);
		}
	}







	// from SearchServer

	static NodeImpl node(SiteImpl site,Document doc) {
		long nodeId = getNodeId(doc);
		NodeImpl node = NodeImpl.getNode(site.siteKey,nodeId);
		if (node==null)
			logger.error("invalid node_id in lucene index: "+nodeId);
		return node;
	}

	private static final IndexCache.Builder<Long> builder = new IndexCache.Builder<Long>() {

		public void build(Long siteId) throws SQLException, IOException {
			SiteKey siteKey = SiteKey.getInstance(siteId);
			Connection con = siteKey.getDb().getConnection();
			long[] nodeIds;
			{
				Statement stmt = con.createStatement();
				ResultSet rs = stmt.executeQuery(
					"select count(*) as n from node"
				);
				rs.next();
				nodeIds = new long[rs.getInt("n")];
				rs.close();
				stmt.close();
			}
			{
				PreparedStatement stmt = con.prepareStatement(
					"select node_id from node order by node_id limit ?"
				);
				stmt.setInt(1,nodeIds.length);
				ResultSet rs = stmt.executeQuery();
				for( int i=0; rs.next(); i++ ) {
					nodeIds[i] = rs.getLong("node_id");
				}
				rs.close();
				stmt.close();
			}
			logger.error("Lucene started - site_id = " + siteId + " / " + nodeIds.length + " nodes");
			IndexWriter indexWriter = nodeIndex.openIndexWriter(siteId);
			int count = 0;
			int lastPercent = 0;
			try {
				for( long nodeId : nodeIds ) {
					Node node = NodeImpl.getNode(siteKey,nodeId);
					if( node != null ) {
						Document doc = document(node);
						indexWriter.updateDocument( new Term(NODE_ID_FLD,doc.get(NODE_ID_FLD)), doc );
					}
					count++;
					int percent = Math.round(100f * count / (float) nodeIds.length);
					if (percent > lastPercent) {
						logger.error("Lucene build " + percent + "% completed");
						lastPercent = percent;
					}
				}
			} finally {
				indexWriter.close();
			}
			con.close();
		}

		public boolean exists(String keyString) {
			long id;
			try {
				id = Long.parseLong(keyString);
			} catch(NumberFormatException e) {
				return false;
			}
			return SiteKey.getInstance(id).siteGlobal() != null;
		}
	};

	private static final IndexCache<Long> nodeIndex;
	static {
		logger.info("Starting search server");
		Init.luceneStarted = true;
		String homeDir = (String)Init.get("home_dir");
		String luceneDir = homeDir + "local/lucene/";
		File dirFile = new File(luceneDir);
		nodeIndex = new IndexCache<Long>(dirFile,analyzer,nodeIndexVersion,builder);
	}

	private static void updateNodes(final SiteImpl site,Query query) {
		try {
			final LuceneSearcher searcher = newSearcher(site);
			try {
				searcher.search(query,new HitCollector() {
					protected void process(Document doc) {
						Node node = getNode(site,doc);
						if( node != null )
							update(node);
					}
				});
			} finally {
				searcher.close();
			}
		} catch(IOException e) {
			throw new RuntimeException(e);
		}
	}


	public static boolean isReady(Site site) {
		return nodeIndex.isReady(site.getId());
	}

	public static void rebuild(Site site) throws IOException {
		nodeIndex.rebuild(site.getId());
	}

	static synchronized void shutdown() {
		nodeIndex.shutdown();
	}





	private static final long tenMinutes = 1000L*60*10;

	static int formatRangeSearchDate(Date date) {
		return (int)(date.getTime()/tenMinutes);
	}


	private static final DateFormat dayFormat = new SimpleDateFormat("yyyyMMdd");

	static String formatDay(Date date) {
		synchronized(dayFormat) {
			return dayFormat.format(date);
		}
	}

	static String formatPrivateNode(Node node) {
		Node privateNode = Permissions.getPrivateNodeForSearch(node);
		return privateNode==null ? "none" : Long.toString(privateNode.getId());
	}


	public static Filter and(Filter f1,Filter f2) {
		BooleanFilter f = new BooleanFilter();
		f.add(new FilterClause(f1,BooleanClause.Occur.MUST));
		f.add(new FilterClause(f2,BooleanClause.Occur.MUST));
		return f;
	}

	public static Filter getRangeFilter(Date from, Date to) {
		Integer lowerDateTerm = (from==null)?null:formatRangeSearchDate(from);
		Integer upperDateTerm = (to==null)?null:formatRangeSearchDate(to);
		return NumericRangeFilter.newIntRange(RANGE_SEARCH_DATE_FLD, lowerDateTerm, upperDateTerm, true,true);
	}


	private static final int maxCachedFilters = Init.get("maxCachedFilters", 20);

	private static Map<Filter,CachingWrapperFilter> filterCache = new LinkedHashMap<Filter,CachingWrapperFilter>() {
	     protected boolean removeEldestEntry(Map.Entry eldest) {
	        return size() > maxCachedFilters;
	     }
	};

	public static synchronized CachingWrapperFilter getCachedFilter(Filter filter) {
		CachingWrapperFilter f = filterCache.get(filter);
		if( f == null ) {
			f = new CachingWrapperFilter(filter);
			filterCache.put(filter,f);
		}
		return f;
	}


	static Query descendants(Node node) {
		return descendants(node.getId());
	}

	private static Query descendants(long nodeId) {
		return new TermQuery(new Term(ANCESTORS_FLD,Long.toString(nodeId)));
	}

	static Query children(Node node) {
		return new TermQuery(new Term(PARENT_ID_FLD,Long.toString(node.getId())));
	}

	static Query node(Node node) {
		return node(node.getId());
	}

	static Query node(long nodeId) {
		return new TermQuery(new Term(NODE_ID_FLD,Long.toString(nodeId)));
	}

	static Query day(Date date) {
		return new TermQuery(new Term(DAY_FLD,formatDay(date)));
	}


	private static final Directory helpDir = new RAMDirectory();
	private static IndexReader helpIndexReader;

	private static final String[] helpSearchFields = new String[] {
		"answer", "question"
	};

	public static Help[] searchHelp(String line) throws ParseException {
		try {
			Query query = NodeSearcher.parse(line,helpSearchFields);
			Searcher searcher = new IndexSearcher(helpIndexReader);
			try {
				TopDocs hits = searcher.search(query,helpIndexReader.numDocs());
				Help[] helps = new Help[hits.scoreDocs.length];
				for( int i=0; i<helps.length; i++ ) {
					helps[i] = Help.getHelp(Integer.parseInt(searcher.doc(hits.scoreDocs[i].doc).get("id")));
				}
				return helps;
			} catch (BooleanQuery.TooManyClauses e) {
				throw new RuntimeException("Your search will give too many matches.");
			} finally {
				searcher.close();
			}
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}

	public static void addHelp(final Collection<Help> helps) {
		try {
			IndexWriter writer = new IndexWriter(helpDir,analyzer,true,IndexWriter.MaxFieldLength.LIMITED);
			for( Help help : helps ) {
				writer.addDocument(document(help));
			}
			writer.close();
			helpIndexReader = IndexReader.open(helpDir,true);
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}

	private static Document document(Help help) {
		Document doc = new Document();
		String id = Integer.toString(help.id);
		doc.add( new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
		Field answer = new Field("answer", help.answer(), Field.Store.NO, Field.Index.ANALYZED);
		doc.add(answer);
		Field question = new Field("question", help.question, Field.Store.NO, Field.Index.ANALYZED);
		doc.add(question);
		return doc;
	}

}