changeset 585:bb3818249dfb

add Parsers
author Franklin Schmidt <fschmidt@gmail.com>
date Fri, 14 Aug 2015 06:35:20 -0600
parents 0742ac78fa69
children a140be489a72
files core/src/luan/modules/Parsers.luan core/src/luan/modules/parsers/BBCode.java core/src/luan/modules/parsers/Csv.java core/src/luan/modules/parsers/ParseException.java core/src/luan/modules/parsers/Parser.java
diffstat 5 files changed, 577 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
diff -r 0742ac78fa69 -r bb3818249dfb core/src/luan/modules/Parsers.luan
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/src/luan/modules/Parsers.luan	Fri Aug 14 06:35:20 2015 -0600
@@ -0,0 +1,11 @@
+java()
+local BBCode = require "java:luan.modules.parsers.BBCode"
+local Csv = require "java:luan.modules.parsers.Csv"
+
+local M = {}
+
+M.bbcode_to_html = BBCode.toHtml
+M.bbcode_to_text = BBCode.toText
+M.csv_to_list = Csv.toList
+
+return M
diff -r 0742ac78fa69 -r bb3818249dfb core/src/luan/modules/parsers/BBCode.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/src/luan/modules/parsers/BBCode.java	Fri Aug 14 06:35:20 2015 -0600
@@ -0,0 +1,298 @@
+package luan.modules.parsers;
+
+
+public final class BBCode {
+
+	public static String toHtml(String bbcode) {
+		return new BBCode(bbcode,true).parse();
+	}
+
+	public static String toText(String bbcode) {
+		return new BBCode(bbcode,false).parse();
+	}
+
+	private final Parser parser;
+	private final boolean toHtml;
+
+	private BBCode(String text,boolean toHtml) {
+		this.parser = new Parser(text);
+		this.toHtml = toHtml;
+	}
+
+	private String parse() {
+		StringBuilder sb = new StringBuilder();
+		while( !parser.endOfInput() ) {
+			String block = parseBlock();
+			if( block != null )
+				sb.append(block);
+			else {
+				sb.append( parser.currentChar() );
+				parser.anyChar();
+			}
+		}
+		return sb.toString();
+	}
+
+	private String parseWellFormed() {
+		StringBuilder sb = new StringBuilder();
+		while( !parser.endOfInput() ) {
+			String block = parseBlock();
+			if( block != null ) {
+				sb.append(block);
+				continue;
+			}
+			if( couldBeTag() )
+				break;
+			sb.append( parser.currentChar() );
+			parser.anyChar();
+		}
+		return sb.toString();
+	}
+
+	private boolean couldBeTag() {
+		if( parser.currentChar() != '[' )
+			return false;
+		return parser.testIgnoreCase("[b]")
+			|| parser.testIgnoreCase("[/b]")
+			|| parser.testIgnoreCase("[i]")
+			|| parser.testIgnoreCase("[/i]")
+			|| parser.testIgnoreCase("[u]")
+			|| parser.testIgnoreCase("[/u]")
+			|| parser.testIgnoreCase("[url]")
+			|| parser.testIgnoreCase("[url=")
+			|| parser.testIgnoreCase("[/url]")
+			|| parser.testIgnoreCase("[code]")
+			|| parser.testIgnoreCase("[/code]")
+			|| parser.testIgnoreCase("[img]")
+			|| parser.testIgnoreCase("[/img]")
+			|| parser.testIgnoreCase("[color=")
+			|| parser.testIgnoreCase("[/color]")
+			|| parser.testIgnoreCase("[size=")
+			|| parser.testIgnoreCase("[/size]")
+			|| parser.testIgnoreCase("[youtube]")
+			|| parser.testIgnoreCase("[/youtube]")
+			|| parser.testIgnoreCase("[quote]")
+			|| parser.testIgnoreCase("[quote=")
+			|| parser.testIgnoreCase("[/quote]")
+		;
+	}
+
+	private String parseBlock() {
+		if( parser.currentChar() != '[' )
+			return null;
+		String s;
+		s = parseB();  if(s!=null) return s;
+		s = parseI();  if(s!=null) return s;
+		s = parseU();  if(s!=null) return s;
+		s = parseUrl1();  if(s!=null) return s;
+		s = parseUrl2();  if(s!=null) return s;
+		s = parseCode();  if(s!=null) return s;
+		s = parseImg();  if(s!=null) return s;
+		s = parseColor();  if(s!=null) return s;
+		s = parseSize();  if(s!=null) return s;
+		s = parseYouTube();  if(s!=null) return s;
+		s = parseQuote1();  if(s!=null) return s;
+		s = parseQuote2();  if(s!=null) return s;
+		return null;
+	}
+
+	private String parseB() {
+		parser.begin();
+		if( !parser.matchIgnoreCase("[b]") )
+			return parser.failure(null);
+		String content = parseWellFormed();
+		if( !parser.matchIgnoreCase("[/b]") )
+			return parser.failure(null);
+		String rtn = toHtml ? "<b>"+content+"</b>" : content;
+		return parser.success(rtn);
+	}
+
+	private String parseI() {
+		parser.begin();
+		if( !parser.matchIgnoreCase("[i]") )
+			return parser.failure(null);
+		String content = parseWellFormed();
+		if( !parser.matchIgnoreCase("[/i]") )
+			return parser.failure(null);
+		String rtn = toHtml ? "<i>"+content+"</i>" : content;
+		return parser.success(rtn);
+	}
+
+	private String parseU() {
+		parser.begin();
+		if( !parser.matchIgnoreCase("[u]") )
+			return parser.failure(null);
+		String content = parseWellFormed();
+		if( !parser.matchIgnoreCase("[/u]") )
+			return parser.failure(null);
+		String rtn = toHtml ? "<u>"+content+"</u>" : content;
+		return parser.success(rtn);
+	}
+
+	private String parseUrl1() {
+		parser.begin();
+		if( !parser.matchIgnoreCase("[url]") )
+			return parser.failure(null);
+		String url = parseRealUrl();
+		if( !parser.matchIgnoreCase("[/url]") )
+			return parser.failure(null);
+		String rtn = toHtml ? "<a href='"+url+"'>"+url+"</u>" : url;
+		return parser.success(rtn);
+	}
+
+	private String parseUrl2() {
+		parser.begin();
+		if( !parser.matchIgnoreCase("[url=") )
+			return parser.failure(null);
+		String url = parseRealUrl();
+		if( !parser.match(']') )
+			return parser.failure(null);
+		String content = parseWellFormed();
+		if( !parser.matchIgnoreCase("[/url]") )
+			return parser.failure(null);
+		String rtn = toHtml ? "<a href='"+url+"'>"+content+"</u>" : content;
+		return parser.success(rtn);
+	}
+
+	private String parseRealUrl() {
+		parser.begin();
+		while( parser.match(' ') );
+		int start = parser.currentIndex();
+		if( !parser.matchIgnoreCase("http") )
+			return parser.failure(null);
+		parser.matchIgnoreCase("s");
+		if( !parser.matchIgnoreCase("://") )
+			return parser.failure(null);
+		while( parser.noneOf(" []'") );
+		String url = parser.textFrom(start);
+		while( parser.match(' ') );
+		return parser.success(url);
+	}
+
+	private String parseCode() {
+		parser.begin();
+		if( !parser.matchIgnoreCase("[code]") )
+			return parser.failure(null);
+		int start = parser.currentIndex();
+		while( !parser.testIgnoreCase("[/code]") ) {
+			if( !parser.anyChar() )
+				return parser.failure(null);
+		}
+		String content = parser.textFrom(start);
+		if( !parser.matchIgnoreCase("[/code]") ) throw new RuntimeException();
+		String rtn = toHtml ? "<code>"+content+"</code>" : content;
+		return parser.success(rtn);
+	}
+
+	private String parseImg() {
+		parser.begin();
+		if( !parser.matchIgnoreCase("[img]") )
+			return parser.failure(null);
+		String url = parseRealUrl();
+		if( !parser.matchIgnoreCase("[/img]") )
+			return parser.failure(null);
+		String rtn = toHtml ? "<img src='"+url+"'>" : "";
+		return parser.success(rtn);
+	}
+
+	private String parseColor() {
+		parser.begin();
+		if( !parser.matchIgnoreCase("[color=") )
+			return parser.failure(null);
+		int start = parser.currentIndex();
+		parser.match('#');
+		while( parser.inCharRange('0','9')
+			|| parser.inCharRange('a','z')
+			|| parser.inCharRange('A','Z')
+		);
+		String color = parser.textFrom(start);
+		if( !parser.match(']') )
+			return parser.failure(null);
+		String content = parseWellFormed();
+		if( !parser.matchIgnoreCase("[/color]") )
+			return parser.failure(null);
+		String rtn = toHtml ? "<span style='color: "+color+"'>"+content+"</span>" : content;
+		return parser.success(rtn);
+	}
+
+	private String parseSize() {
+		parser.begin();
+		if( !parser.matchIgnoreCase("[size=") )
+			return parser.failure(null);
+		int start = parser.currentIndex();
+		while( parser.match('.') || parser.inCharRange('0','9') );
+		String size = parser.textFrom(start);
+		if( !parser.match(']') )
+			return parser.failure(null);
+		String content = parseWellFormed();
+		if( !parser.matchIgnoreCase("[/size]") )
+			return parser.failure(null);
+		String rtn = toHtml ? "<span style='font-size: "+size+"em'>"+content+"</span>" : content;
+		return parser.success(rtn);
+	}
+
+	private String parseYouTube() {
+		parser.begin();
+		if( !parser.matchIgnoreCase("[youtube]") )
+			return parser.failure(null);
+		int start = parser.currentIndex();
+		while( parser.inCharRange('0','9')
+			|| parser.inCharRange('a','z')
+			|| parser.inCharRange('A','Z')
+			|| parser.match('-')
+			|| parser.match('_')
+		);
+		String id = parser.textFrom(start);
+		if( id.length()==0 || !parser.matchIgnoreCase("[/youtube]") )
+			return parser.failure(null);
+		String rtn = toHtml ? "<iframe width='420' height='315' src='https://www.youtube.com/embed/"+id+"' frameborder='0' allowfullscreen></iframe>" : "";
+		return parser.success(rtn);
+	}
+
+	private String parseQuote1() {
+		parser.begin();
+		if( !parser.matchIgnoreCase("[quote]") )
+			return parser.failure(null);
+		String content = parseWellFormed();
+		if( !parser.matchIgnoreCase("[/quote]") )
+			return parser.failure(null);
+		String rtn = toHtml ? "<blockquote>"+content+"</blockquote>" : "";
+		return parser.success(rtn);
+	}
+
+	private String parseQuote2() {
+		parser.begin();
+		if( !parser.matchIgnoreCase("[quote=") )
+			return parser.failure(null);
+		int start = parser.currentIndex();
+		while( parser.noneOf("[];") );
+		String name = parser.textFrom(start).trim();
+		if( name.length() == 0 )
+			return parser.failure(null);
+		String src = null;
+		if( parser.match(';') ) {
+			start = parser.currentIndex();
+			while( parser.noneOf("[]'") );
+			src = parser.textFrom(start).trim();
+			if( src.length() == 0 )
+				return parser.failure(null);
+		}
+		if( !parser.match(']') )
+			return parser.failure(null);
+		String content = parseWellFormed();
+		if( !parser.matchIgnoreCase("[/quote]") )
+			return parser.failure(null);
+		if( !toHtml )
+			return parser.success("");
+		StringBuilder sb = new StringBuilder();
+		sb.append( "<blockquote><div quoted>" );
+		if( src != null )
+			sb.append( "<a href='/thread?id=" ).append( src ).append( "'>" );
+		sb.append( name ).append( " wrote" );
+		if( src != null )
+			sb.append( "</a>" );
+		sb.append( ":</div>" ).append( content ).append( "</blockquote>" );
+		return parser.success(sb.toString());
+	}
+
+}
diff -r 0742ac78fa69 -r bb3818249dfb core/src/luan/modules/parsers/Csv.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/src/luan/modules/parsers/Csv.java	Fri Aug 14 06:35:20 2015 -0600
@@ -0,0 +1,61 @@
+package luan.modules.parsers;
+
+import luan.LuanTable;
+
+
+public final class Csv {
+
+	public static LuanTable toList(String line) throws ParseException {
+		return new Csv(line).parse();
+	}
+
+	private final Parser parser;
+
+	private Csv(String line) {
+		this.parser = new Parser(line);
+	}
+
+	private ParseException exception(String msg) {
+		return new ParseException(parser,msg);
+	}
+
+	private LuanTable parse() throws ParseException {
+		LuanTable list = new LuanTable();
+		while(true) {
+			Spaces();
+			String field = parseField();
+			list.rawPut(list.rawLength()+1,field);
+			Spaces();
+			if( parser.endOfInput() )
+				return list;
+			if( !parser.match(',') )
+				throw exception("unexpected char");
+		}
+	}
+
+	private String parseField() throws ParseException {
+		parser.begin();
+		String rtn;
+		if( parser.match('"') ) {
+			int start = parser.currentIndex();
+			do {
+				if( parser.endOfInput() ) {
+					parser.failure();
+					throw exception("unclosed quote");
+				}
+			} while( parser.noneOf("\"") );
+			rtn = parser.textFrom(start);
+			parser.match('"');
+		} else {
+			int start = parser.currentIndex();
+			while( !parser.endOfInput() && parser.noneOf(",") );
+			rtn = parser.textFrom(start).trim();
+		}
+		return parser.success(rtn);
+	}
+
+	private void Spaces() {
+		while( parser.anyOf(" \t") );
+	}
+
+}
diff -r 0742ac78fa69 -r bb3818249dfb core/src/luan/modules/parsers/ParseException.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/src/luan/modules/parsers/ParseException.java	Fri Aug 14 06:35:20 2015 -0600
@@ -0,0 +1,51 @@
+package luan.modules.parsers;
+
+
+public final class ParseException extends Exception {
+	public final String text;
+	public final int errorIndex;
+	public final int highIndex;
+
+	ParseException(Parser parser,String msg) {
+		super(msg);
+		this.text = parser.text;
+		this.errorIndex = parser.currentIndex();
+		this.highIndex = parser.highIndex();
+	}
+
+	private class Location {
+		final int line;
+		final int pos;
+
+		Location(int index) {
+			int line = 0;
+			int i = -1;
+			while(true) {
+				int j = text.indexOf('\n',i+1);
+				if( j == -1 || j >= index )
+					break;
+				i = j;
+				line++;
+			}
+			this.line = line;
+			this.pos = index - i - 1;
+		}
+	}
+
+	private String[] lines() {
+		return text.split("\n",-1);
+	}
+
+	@Override public String getMessage() {
+		Location loc = new Location(errorIndex);
+		String line = lines()[loc.line];
+		String msg = super.getMessage() +  " (line " + (loc.line+1) + ", pos " + (loc.pos+1) + ")\n";
+		StringBuilder sb = new StringBuilder(msg);
+		sb.append( line + "\n" );
+		for( int i=0; i<loc.pos; i++ ) {
+			sb.append( line.charAt(i)=='\t' ? '\t' : ' ' );
+		}
+		sb.append("^\n");
+		return sb.toString();
+	}
+}
diff -r 0742ac78fa69 -r bb3818249dfb core/src/luan/modules/parsers/Parser.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/src/luan/modules/parsers/Parser.java	Fri Aug 14 06:35:20 2015 -0600
@@ -0,0 +1,156 @@
+package luan.modules.parsers;
+
+
+public class Parser {
+	public final String text;
+	private final int len;
+	private int[] stack = new int[256];
+	private int frame = 0;
+	private int iHigh;
+
+	Parser(String text) {
+		this.text = text;
+		this.len = text.length();
+	}
+
+	private int i() {
+		return stack[frame];
+	}
+
+	private void i(int i) {
+		stack[frame] += i;
+		if( iHigh < stack[frame] )
+			iHigh = stack[frame];
+	}
+
+	public int begin() {
+		frame++;
+		if( frame == stack.length ) {
+			int[] a = new int[2*frame];
+			System.arraycopy(stack,0,a,0,frame);
+			stack = a;
+		}
+		stack[frame] = stack[frame-1];
+		return i();
+	}
+
+	public void rollback() {
+		stack[frame] = stack[frame-1];
+	}
+
+	public <T> T success(T t) {
+		success();
+		return t;
+	}
+
+	public boolean success() {
+		frame--;
+		stack[frame] = stack[frame+1];
+		return true;
+	}
+
+	public <T> T failure(T t) {
+		failure();
+		return t;
+	}
+
+	public boolean failure() {
+		frame--;
+		return false;
+	}
+
+	public int currentIndex() {
+		return i();
+	}
+/*
+	public int errorIndex() {
+		return frame > 0 ? stack[frame-1] : 0;
+	}
+*/
+	public int highIndex() {
+		return iHigh;
+	}
+
+	public char lastChar() {
+		return text.charAt(i()-1);
+	}
+
+	public char currentChar() {
+		return text.charAt(i());
+	}
+
+	public boolean endOfInput() {
+		return i() >= len;
+	}
+
+	public boolean match(char c) {
+		if( endOfInput() || text.charAt(i()) != c )
+			return false;
+		i(1);
+		return true;
+	}
+
+	public boolean match(String s) {
+		int n = s.length();
+		if( !text.regionMatches(i(),s,0,n) )
+			return false;
+		i(n);
+		return true;
+	}
+
+	public boolean matchIgnoreCase(String s) {
+		int n = s.length();
+		if( !text.regionMatches(true,i(),s,0,n) )
+			return false;
+		i(n);
+		return true;
+	}
+
+	public boolean anyOf(String s) {
+		if( endOfInput() || s.indexOf(text.charAt(i())) == -1 )
+			return false;
+		i(1);
+		return true;
+	}
+
+	public boolean noneOf(String s) {
+		if( endOfInput() || s.indexOf(text.charAt(i())) != -1 )
+			return false;
+		i(1);
+		return true;
+	}
+
+	public boolean inCharRange(char cLow, char cHigh) {
+		if( endOfInput() )
+			return false;
+		char c = text.charAt(i());
+		if( !(cLow <= c && c <= cHigh) )
+			return false;
+		i(1);
+		return true;
+	}
+
+	public boolean anyChar() {
+		if( endOfInput() )
+			return false;
+		i(1);
+		return true;
+	}
+
+	public boolean test(char c) {
+		return !endOfInput() && text.charAt(i()) == c;
+	}
+
+	public boolean test(String s) {
+		return text.regionMatches(i(),s,0,s.length());
+	}
+
+	public boolean testIgnoreCase(String s) {
+		return text.regionMatches(true,i(),s,0,s.length());
+	}
+
+	public String textFrom(int start) {
+		return text.substring(start,i());
+	}
+
+}