diff src/luan/modules/lucene/Ab_testing.luan @ 775:1a68fc55a80c

simplify dir structure
author Franklin Schmidt <fschmidt@gmail.com>
date Fri, 26 Aug 2016 14:36:40 -0600
parents lucene/src/luan/modules/lucene/Ab_testing.luan@ca169567ce07
children bae2d0c2576c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/luan/modules/lucene/Ab_testing.luan	Fri Aug 26 14:36:40 2016 -0600
@@ -0,0 +1,272 @@
+local Luan = require "luan:Luan.luan"
+local pairs = Luan.pairs
+local ipairs = Luan.ipairs
+local error = Luan.error
+local range = Luan.range
+local Math = require "luan:Math.luan"
+local Table = require "luan:Table.luan"
+local String = require "luan:String.luan"
+local gsub = String.gsub
+local Io = require "luan:Io.luan"
+local Http = require "luan:http/Http.luan"
+local Logging = require "luan:logging/Logging.luan"
+local Lucene = require "luan:lucene/Lucene.luan"
+
+local M = {}
+
+local logger = Logging.logger "Ab_testing"
+
+function M.of(index)
+
+	local ab_testing = {}
+
+	ab_testing.test_map = {}
+
+	function ab_testing.test(test)
+		test.name or error "name not defined"
+		test.values or error "values not defined"
+
+		-- list of event names
+		test.events or error "events not defined"
+
+		-- map of event name to aggregator factory
+		test.aggregator_factories or error "aggregator_factories not defined"
+
+		-- test.date_field is optional
+
+		local field = "ab_test_" .. test.name
+		index.indexed_fields[field] == nil or error("test "+test.name+" already defined")
+		index.indexed_fields[field] = Lucene.type.string
+		test.field = field
+
+		-- returns map of event name to (map of value to result) and "start_date"
+		function test.results()
+			local results = {}
+			for name in pairs(test.aggregator_factories) do
+				results[name] = {}
+			end
+			local date_field = test.date_field
+			local start_date = nil
+			for _, value in ipairs(test.values) do
+				local aggregators = {}
+				for name, factory in pairs(test.aggregator_factories) do
+					aggregators[name] = factory()
+				end
+				local query = field..":"..value
+				index.advanced_search(query, function(_,doc_fn)
+					local doc = doc_fn()
+					for _, aggregator in pairs(aggregators) do
+						aggregator.aggregate(doc)
+					end
+					if date_field ~= nil then
+						local date = doc[date_field]
+						if date ~= nil and (start_date==nil or start_date > date) then
+							start_date = date
+						end
+					end
+				end)
+				for name, aggregator in pairs(aggregators) do
+					results[name][value] = aggregator.result
+				end
+			end
+			results.start_date = start_date
+			return results
+		end
+
+		function test.fancy_results()
+			local events = test.events
+			local results = test.results()
+			local fancy = {}
+			fancy.start_date = results.start_date
+			local event = events[1]
+			fancy[event] = {}
+			for value, count in pairs(results[event]) do
+				fancy[event][value] = {}
+				fancy[event][value].count = count
+				fancy[event][value].pct_of_total = 100
+				fancy[event][value].pct_of_prev = 100
+			end
+			local all = results[event]
+			local prev = all
+			for i in range(2,#events) do
+				event = events[i]
+				fancy[event] = {}
+				for value, count in pairs(results[event]) do
+					fancy[event][value] = {}
+					fancy[event][value].count = count
+					fancy[event][value].pct_of_total = M.percent(count,all[value])
+					fancy[event][value].pct_of_prev = M.percent(count,prev[value])
+				end
+				prev = results[event]
+			end
+			return fancy
+		end
+
+		ab_testing.test_map[test.name] = test
+
+		return test
+	end
+
+	function ab_testing.value(test_name,values)
+		return values[test_name] or ab_testing.test_map[test_name].values[1]
+	end
+
+	-- returns map from test name to value
+	function ab_testing.from_doc(doc)
+		local values = {}
+		for _, test in pairs(ab_testing.test_map) do
+			values[test.name] = doc[test.field]
+		end
+		return values
+	end
+
+	function ab_testing.to_doc(doc,values,tests)
+		tests = tests or ab_testing.test_map
+		if values == nil then
+			values = {}
+			for _, test in pairs(tests) do
+				values[test.name] = test.values[Math.random(#test.values)]
+			end
+		end
+		for _, test in pairs(tests) do
+			doc[test.field] = values[test.name]
+		end
+		return values
+	end
+
+	function ab_testing.web_page(test_names)
+		return function()
+			local results = {}
+			for _, name in ipairs(test_names) do
+				local test = ab_testing.test_map[name]
+				test or error("test not found: "..name)
+				results[name] = test.fancy_results()
+			end
+			Io.stdout = Http.response.text_writer()
+			M.html(test_names,ab_testing.test_map,results)
+		end
+	end
+
+	return ab_testing
+end
+
+
+-- aggregator factories
+
+-- fn(doc) should return boolean whether doc should be counted
+function M.count(fn)
+	return function()
+		local aggregator = {}
+		aggregator.result = 0
+		function aggregator.aggregate(doc)
+			if fn(doc) then
+				aggregator.result = aggregator.result + 1
+			end
+		end
+		return aggregator
+	end
+end
+
+M.count_all = M.count( function(doc) return true end )
+
+-- fn(doc) should return number to add to result, return 0 for nothing
+function M.sum(fn)
+	return function()
+		local aggregator = {}
+		aggregator.result = 0
+		function aggregator.aggregate(doc)
+			aggregator.result = aggregator.result + fn(doc)
+		end
+		return aggregator
+	end
+end
+
+
+
+function M.percent(x,total)
+	if total==0 then
+		return 0
+	else
+		return 100 * x / total
+	end
+end
+
+-- I will change this to use SimplyHTML when this is used again.
+local function basic_style() %>
+	body {font-family:'Arial',sans-serif;font-size:16px;padding:1em 2em}
+	h1 {font-weight:bold;font-size:20px}
+	h2 {margin:2em 0 0em;font-size:18px;color:#3589B1}
+	table.results {margin-top:.5em;border-collapse:collapse;font-size:90%}
+	table.results th {background:#eee}
+	table.results th,table.results td {border-left:1px solid #bbb;padding:.4em 2em}
+	table.results tr:nth-child(odd) td {background:#f8f8f8}
+<% end
+
+local function format(v)
+	v = v .. ''
+	return gsub( v, [[(\d+\.\d{1})\d+]], '$1' )
+end
+
+function M.html(test_names,tests,results) %>
+<!DOCTYPE html>
+<html lang="en">
+	<head>
+		<title>A/B Test Results</title>
+		<style><% basic_style() %></style>
+	</head>
+	<body>
+		<h1>A/B Test Results</h1>
+		<%
+		for _, test_name in ipairs(test_names) do
+			local test = tests[test_name]
+			local result = results[test_name]
+			local n = #test.values
+			%>
+			<h2><%=test_name%></h2>
+			<table class="results">
+				<tr>
+					<th>Event</th>
+					<th class="top" colspan="<%=n%>">Count</th>
+					<th class="top" colspan="<%=n%>">% of total</th>
+					<th class="top" colspan="<%=n%>">% of prev</th>
+				</tr>
+				<tr>
+					<th></th>
+					<%
+					for _ in range(1,3) do
+						for _, value in ipairs(test.values) do
+							%><th class="top"><%=value%></th><%
+						end
+					end
+					%>
+				</tr>
+				<%
+				for _, event in ipairs(test.events) do
+					local event_values = result[event]
+					%>
+					<tr>
+						<td><%=event%></td>
+						<%
+						for _, value in ipairs(test.values) do
+							%><td><%=format(event_values[value].count)%></th><%
+						end
+						for _, value in ipairs(test.values) do
+							%><td><%=format(event_values[value].pct_of_total)%></th><%
+						end
+						for _, value in ipairs(test.values) do
+							%><td><%=format(event_values[value].pct_of_prev)%></th><%
+						end
+						%>
+					</tr>
+					<%
+				end
+				%>
+			</table>
+			<%
+		end
+		%>
+	</body>
+</html>
+<% end
+
+return M