Module:mh-pronunc/testcases

From Wiktionary, the free dictionary
Jump to navigation Jump to search

All tests passed. (refresh)

TextExpectedActual
test_all:
PassedM̧ajeļm̧ahjeļm̧ahjeļ
PassedM̧ajeļ/mˠæɰtʲɛlˠ//mˠæɰtʲɛlˠ/
PassedM̧ajeļ[mˠɑːzʲɛlˠ][mˠɑːzʲɛlˠ]
PassedM̧ajeļ[mˠɑːzʲɛlˠ][mˠɑːzʲɛlˠ]

local tests = require "Module:UnitTests"

local function link(word)
	return '<span class="Latn" lang="mh">[['
		.. word .. '#Marshallese|'
		.. word
		.. ']]</span>'
end

local function tag_IPA(transcription)
	return '<span class="IPA">' .. transcription .. '</span>'
end

-- Redefine mw.ustring.gsub to count total number of gsubs in testcases.
local original_ustring = {}
local counting_ustring = {}
local ustring_counts = {}
for name, func in pairs(mw.ustring) do
	if type(func) == "function" then
		original_ustring[name] = func
		ustring_counts[name] = 0
		counting_ustring[name] = function(...)
			ustring_counts[name] = ustring_counts[name] + 1
			return func(...)
		end
	end
end

mw.ustring = counting_ustring
local mh_pronunc = require "Module:mh-pronunc"

-- Don't count uses of mw.ustring.gsub in sandbox module.
mw.ustring = original_ustring
require "Module:mh-pronunc/sandbox"

local function make_transcription(input, system)
	input = mh_pronunc._parse(input)
	if type(system) ~= "string" then
		error("System should be a string.")
	elseif system == "Bender" then
		return mh_pronunc._toBender(input)[1]
	elseif system == "phonemic" then
		return mh_pronunc._toPhonemic(input)[1]
	elseif system == "Ralik" or system == "Ratak" then
		return mh_pronunc._toPhonetic(input, {
			dialect = system == "Ralik" and "ralik" or "ratak"
		})[1]
	else
		error("System '" .. system .. "' not recognized.")
	end
end

function tests:test_all()
	local examples = {
		{ "M̧ajeļ", "mhahjelh", "m̧ahjeļ", "mˠæɰtʲɛlˠ", "mˠɑːzʲɛlˠ" },
	}
	
	self:iterate(
		examples,
		function (self, spelling, input, Bender, phonemic, Ralik, Ratak)
			local word = link(spelling)
			
			if not Ratak then
				Ratak = Ralik
			end
			
			local function phonemically_bracket(transcription)
				return "/" .. transcription .. "/"
			end
			
			local function phonetically_bracket(transcription)
				return "[" .. transcription .. "]"
			end
			
			local function identity(...)
				return ...
			end
			
			local function combine(func1, func2)
				return function(...)
					return func2(func1(...))
				end
			end
			
			local function check_transcription(system, expected)
				local output = make_transcription(input, system)
				local display
				if system == "phonemic" then
					display = phonemically_bracket
				elseif system == "Ralik" or system == "Ratak" then
					display = phonetically_bracket
				else
					display = identity
				end
				if system ~= "Bender" then
					display = combine(display, tag_IPA)
				end
				self:equals(word, output, expected, { display = display })
			end
			
			check_transcription("Bender", Bender)
			check_transcription("phonemic", phonemic)
			check_transcription("Ralik", Ralik)
			check_transcription("Ratak", Ratak)
		end)
end

function tests.print_phonemic_and_phonetic(frame)
	local output = require "Module:array"()
	
	output:insert([[
{| class="wikitable sortable mh-transcription-list" style="text-align: center;"
|+ Marshallese transcriptions in entries
! term !! Bender !! phonemic !! phonetic (Rālik) !! phonetic (Ratak)
]])
	
	local input_to_title = {}
	local content
	if frame.args.source then
		content = assert(mw.title.new(frame.args.source)):getContent()
	else
		content = frame.args[1]
	end
	
	for line in mw.text.unstripNoWiki(content):match("<pre>%s*(.-)%s*</pre>"):gmatch "[^\n]+" do
		local title, input = line:match("^([^\t]+)\t([^\t]+)$")
		input_to_title[input] = title
	end
	
	local function make_sorter_by_key(key_maker)
		return function(a, b)
			return key_maker(a) < key_maker(b)
		end
	end
	
	local function make_alphabet_with_diacritics(letters_with_diacritics, diacritics)
		table.insert(diacritics, 1, "")
		
		local letters = require "Module:array"()
		for letter = ("a"):byte(), ("z"):byte() do
			letter = mw.ustring.char(letter)
			if letters_with_diacritics:find(letter) then
				for _, diacritic in ipairs(diacritics) do
					letters:insert(letter .. diacritic)
				end
			else
				letters:insert(letter)
			end
		end
		return letters
	end
	
	local function make_letter_lookup(letters)
		local letter_lookup = {}
		local start_code_point = 0x4E00
		for number, letter in ipairs(letters) do
			letter_lookup[letter] = mw.ustring.char(start_code_point + number - 1)
		end
		return letter_lookup
	end

	local letters_with_diacritics = "almnou"
	local macron, cedilla = mw.ustring.char(0x0304), mw.ustring.char(0x0327)
	local diacritics = { macron, cedilla }
	
	local letter_lookup = make_letter_lookup(make_alphabet_with_diacritics(letters_with_diacritics, diacritics))
	
	local strip_diacritics = require "Module:fun".memoize(function(title)
		return (mw.ustring.gsub(mw.ustring.toNFD(title), "[" .. macron .. cedilla .. "]", ""))
	end)
	
	local make_sortkey = require "Module:fun".memoize(function (title)
		local decomposed_lowercase = mw.ustring.toNFD(mw.ustring.lower(title))
		return { 
			mw.ustring.lower(strip_diacritics(title)),
			(mw.ustring.gsub(
				decomposed_lowercase,
				"%l[" .. macron .. cedilla .. "]?",
				letter_lookup))
		}
	end)
	
	-- Sort by official sortkey generated by [[Module:languages]].
	local sorter = function(a, b)
		local title1, title2 = input_to_title[a], input_to_title[b]
		local sortkey1, sortkey2 = make_sortkey(title1), make_sortkey(title2)
		if sortkey1[1] ~= sortkey2[1] then
			return sortkey1[1] < sortkey2[1]
		else
			return sortkey1[2] < sortkey2[2]
		end
	end
	
	local function make_module(name)
		local module = require("Module:" .. name)
		local parse = require "Module:fun".memoize(module._parse)
		
		local actual_module = {}
		
		local function add_function(name, extra_arg, function_name)
			local function_name = "_to" .. (function_name or name):gsub("^.", string.upper)
			local func = assert(module[function_name], function_name .. " not found")
			actual_module[name] = function(arg)
				return table.concat(func(parse(arg), extra_arg), ", ")
			end
		end
		
		add_function("Bender")
		add_function("Ralik", { dialect = "ralik" }, "Phonetic")
		add_function("Ratak", { dialect = "ratak" }, "Phonetic")
		add_function("phonemic")
		
		return actual_module
	end
	local main_module = make_module(frame.args.main or "mh-pronunc")
	local sandbox_module = make_module(frame.args.sandbox or "mh-pronunc/sandbox")
	
	local sortkey = 0
	local transcription_formatters = {
		Bender = function(arg)
			return '<span lang="mh">' .. arg .. '</span>'
		end,
		Ralik = function(arg)
			return tag_IPA("[" .. mw.text.nowiki(arg) .. "]")
		end,
		Ratak = function(arg)
			return tag_IPA("[" .. mw.text.nowiki(arg) .. "]")
		end,
		phonemic = function(arg)
			return tag_IPA("/" .. mw.text.nowiki(arg) .. "/")
		end,
	}
	
	for input, title in require "Module:table".sortedPairs(input_to_title, sorter) do
		sortkey = sortkey + 1
		title = link(title)
		local transcriptions = {}
		local sandbox_transcriptions = {}
		local printed_sandbox_transcriptions = {}
		for name in pairs(transcription_formatters) do
			transcriptions[name] = main_module[name](input)
			sandbox_transcriptions[name] = sandbox_module[name](input)
			if transcriptions[name] ~= sandbox_transcriptions[name] then
				printed_sandbox_transcriptions[name] = sandbox_transcriptions[name]
			end
		end
		
		local function format_transcription(name)
			if sandbox_transcriptions[name] ~= transcriptions[name] then
				return transcription_formatters[name](transcriptions[name])
					.. "<br>"
					.. transcription_formatters[name](sandbox_transcriptions[name])
			else
				return transcription_formatters[name](transcriptions[name])
			end
		end
		
		local Ralik_Ratak = transcriptions.Ralik == transcriptions.Ratak
			and sandbox_transcriptions.Ralik == sandbox_transcriptions.Ratak
			and ('colspan="2" | %s'):format(
				format_transcription("Ratak"))
			or ('%s || %s'):format(
				format_transcription("Ralik"),
				format_transcription("Ratak"))
		
		output:insert(([[
|- data-template-input="%s" data-phonemic="%s" data-has-difference="%d"
| data-sort-value="%s" | %s || {%s} || %s || %s
]]):format(
			input,
			transcriptions.phonemic,
			not (transcriptions.Ralik == sandbox_transcriptions.Ralik
				and transcriptions.Ratak == sandbox_transcriptions.Ratak) and 1 or 0,
			sortkey,
			title,
			format_transcription("Bender"),
			format_transcription("phonemic"),
			Ralik_Ratak))
	end
	
	output:insert "|}"
	
	for name in pairs(counting_ustring) do
		local count = ustring_counts[name]
		if count > 0 then
			mw.log("total number of calls to " .. name .. " in Module:mh-pronunc:", count)
		end
	end
	
	return output:concat ""
end

function tests.show_duplicate_template_input(frame)
	local pre_tag = frame.args[1]
	local lines
	if pre_tag then
		pre_tag = mw.text.trim(pre_tag)
		lines = pre_tag:match("^<pre>%s*(.-)%s*</pre>$")
	end
	local line_description = "title, tab character, and mh-ipa-rows input"
	if not lines then
		error("Supply lines consisting of " .. line_description .. " in a pre tag in parameter 1")
	end
	local titles_by_input = {}
	for line in lines:gmatch "[^\n]+" do
		local title, template_input = line:match "^([^\t]+)\t(.+)"
		if not title then
			error("Line " .. line .. " does not not consist of " .. line_description)
		end
		titles_by_input[input] = titles_by_input[input] or {}
		table.insert(titles_by_input[input], title)
	end
	
	local output = require "Module:array"
	for input, titles in pairs(titles_by_input) do
		if #titles > 1 then
			output:insert(title)
		end
	end
	return pre_tag .. "\n\n" .. output:concat "\n"
end

return tests