Module:tru-translit/sandbox

From Wiktionary, the free dictionary
Jump to navigation Jump to search

local export = {}

local U = mw.ustring.char
local U = mw.ustring.char
local rsub = mw.ustring.gsub

local rbasa_below = U(0x737)
local pthaha_below = U(0x731)
local rbasa = U(0x736)
local zqapha = U(0x733)
local pthaha = U(0x730)
local vowel_diacritics_capture = "([" .. rbasa_below .. pthaha_below .. rbasa .. zqapha .. pthaha .. "])"

-- we declare consonants representing vowels (matres lectionis) as constants to mitigate differences in how software
-- text editors render code vs how wiktionary renders code in the UI.  Since matres is used in string concatenation via
-- the .. operator,  "ܘ" .. "ܐ" on wiktionary would render as "ܐ" .. "ܘ" in a text editor
local alaph = U(0x710)
local waw = U(0x718)
local yudh = U(0x71D)


local combining_tilde_below = U(0x330)
local qushshaya = U(0x741)
local rukkakha = U(0x742)


local fix = {
	-- if there isn't an example of ܦ using both a qushaya and a vowel, this should be removed
	{ vowel_diacritics_capture .. qushshaya, qushshaya .. "%1" }, -- need a test case for this

	-- under the hood mw uses uses NFC https://www.mediawiki.org/wiki/Unicode_normalization_considerations which preempts
	-- the following two substitutions... feel free to uncomment if there's a test case which can be added that requires
	-- them to be uncommented
	-- { vowel_diacritics_capture .. rukkakha, rukkakha .. "%1" },
	-- { vowel_diacritics_capture .. combining_tilde_below, combining_tilde_below .. "%1" },
}

local tt = {
	["ܦ"] = "f", ["ܒ"] = "b", ["ܬ"] = "t", ["ܛ"] = "ṭ", ["ܕ"] = "d", ["ܟ"] = "k",
	["ܓ"] = "g", ["ܩ"] = "q", ["ܔ"] = "j", ["ܣ"] = "s", ["ܨ"] = "ṣ", ["ܙ"] = "z",
	["ܫ"] = "š", ["ܚ"] = "ḥ", ["ܥ"] = "c", ["ܗ"] = "h", ["ܡ"] = "m", ["ܢ"] = "n",
	["ܪ"] = "r", ["ܠ"] = "l",
}

local tt_next = {
	[waw] = "w",
	[yudh] = "y",

	[rbasa_below] = "ë",
	[pthaha_below] = "ä",
	[rbasa] = "e",
	[zqapha] = "o",
	[pthaha] = "a",
}

local consonants = "fbtṭdkgqjsṣzšḥchmnrlvžpvṯḏxġ" .. yudh .. waw
local consonants_group = "([" .. consonants .. "])"

function export.tr(text)

	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"
	text = rsub(text, "ـ", "")
	for _, sub in ipairs(fix) do text = rsub(text, unpack(sub)) end

	text = rsub(text, "ܫ" .. combining_tilde_below, "č")
	text = rsub(text, "ܙ" .. combining_tilde_below, "ž")

	text = rsub(text, "ܦ" .. qushshaya, "p")

	text = rsub(text, "ܒ" .. rukkakha, "v")
	text = rsub(text, "ܬ" .. rukkakha, "ṯ")
	text = rsub(text, "ܕ" .. rukkakha, "ḏ")
	text = rsub(text, "ܟ" .. rukkakha, "x")
	text = rsub(text, "ܓ" .. rukkakha, "ġ")

	text = rsub(text, ".", tt)

	text = rsub(text, consonants_group .. waw .. consonants_group, "%1u%2")
	text = rsub(text, consonants_group .. yudh .. consonants_group, "%1i%2")

	text = rsub(text, alaph .. pthaha ..  waw .. "#", "#aw")
	text = rsub(text, alaph .. pthaha .. yudh .. "#", "#ay")

	text = rsub(text, "#" .. alaph .. waw, "#u")
	text = rsub(text, "#" .. alaph .. yudh, "#i")

	text = rsub(text, waw .. "#", "u#")
	text = rsub(text, yudh .. "#", "i#")

	text = rsub(text, pthaha .. alaph .. "#", "a#")
	text = rsub(text, rbasa .. alaph .. "#", "e#")
	text = rsub(text, zqapha .. alaph .. "#", "o#")
	text = rsub(text, alaph .. "#", "o#")
	text = rsub(text, alaph, "")

	text = rsub(text, ".", tt_next)

	text = rsub(text, "#", "")

	return text
end

return export