Module:User:Theknightwho/abq-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Theknightwho, for their own experimentation. Items in this module may be added and removed at Theknightwho's discretion; do not rely on this module's stability.


local export = {}

local tt = {
	["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "jo", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "x", ["ц"] = "c", ["ч"] = "ć", ["ш"] = "š", ["щ"] = "ś", ["ъ"] = "ʔ", ["ы"] = "ə", ["ь"] = "ʲ", ["э"] = "è", ["ю"] = "ju", ["я"] = "ja"
}

local digraphs = {
	["гъ"] = "ɣ", ["гь"] = "gʲ", ["гӏ"] = "ʻ", ["дж"] = "dž", ["дз"] = "dz", ["жь"] = "ź", ["къ"] = "qʼ", ["кь"] = "kʲ", ["кӏ"] = "kʼ", ["ль"] = "lᶻ", ["лӏ"] = "lˢʼ", ["пӏ"] = "pʼ", ["тл"] = "tˡ", ["тӏ"] = "tʼ", ["фӏ"] = "fʼ", ["хъ"] = "q", ["хь"] = "xʲ", ["хӏ"] = "ḥ", ["цӏ"] = "cʼ", ["чӏ"] = "ćʼ", ["шӏ"] = "čʼ"
}

local trigraphs = {
	["гъь"] = "ɣʲ", ["джь"] = "dź", ["къь"] = "qʲʼ", ["кӏь"] = "kʲʼ"
}

function export.tr(text, lang, sc)
	local str_gsub = string.gsub
	local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"
	
	-- Convert "false" palochkas (entered as Latin "I" or "l", or Cyrillic "І"). Lowercase palochka is found in tables above.
	text = mw.ustring.gsub(text, "[IlІ]", "ӏ")
	
	-- Divide string into words.
	local words = {}
	local case
	for _,word in mw.ustring.gmatch(text, "(%s*)(%S+)(%s*)", "%2") do
		-- Determine case form of the word, then convert to lowercase.
		if word == mw.ustring.upper(word) then
			case = 2
		elseif mw.ustring.sub(word, 1, 1) == mw.ustring.upper(mw.ustring.sub(word, 1, 1)) then
			case = 1
		else
			case = 0
		end
		word = mw.ustring.lower(word)
	
		-- Contextual substitution of "j" before "е" and "w" for "у". NOTE: These break with str_gsub, so must use mw.ustring.gsub.
		word = mw.ustring.gsub(word, "^е", "jе")
		word = mw.ustring.gsub(word, "([аеёиоуыэюяАЕЁИОУЫЭЮЯ%s])е", "%1jе")
		word = mw.ustring.gsub(word, "у([аиоуыэ])", "w%1")
		word = mw.ustring.gsub(word, "([аеёиоуыэюяАЕЁИОУЫЭЮЯ])у", "%1w")
		word = mw.ustring.gsub(word, "([бгджзклмнпрстфхцчшщъьӏБГДЖЗКЛМНПРСТФХЦЧШЩЪЬӀ])в", "%1ʷ")
		
		for trigraph, replacement in pairs(trigraphs) do
			word = str_gsub(word, trigraph, replacement)
		end
		
		for digraph, replacement in pairs(digraphs) do
			word = str_gsub(word, digraph, replacement)
		end
		
		word = str_gsub(word, UTF8_char, tt)
		
		-- Convert output into correct case form.
		if case == 2 then
			word = mw.ustring.upper(word)
		elseif case == 1 then
			word = mw.ustring.gsub(word, "^(.)", mw.ustring.upper)
		end
		
		-- Add to table of words.
		table.insert(words, word)
	end
	
	-- Concatenate table of words, and do final substitution.
	return mw.ustring.gsub(table.concat(words, " "), "ʼʷ", "ʷʼ")
end

return export