Module:inc-mbn-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will transliterate Middle Bengali language text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:inc-mbn-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

-- Transliteration for Middle Bengali

local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match

local char = {
	-- consonants
	["ক"] = "k",	["খ"] = "kh",	["গ"] = "g",	["ঘ"] = "gh",	["ঙ"] = "ṅ",
	["চ"] = "c",	["ছ"] = "ch",	["জ"] = "j",	["ঝ"] = "jh",	["ঞ"] = "ñ",
	["ট"] = "ṭ",	["ঠ"] = "ṭh",	["ড"] = "ḍ",	["ঢ"] = "ḍh",	["ণ"] = "ṇ",
	["ত"] = "t",	["থ"] = "th",	["দ"] = "d",	["ধ"] = "dh",	["ন"] = "n",
	["প"] = "p",	["ফ"] = "ph",	["ব"] = "b",	["ভ"] = "bh",	["ম"] = "m",
	["য"] = "j",	["র"] = "r",	["ল"] = "l",
	["শ"] = "ś",	["ষ"] = "ṣ",	["স"] = "s",	["হ"] = "h",	
	["য়"] = "ẏ",	["ড়"] = "ṛ",	["ঢ়"] = "ṛh",

	-- vowel diacritics
	["ি"] = "i",	["ু"] = "u",	
	["ৃ"] = "ri",	["ে"] = "e",	["ো"] = "ō",
	["া"] = "a",	["ী"] = "i",	["ূ"] = "u",	["ৈ"] = "ōi",	["ৌ"] = "ōu",

	-- archaic vowel diacritics
	["ৄ"] = "ri",	["ৢ"] = "li",	["ৣ"] = "li",

	-- visarga
	["ঃ"] = "ḥ",

	-- vowel signs
	["অ"] = "o", 	["ই"] = "i",	["উ"] = "u",	
	["ঋ"] = "ri",	["এ"] = "e",	["ও"] = "ō",
	["আ"] = "a",	["ঈ"] = "i",	["ঊ"] = "u",	["ঐ"] = "ōi",	["ঔ"] = "ōu",

	-- archaic vowel signs
	["ৠ"] = "ri",	["ঌ"] = "li",	["ৡ"] = "li",

	--virama
	["্"] = "",

	-- chandrabindu
	["ঁ"] = "̃",
	
	-- avagraha
	['ঽ']='’',
		
	-- anusvara
	["ং"] = "ṅ",

	-- khandata, 
	["ৎ"] = "t",

	-- numerals
	["০"] = "0", ["১"] = "1", ["২"] = "2", ["৩"] = "3", ["৪"] = "4", 
	["৫"] = "5", ["৬"] = "6", ["৭"] = "7", ["৮"] = "8", ["৯"] = "9",
 
	-- punctuation
	["।"] = ".", -- dãri
}

local consonant, vowel, vowel_sign = "ক-হড়-য়", "oা-ৌ’", "অ-ঔ"
local c = "[" .. consonant .. "]"
local cc = "়?" .. c
local v = "[" .. vowel .. vowel_sign .. "o]"
local syncope_pattern = "(" .. v .. cc .. v .. cc .. ")o(" .. cc .. "ঁ?" .. v .. ")"


local function rev_string(text)
	local result, length = "", mw.ustring.len(text)
	for i = 1, length do
		result = result .. mw.ustring.sub(text, length - i + 1, length - i + 1)
	end
	return result
end

function export.tr(text, lang, sc, override)
	text = gsub(text, "(" .. c .. ")ও", "%1্ও")
	text = gsub(text, "^(" .. c .. ")্ও", "%1ও")

	text = gsub(text, "([কখগঘ])([গচছট-ধস])", "%1্%2")
	text = gsub(text, "^([কখগঘ])্([গচছট-ধস])", "%1%2")
	text = gsub(text, "([কখগঘ])্([ড়ঢ়])", "%1%2")

	text = gsub(text, "কব", "ক্b")
	text = gsub(text, "^ক্b", "কb")

	text = gsub(text, "(" .. c .. ")্‌(" .. c .. ")$", "%1্%2্")
	text = gsub(text, "(" .. c .. ")্‌(" .. c .. ") ", "%1্%2্ ")

	text = gsub(text, "([ট-ধ])([ক-ঘ])(" .. v .. ")", "%1্%2%3")
	text = gsub(text, "^([ট-ধ])্([ক-ঘ])(" .. v .. ")", "%1%2%3")
	text = gsub(text, " ^([ট-ধ])্([ক-ঘ])(" .. v .. ")", " %1%2%3")

	text = gsub(text, "ন([চ-ঝট-ঢশ-হ])", "ন্%1")
	text = gsub(text, "^ন্([চ-ঝট-ঢশ-হ])", "ন%1")
	text = gsub(text, " ন্([চ-ঝট-ঢশ-হ])", " ন%1")

	text = gsub(text, "প([ঙঞণনম])", "প্%1")
	text = gsub(text, "^প্([ঙঞণনম])", "প%1")
	text = gsub(text, "ফ([ঙঞণত-নমরল])", "ফ্%1")
	text = gsub(text, "^ফ্([ঙঞণত-নমরল])", "ফ%1")

	text = gsub(text, "(" .. v .. ")ঞ(" .. v .. ")", "%1̃%2")

	text = gsub(text, "(" .. c .. "়?)([" .. vowel .. "’?্]?)", function(a, b)
		return a .. (b == "" and "o" or b) end)
	
	for word in mw.ustring.gmatch(text, "[ঁ-৽o’]+") do
		local orig_word = word
		word = rev_string(word)
		word = gsub(word, "^o(়?" .. c .. ")(ঁ?" .. v .. ")", "%1%2")
		while match(word, syncope_pattern) do
			word = gsub(word, syncope_pattern, "%1%2")
		end
		text = gsub(text, orig_word, rev_string(word))
	end

	text = gsub(text, "্ম", "ṃ")
	text = gsub(text, "্য", "y")
	text = gsub(text, "্ব", "v")

	text = gsub(text, "িত$", "ito")
	text = gsub(text, "িত ", "ito ")

	text = gsub(text, "ৃত$", "rito")
	text = gsub(text, "ৃত ", "rito ")

	text = gsub(text, "ছিল$", "chilo")
	text = gsub(text, "ছিল ", "chilo ")
	
	text = gsub(text, ".[়’]?", char)
	text = gsub(text, ".", char)

	local v_Latn = "[oaiueō]"
	local c_Latn = "[bcdḍghjklmṃnṇprsśṣtṭvẇyẏ]"
	local consonants_no_h = "[bcdgjklmnpsśtṭḍ]"

	-- inherent vowel deletion
	text = gsub(text, "(".. v_Latn .. ")bo([bnp])(".. v_Latn .. ")", "%1b%2%3")
	text = gsub(text, "(".. v_Latn .. ")do([bp])(".. v_Latn .. ")", "%1d%2%3")
	text = gsub(text, "(".. v_Latn .. ")dho([bp])(".. v_Latn .. ")", "%1dh%2%3")
	text = gsub(text, "(".. v_Latn .. ")lo([bp])(".. v_Latn .. ")", "%1l%2%3")
	text = gsub(text, "(".. v_Latn .. ")mo([bkprṛ])(".. v_Latn .. ")", "%1m%2%3")
	text = gsub(text, "(".. v_Latn .. ")ro([bcghjpsś]h?)(".. v_Latn .. ")", "%1r%2%3")
	text = gsub(text, "goñjo$", "gonj") -- exceptional
	text = gsub(text, "goñjo ", "gonj ") -- exceptional

	-- Cv
	text = gsub(text, "([bgmr])v", "%1b")
	text = gsub(text, "hv", "hb")
	text = gsub(text, "udv", "udb")
	text = gsub(text, "ttv", "tt")
	text = gsub(text, "^sv", "ś") -- initial
	text = gsub(text, "([sś])v", "śś") -- medial

	text = gsub(text, "^(" .. consonants_no_h .. "h?)v", "%1") -- initial
	text = gsub(text, "(" .. consonants_no_h .. ")v", "%1%1") -- medial
	text = gsub(text, "(" .. consonants_no_h .. ")hv", "%1%1h") -- medial_h

	--ahb, ihb
    text = gsub(text, "ahb", "aōbh")
    text = gsub(text, "ihb", "iubh")

	-- kṣ
	text = gsub(text, "^kṣ", "kh") -- initial
	text = gsub(text, "kṣ", "kkh") -- medial
	text = gsub(text, "okkhṃ", "okkh") -- medial_m

	-- sm
	text = gsub(text, "^([ṣs])ṃ(" .. v_Latn .. ")", "ś%2̃") -- initial
	text = gsub(text, "([ṣs])ṃ(" .. v_Latn .. ")", "śś%2̃") -- medial

	-- tm
	text = gsub(text, "^tṃ", "t") -- initial
	text = gsub(text, "tṃ", "tt") -- medial

	text = gsub(text, "ṃ", "m")
	text = gsub(text, "ṣ", "ś")

	-- rules for changing s to ś (applicable for native words only)
	text = gsub(text, "s(".. v_Latn .. ")$", "ś%1")
	text = gsub(text, "s(".. v_Latn .. ") ", "ś%1 ")
	text = gsub(text, "s([oō])", "ś%1")
	text = gsub(text, "os$", "oś")
	text = gsub(text, "os ", "oś ")

	text = gsub(text, "śl", "sl")
	text = gsub(text, "śr", "sr")
	text = gsub(text, "sp", "śp")
	text = gsub(text, "^śp", "sp")
	text = gsub(text, " śp", " sp")

	-- visarga deletion
	text = gsub(text, "ḥkh", "kkh")

	-- jñ
	text = gsub(text, "jñ", "gy")

	text = gsub(text, "ñ", "n")

	text = gsub(text, "nḍo$", "nḍ")
	text = gsub(text, "nḍo ", "nḍ ")

	text = gsub(text, "rko$", "rk")
	text = gsub(text, "rko ", "rk ")

	text = gsub(text, "(" .. v_Latn .. ")h$", "%1ho")
	text = gsub(text, "(" .. v_Latn .. ")h ", "%1ho ")

	text = gsub(text, "([glś])aho$", "%1ah")
	text = gsub(text, "([glś])aho ", "%1ah ")

	text = gsub(text, "ṇn", "ṇon")
	text = gsub(text, "ṇ", "n")

	text = gsub(text, "^ek", "êk")
	text = gsub(text, "^oya", "ê")
	text = gsub(text, "^eya", "ê")

	text = gsub(text, "^(" .. consonants_no_h .. "h?)ya", "%1ê") -- initial
	text = gsub(text, " (" .. consonants_no_h .. "h?)ya", " %1ê") -- initial
	text = gsub(text, "^hya", "hê") -- h_initial
	text = gsub(text, "gya", "ggê") -- g_medial
	text = gsub(text, "yal$", "êl") -- final_l

	text = gsub(text, "yanḍ", "ênḍ")

	-- Cy
	text = gsub(text, "^(" .. consonants_no_h .. "h?)y", "%1") -- initial
	text = gsub(text, "(" .. consonants_no_h .. ")y", "%1%1") -- medial
	text = gsub(text, "(" .. consonants_no_h .. ")hy", "%1%1h") -- medial_h
	text = gsub(text, "^hy", "jh") -- h_initial
	text = gsub(text, "hy", "jjh") -- h_medial
	text = gsub(text, "ry", "rj")

	text = gsub(text, "ẏo([gklmn])([aeiīōuū])", "ẏ%1%2")
	text = gsub(text, "ẏoō", "ẏō")
	text = gsub(text, "oō$", "ō")

	text = gsub(text, "([ei])ẏ([" .. consonant .. "])", "%1ẏo%2")
	text = gsub(text, "([ei])ẏ$", "%1ẏo")

	text = gsub(text, "śṭh$", "śṭho")

	text = gsub(text, "^([kg]h?)([dḍtṭ])", "%1o%2")
	text = gsub(text, "([au])b$", "%1bo")
	text = gsub(text, "([au])b ", "%1bo ")

	text = gsub(text, "([tb]h?)ob$", "%1obo") -- exceptional
	text = gsub(text, "([tb]h?)ob ", "%1obo ") -- exceptional

	text = gsub(text, "([au])bh$", "%1bho")
	text = gsub(text, "([au])bh ", "%1bho ")
	text = gsub(text, "^l([au])bho$", "l%1bh")
	text = gsub(text, "^l([au])bho ", "l%1bh ")

	text = gsub(text, "lona$", "lna")
	text = gsub(text, "nola$", "nla")

	text = gsub(text, "ōẏ", "ōẇ")
	text = gsub(text, "ō̃ẏ", "ō̃ẇ")

	text = gsub(text, "oo", "o")

	if match(text, "[ঁ-৽]") and mode ~= "debug" then
		return nil
	else
		return mw.ustring.toNFC(text)
	end
end
 
return export