Module:bn-translit

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
This module is in beta stage.
Its interface has been stabilised, but the module may still contain errors. Do not deploy widely until the module has been tested.

This module will transliterate Bengali language text per WT:BN TR.

The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:bn-translit/testcases.

Functions[edit]

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang. When the transliteration fails, returns nil.

-- Transliteration for Bengali

local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
 
local conv = {
	-- consonants
	['ক্ষ'] = 'kkh', ['জ্ঞ'] = 'gg',
	['ক'] = 'k',	['খ'] = 'kh',	['গ'] = 'g',	['ঘ'] = 'gh',	['ঙ'] = 'ṅ',
	['চ'] = 'c',	['ছ'] = 'ch',	['জ'] = 'j',	['ঝ'] = 'jh',	['ঞ'] = 'ñ',
	['ট'] = 'ṭ',	['ঠ'] = 'ṭh',	['ড'] = 'ḍ',	['ঢ'] = 'ḍh',	['ণ'] = 'ṇ',
	['ত'] = 't',	['থ'] = 'th',	['দ'] = 'd',	['ধ'] = 'dh',	['ন'] = 'n',
	['প'] = 'p',	['ফ'] = 'ph',	['ব'] = 'b',	['ভ'] = 'bh',	['ম'] = 'm',
	['য'] = 'j',	['র'] = 'r',	['ল'] = 'l',	['ৱ'] = 'w', 
	['শ'] = 'ś',	['ষ'] = 'ṣ',	['স'] = 's',	['হ'] = 'h',	
	['য়'] = 'y',	['ড়'] = 'ṛ',	['ঢ়'] = 'ṛh',

	-- visarga
	['ঃ'] = 'ḥ',

	-- vowel diacritics
	['ি'] = 'i',	['ু'] = 'u',	['ৃ'] = 'ri',	['ে'] = 'e',	['ো'] = 'o',
	['া'] = 'a',	['ী'] = 'i',	['ূ'] = 'u',	['ৈ'] = 'oi',	['ৌ'] = 'ou',

	-- vowel signs
	['অ'] = 'ô',	['ই'] = 'i',	['উ'] = 'u',	['ঋ'] = 'ri',	['এ'] = 'e',	['ও'] = 'o',
	['আ'] = 'a',	['ঈ'] = 'i',	['ঊ'] = 'u',	['ঐ'] = 'oi',	['ঔ'] = 'ou',

	--hôshôntô
	['্'] = '',

	-- chôndrôbindu
	['ঁ'] = 'ṁ',
		
	-- ônusbar
	['ং'] = 'N',

	-- khôndô tô
	['ৎ'] = 't',

	-- numerals
	['০'] = '0', ['১'] = '1', ['২'] = '2', ['৩'] = '3', ['৪'] = '4', ['৫'] = '5', ['৬'] = '6', ['৭'] = '7', ['৮'] = '8', ['৯'] = '9',
 
	-- punctuation
	['।'] = '.', -- dari
}

function export.tr(text, lang, sc)
	local c = '([কষজঞকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলৱশষসহ]়?)'
	local y = 'য়'
	local r = 'র'
	local v = '([ô্িুৃেোাীূৈৌঅইউঋএওআঈঊঐঔ])'
	local virama = '্'
	local n = '(ং?)'
	
	local no_virama = mw.ustring.gsub(v,virama,"")
	
	text = text .. " "
	
	text = mw.ustring.gsub(text,c,"%1ô")
	text = mw.ustring.gsub(text,"ô"..v,"%1")
	
	text = mw.ustring.gsub(text,v..n..c.."ô ",function(j,k,l) --ending
		return l==y and j..k..l.."ô " or j..k..l.." "
	end)
	
	local pattern = v..n..c.."ô"..c .. no_virama
	local continue = true
	while continue do
		continue = false
		text = mw.ustring.gsub(text,"(.*)"..pattern,function(d,e,f,g,h,i)
			if g~=y and g~=r then
				continue = true
			end
			return (g==y or g==r) and d..e..f..g.."ô"..h..i or d..e..f..g..h..i
		end)
	end
	
	text = mw.ustring.gsub(text,"([যডঢ]়)",conv)
	text = mw.ustring.gsub(text,"ক্ষ","kkh")
	text = mw.ustring.gsub(text,"জ্ঞ","gg")
	text = mw.ustring.gsub(text,".",conv)
	
	text = mw.ustring.gsub(text,"ː(.)","%1%1")
	
	text = mw.ustring.gsub(text," ?।",".")
	
	text = mw.ustring.gsub(text,"([āēeo]y)ô ","%1 ")
	text = gsub(text,"ôN ","ông ")
	text = gsub(text,"N","ng")

	text = mw.ustring.gsub(text,"([ts])b","%1")
	
	text = mw.ustring.gsub(text," $","")
	
	return mw.ustring.toNFC(text)
end
 
return export