Module:my-translit

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
The following documentation is located at Module:my-translit/documentation. [edit]
Useful links: subpage listtransclusionstestcases

This module will transliterate Burmese language text per the MLCTS scheme.

The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:my-translit/testcases.

Functions[edit]

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by sc, and language specified by lang. When the transliteration fails, returns nil.

local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
 
local initial_conv = {
	['က'] = 'k', ['ခ'] = 'hk', ['ဂ'] = 'g', ['ဃ'] = 'gh', ['င'] = 'ng', 
	['စ'] = 'c', ['ဆ'] = 'hc', ['ဇ'] = 'j', ['ဈ'] = 'jh', ['ဉ'] = 'ny', ['ည'] = 'ny', 
	['ဋ'] = 't', ['ဌ'] = 'ht', ['ဍ'] = 'd', ['ဎ'] = 'dh', ['ဏ'] = 'n', 
	['တ'] = 't', ['ထ'] = 'ht', ['ဒ'] = 'd', ['ဓ'] = 'dh', ['န'] = 'n', 
	['ပ'] = 'p', ['ဖ'] = 'hp', ['ဗ'] = 'b', ['ဘ'] = 'bh', ['မ'] = 'm', 
	['ယ'] = 'y', ['ရ'] = 'r', ['လ'] = 'l', ['ဝ'] = 'w', ['သ'] = 's', 
	['ဟ'] = 'h', ['ဠ'] = 'l', ['အ'] = ''
}
 
local medial_conv = {
	['ျ'] = 'y', ['ြ'] = 'r', 
	['ွ'] = 'w', 
	['ှ'] = 'h', 
	[''] = ''
}
 
local final_conv = {
	[''] = 'a.', ['ါ'] = 'a', ['ာ'] = 'a', ['ါး'] = 'a:', ['ား'] = 'a:', 
	['က်'] = 'ak', ['င်'] = 'ang', ['င့်'] = 'ang.', ['င်း'] = 'ang:', ['စ်'] = 'ac', ['ည်'] = 'any', ['ဉ်'] = 'any', ['ည့်'] = 'any.', ['ဉ့်'] = 'any.', ['ည်း'] = 'any:', ['ဉ်း'] = 'any:', 
	['တ်'] = 'at', ['န်'] = 'an', ['န့်'] = 'an.', ['န်း'] = 'an:', ['ပ်'] = 'ap', ['မ်'] = 'am', ['မ့်'] = 'am.', ['မ်း'] = 'am:', ['ယ်'] = 'ai', ['ယ့်'] = 'ai.', ['ယ်း'] = 'ai:',
	['ံ'] = 'am', ['ံ့'] = 'am.', ['ံး'] = 'am:', 
	['ိ'] = 'i.', ['ိတ်'] = 'it', ['ိန်'] = 'in', ['ိန့်'] = 'in.', ['ိန်း'] = 'in:', ['ိပ်'] = 'ip', ['ိမ်'] = 'im', ['ိမ့်'] = 'im.', ['ိမ်း'] = 'im:', ['ိံ'] = 'im', ['ိံ့'] = 'im.', ['ိံး'] = 'im:', 
	['ီ'] = 'i', ['ီး'] = 'i:', 
	['ု'] = 'u.', ['ုတ်'] = 'ut', ['ုန်'] = 'un', ['ုန့်'] = 'un.', ['ုန်း'] = 'un:', ['ုပ်'] = 'up', ['ုမ်'] = 'um', ['ုမ့်'] = 'um.', ['ုမ်း'] = 'um:', ['ုံ'] = 'um', ['ုံ့'] = 'um.', ['ုံး'] = 'um:', 
	['ူ'] = 'u', ['ူး'] = 'u:', 
	['ေ'] = 'e', ['ေ့'] = 'e.', ['ေး'] = 'e:', 
	['ဲ'] = 'ai:', ['ဲ့'] = 'ai.', 
	['ော'] = 'au:', ['ောက်'] = 'auk', ['ောင်'] = 'aung', ['ောင့်'] = 'aung.', ['ောင်း'] = 'aung:', ['ော့'] = 'au.', ['ော်'] = 'au', 
	['ေါ'] = 'au:', ['ေါက်'] = 'auk', ['ေါင်'] = 'aung', ['ေါင်'] = 'aung.', ['ေါင်း'] = 'aung:', ['ေါ့'] = 'au.', ['ေါ်'] = 'au', 
	['ို'] = 'ui', ['ိုက်'] = 'uik', ['ိုင်'] = 'uing', ['ိုင့်'] = 'uing.', ['ိုင်း'] = 'uing:', ['ို့'] = 'ui.', ['ိုး'] = 'ui:', ['ိုယ်'] = 'uiy', ['ိုယ့်'] = 'uiy.', ['ိုယ်း'] = 'uiy:', 
	['ွတ်'] = 'wat', ['ွန်'] = 'wan', ['ွန့်'] = 'wan.', ['ွန်း'] = 'wan:', ['ွပ်'] = 'wap', ['ွမ်'] = 'wam', ['ွမ့်'] = 'wam.', ['ွမ်း'] = 'wam:'
}
 
local nucleus = {
	[''] = 'a', ['ါ'] = 'a', ['ာ'] = 'a', ['ံ'] = 'am', 
	['ိ'] = 'i', ['ိံ'] = 'im', ['ီ'] = 'i', 
	['ု'] = 'u', ['ုံ'] = 'um', ['ူ'] = 'u', 
	['ေ'] = 'e', ['ဲ'] = 'ai:', 
	['ော'] = 'au', ['ေါ'] = 'au', 
	['ို'] = 'ui', 
	['ွ'] = 'wa'
}
 
local tone_etc = {
	['့'] = '.', ['း'] = ':', ['်'] = ''
}
 
local indep_letter = {
	['ဣ'] = 'i.', ['ဤ'] = 'i', ['ဥ'] = 'u.', ['ဦ'] = 'u', ['ဧ'] = 'e', ['ဩ'] = 'au:', ['ဪ'] = 'au', ['၏'] = 'e', 
	['၌'] = 'hnai.', ['၍'] = 'rwe'
}
 
local ambig_intersyl = {
	['ky'] = '', ['kr'] = '', ['kw'] = '', 
	['gy'] = '', ['gr'] = '', ['gw'] = '', 
	['ng'] = '', ['ny'] = '', 
	['cw'] = '', ['tw'] = '', ['nw'] = '', 
	['py'] = '', ['pr'] = '', ['pw'] = '', 
	['my'] = '', ['mr'] = '', ['mw'] = '', 
}
 
local symbols = {
	['၀'] = '0', ['၁'] = '1', ['၂'] = '2', ['၃'] = '3', ['၄'] = '4', 
	['၅'] = '5', ['၆'] = '6', ['၇'] = '7', ['၈'] = '8', ['၉'] = '9', 
	['၊'] = ',', ['။'] = '.'
}
 
function export.tr(text, lang, sc, debug_mode)
	text = gsub(text, '.', symbols)
	for word in mw.ustring.gmatch(text, '[က-ၴ]+') do
		local original_word = word
		word = gsub(word, '([ဣဤဥဦဧဩဪ၏၌၍][့း်]?)(.?)(.?)', function(a, b, c)
			if c == '္' then
				return ' '..a..b..' '..c
			else
				return ' '..a..' '..b..c
			end end) .. ' '
		word = gsub(word, '(်း?)', '%1 ')
		while match(word, '[ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအ][ျြွှ]*[ံ့းွာါါိီုူေဲ]*[ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအ][့]?[^့်္]') do
			word = gsub(word, '([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအ][ျြွှ]*[ံ့းွာါါိီုူေဲ]*)([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအ][့]?[^့်္])', '%1 %2')
		end
		word = gsub(word, '္', ' , ')
		word = gsub(word, 'ဿ', 'သ သ')
		word = gsub(word, ' +', ' ')
		word = gsub(word, '^ ?(.*[^ ]) ?$', '%1')
		local syllable = mw.text.split(word, " ", true)
		for i = 1, #syllable do
			syllable[i] = gsub(syllable[i], '^([ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအဣဤဥဦဧဩဪ၏၌၍])([ျြ]?)(ွ?)(ှ?)([က-႟ꩠ-ꩻ]*)$', function(initial, medial_yr, medial_w, medial_h, final)
				return medial_conv[medial_h] .. (indep_letter[initial] or initial_conv[initial]) .. medial_conv[medial_yr] .. medial_conv[medial_w] ..
					gsub((final_conv[final] or gsub(final, '^([^်]*)([^်])်?$', function(a, b) return gsub((nucleus[a] or final_conv[a] or a) .. (initial_conv[b] or b), '([%.:])(.*)', '%2%1') end)), '.', function(final_ext)
						if indep_letter[initial] then return gsub(final_ext, '[aeiou%.%:]', '') end end) end)
 
			syllable[i] = gsub(syllable[i], '.', tone_etc)
			if match(syllable[i], '[က-႟ꩠ-ꩻ]') and not debug_mode then
				return nil
			end
		end
		word = table.concat(syllable, " ")
		word = gsub(word, ' , ', ', ')
		word = gsub(word, '%, h(%l)', '%1h')
		word = gsub(word, '%, ', '')
		word = gsub(word, '(.) (.)', function(a, b)
			if ambig_intersyl[a..b] then
				return a..'-'..b
			else
				return a..b
			end end)
		text = gsub(text, original_word, word)
	end
	return text
end
 
return export