Module:sh-translit

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

This module will transliterate Serbo-Croatian language text per WT:SH TR.

The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:sh-translit/testcases.

Functions[edit]

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang. When the transliteration fails, returns nil.

local export = {}

local tt = {}
tt["Cyrl"] = {
	["А"]='A', ["а"]='a',
	["Б"]='B', ["б"]='b',
	["В"]='V', ["в"]='v',
	["Г"]='G', ["г"]='g',
	["Д"]='D', ["д"]='d',
	["Ђ"]='Đ', ["ђ"]='đ',
	["Е"]='E', ["е"]='e',
	["Ж"]='Ž', ["ж"]='ž',
	["З"]='Z', ["з"]='z',
	["И"]='I', ["и"]='i',
	["Ј"]='J', ["ј"]='j',
	["К"]='K', ["к"]='k',
	["Л"]='L', ["л"]='l',
	["Љ"]='Lj', ["љ"]='lj',
	["М"]='M', ["м"]='m',
	["Н"]='N', ["н"]='n',
	["Њ"]='Nj', ["њ"]='nj',
	["О"]='O', ["о"]='o',
	["П"]='P', ["п"]='p',
	["Р"]='R', ["р"]='r',
	["С"]='S', ["с"]='s',
	["Т"]='T', ["т"]='t',
	["Ћ"]='Ć', ["ћ"]='ć',
	["У"]='U', ["у"]='u',
	["Ф"]='F', ["ф"]='f',
	["Х"]='H', ["х"]='h',
	["Ц"]='C', ["ц"]='c',
	["Ч"]='Č', ["ч"]='č',
	["Џ"]='Dž', ["џ"]='dž',
	["Ш"]='Š', ["ш"]='š',
	
	--letters with diacritics
	["Ѐ"]='È', ["ѐ"]='è',
	["Ѝ"]='Ì', ["ѝ"]='ì',
	["Ӣ"]='Ī', ["ӣ"]='ī',
	["Ӯ"]='Ū', ["ӯ"]='ū',

	-- proposed Montenegrin letters
	["Ć"]='Ś', ["ć"]='ś'
};

tt["Latn"] = {
	--Digraphs
	["Lj"]='Љ', ["lj"]='љ',
	["Nj"]='Њ', ["nj"]='њ',
	["Dž"]='Џ', ["dž"]='џ',
	
	["A"]='А', ["a"]='а',
	["B"]='Б', ["b"]='б',
	["V"]='В', ["v"]='в',
	["G"]='Г', ["g"]='г',
	["D"]='Д', ["d"]='д',
	["Đ"]='Ђ', ["đ"]='ђ',
	["E"]='Е', ["e"]='е',
	["Ž"]='Ж', ["ž"]='ж',
	["Z"]='З', ["z"]='з',
	["I"]='И', ["i"]='и',
	["J"]='Ј', ["j"]='ј',
	["K"]='К', ["k"]='к',
	["L"]='Л', ["l"]='л',
	["M"]='М', ["m"]='м',
	["N"]='Н', ["n"]='н',
	["O"]='О', ["o"]='о',
	["P"]='П', ["p"]='п',
	["R"]='Р', ["r"]='р',
	["S"]='С', ["s"]='с',
	["T"]='Т', ["t"]='т',
	["Ć"]='Ћ', ["ć"]='ћ',
	["U"]='У', ["u"]='у',
	["F"]='Ф', ["f"]='ф',
	["H"]='Х', ["h"]='х',
	["C"]='Ц', ["c"]='ц',
	["Č"]='Ч', ["č"]='ч',
	["Š"]='Ш', ["š"]='ш',
	
	--letters with diacritics
	["È"]='Ѐ', ["è"]='ѐ',
	["Ì"]='Ѝ', ["ì"]='ѝ',
	["Ī"]='Ӣ', ["ī"]='ӣ',
	["Ū"]='Ӯ', ["ū"]='ӯ',
	
	["Á"]='А́', ["á"]='а́',
	["À"]='А̀', ["à"]='а̀',
	["Ā"]='А̄', ["ā"]='а̄',
	["Ȁ"]='А̏', ["ȁ"]='а̏',
	["Ȃ"]='А̑', ["ȃ"]='а̑',
	
	["É"]='Е́', ["é"]='е́',
	["Ē"]='Е̄', ["ē"]='е̄',
	["Ȅ"]='Е̏', ["ȅ"]='е̏',
	["Ȇ"]='Е̑', ["ȇ"]='е̑',
	
	["Í"]='И́', ["í"]='и́',
	["Ȉ"]='И̏', ["ȉ"]='и̏',
	["Ȋ"]='И̑', ["ȋ"]='и̑',
	
	["Ó"]='О́', ["ó"]='о́',
	["Ò"]='О̀', ["ò"]='о̀',
	["Ō"]='О̄', ["ō"]='о̄',
	["Ȍ"]='О̏', ["ȍ"]='о̏',
	["Ȏ"]='О̑', ["ȏ"]='о̑',
	
	["Ŕ"]='Р́', ["ŕ"]='р́',
	["Ȑ"]='Р̏', ["ȑ"]='р̏',
	["Ȓ"]='Р̑', ["ȓ"]='р̑',
	
	["Ú"]='У́', ["ú"]='у́',
	["Ù"]='У̀', ["ù"]='у̀',
	["Ȕ"]='У̏', ["ȕ"]='у̏',
	["Ȗ"]='У̑', ["ȗ"]='у̑',

	-- proposed Montenegrin letters
	["Ź"]='З́', ["ź"]='з́',
	["Ś"]='Ć', ["ś"]='ć',
		
	-- backtick needs to be removed so that "nad`živeti" returns "надживети"
	["`"]=""
};

function export.tr(text, lang, sc)
	if (sc == "Latn") then
		text = mw.ustring.gsub(text, '[dDnNlL][jž]', tt[sc])
	end
    return mw.ustring.toNFC(mw.ustring.gsub(text, '.', tt[sc]))
end

return export