Module:Cans-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will transliterate text in the Canadian syllabics script. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:Cans-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.
{
	[5] = {
		[1] = {
			[1] = "ᑊ",
			[2] = "p",
		},
		[2] = {
			[1] = "ᐟ",
			[2] = "t",
		},
		[3] = {
			[1] = "ᐠ",
			[2] = "k",
		},
		[4] = {
			[1] = "ᐨ",
			[2] = "c",
		},
		[5] = {
			[1] = "ᒼ",
			[2] = "m",
		},
		[6] = {
			[1] = "ᐣ",
			[2] = "n",
		},
		[7] = {
			[1] = "ᐢ",
			[2] = "s",
		},
		[8] = {
			[1] = "ᐧ",
			[2] = "y",
		},
		[9] = {
			[1] = "ᐤ",
			[2] = "w",
		},
		[10] = {
			[1] = "ᐦ",
			[2] = "h",
		},
		[11] = {
			[1] = "ᕁ",
			[2] = "hk",
		},
		[12] = {
			[1] = "ᕽ",
			[2] = "hk",
		},
		[13] = {
			[1] = "ᓫ",
			[2] = "l",
		},
		[14] = {
			[1] = "ᕑ",
			[2] = "r",
		},
		[15] = {
			[1] = "ᑉ",
			[2] = "p",
		},
		[16] = {
			[1] = "ᑦ",
			[2] = "t",
		},
		[17] = {
			[1] = "ᒡ",
			[2] = "c",
		},
		[18] = {
			[1] = "ᒃ",
			[2] = "k",
		},
		[19] = {
			[1] = "ᒻ",
			[2] = "m",
		},
		[20] = {
			[1] = "ᓐ",
			[2] = "n",
		},
		[21] = {
			[1] = "ᔅ",
			[2] = "s",
		},
		[22] = {
			[1] = "ᔥ",
			[2] = "š",
		},
		[23] = {
			[1] = "ᔾ",
			[2] = "y",
		},
		[24] = {
			[1] = "ᓪ",
			[2] = "l",
		},
		[25] = {
			[1] = "ᕐ",
			[2] = "r",
		},
		[26] = {
			[1] = "ᕪ",
			[2] = "ð",
		},
		[27] = {
			[1] = "‡",
			[2] = "ð",
		},
		[28] = {
			[1] = "ᒄ",
			[2] = "kw",
		},
		[29] = {
			[1] = "ᔉ",
			[2] = "sk",
		},
		[30] = {
			[1] = "ᖅ",
			[2] = "q",
		},
		[31] = {
			[1] = "ᖕ",
			[2] = "ng",
		},
		[32] = {
			[1] = "ᖖ",
			[2] = "nng",
		},
		[33] = {
			[1] = "ᕝ",
			[2] = "v",
		},
		[34] = {
			[1] = "ᖦ",
			[2] = "l",
		},
		[35] = {
			[1] = "ᕀ",
			[2] = "y",
		},
		[36] = {
			[1] = "ᕻ",
			[2] = "h",
		},
		[37] = {
			[1] = "ᕼ",
			[2] = "h",
		},
	},
}

local export = {}
-- to be discussed: not to create this module, but create individual modules instead?

local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"
	
local function iter_char(str)
	return string.gmatch(str, UTF8_char)
end

local function len(str)
	local _, length = string.gsub(str, UTF8_char, "")
	return length
end

local function fetch(str, index)
	local i = 0
	for char in iter_char(str) do
		i = i + 1
		if i == index then
			return char
		end
	end
	return ""
end

function export.tr(text, lang, sc)
	local data = {
		{ --short-vowel
			"ᐁᐃᐅᐊᐯᐱᐳᐸᑌᑎᑐᑕᑫᑭᑯᑲᒉᒋᒍᒐᒣᒥᒧᒪᓀᓂᓄᓇᓭᓯᓱᓴᔦᔨᔪᔭᖊᖋᖌᖍᕃᕆᕈᕍᕂᕄᕊᕋᔐᔑᔓᔕᕓᕕᕗᕙᕞᕠᕤᕦᘚᘛᘕᘔᓓᓕᓗᓚᙯᕿᖁᖃᙰᖏᖑᖓ ᙱᙳᙵᕴᕵᕷᕹ",
			"1ptkcmnsyrrršfðzlq23h",
			{"","ng","nng"},
			"eioa",
			function(a,b)
				return a..b
			end
		},
		{ --w
			"ᐌᐍᐎᐏᐒᐓᐗᐘᐺᐻᐼᐽᑀᑁᑄᑅᑗᑘᑙᑚᑝᑞᑡᑢᑴᑵᑶᑷᑺᑻᑾᑿᒒᒓᒔᒕᒘᒙᒜᒝᒬᒭᒮᒯᒲᒳᒶᒷ"
				.. "ᓉᓊ  ᓋᓌ  ᓶᓷᓸᓹᓼᓽᔀᔁᔯᔰᔱᔲᔵᔶᔹᔺᔗᔘᔙᔚᔝᔞᔡᔢᓜᓝᓞᓟᓢᓣᓦᓧ",
			"1ptkcmnsyrz",
			{""},
			"eeiiooaa",
			function(a,b)
				return a..'w'..b
			end
		},
		{ --long-vowel
			"ᐄᐆᐋᐲᐴᐹᑏᑑᑖᑮᑰᑳᒌᒎᒑᒦᒨᒫᓃᓅᓈᓰᓲᓵᔩᔫᔮᕇᕉᕌᔒᔔᔖᕖᕘᕚᕢᕥᕧᓖᓘᓛᐐᐔᐙᖀᖂᖄᕶᕸᕺ",
			"1ptkcmnsyršfðlwqh",
			{""},
			"īōā",
			function(a,b)
				return a..b
			end
		},
		{ --w-long
			"ᐐᐑᐔᐕᐖᐙᐚᐛᐾᐿᑂᑃ ᑆᑇᑈᑛᑜᑟᑠ ᑣᑤᑥᑸᑹᑼᑽ ᒀᒁᒂᒖᒗᒚᒛ ᒞᒟᒠᒰᒱᒴᒵ ᒸᒹᒺ     ᓍᓎᓏ"
				.. "ᓠᓡᓤᓥ ᓨᓩ ᓺᓻᓾᓿ ᔂᔃᔄᔛᔜᔟᔠ ᔣᔤ ᔳᔴᔷᔸ ᔻᔼᔽ     ᕎᕏ      ᕛᕜ      ᕨᕩ ",
			"1ptkcmnlsšyrfð",
			{""},
			"īīōōōāāā",
			function(a,b)
				return a..'w'..b
			end
		},
		{ --individual
			"ᑊᐟᐠᐨᒼᐣᐢᐧᐤᐦᕁᕽᓫᕑᑉᑦᒡᒃᒻᓐᔅᔥᔾᓪᕐᕪ‡ᒄᔉᖅᖕᖖᕝᖦᕀᕻᕼ",
			"ptkcmnsywh11lrptckmnsšylrðð23q45vlyhh",
			{"hk","kw","sk","ng","nng"},
		},
	}
	
	for i, item in pairs(data) do
		if item[4] then
			local length = len(item[4])
			local c = 0
			for s in iter_char(item[1]) do
				c = c + 1
				local index = math.ceil(c / length)
				local a = fetch(item[2], index)
				if tonumber(a) then
					a = item[3][tonumber(a)]
				end
				index = (c - 1) % length + 1
				local b = fetch(item[4], index)
				if s ~= " " then
					text = string.gsub(text, s, item[5](a,b))
				end
			end
		else
			local iter1, iter2 = iter_char(item[1]), iter_char(item[2])
			while true do
				local s, a = iter1(), iter2()
				if not (s and a) then
					break
				end
				if tonumber(a) then
					a = item[3][tonumber(a)]
				end
				text = string.gsub(text, s, a)
			end
		end
	end
	return text
end

return export