Module:ab-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will transliterate Abkhaz language text per WT:AB TR. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:ab-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}
local gmatch = require("Module:string utilities").gmatch
local gsub = require("Module:string utilities").gsub
local lower = require("Module:string utilities").lower
local u = require("Module:string/char")

local GRAVE, ACUTE, CIRC, BREVE, CARON, DOTBELOW = u(0x300), u(0x301), u(0x302), u(0x306), u(0x30C), u(0x323)

local tt = {
	["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["ӷ"] = "ğ", ["ҕ"] = "ğ", ["д"] = "d", ["е"] = "e", ["ё"] = "jo", ["ж"] = "ẑ", ["з"] = "z", ["ҙ"] = "ź", ["ӡ"] = "dz", ["и"] = "i", ["й"] = "j", ["к"] = "kʼ", ["қ"] = "k", ["ҟ"] = "qʼ", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "pʼ", ["ԥ"] = "p", ["ҧ"] = "p", ["р"] = "r", ["с"] = "s", ["ҫ"] = "ś", ["т"] = "tʼ", ["ҭ"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "x", ["ҳ"] = "ḥ", ["ц"] = "c", ["ҵ"] = "cʼ", ["ч"] = "č", ["ҷ"] = "čʼ", ["ҽ"] = "ĉ", ["ҿ"] = "ĉʼ", ["ш"] = "ŝ", ["ы"] = "ə", ["ҩ"] = "jʷ", ["џ"] = "dẑ", ["ь"] = "ʲ", ["ә"] = "ʷ", ["э"] = "e", ["ю"] = "ju", ["я"] = "ja",
	["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Ӷ"] = "Ğ", ["Ҕ"] = "Ğ", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Jo", ["Ж"] = "Ẑ", ["З"] = "Z", ["Ҙ"] = "Ź", ["Ӡ"] = "Dz", ["И"] = "I", ["Й"] = "J", ["К"] = "Kʼ", ["Қ"] = "K", ["Ҟ"] = "Qʼ", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "Pʼ", ["Ԥ"] = "P", ["Ҧ"] = "P", ["Р"] = "R", ["С"] = "S", ["Ҫ"] = "Ś", ["Т"] = "Tʼ", ["Ҭ"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "X", ["Ҳ"] = "Ḥ", ["Ц"] = "C", ["Ҵ"] = "Cʼ", ["Ч"] = "Č", ["Ҷ"] = "Čʼ", ["Ҽ"] = "Ĉ", ["Ҿ"] = "Ĉʼ", ["Ш"] = "Ŝ", ["Ы"] = "Ə", ["Ҩ"] = "Jʷ", ["Џ"] = "Dẑ", ["Ь"] = "ʲ", ["Ә"] = "ʷ", ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ja"
}

local digraphs = {
	["жь"] = "ž", ["ӡь"] = "dź", ["ӡ'"] = "dź", ["ф'"] = "fʼ", ["х'"] = "x̣", ["ць"] = "ć", ["ц'"] = "ć", ["ҵь"] = "ćʼ", ["ҵ'"] = "ćʼ", ["шь"] = "š", ["џь"] = "dž",
	["Жь"] = "Ž", ["Ӡь"] = "Dź", ["Ӡ'"] = "Dź", ["Ф'"] = "Fʼ", ["Х'"] = "X̣", ["Ць"] = "Ć", ["Ц'"] = "Ć", ["Ҵь"] = "Ćʼ", ["Ҵ'"] = "Ćʼ", ["Шь"] = "Š", ["Џь"] = "Dž"
}

function export.tr(text, lang, sc)
	-- Contextual substitution of "w" for "у", "j" for "и" and "j" before "е".
	text = gsub(text, "у([аеиоуыэ])", "w%1")
	text = gsub(text, "У([аеиоуыэ])", "W%1")
	text = gsub(text, "([аеёиоуыэюяАЕЁИОУЫЭЮЯ])у", "%1w")
	text = gsub(text, "и([аеиоуыэ])", "j%1")
	text = gsub(text, "И([аеиоуыэ])", "J%1")
	text = gsub(text, "([аеёиоуыэюяАЕЁИОУЫЭЮЯ])и", "%1j")
	text = gsub(text, "([аеёиоуыэюяАЕЁИОУЫЭЮЯ])е", "%1jе")
	
	for digraph, replacement in pairs(digraphs) do
		text = gsub(text, digraph, replacement)
	end
	
	text = gsub(text, ".", tt)
	
	-- Reposition apostrophes then decompose.
	text = mw.ustring.toNFD(gsub(gsub(text, "ʼʲ", "ʲʼ"), "ʼʷ", "ʷʼ"))
	
	-- When double letters both have a modifier letter and/or an apostrophe, only show on the second for readability purposes.
	for letter in gmatch("abcdefghijklmnopqrstuvxzəABCDEFGHIJKLMNOPQRSTUVXZƏ", ".") do
		text = gsub(text, letter .. "([" .. GRAVE .. ACUTE .. CIRC .. BREVE .. CARON .. DOTBELOW .. "]?)([ʲʷ]?[ʲʷ]?ʼ?)" .. lower(letter) .. "%1%2", letter .. "%1" .. lower(letter) .. "%1%2")
	end
	
	-- Remove consecutive j/ʲ and w/ʷ.
	text = gsub(text, "ʲ?[Jj]ʲ?", function(m1) return gsub(m1, "ʲ", "") end)
	text = gsub(text, "ʷ?([Ww])ʷ?", "%1")
	
	return text
end

return export