Module:grc-translit

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

This module will transliterate Ancient Greek language text per WT:GRC TR.

The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:grc-translit/testcases.

Functions[edit]

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by sc, and language specified by lang. When the transliteration fails, returns nil.

local export = {}

local tt = {
	-- Vowels
	["α"] = "a",
	["ε"] = "e",
	["η"] = "ē",
	["ι"] = "i",
	["ο"] = "o",
	["υ"] = "u",
	["ω"] = "ō",

	-- Consonants
	["β"] = "b",
	["γ"] = "g",
	["δ"] = "d",
	["ζ"] = "z",
	["θ"] = "th",
	["κ"] = "k",
	["λ"] = "l",
	["μ"] = "m",
	["ν"] = "n",
	["ξ"] = "x",
	["π"] = "p",
	["ρ"] = "r",
	["σ"] = "s",
	["ς"] = "s",
	["τ"] = "t",
	["φ"] = "ph",
	["χ"] = "kh",
	["ψ"] = "ps",
	
	-- Archaic letters
	["ϝ"] = "w",
	["ϻ"] = "ś",
	["ϙ"] = "q",
	["ϡ"] = "š",
	["ͷ"] = "v",
	
	-- Diacritics
	['̄'] = '̄', -- macron 304
	['̆'] = '', -- breve 306
	['̓'] = '', -- psili 313
	['̔'] = '', -- dasia 314
	['̈'] = '̈', -- trema 308
	['̀'] = '̀', -- grave 300
	['́'] = '́', -- acute 301
	['͂'] = '̂', -- circumflex 342
	['ͅ'] = 'i', -- hypogegrammene 345
	
	-- For internal processing of diaeresis
	['+'] = '',
}

local diacritics = '[̄̆̓̔̈̀́͂ͅ]'

function export.tr(text, lang, sc)
	-- If the script is given as Cprt, then forward the transliteration to that module
	if sc == "Cprt" then
		return require("Module:Cprt-translit").tr(text, lang, sc)
	end
	
	local gsub = mw.ustring.gsub

	-- decompose text
	text = mw.ustring.toNFD(text)
	
	text = gsub(text,'([ιυ])([̄̆]?)̈','+%1%2̈')
	
	--tokenize
	tokens = {}
	ti = 0 -- it gets incremented every time
	for i = 1,mw.ustring.len(text) do
		ch = mw.ustring.sub(text,i,i)
		if ch == 'ι' and tokens[ti] and mw.ustring.match(tokens[ti],'[ΑΕΗΟΥΩαεηουω]') then
			tokens[ti] = tokens[ti]..'ι'
		elseif ch == 'υ' and tokens[ti] and mw.ustring.match(tokens[ti],'[ΑΕΗΟΩαεηοω]') then
			tokens[ti] = tokens[ti]..'υ'
		elseif mw.ustring.match(ch,diacritics) then
			tokens[ti] = tokens[ti]..ch
		else
			ti = ti+1
			tokens[ti] = ch
		end
	end
	
	--now read the tokens
	out = ''
	for i,token in pairs(tokens) do
		t = mw.ustring.gsub(mw.ustring.lower(token),'.',function(x) return tt[x] end)
		
		-- elseif is misleading (these are independent) but it's more concise this way
		if token == 'γ' and tokens[i+1] and mw.ustring.match(tokens[i+1],'[κγχξ]') then
			t = 'n'
		elseif token == 'ρ' and tokens[i-1] and tokens[i-1] == 'ρ' then
			t = 'rh'
		elseif mw.ustring.match(token,'[ΑΕΗΟΩαεηοω]υ') or mw.ustring.match(token,'[Υυ]ι') then
			t = mw.ustring.gsub(t,'y','u')
		elseif mw.ustring.match(token,'[αΑ].*ͅ') then
			t = mw.ustring.gsub(t,'([aA])','%1̄')
		end
		
		if mw.ustring.match(token,'̔') then
			if mw.ustring.match(token,'[Ρρ]') then
				t = t .. 'h'
			else
				t = 'h' .. t
			end
		end
	
		t = mw.ustring.toNFD(t) -- we can't manually enter them as e/o + macron in the table because it'll recombine apparently
		if mw.ustring.match(t,'̂') then
			t = mw.ustring.gsub(t,'̄','')
		end
		
		if token ~= mw.ustring.lower(token) then
			t = mw.ustring.upper(mw.ustring.sub(t,1,1) ) .. mw.ustring.lower(mw.ustring.sub(t,2) )
		end
		out = out .. t
	end
	return out
end

return export