Module:grc-translit

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

This module will transliterate Ancient Greek language text per WT:GRC TR.

The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:grc-translit/testcases.

Functions[edit]

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by sc, and language specified by lang. When the transliteration fails, returns nil.

All tests passed. (refresh)

test_links:
Text Expected Actual
Passed λόγος lógos lógos
Passed σφίγξ sphínx sphínx
Passed ϝάναξ wánax wánax
Passed οἷαι hoîai hoîai
Passed ταῦρος taûros taûros
Passed νηῦς nēûs nēûs
Passed σῦς sûs sûs
Passed γυῖον guîon guîon
Passed ἀναῡ̈τέω anaṻtéō anaṻtéō
Passed δαΐφρων daḯphrōn daḯphrōn
Passed τῶν tôn tôn
Passed τοὶ toì toì
Passed τῷ tôi tôi
Passed τούτῳ toútōi toútōi
Passed σοφίᾳ sophíāi sophíāi
Passed ho ho
Passed οἱ hoi hoi
Passed εὕρισκε heúriske heúriske
Passed ὑϊκός huïkós huïkós
Passed πυρρός purrhós purrhós
Passed ῥέω rhéō rhéō
Passed σάἁμον sáhamon sáhamon
Passed Ὀδυσσεύς Odusseús Odusseús
Passed Εἵλως Heílōs Heílōs
Passed ᾍδης Hā́idēs Hā́idēs
Passed ἡ Ἑλήνη hē Helḗnē hē Helḗnē
Passed 𐠠𐠒𐠯𐠗 pi-lo-ti-mo pi-lo-ti-mo



local export = {}

local m_utilities = require('Module:grc-utilities')
local m_data = require('Module:grc-utilities/data')
local chars = m_data.named

local acute = chars.acute
local grave = chars.grave
local circumflex = chars.circum
local diaeresis = chars.diaeresis
local smooth = chars.smooth
local rough = chars.rough
local macron = chars.macron
local breve = chars.breve
local subscript = chars.subscript

local hat = chars.Latin_circum

local tt = {
	-- Vowels
	["α"] = "a",
	["ε"] = "e",
	["η"] = "e"..macron,
	["ι"] = "i",
	["ο"] = "o",
	["υ"] = "u",
	["ω"] = "o"..macron,

	-- Consonants
	["β"] = "b",
	["γ"] = "g",
	["δ"] = "d",
	["ζ"] = "z",
	["θ"] = "th",
	["κ"] = "k",
	["λ"] = "l",
	["μ"] = "m",
	["ν"] = "n",
	["ξ"] = "x",
	["π"] = "p",
	["ρ"] = "r",
	["σ"] = "s",
	["ς"] = "s",
	["τ"] = "t",
	["φ"] = "ph",
	["χ"] = "kh",
	["ψ"] = "ps",
	
	-- Archaic letters
	["ϝ"] = "w",
	["ϻ"] = "ś",
	["ϙ"] = "q",
	["ϡ"] = "š",
	["ͷ"] = "v",
	
	-- Diacritics
	[macron] = macron,
	[breve] = '',
	[smooth] = '',
	[rough] = '',
	[diaeresis] = diaeresis,
	[grave] = grave,
	[acute] = acute,
	[circumflex] = hat,
	[subscript] = 'i',
}

local diacritics = m_data.all

function export.tr(text, lang, sc)
	-- If the script is given as Cprt, then forward the transliteration to that module
	if sc == "Cprt" then
		return require("Module:Cprt-translit").tr(text, lang, sc)
	end
	
	if text == '῾' then
		return 'h'
	end
	
	local tokens = m_utilities.tokenize(text)

	--now read the tokens
	local output = {}
	for i, token in pairs(tokens) do
		-- substitute each character in the token for its transliteration
		local translit = mw.ustring.gsub(mw.ustring.lower(token),'.',function(x) return tt[x] end)
		
		if token == 'γ' and tokens[i + 1] and mw.ustring.match(tokens[i + 1],'[κγχξ]') then
			-- γ before a velar should be <n>
			translit = 'n'
		elseif token == 'ρ' and tokens[i - 1] and tokens[i - 1] == 'ρ' then
			-- ρ after ρ should be <rh>
			translit = 'rh'
		elseif mw.ustring.match(token, '[αΑ].*'..subscript) then
			-- add macron to ᾳ
			translit = mw.ustring.gsub(translit, '([aA])','%1'..macron)
		end
		
		if mw.ustring.match(token, rough) then
			if mw.ustring.match(token, '[Ρρ]') then
				translit = translit .. 'h'
			else -- vowel
				translit = 'h' .. translit
			end
		end
	
		if mw.ustring.match(translit, macron .. '[' .. rough .. smooth .. ']?' .. hat) then
			translit = mw.ustring.gsub(translit, macron, '')
		end
		
		if token ~= mw.ustring.lower(token) then
			translit = mw.ustring.upper(mw.ustring.sub(translit, 1, 1) ) .. mw.ustring.lower(mw.ustring.sub(translit, 2) )
		end
		
		table.insert(output, translit)
	end
	output = table.concat(output)
	output = mw.ustring.toNFC(output)
	
	return output
end

return export