Module:jje-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will transliterate Jeju language text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:jje-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

-- Created from Module:ko.translit
local export = {}

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub
local match = m_str_utils.match

function export.tr(text, lang, sc)
	if (not text) or text == "" then
		return text
	end
	local HaniChars = require("Module:scripts").getByCode("Hani"):getCharacters()
	text = gsub(text, "%<%/?r[pt]%>", "")
	text = gsub(text, "%<%/?ruby%>", "")
	-- remove hanja from (ex.) 사전(辭典) and 辭典(사전)
	text = gsub(text, "%([" .. HaniChars .. "]+%)", "")
	text = gsub(text, "%([" .. HaniChars .. "]*'''[" .. HaniChars .. "]+'''[" .. HaniChars .. "]*%)", "")
	text = gsub(text, "[" .. HaniChars .. "]+%((.-)%)", "%1")

	-- transform em-dash to plain hyphen-minus
	text = gsub(text, "—", "-")
	
	if match(text, "^[ㄱㄲㄳㄴㄵㄶㄷㄸㄹㄺㄻㄼㄽㄾㄿㅀㅁㅯㅂㅃㅄㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘᆦㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣ%-]+$") then
		return (gsub(text,
			"[ㄱㄲㄳㄴㄵㄶㄷㄸㄹㄺㄻㄼㄽㄾㄿㅀㅁㅯㅂㅃㅄㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘᆦㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣ]", {
				["ㄱ"] = "g", ["ㄲ"] = "kk", ["ㄳ"] = "ks", ["ㄴ"] = "n", ["ㄵ"] = "nj", ["ㄶ"] = "nh", ["ㄷ"] = "d", ["ㄸ"] = "tt", ["ㄹ"] = "l", ["ㄺ"] = "lg",
				["ㄻ"] = "lm", ["ㄼ"] = "lb", ["ㄽ"] = "ls", ["ㄾ"] = "lt", ["ㄿ"] = "lp", ["ㅀ"] = "lh", ["ㅁ"] = "m", ["ㅯ"] = "ms", ["ㅂ"] = "b", ["ㅃ"] = "pp", ["ㅄ"] = "ps",
				["ㅅ"] = "s", ["ㅆ"] = "ss", ["ㅇ"] = "'", ["ㅈ"] = "j", ["ㅉ"] = "jj", ["ㅊ"] = "ch", ["ㅋ"] = "k", ["ㅌ"] = "t", ["ㅍ"] = "p", ["ㅎ"] = "h",
				["ㅏ"] = "a", ["ㅐ"] = "ae", ["ㅑ"] = "ya", ["ㅒ"] = "yae", ["ㅓ"] = "eo", ["ㅔ"] = "e", ["ㅕ"] = "yeo",
				["ㅖ"] = "ye", ["ㅗ"] = "o", ["ㅘ"] = "wa", ["ᆦ"] = "wya", ["ㅙ"] = "wae", ["ㅚ"] = "oe", ["ㅛ"] = "yo", ["ㅜ"] = "u",
				["ㅝ"] = "wo", ["ㅞ"] = "we", ["ㅟ"] = "wi", ["ㅠ"] = "yu", ["ㅡ"] = "eu", ["ㅢ"] = "ui", ["ㅣ"] = "i", 
				["ㆍ"] = "aw", ["ᆢ"] = "yaw" }
		))
	end

	-- transform compat jamo into a form [[Module:jje-pron]] can handle
	-- for [[-ㅂ니까]] [[-ㅁ둥]] etc.
	-- could be moved to [[Module:jje-pron]]
	if match(text, "%-[ㄱㄲㄳㄴㄵㄶㄷㄹㄺㄻㄼㄽㄾㄿㅀㅁㅂㅄㅅㅆㅇㅈㅊㅋㅌㅍㅎ]") then
		text = gsub(text,
			"[ㄱㄲㄳㄴㄵㄶㄷㄹㄺㄻㄼㄽㄾㄿㅀㅁㅂㅄㅅㅆㅇㅈㅊㅋㅌㅍㅎ]", {
				["ㄱ"] = "ᆨ", ["ㄲ"] = "ᆩ", ["ㄳ"] = "ᆪ", ["ㄴ"] = "ᆫ", ["ㄵ"] = "ᆬ", ["ㄶ"] = "ᆭ", ["ㄷ"] = "ᆮ", ["ㄹ"] = "ᆯ", ["ㄺ"] = "ᆰ",
				["ㄻ"] = "ᆱ", ["ㄼ"] = "ᆲ", ["ㄽ"] = "ᆳ", ["ㄾ"] = "ᆴ", ["ㄿ"] = "ᆵ", ["ㅀ"] = "ᆶ", ["ㅁ"] = "ᆷ", ["ㅯ"] = "ᇝ", ["ㅂ"] = "ᆸ", ["ㅄ"] = "ᆹ",
				["ㅅ"] = "ᆺ", ["ㅆ"] = "ᆻ", ["ㅇ"] = "ᆼ", ["ㅈ"] = "ᆽ", ["ㅊ"] = "ᆾ", ["ㅋ"] = "ᆿ", ["ㅌ"] = "ᇀ", ["ㅍ"] = "ᇁ", ["ㅎ"] = "ᇂ", }
		)
	end
	
	local HangChars = require("Module:scripts").getByCode("Hang"):getCharacters()
	local m_pron = require("Module:jje-pron")
	
	text = gsub(text, "[" .. HangChars .. "%s%p􀀀-􏿽]+", function(m1) return m_pron.romanise(m1, 2, {}, true) end)

	return text and text
		:gsub("([A-Za-z])%-%'([A-Za-z])", "%1-%2")
		:gsub("%-'''%-", "'''-")
		:gsub("%-%-", "-")
end

export.tr_revised = export.tr

return export