Module:ko-appendix-junggan-nogeoldae

From Wiktionary, the free dictionary
Jump to navigation Jump to search
local export = {}

local memoize = require "Module:fun".memoize

local function string_rep(str, n, sep)
	assert(n >= 0)
	local list = {}
	for i = 1, n do
		list[i] = str
	end
	return table.concat(list, sep)
end

local function tag(sc, lang_code)
	return function(text)
		return '<span class="' .. sc .. '" lang="' .. lang_code .. '">'
			.. text .. '</span>'
	end
end

local function link_and_tag(sc, lang_code, lang_name)
	local tagger = tag(sc, lang_code)
	return function(term)
		return tagger('[[' .. term .. '#' .. lang_name .. '|' .. term .. ']]')
	end
end

local tag_Korean = tag("Hang", "ko")
local link_Korean = link_and_tag("Hang", "ko", "Korean")
local tag_Middle_Korean = tag("Hang", "okm")
local link_Chinese = link_and_tag("Hani", "zh", "Chinese")
local link_Mandarin = link_and_tag("Latn", "cmn", "Mandarin")

local function format_Han(traditional_and_simplified)
	if traditional_and_simplified:find("(", 1, true) then
		return ((traditional_and_simplified):gsub(
			"([^()]+)%s*%(([^()]+)%)",
			function(traditional, simplified)
				return link_Chinese(traditional) .. "(" .. link_Chinese(simplified) .. ")"
			end))
	else
		local traditional = traditional_and_simplified
		return link_Chinese(traditional)
	end
end

local conversions = {
	["ᄀ"] = "k",
	["ᄁ"] = "kk",
	["ᄂ"] = "n",
	["ᄃ"] = "t",
	["ᄄ"] = "tt",
	["ᄅ"] = "l",
	["ᄆ"] = "m",
	["ᄇ"] = "p",
	["ᄈ"] = "pp",
	["ᄉ"] = "s",
	["ᄊ"] = "ss",
	["ᄋ"] = "",
	["ᄌ"] = "c",
	["ᄍ"] = "cc",
	["ᄎ"] = "ch",
	["ᄏ"] = "kh",
	["ᄐ"] = "th",
	["ᄑ"] = "ph",
	["ᄒ"] = "h",
	["ᄝ"] = "W",
	["ᄫ"] = "f",
	["ᄬ"] = "v",
	["ᅀ"] = "z",
	["ᅌ"] = "ng",
	["ᅘ"] = "hh",
	["ᅙ"] = "q",
	["ᅡ"] = "a",
	["ᅢ"] = "ay",
	["ᅣ"] = "ya",
	["ᅤ"] = "yay",
	["ᅥ"] = "e",
	["ᅦ"] = "ey",
	["ᅧ"] = "ye",
	["ᅨ"] = "yey",
	["ᅩ"] = "wo",
	["ᅪ"] = "wa",
	["ᅫ"] = "way",
	["ᅬ"] = "woy",
	["ᅭ"] = "yo",
	["ᅮ"] = "wu",
	["ᅯ"] = "we",
	["ᅱ"] = "wuy",
	["ᅲ"] = "yu",
	["ᅳ"] = "u",
	["ᅴ"] = "uy",
	["ᅵ"] = "i",
	["ᅶ"] = "awo",
	["ᅸ"] = "yawo",
	["ᆄ"] = "yoya",
	["ᆏ"] = "yue",
	["ᆑ"] = "yuye",
	["ᆔ"] = "yuy",
	["ᆕ"] = "uwu",
	["ᆛ"] = "iwu",
	["ᆜ"] = "iu",
	["ᆫ"] = "n",
	["ᆯ"] = "l",
	["ᆷ"] = "m",
	["ᆼ"] = "ng",
	["ᇢ"] = "W",
	["ᇦ"] = "f",
	["ᇫ"] = "z",
	["ᇹ"] = "q",
}
local vowels = require "Module:array".keys {
	["ᅡ"] = "a",
	["ᅢ"] = "ay",
	["ᅣ"] = "ya",
	["ᅤ"] = "yay",
	["ᅥ"] = "e",
	["ᅦ"] = "ey",
	["ᅧ"] = "ye",
	["ᅨ"] = "yey",
	["ᅩ"] = "wo",
	["ᅪ"] = "wa",
	["ᅫ"] = "way",
	["ᅬ"] = "woy",
	["ᅭ"] = "yo",
	["ᅮ"] = "wu",
	["ᅯ"] = "we",
	["ᅱ"] = "wuy",
	["ᅲ"] = "yu",
	["ᅳ"] = "u",
	["ᅴ"] = "uy",
	["ᅵ"] = "i",
	["ᅶ"] = "awo",
	["ᅸ"] = "yawo",
	["ᆄ"] = "yoya",
	["ᆏ"] = "yue",
	["ᆑ"] = "yuye",
	["ᆔ"] = "yuy",
	["ᆕ"] = "uwu",
	["ᆛ"] = "iwu",
	["ᆜ"] = "iu",
}:sort():concat()
local vowel = "[" .. vowels .. "]"
local Yale_romanization = memoize(function(syllable)
	return (mw.ustring.gsub(syllable, ".", conversions))
end)

local to_full_letter = {
  ["ᄀ"] = "ㄱ",
  ["ᄁ"] = "ㄲ",
  ["ᄂ"] = "ㄴ",
  ["ᄃ"] = "ㄷ",
  ["ᄄ"] = "ㄸ",
  ["ᄅ"] = "ㄹ",
  ["ᄆ"] = "ㅁ",
  ["ᄇ"] = "ㅂ",
  ["ᄈ"] = "ㅃ",
  ["ᄉ"] = "ㅅ",
  ["ᄊ"] = "ㅆ",
  ["ᄋ"] = "ㅇ",
  ["ᄌ"] = "ㅈ",
  ["ᄍ"] = "ㅉ",
  ["ᄎ"] = "ㅊ",
  ["ᄏ"] = "ㅋ",
  ["ᄐ"] = "ㅌ",
  ["ᄑ"] = "ㅍ",
  ["ᄒ"] = "ㅎ",
  ["ᄝ"] = "ㅱ",
  ["ᄫ"] = "ㅸ",
  ["ᄬ"] = "ㅹ",
  ["ᅀ"] = "ㅿ",
  ["ᅌ"] = "ㆁ",
  ["ᅘ"] = "ㆅ",
  ["ᅙ"] = "ㆆ",
}

local function format_initial_or_final(Korean, link)
	local format_Korean = link and link_Korean or tag_Korean
	local romanization = Yale_romanization(Korean)
	local displayed_Korean = to_full_letter[Korean] or Korean
	if romanization == "" then
		romanization = "-"
	end
	return romanization .. " (" .. format_Korean(displayed_Korean) .. ")"
end

local split_syllable = memoize(function(syllable)
	local initial, final = mw.ustring.match(syllable, "^(.)(.*)$")
	if mw.ustring.find(final, "^" .. vowel) then
		final = "ᄋ" .. final
	end
	return { initial = initial, final = final }
end)

local format_transcription_romanization_initial_final = memoize(function (tr)
	local rom = Yale_romanization(tr)
	local split = split_syllable(tr)
	return ('| %s || %s || %s || %s'):format(
		tag_Middle_Korean(tr),
		rom,
		format_initial_or_final(split.initial, true),
		format_initial_or_final(split.final, false))
end)

local Revised_Romanization = memoize(function(Korean)
	return require "Module:ko-translit".tr(mw.ustring.toNFC(Korean))
end)

local format_modern = memoize(function(Korean)
	return Korean:gsub("[^, ]+", function(Korean)
		return link_Korean(Korean) .. " (" .. Revised_Romanization(Korean) .. ")"
	end)
end)

-- for [[Appendix:Early Mandarin transcription using Old Hangeul in Junggan Nogeoldae Eonhae]]
-- format:
--ā	啊	아	a	- (ㅇ)	a (아)	ᅙᅥ	qe	q (ㆆ)	e (어)	아 (a)	6
function export.main(frame)
	local text_format = mw.ustring.toNFD(frame.args[1])
	local Array = require "Module:array"
	local output = Array()
	local function insert(val)
		output:insert(val)
	end
	
	local soft_hyphen = "&shy;"
	local hyphenate = {}
	for _, word in ipairs {
		"Fi-nal", "Fre-quen-cy", "I-ni-tial", "Ko-re-an", "Mo-dern", "Pin-yin",
		"Ro-ma-ni-za-tion", "Tran-scrip-tion"
	} do
		hyphenate[word:gsub("%-", "")] = word:gsub("%-", soft_hyphen)
	end
	
	insert(([[
{| class="wikitable sortable"
! Pinyin !! Han character !! Transcription 1 !! Romanization 1 !! Initial 1 !! Final 1 !! Transcription 2 !! Romanization 2 !! Initial 2 !! Final 2 !! Modern Sino-Korean !! Frequency]]):gsub("%a+", hyphenate))

	local pattern = string_rep("([^\t]+)", 6, "\t")
	for line in text_format:gmatch "[^\n]+" do
		local pinyin, han, tr1, tr2, mod, freq =
			line:match(pattern)
		if not pinyin then
			error("The line " .. line .. " did not match the pattern " .. pattern)
		end
		insert(([[
|-
| %s || %s]]):format(link_Mandarin(pinyin), format_Han(han)))
		insert(format_transcription_romanization_initial_final(tr1))
		
		insert(format_transcription_romanization_initial_final(tr2))
		insert(([[
| %s || %d]]):format(format_modern(mod), freq))
	end
	insert "|}"
	
	return output:concat "\n"
end

return export