Module:Hrkt-translit
Jump to navigation
Jump to search
- The following documentation is generated by Module:documentation/functions/translit. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module will transliterate text in the Kana script. It is used to transliterate Southern Amami Ōshima, Japanese, Hachijō, Kikai, Miyako, Okinoerabu, Northern Amami Ōshima, Yaeyama, Okinawan, Tokunoshima, Kunigami, Yonaguni, and Yoron.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:Hrkt-translit/testcases.
Functions
tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
local concat = table.concat
local insert = table.insert
local load_data = mw.loadData
local toNFD = mw.ustring.toNFD
local umatch = mw.ustring.match
local m_ja = require("Module:ja")
local kata_to_hira = m_ja.kata_to_hira
local normalize_kana = m_ja.normalize_kana
local data_common
local glottal = "\1"
local disambig = "\2"
local cons = "b-df-hj-np-tvxz"
local export = {}
local function get_initial(text)
return umatch(text, "(.+)%f[" .. umatch(text, ".$") .. "]") or text
end
local function handle_initials(data, d_voicing, d_semivoicing, initials, checked)
if not data then
return
end
for k, v in pairs(data) do
if not checked[k] and umatch(v, "^%a+$") then
local initial = get_initial(v)
if initial:match("^[" .. cons .. "]+$") then
initials[initial] = true
end
local v_initial, sv_initial = d_voicing[initial], d_semivoicing[initial]
if v_initial and v_initial:match("^[" .. cons .. "]+$") then
initials[v_initial] = true
end
if sv_initial and sv_initial:match("^[" .. cons .. "]+$") then
initials[sv_initial] = true
end
end
checked[k] = true
end
end
function export.process_data(data, common)
local initials, checked, d_voicing, d_semivoicing = {}, {}, data.tr_voicing, data.tr_semivoicing
data.initials = initials
if not common then
data_common = data_common or load_data("Module:Hrkt-translit/data")
d_voicing = d_voicing or data_common.tr_voicing
d_semivoicing = d_semivoicing or data_common.tr_semivoicing
end
handle_initials(data.rom, d_voicing, d_semivoicing, initials, checked)
if not common then
handle_initials(data_common.rom, d_voicing, d_semivoicing, initials, checked)
end
return data
end
local function get_data(lang)
data_common = data_common or load_data("Module:Hrkt-translit/data")
local function inspect_table(t, ...)
for i = 1, select("#", ...) do
if type(t) == "table" then
t = t[select(i, ...)]
else return nil end
end
return t
end
if lang then
local name_data = "Module:Hrkt-translit/data/" .. lang
if package.loaders[2](name_data) then
local data_lang = load_data(name_data)
return function(...)
local item_lang, item_common = data_lang[...], data_common[...]
for i = 2, select("#", ...) do
local key = select(i, ...)
if type(item_lang) == "table" then
item_lang = item_lang[key]
else return inspect_table(item_common, select(i, ...)) end
if type(item_common) == "table" then
item_common = item_common[key]
else return inspect_table(item_lang, select(i + 1, ...)) end
end
if item_lang ~= nil then return item_lang else return item_common end
end
end
end
return function(...)
return inspect_table(data_common[...], select(2, ...))
end
end
local function do_voicing(i_last, result, result_sp, hist, d, key)
local text = result[i_last]
if not hist and result_sp[i_last] == "historical w" then
text = "w" .. text
end
return text:gsub("^" .. get_initial(text), d(key))
end
function export.tr(text, lang, sc, options)
if umatch(text, "[" .. mw.loadData("Module:ja/data/range").kanji .. "]") then
require("Module:debug").track("ja/invalid Hrkt")
end
options = options or {}
local result = {[0] = ""}
local result_sp = {}
local d = get_data(lang)
local function getlast(i_start, predicate_good, predicate_bad)
local in_xml = false
for i = i_start or #result, 1, -1 do
if in_xml then
if result[i] == "<" then in_xml = false end
elseif result[i] == ">" then
in_xml = true
else
if (predicate_bad or function(index)
return result_sp[index] == "stop"
end)(i) then break end
if (predicate_good or function(index)
return result[index]:len() > 0 and result_sp[index] ~= "'"
end)(i) then return i end
end
end
return 0
end
-- normalize long vowels and iteration marks
text = toNFD(kata_to_hira(normalize_kana(text)))
for c in text:gmatch(".[\128-\191]*") do
local rc = options.hist and d("rom_hist", c) or d("rom", c) or c
local rc_sp = d("rom_sp", c)
local i_last = getlast()
if options.keep_dot and c == "." then
rc = "."
elseif c:match("%a") then
rc_sp = "stop"
end
local repl_digraph = d("digraph", c, result[i_last])
if repl_digraph then
result[i_last], rc = repl_digraph, ""
result_sp[i_last], rc_sp = nil, nil
end
if not options.hist then --はへ
if d("flag_hahe", result_sp[i_last]) and (umatch(c, "[-~%.゙゚]") or rc:match("[-~%a" .. glottal .. "]")) then
result[i_last] = result_sp[i_last]
result_sp[i_last] = nil
end
if d("flag_hahe", rc_sp) and (options.phonetic or result_sp[getlast(nil, function(i)
return result[i]:len() > 0 and result_sp[i] ~= "'" or result_sp[i] == "stop"
end, function() return false end)] == "stop" or result[i_last]:match("[-~%a" .. glottal .. "]")) then
rc = rc_sp
rc_sp = nil
end
end
if rc:match("%a") and umatch(result[i_last], "^[,%.?!:)Ӡ]$") then --space and punctuations
result[i_last] = result[i_last] .. " "
elseif umatch(rc, "^[(“]$") and result[i_last]:match("%a") then
rc = " " .. rc
end
if rc_sp == "voiced" then -- voicing
result[i_last] = do_voicing(i_last, result, result_sp, options.hist, d, "tr_voicing")
elseif rc_sp == "semivoiced" then
result[i_last] = do_voicing(i_last, result, result_sp, options.hist, d, "tr_semivoicing")
end
if rc:match("[" .. cons .. "]+" .. "$") and rc_sp ~= "stop" then
rc_sp = "coda"
end
local r_last = result[i_last]
local r_lastlast = r_last:match"^.*(%a%A*)$" --vowel clusters or stop consonants
if r_lastlast and r_lastlast:match("[aiueo]") then
if rc:match("^%-[yw]") and r_last:match("^[" .. cons .. "yw]") then
local rc_first = rc:sub(2, 2)
r_last = #r_last > 1 and r_last:sub(1, -2) or r_last
if not (rc_first == "y" and d("flag_postalveolarconsonant", r_last)) then
r_last = r_last .. rc_first
end
result[i_last] = r_last
rc = rc:sub(3)
elseif options.hist and r_last:match("^[" .. cons .. "]") and (
r_lastlast == "i" and rc:sub(1, 1) == "y" or
r_lastlast == "u" and rc:sub(1, 1) == "w"
) then
local rc_first = rc:sub(1, 1)
r_last = r_last:sub(1, -2)
if not (rc_first == "y" and d("flag_postalveolarconsonant", r_last)) then
r_last = r_last .. rc_first
end
result[i_last] = r_last
rc = rc:sub(2)
elseif rc:match"^%-[yw]?[aiueo]$" then
rc = rc:sub(2)
if r_lastlast == rc then
result[i_last] = r_last .. r_lastlast
rc = ""
elseif d("flag_specialconsonant", r_last) then
result[i_last] = r_last:sub(1, -2)
elseif r_lastlast == "i" then
result[i_last] = r_last:sub(1, -2) .. "y"
elseif r_lastlast:match("[ou]") and rc ~= "u" then
result[i_last] = r_last:sub(1, -2) .. "w"
elseif #r_last > 1 then
result[i_last] = r_last:sub(1, -2)
end
end
end
insert(result, rc)
result_sp[#result] = rc_sp
end
if not options.hist then --isolated はへ
local i_last = getlast()
if d("flag_hahe", result_sp[i_last]) and getlast(i_last - 1) == 0 then
result[i_last] = result_sp[i_last]
end
end
local has_gem = false
for i, v in ipairs(result) do
--gemination
if has_gem then
local apos, consonant, remainder = v:match("^(" .. glottal .. "*)([" .. cons .. "yw]+)(.*)")
if consonant then
local init, c_gem = apos .. consonant
while true do
c_gem = d("tr_gem", init)
if #init == 1 or not init:match("[yw]$") then
break
end
init = init:sub(1, -2)
end
c_gem = c_gem or init:sub(1, 1)
v = consonant .. remainder
local i_gem = getlast(i)
while true do
i_gem = getlast(i_gem - 1)
if result_sp[i_gem] == "gem" then
result[i_gem] = c_gem
elseif result_sp[i_gem] ~= "allow gem" then
i_gem = getlast(i_gem + 1)
result[i_gem] = apos .. result[i_gem]
break
end
end
has_gem = false
end
elseif result_sp[i] == "gem" then
has_gem = true
end
-- FIXME: ng/nw should be determined automatically by a disambiguation model.
local v_first = v:match("^[aiueoyw]") or v:match("^n[gw]")
if v_first then
local i_last
if v_first == "y" or v_first == "w" or v_first == "ng" or v_first == "nw" then
i_last = getlast(i - 1, function(index)
local res, res_sp = result[index], result_sp[index]
return res ~= "" and res ~= "." and res_sp ~= "'" and res_sp ~= "gem"
end, function() end)
else
i_last = getlast(i - 1, nil, function() end)
end
if v_first:sub(1, 1) == "n" then
if umatch(result[i_last], "%a") and not (v_first == "nw" and result[i_last]:match("n$")) then
v = disambig .. v
end
elseif result_sp[i_last] == "coda" then
local coda = d("tr_coda_apos", v_first, result[i_last])
if coda == nil or options.hist and coda == "hist" then
v = disambig .. v
end
end
end
--Diacritics (long vowels and others).
v = v:gsub("[aiueo][aiueo%A]*", d("tr_long")) -- From small kana.
local i_last = getlast(i - 1)
local r_last = result[i_last]
-- From digraphs.
if r_last and not (options.hist or options.phonetic or options.no_diacritics) then
local r_lastlast = r_last:match"^.*(%a%A*)$" --vowel clusters or stop consonants
if r_lastlast and d("tr_long", r_lastlast .. v) and not r_last:match("[aiueo][aiueo]$") then
result[i_last] = (r_last .. v):gsub("[aiueo][aiueo%A]*", d("tr_long"))
v = ""
end
end
result[i] = v
end
local num_cap = 0
for i, v in ipairs(result) do
--uppercase
if result_sp[i] == "cap" then
num_cap = num_cap + 1
end
if num_cap > 0 then
result[i] = v:gsub(".[\128-\191]*", function(c)
if num_cap <= 0 then return c end
local uc = c:uupper()
if c ~= uc then num_cap = num_cap - 1 end
return uc
end)
end
end
return (concat(result):gsub("[" .. glottal .. disambig .. "]", "'"))
end
return export
Categories:
- Kana script
- Transliteration modules by script
- Kana script modules
- Transliteration modules
- Japanese modules
- Kunigami modules
- Northern Amami Ōshima modules
- Yaeyama modules
- Miyako modules
- Yonaguni modules
- Kikai modules
- Okinoerabu modules
- Tokunoshima modules
- Southern Amami Ōshima modules
- Hachijō modules
- Yoron modules
- Okinawan modules