Module:yue-pron: difference between revisions
Jump to navigation
Jump to search
Content deleted Content added
No edit summary |
No edit summary |
||
Line 120: | Line 120: | ||
for regex_idx,regex_pair in ipairs(ipa_preprocess) do |
for regex_idx,regex_pair in ipairs(ipa_preprocess) do |
||
syllable[i] = mw.ustring.gsub(syllable[i],regex_pair[1],regex_pair[2]) |
syllable[i] = mw.ustring.gsub(syllable[i],regex_pair[1],regex_pair[2]) |
||
end |
|||
if not mw.ustring.match(syllable[i], "([bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?)([aăeĕiĭoŏuŭy][eo]?)([iuymngptk]?g?)([1-9])([%-%*]?)([1-9]?)") then |
|||
error("Incorrect Jyutping format. Please check!") |
|||
end |
end |
||
syllable[i] = mw.ustring.gsub(syllable[i], "([bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?)([aăeĕiĭoŏuŭy][eo]?)([iuymngptk]?g?)([1-9])([%-%*]?)([1-9]?)", |
syllable[i] = mw.ustring.gsub(syllable[i], "([bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?)([aăeĕiĭoŏuŭy][eo]?)([iuymngptk]?g?)([1-9])([%-%*]?)([1-9]?)", |
Revision as of 03:54, 29 January 2017
- The following documentation is located at Module:yue-pron/documentation. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox (diff)
(Aiming to be) a Jyutping-to-anything converter. Currently: Jyutping-to-IPA, Jyutping-to-Yale, Jyutping-to-Cantonese-Pinyin, Jyutping-to-Guangdong-Romanization.
local export = {}
local entering_tones = {
["1"] = "7", ["3"] = "8", ["6"] = "9"
}
local ipa_preprocess = {
[1] = {"a", "ă"}, [2] = {"yu", "y"}, [3] = {"ăă", "a"}, [4] = {"uk", "ŭk"}, [5] = {"ik", "ĭk"},
[6] = {"ou", "ŏu"}, [7] = {"eoi", "eoy"}, [8] = {"ung", "ŭng"}, [9] = {"ing", "ĭng"}, [10] = {"ei", "ĕi"}
}
local ipa_initial = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["gw"] = "kʷ", ["kw"] = "kʷʰ",
["zh"] = "t͡ɕ", ["ch"] = "t͡ɕʰ", ["sh"] = "ɕ",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s",
["h"] = "h", ["w"] = "w", ["j"] = "j",
[""] = ""
}
local ipa_nucleus = {
["a"] = "ɑː", ["ă"] = "ɐ",
["e"] = "ɛː", ["ĕ"] = "e",
["i"] = "iː", ["ĭ"] = "ɪ",
["o"] = "ɔː", ["ŏ"] = "o",
["oe"] = "œː", ["eo"] = "ɵ",
["u"] = "uː", ["ŭ"] = "ʊ",
["y"] = "yː"
}
local ipa_coda = {
["i"] = "i̯", ["u"] = "u̯", ["y"] = "y̯",
["m"] = "m", ["n"] = "n", ["ng"] = "ŋ",
["p"] = "p̚", ["t"] = "t̚", ["k"] = "k̚",
[""] = ""
}
local ipa_tone = {
["1"] = "<span style=\"cursor:help\" title=\"or 53\">⁵⁵</span>",
["2"] = "³⁵",
["3"] = "³³",
["4"] = "²¹",
["5"] = "¹³",
["6"] = "²²",
["7"] = "⁵",
["8"] = "³",
["9"] = "²",
[""] = ""
}
local ipa_tone_sandhi = {
["*"] = "⁻", ["-"] = "⁻", [""] = ""
}
local ipa_syllabic = {
["m"] = "m̩", ["ng"] = "ŋ̍"
}
local acute_accents = {
["a"] = "á", ["e"] = "é", ["i"] = "í", ["o"] = "ó", ["u"] = "ú"
}
local grave_accents = {
["a"] = "à", ["e"] = "è", ["i"] = "ì", ["o"] = "ò", ["u"] = "ù"
}
local macrons = {
["a"] = "ā", ["e"] = "ē", ["i"] = "ī", ["o"] = "ō", ["u"] = "ū"
}
local yale_final = {
["aa"] = "a", ["eu"] = "沒有耶魯拼音", ["em"] = "沒有耶魯拼音", ["ep"] = "沒有耶魯拼音", ["et"] = "沒有耶魯拼音",
["oe"] = "eu", ["oeng"] = "eung", ["oek"] = "euk", ["eoi"] = "eui", ["eon"] = "eun", ["eot"] = "eut",
[""] = ""
}
local canton_pinyin_regex = {
["yu"] = "y", ["eo"] = "oe", ["eoi"] = "oey", ["z(h?)"] = "dz%1", ["c(h?)"] = "ts%1",
["([ptk])([136])"]=function(a,b) return a .. entering_tones[b] end,
["([ptk][1-6][%-%*])([136])$"]=function(a,b) return a .. entering_tones[b] end
}
local function yale_tone(final,b)
if b == "4" or b == "5" or b == "6" then
final = mw.ustring.gsub(final, "([ptkmn]?g?)$", "h%1", 1)
end
if b == "1" then final = mw.ustring.gsub(final, "[aeiou]", macrons, 1) end
if b == "4" then final = mw.ustring.gsub(final, "[aeiou]", grave_accents, 1) end
if b == "2" or b == "5" then final = mw.ustring.gsub(final, "[aeiou]", acute_accents, 1) end
return final
end
function export.jyutping_to_ipa(text)
if type(text) == "table" then text = text.args[1] end
if text:match("[7-9]") then error("Invalid tone in Jyutping.") end
if text:match("[A-Z]") then error("Please do not capitalize the Jyutping.") end
if text:match("%-[A-Za-z]") then error("Please do not hyphenate the Jyutping.") end
if text:match("[0-9][a-z]") then error("Error in the Jyutping transcription.") end
text = text:lower()
text = text:gsub("jy([^u])", "j%1")
text = text:gsub("%.%.%.", " ")
text = text:gsub(",", "隔"):gsub("隔 ", ", ")
local reading = mw.text.split(text, "隔")
for i = 1,#reading do
reading[i] = reading[i]:gsub("([1-6])[a-z]", "%1 "):gsub("[^a-z1-6%-%* ]", "")
local syllable = mw.text.split(reading[i]:lower(), " ")
for i = 1,#syllable do
syllable[i] = syllable[i]:gsub("([zcs])yu", "%1hyu")
syllable[i] = syllable[i]:gsub("([zc])oe", "%1hoe")
syllable[i] = syllable[i]:gsub("([zc])eo", "%1heo")
syllable[i] = syllable[i]:gsub("^([mn]g?)([1-6])([%-%*]?)([1-6]?)$",
function(a,b,c,d) return ipa_syllabic[a] .. ipa_tone[b] .. ipa_tone_sandhi[c] .. ipa_tone[d] end)
syllable[i] = syllable[i]:gsub("([ptk])([136])",
function(a,b) return a .. entering_tones[b] end)
syllable[i] = syllable[i]:gsub("([ptk][1-9][%-%*])([136])$",
function(a,b) return a .. entering_tones[b] end)
for regex_idx,regex_pair in ipairs(ipa_preprocess) do
syllable[i] = mw.ustring.gsub(syllable[i],regex_pair[1],regex_pair[2])
end
if not mw.ustring.match(syllable[i], "([bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?)([aăeĕiĭoŏuŭy][eo]?)([iuymngptk]?g?)([1-9])([%-%*]?)([1-9]?)") then
error("Incorrect Jyutping format. Please check!")
end
syllable[i] = mw.ustring.gsub(syllable[i], "([bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?)([aăeĕiĭoŏuŭy][eo]?)([iuymngptk]?g?)([1-9])([%-%*]?)([1-9]?)",
function(a,b,c,d,e,f)
return (ipa_initial[a] or error(("Unrecognised initial: \"%s\""):format(a))) ..
(ipa_nucleus[b] or error(("Unrecognised nucleus: \"%s\""):format(b))) ..
(ipa_coda[c] or error(("Unrecognised coda: \"%s\""):format(c))) ..
(ipa_tone[d] or error(("Unrecognised tone: \"%s\""):format(d))) ..
ipa_tone_sandhi[e] ..
(ipa_tone[f] or error(("Unrecognised tone: \"%s\""):format(f)))
end)
end
reading[i] = table.concat(syllable, " ")
end
return table.concat(reading, "/, /")
end
function export.jyutping_to_yale(text)
if type(text) == "table" then text = text.args[1] end
text = text:gsub("jy([^u])", "j%1")
text = text:gsub(",", "隔"):gsub("隔 ", ", ")
local reading = mw.text.split(text, "隔")
for i = 1,#reading do
reading[i] = mw.ustring.gsub(reading[i], "[1-6]%-", "")
reading[i] = mw.ustring.gsub(reading[i], "([1-9])[a-z]", "%1 ")
local syllables = mw.text.split(mw.ustring.lower(reading[i]), " ",true)
for j = 1,#syllables do
local text = syllables[j]
if mw.ustring.match(text, "jy[^u]") then error("wrong usage of 'jy' in Jyutping") end
text = mw.ustring.gsub(text, "[789]",{["7"] = "1", ["8"] = "3", ["9"] = "6"})
text = mw.ustring.gsub(text, "^m[1-6]$",{["m1"] = "{{懸停|或m̀|m̄}}", ["m2"] = "ḿ", ["m3"] = "m", ["m4"] = "m̀h", ["m5"] = "ḿh", ["m6"] = "mh"})
text = mw.ustring.gsub(text, "^ng[1-6]$",{["ng1"] = "{{懸停|或ǹg|n̄g}}", ["ng2"] = "ńg", ["ng3"] = "ng", ["ng4"] = "ǹgh", ["ng5"] = "ńgh", ["ng6"] = "ngh"})
text = mw.ustring.gsub(text, "jy?", "y")
text = mw.ustring.gsub(text, "[cz]",{["z"] = "j", ["c"] = "ch"})
text = mw.ustring.gsub(text, "([aeiou][aeiou]?[iumngptk]?[g]?)([1-6])",
function(a,b) return yale_tone((yale_final[a] or a),b) end)
text = mw.ustring.gsub(text, "(yu[tn]?)([1-6])",yale_tone)
text = mw.ustring.gsub(text, "[āēīōū]",{["ā"] = "{{懸停|或à|ā}}", ["ē"] = "{{懸停|或è|ē}}", ["ī"] = "{{懸停|或ì|ī}}", ["ō"] = "{{懸停|或ò|ō}}", ["ū"] = "{{懸停|或ù|ū}}"})
text = mw.ustring.gsub(text, "{{懸停|或([^|]+)|([^}]+)}}", '<span style="cursor:help" title="or %1">%2</span>')
syllables[j] = text
end
reading[i] = table.concat(syllables, " ")
if mw.ustring.match(reading[i], "沒有耶魯拼音") then
reading[i] = "colloquial sounds not defined"
end
end
return table.concat(reading, ", ")
end
function export.jyutping_to_cantonese_pinyin(text)
if type(text) == "table" then text = text.args[1] end
text = text:gsub("jy([^u])", "j%1")
text = text:gsub(",", "隔"):gsub("隔 ", ", ")
local reading = mw.text.split(text, "隔")
for i=1,#reading do
for regex,replace in pairs(canton_pinyin_regex) do
reading[i] = mw.ustring.gsub(reading[i],regex,replace)
end
end
return table.concat(reading, ", ")
end
function export.jyutping_to_guangdong(text)
if type(text) == 'table' then text = text.args[1] end
text = text:gsub(",", "隔"):gsub("隔 ", ", ")
local reading = mw.text.split(text, "隔")
for i = 1,#reading do
reading[i] = mw.ustring.gsub(reading[i], 'yu', 'ü')
reading[i] = mw.ustring.gsub(reading[i], 'eoi', 'êü')
reading[i] = mw.ustring.gsub(reading[i], 'j', 'y')
reading[i] = mw.ustring.gsub(reading[i], '[oe][oe]', 'ê')
reading[i] = mw.ustring.gsub(reading[i], 'e', 'é')
local palatal = { ['z']='j', ['c']='q', ['s']='x' }
reading[i] = mw.ustring.gsub(reading[i], '([zcs])([iü])', function(a,b) return palatal[a] .. b end )
reading[i] = mw.ustring.gsub(reading[i], '([jqxyê])ü', '%1u')
reading[i] = mw.ustring.gsub(reading[i], 'a+', { ['aa']='a', ['a']='e' } )
reading[i] = mw.ustring.gsub(reading[i], '([kg])w', '%1u')
reading[i] = mw.ustring.gsub(reading[i], '([ae])u', '%1o')
local final = { ['p']='b', ['k']='g', ['t']='d' }
reading[i] = mw.ustring.gsub(reading[i], '([pkt])(%d)', function(a,b) return final[a] .. b end )
reading[i] = mw.ustring.gsub(reading[i], '%d%-(%d)', '%1')
if mw.ustring.match(reading[i], 'é[umb]') then
reading[i] = 'colloquial sounds not defined'
end
end
return table.concat(reading, ", ")
end
function export.jyutping_format(text)
if type(text) == "table" then text = text.args[1] end
if text:find("%[%[") then
return "just a lengthy text to ensure it works"
end
text = mw.text.split(text:gsub(",", "隔"):gsub("隔 ", ", "), "隔")
for i, to_be_processed in ipairs(text) do
text[i] = to_be_processed:gsub("[1-6]%-([1-6])", "%1")
end
return text
end
function export.hoisanva_to_ipa(text)
local hsv_initial = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "ᵐb", ["f"] = "f", ["v"] = "v",
["d"] = "t", ["t"] = "tʰ", ["n"] = "ⁿd", ["l"] = "l", ["lh"] = "ɬ",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ᵑg",
["z"] = "t͡s", ["c"] = "t͡sʰ",
["y"] = "ʒ", ["s"] = "s", ["h"] = "h", [""] = ""
}
local hsv_final = {
["a"] = "a", ["ai"] = "ai", ["au"] = "au", ["am"] = "am",
["an"] = "an", ["ang"] = "aŋ", ["ap"] = "ap̚", ["at"] = "at̚",
["ak"] = "ak̚",
["i"] = "i", ["iu"] = "iu", ["im"] = "im", ["in"] = "in",
["ip"] = "ip̚", ["it"] = "it̚",
["ie"] = "iɛ", ["iau"] = "iau", ["iam"] = "iam", ["iang"] = "iaŋ",
["iap"] = "iap̚", ["iak"] = "iak̚",
["u"] = "u", ["ui"] = "ui", ["un"] = "un", ["ut"] = "ut̚",
["ei"] = "ei", ["eu"] = "eu", ["em"] = "em", ["en"] = "en",
["uung"] = "ɵŋ", ["ep"] = "ep̚", ["et"] = "et̚", ["uuk"] = "ɵk̚", ["uut"] = "ɵt̚",
["o"] = "ᵘɔ", ["oi"] = "ᵘɔi", ["on"] = "ᵘɔn", ["ong"] = "ɔŋ",
["ot"] = "ᵘɔt̚", ["ok"] = "ɔk̚",
["m"] = "m̩"
}
local hsv_tone = { "³³", "⁵⁵", "²²", "²¹", "³²" }
local result = {}
for word in mw.text.gsplit(text, ",") do
local initial, final, tone, tone_ch, word_result = "", "", "", "", {}
for syllable in mw.text.gsplit(word, " ") do
initial, final, tone, tone_ch = mw.ustring.match(syllable, "^([^aeiou]*)([^1-5]*)([1-5])([%*%-]?[1-5]?%*?)$")
if final == "" then final, initial = initial, "" end
if not hsv_initial[initial] or not hsv_final[final] or not hsv_tone[tonumber(tone)] or (tone == "2" and tone_ch == "*") then
error("Syllable '" .. syllable .. "' is not a valid syllable for IPA conversion.")
end
table.insert(word_result,
hsv_initial[initial] .. hsv_final[final] .. hsv_tone[tonumber(tone)] ..
(tone_ch ~= "" and "⁻" or "") ..
(tone_ch == "*" and hsv_tone[tonumber(tone)] or (mw.ustring.find(tone_ch, "^%-[1-5]") and hsv_tone[tonumber(tone_ch:sub(2, 2))] or "")) ..
(tone_ch:sub(-1, -1) == "*" and "⁵" or ""))
end
table.insert(result, table.concat(word_result, " "))
end
return "/" .. table.concat(result, "/, /") .. "/"
end
return export