User:Gnosygnu/sandbox/Module:yue-pron

From Wiktionary, the free dictionary
Jump to navigation Jump to search

local export = {}

local entering_tones={ ['1']='7',['3']='8',['6']='9' }

local ipa_preprocess={ [1]={'a','ă'},[2]={'yu','y'},[3]={'ăă', 'a'},[4]={'uk', 'ŭk'},[5]={'ik', 'ĭk'}, [6]={'ou', 'ŏu'},[7]={'eoi','eoy'},[8]={'ung','ŭng'},[9]={'ing','ĭng'},[10]={'ei', 'ĕi'} }

local ipa_initial={ ['b']='p',['p']='pʰ',['d']='t',['t']='tʰ',['g']='k',['k']='kʰ',['ng']='ŋ',['gw']='kʷ',['kw']='kʷʰ', ['zh']='t͡ɕ',['z']='t͡s',['ch']='t͡ɕʰ',['c']='t͡sʰ',['sh']='ɕ', ['m']='m',['f']='f',['n']='n',['l']='l',['h']='h',['w']='w',['s']='s',['j']='j', []= }

local ipa_nucleus={ ['a']='ɑː',['ă']='ɐ',['e']='ɛː',['ĕ']='e',['i']='iː',['ĭ']='ɪ',['o']='ɔː',['ŏ']='o',['oe']='œː',['eo']='ɵ',['u']='uː',['ŭ']='ʊ',['y']='yː' }

local ipa_coda={ ['i']='i̯',['u']='u̯',['y']='y̯',['m']='m',['n']='n',['ng']='ŋ',['p']='p̚',['t']='t̚',['k']='k̚',[]= }

local ipa_tone={ ['1']='⁵⁵',['2']='³⁵',['3']='³³',['4']='²¹',['5']='¹³',['6']='²²',['7']='⁵',['8']='³',['9']='²',[]= }

local ipa_tone_sandhi={ ['*']='⁻',['-']='⁻',[]= }

local ipa_syllabic={ ['m']='m̩',['ng']='ŋ̍' }

local acute_accents={ ['a']='á',['e']='é',['i']='í',['o']='ó',['u']='ú' }

local grave_accents={ ['a']='à',['e']='è',['i']='ì',['o']='ò',['u']='ù' }

local macrons={ ['a']='ā',['e']='ē',['i']='ī',['o']='ō',['u']='ū' }

local yale_final={ ['aa']='a',['eu']='沒有耶魯拼音',['em']='沒有耶魯拼音',['ep']='沒有耶魯拼音',['et']='沒有耶魯拼音', ['oe']='eu',['oeng']='eung',['oek']='euk',['eoi']='eui',['eon']='eun',['eot']='eut', []= }

local canton_pinyin_regex={ ['yu']='y',['eo']='oe',['oei']='oey',['z(h?)']='dz%1',['c(h?)']='ts%1', ['([ptk])([136])']=function(a,b) return a .. entering_tones[b] end, ['([ptk][1-6][%-%*])([136])$']=function(a,b) return a .. entering_tones[b] end }

local function yale_tone(final,b) if b=='4' or b=='5' or b=='6' then final = mw.ustring.gsub(final,'([ptkmn]?g?)$','h%1',1) end if b=='1' then final = mw.ustring.gsub(final,'[aeiou]',macrons,1) end if b=='4' then final = mw.ustring.gsub(final,'[aeiou]',grave_accents,1) end if b=='2' or b=='5' then final = mw.ustring.gsub(final,'[aeiou]',acute_accents,1) end return final end

function export.jyutping_to_ipa(text) if type(text) == 'table' then text = text.args[1] end if text:match('[7-9]') then error('Invalid tone in Jyutping') end text = text:lower() text = text:gsub('jy([^u])','j%1') text = text:gsub(',','隔'):gsub('隔 ',', ') local reading = mw.text.split(text,'隔') for i = 1,#reading do reading[i] = reading[i]:gsub('([1-6])[a-z]','%1 '):gsub('[^a-z1-6%-%* ]',) local syllable = mw.text.split(reading[i]:lower(),' ') for i = 1,#syllable do syllable[i] = syllable[i]:gsub('([zcs])yu','%1hyu') syllable[i] = syllable[i]:gsub('([zc])oe','%1hoe') syllable[i] = syllable[i]:gsub('([zc])eo','%1heo') syllable[i] = syllable[i]:gsub('^([mn]g?)([1-6])([%-%*]?)([1-6]?)$', function(a,b,c,d) return ipa_syllabic[a] .. ipa_tone[b] .. ipa_tone_sandhi[c] .. ipa_tone[d] end) syllable[i] = syllable[i]:gsub('([ptk])([136])', function(a,b) return a .. entering_tones[b] end) syllable[i] = syllable[i]:gsub('([ptk][1-9][%-%*])([136])$', function(a,b) return a .. entering_tones[b] end) for regex_idx,regex_pair in ipairs(ipa_preprocess) do syllable[i] = mw.ustring.gsub(syllable[i],regex_pair[1],regex_pair[2]) end syllable[i] = mw.ustring.gsub(syllable[i],'([bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?)([aăeĕiĭoŏuŭy][eo]?)([iuymngptk]?g?)([1-9])([%-%*]?)([1-9]?)', function(a,b,c,d,e,f) return (ipa_initial[a] or error(("Unrecognised initial: \"%s\""):format(a))) .. (ipa_nucleus[b] or error(("Unrecognised nucleus: \"%s\""):format(b))) .. (ipa_coda[c] or error(("Unrecognised coda: \"%s\""):format(c))) .. (ipa_tone[d] or error(("Unrecognised tone: \"%s\""):format(d))) .. ipa_tone_sandhi[e] .. (ipa_tone[f] or error(("Unrecognised tone: \"%s\""):format(f))) end) end reading[i] = table.concat(syllable," ") end return table.concat(reading,'/, /') end

function export.jyutping_to_yale(text) if type(text) == 'table' then text = text.args[1] end

text = text:gsub('jy([^u])','j%1') text = text:gsub(',','隔'):gsub('隔 ',', ') local reading = mw.text.split(text,'隔') for i = 1,#reading do reading[i] = mw.ustring.gsub(reading[i],'%-[1-6]',) reading[i] = mw.ustring.gsub(reading[i],'([1-9])[a-z]','%1 ') local syllables = mw.text.split(mw.ustring.lower(reading[i]),' ',true) for j = 1,#syllables do local text = syllables[j]

if mw.ustring.match(text,'jy[^u]') then error('wrong usage of "jy" in Jyutping') end text = mw.ustring.gsub(text,'[789]',{['7']='1',['8']='3',['9']='6'}) text = mw.ustring.gsub(text,'^m[1-6]$',{['m1']='Template:懸停',['m2']='ḿ',['m3']='m',['m4']='m̀h',['m5']='ḿh',['m6']='mh'}) text = mw.ustring.gsub(text,'^ng[1-6]$',{['ng1']='Template:懸停',['ng2']='ńg',['ng3']='ng',['ng4']='ǹgh',['ng5']='ńgh',['ng6']='ngh'}) text = mw.ustring.gsub(text,'jy?','y') text = mw.ustring.gsub(text,'[cz]',{['z']='j',['c']='ch'}) text = mw.ustring.gsub(text,'([aeiou][aeiou]?[iumngptk]?[g]?)([1-6])', function(a,b) return yale_tone((yale_final[a] or a),b) end) text = mw.ustring.gsub(text,'(yu[tn]?)([1-6])',yale_tone) text = mw.ustring.gsub(text,'[āēīōū]',{['ā']='Template:懸停',['ē']='Template:懸停',['ī']='Template:懸停',['ō']='Template:懸停',['ū']='Template:懸停'}) text = mw.ustring.gsub(text,'Template:懸停','%2')

syllables[j] = text end reading[i] = table.concat(syllables,' ') if mw.ustring.match(reading[i],'沒有耶魯拼音') then reading[i] = 'colloquial sounds not defined' end end return table.concat(reading,', ') end

function export.jyutping_to_cantonese_pinyin(text) if type(text) == 'table' then text = text.args[1] end

text = text:gsub('jy([^u])','j%1') text = text:gsub(',','隔'):gsub('隔 ',', ') local reading = mw.text.split(text,'隔') for i=1,#reading do for regex,replace in pairs(canton_pinyin_regex) do reading[i] = mw.ustring.gsub(reading[i],regex,replace) end end return table.concat(reading,', ') end

function export.jyutping_format(text) if type(text) == 'table' then text = text.args[1] end

if text:find('%[%[') then return 'just a lengthy text to ensure it works' end

text = mw.text.split(text:gsub(',','隔'):gsub('隔 ',', '),'隔') for i,to_be_processed in ipairs(text) do text[i] = to_be_processed:gsub('[1-6]%-([1-6])','%1') end

return text end

return export