Module:yue-pron

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
The following documentation is located at Module:yue-pron/documentation. [edit]
Useful links: subpage listtransclusionstestcases

(Aiming to be) a Jyutping-to-anything converter. Currently: Jyutping-to-IPA, Jyutping-to-Yale, Jyutping-to-Cantonese-Pinyin.


local export = {}
 
local entering_tones={
	['1']='7',['3']='8',['6']='9'
}
 
local ipa_preprocess={
	[1]={'a','ă'},[2]={'yu','y'},[3]={'ăă', 'a'},[4]={'uk', 'ŭk'},[5]={'ik', 'ĭk'},
	[6]={'ou', 'ŏu'},[7]={'eoi','eoy'},[8]={'ung','ŭng'},[9]={'ing','ĭng'},[10]={'ei', 'ĕi'}
}
 
local ipa_initial={
	['b']='p',['p']='pʰ',['d']='t',['t']='tʰ',['g']='k',['k']='kʰ',['ng']='ŋ',['gw']='kʷ',['kw']='kʷʰ',
	['zh']='t͡ɕ',['z']='t͡s',['ch']='t͡ɕʰ',['c']='t͡sʰ',['sh']='ɕ',
	['m']='m',['f']='f',['n']='n',['l']='l',['h']='h',['w']='w',['s']='s',['j']='j',
	['']=''
}
 
local ipa_nucleus={
	['a']='ɑː',['ă']='ɐ',['e']='ɛː',['ĕ']='e',['i']='iː',['ĭ']='ɪ',['o']='ɔː',['ŏ']='o',['oe']='œː',['eo']='ɵ',['u']='uː',['ŭ']='ʊ',['y']='yː'
}
 
local ipa_coda={
	['i']='i̯',['u']='u̯',['y']='y̯',['m']='m',['n']='n',['ng']='ŋ',['p']='p̚',['t']='t̚',['k']='k̚',['']=''
}
 
local ipa_tone={
	['1']='<span style="cursor:help" title="or 53">⁵⁵</span>',['2']='³⁵',['3']='³³',['4']='²¹',['5']='¹³',['6']='²²',['7']='⁵',['8']='³',['9']='²',['']=''
}
 
local ipa_tone_sandhi={
	['*']='⁻',['-']='⁻',['']=''
}
 
local ipa_syllabic={
	['m']='m̩',['ng']='ŋ̍'
}
 
local acute_accents={
	['a']='á',['e']='é',['i']='í',['o']='ó',['u']='ú'
}
 
local grave_accents={
	['a']='à',['e']='è',['i']='ì',['o']='ò',['u']='ù'
}
 
local macrons={
	['a']='ā',['e']='ē',['i']='ī',['o']='ō',['u']='ū'
}
 
local yale_final={
	['aa']='a',['eu']='沒有耶魯拼音',['em']='沒有耶魯拼音',['ep']='沒有耶魯拼音',['et']='沒有耶魯拼音',
	['oe']='eu',['oeng']='eung',['oek']='euk',['eoi']='eui',['eon']='eun',['eot']='eut',
	['']=''
}
 
local canton_pinyin_regex={
	['yu']='y',['eo']='oe',['oei']='oey',['z(h?)']='dz%1',['c(h?)']='ts%1',
	['([ptk])([136])']=function(a,b) return a .. entering_tones[b] end,
	['([ptk][1-6][%-%*])([136])$']=function(a,b) return a .. entering_tones[b] end
}
 
local function yale_tone(final,b)
	if b=='4' or b=='5' or b=='6' then
		final = mw.ustring.gsub(final,'([ptkmn]?g?)$','h%1',1)
	end
	if b=='1' then final = mw.ustring.gsub(final,'[aeiou]',macrons,1) end
	if b=='4' then final = mw.ustring.gsub(final,'[aeiou]',grave_accents,1) end
	if b=='2' or b=='5' then final = mw.ustring.gsub(final,'[aeiou]',acute_accents,1) end
	return final
end
 
function export.jyutping_to_ipa(text)
	if type(text) == 'table' then text = text.args[1] end
	if text:match('[7-9]') then error('Invalid tone in Jyutping') end
	text = text:lower()
	text = text:gsub('jy([^u])','j%1')
	text = text:gsub(',','隔'):gsub('隔 ',', ')
	local reading = mw.text.split(text,'隔')
	for i = 1,#reading do
		reading[i] = reading[i]:gsub('([1-6])[a-z]','%1 '):gsub('[^a-z1-6%-%* ]','')
		local syllable = mw.text.split(reading[i]:lower(),' ')
		for i = 1,#syllable do
			syllable[i] = syllable[i]:gsub('([zcs])yu','%1hyu')
			syllable[i] = syllable[i]:gsub('([zc])oe','%1hoe')
			syllable[i] = syllable[i]:gsub('([zc])eo','%1heo')
			syllable[i] = syllable[i]:gsub('^([mn]g?)([1-6])([%-%*]?)([1-6]?)$',
				function(a,b,c,d) return ipa_syllabic[a] .. ipa_tone[b] .. ipa_tone_sandhi[c] .. ipa_tone[d] end)
			syllable[i] = syllable[i]:gsub('([ptk])([136])',
				function(a,b) return a .. entering_tones[b] end)
			syllable[i] = syllable[i]:gsub('([ptk][1-9][%-%*])([136])$',
				function(a,b) return a .. entering_tones[b] end)
			for regex_idx,regex_pair in ipairs(ipa_preprocess) do
				syllable[i] = mw.ustring.gsub(syllable[i],regex_pair[1],regex_pair[2])
			end
			syllable[i] = mw.ustring.gsub(syllable[i],'([bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?)([aăeĕiĭoŏuŭy][eo]?)([iuymngptk]?g?)([1-9])([%-%*]?)([1-9]?)',
				function(a,b,c,d,e,f)
					return (ipa_initial[a] or error(("Unrecognised initial: \"%s\""):format(a))) ..
						(ipa_nucleus[b] or error(("Unrecognised nucleus: \"%s\""):format(b))) ..
						(ipa_coda[c] or error(("Unrecognised coda: \"%s\""):format(c))) ..
						(ipa_tone[d] or error(("Unrecognised tone: \"%s\""):format(d))) ..
						ipa_tone_sandhi[e] ..
						(ipa_tone[f] or error(("Unrecognised tone: \"%s\""):format(f)))
				end)
		end
		reading[i] = table.concat(syllable," ")
	end
	return table.concat(reading,'/, /')
end
 
function export.jyutping_to_yale(text)
	if type(text) == 'table' then text = text.args[1] end
 
	text = text:gsub('jy([^u])','j%1')
	text = text:gsub(',','隔'):gsub('隔 ',', ')
	local reading = mw.text.split(text,'隔')
	for i = 1,#reading do
		reading[i] = mw.ustring.gsub(reading[i],'%-[1-6]','')
		reading[i] = mw.ustring.gsub(reading[i],'([1-9])[a-z]','%1 ')
		local syllables = mw.text.split(mw.ustring.lower(reading[i]),' ',true)
		for j = 1,#syllables do
			local text = syllables[j]
 
			if mw.ustring.match(text,'jy[^u]') then error('wrong usage of "jy" in Jyutping') end
			text = mw.ustring.gsub(text,'[789]',{['7']='1',['8']='3',['9']='6'})
			text = mw.ustring.gsub(text,'^m[1-6]$',{['m1']='{{懸停|或m̀|m̄}}',['m2']='ḿ',['m3']='m',['m4']='m̀h',['m5']='ḿh',['m6']='mh'})
			text = mw.ustring.gsub(text,'^ng[1-6]$',{['ng1']='{{懸停|或ǹg|n̄g}}',['ng2']='ńg',['ng3']='ng',['ng4']='ǹgh',['ng5']='ńgh',['ng6']='ngh'})
			text = mw.ustring.gsub(text,'jy?','y')
			text = mw.ustring.gsub(text,'[cz]',{['z']='j',['c']='ch'})
			text = mw.ustring.gsub(text,'([aeiou][aeiou]?[iumngptk]?[g]?)([1-6])',
				function(a,b) return yale_tone((yale_final[a] or a),b) end)
			text = mw.ustring.gsub(text,'(yu[tn]?)([1-6])',yale_tone)
			text = mw.ustring.gsub(text,'[āēīōū]',{['ā']='{{懸停|或à|ā}}',['ē']='{{懸停|或è|ē}}',['ī']='{{懸停|或ì|ī}}',['ō']='{{懸停|或ò|ō}}',['ū']='{{懸停|或ù|ū}}'})
			text = mw.ustring.gsub(text,'{{懸停|或([^|]+)|([^}]+)}}','<span style="cursor:help" title="or %1">%2</span>')
 
			syllables[j] = text
		end
		reading[i] = table.concat(syllables,' ')
		if mw.ustring.match(reading[i],'沒有耶魯拼音') then
			reading[i] = 'colloquial sounds not defined'
		end
	end
	return table.concat(reading,', ')
end
 
function export.jyutping_to_cantonese_pinyin(text)
	if type(text) == 'table' then text = text.args[1] end
 
	text = text:gsub('jy([^u])','j%1')
	text = text:gsub(',','隔'):gsub('隔 ',', ')
	local reading = mw.text.split(text,'隔')
	for i=1,#reading do
		for regex,replace in pairs(canton_pinyin_regex) do
			reading[i] = mw.ustring.gsub(reading[i],regex,replace)
		end
	end
	return table.concat(reading,', ')
end
 
function export.jyutping_format(text)
	if type(text) == 'table' then text = text.args[1] end
 
	if text:find('%[%[') then
		return 'just a lengthy text to ensure it works'
	end
 
	text = mw.text.split(text:gsub(',','隔'):gsub('隔 ',', '),'隔')
	for i,to_be_processed in ipairs(text) do
		text[i] = to_be_processed:gsub('[1-6]%-([1-6])','%1')
	end
 
	return text
end
 
return export