Module:wuu-pron

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

Generates IPA based on romanisation for Wu Chinese. See {{zh-pron}}.


local export = {}

local gsub = mw.ustring.gsub
local match = mw.ustring.match

local initial = {
	["p"] = "p", ["ph"] = "pʰ", ["b"] = "b̻", ["m"] = "m", ["'m"] = "ʔm", ["f"] = "f", ["v"] = "v̻",
	["t"] = "t", ["th"] = "tʰ", ["d"] = "d̻", ["n"] = "n", ["'n"] = "ʔn", ["l"] = "l", ["'l"] = "ʔl",
	["ts"] = "t͡s", ["tsh"] = "t͡sʰ", ["s"] = "s", ["z"] = "z̻", ["j"] = "t͡ɕ", ["q"] = "t͡ɕʰ",
	["jj"] = "d̥͡ʑ", ["ny"] = "n̠ʲ", ["'ny"] = "ʔn̠ʲ", ["x"] = "ɕ", ["xx"] = "ʑ̻", ["k"] = "k",
	["kh"] = "kʰ", ["g"] = "g̊", ["ng"] = "ŋ", ["'ng"] = "ʔŋ", ["h"] = "h", ["'"] = "ʔ", ["hh"] = "ɦ", [""] = ""
}

local final = {
	["a"] = "a̱", ["o"] = "o", ["au"] = "ɔ", ["eu"] = "ɜ", ["e"] = "e̞", ["oe"] = "ø", ["i"] = "i",
	["ia"] = "ia̱", ["io"] = "io", ["iau"] = "iɔ", ["ieu"] = "iɜ", ["u"] = "v̩ʷ", ["ua"] = "ɯa̱",
	["ue"] = "ɯe̞", ["uoe"] = "v̩ʷø", ["y"] = "y", ["yoe"] = "yø", ["an"] = "ã", ["aan"] = "ɑ̃",
	["en"] = "əɲ", ["on"] = "ʊŋ", ["aq"] = "a̱ʔ", ["oq"] = "ʊʔ", ["eq"] = "əʔ", ["ian"] = "iã",
	["iaan"] = "iɑ̃", ["in"] = "ɪɲ", ["ion"] = "i̯ʊŋ", ["iaq"] = "ia̱ʔ", ["ioq"] = "i̯ʊʔ", ["iq"] = "i̯ɪʔ",
	["uan"] = "ɯã", ["uaan"] = "ɯɑ̃", ["un"] = "ɯə̯ɲ", ["uaq"] = "ɯa̱ʔ", ["ueq"] = "ɯə̯ʔ", ["yn"] = "ʏɲ",
	["yq"] = "ɥ̯ɪʔ", ["er"] = "əɻ", ["r"] = "z̩"
}

local tone_contours = {
	["1-0"] = "", ["1--"] = "³³",
	["1-1"] = "⁵³", ["1-2"] = "³⁴", ["1-3"] = "²³", ["1-4"] = "⁵⁵", ["1-5"] = "¹²", 
	["2-1"] = "⁵⁵ ²¹", ["2-2"] = "³³ ⁴⁴", ["2-3"] = "²² ⁴⁴", ["2-4"] = "³³ ⁴⁴", ["2-5"] = "¹¹ ²³",
	["3-1"] = "⁵⁵ ³³ ²¹", ["3-2"] = "³³ ⁵⁵ ²¹", ["3-3"] = "²² ⁵⁵ ²¹", ["3-4"] = "³³ ⁵⁵ ²¹", ["3-5"] = "¹¹ ²² ²³",
	["4-1"] = "⁵⁵ ³³ ³³ ²¹", ["4-2"] = "³³ ⁵⁵ ³³ ²¹", ["4-3"] = "²² ⁵⁵ ³³ ²¹", ["4-4"] = "³³ ⁵⁵ ³³ ²¹", ["4-5"] = "²² ⁵⁵ ³³ ²¹",
	["5-1"] = "⁵⁵ ³³ ³³ ³³ ²¹", ["5-2"] = "³³ ⁵⁵ ³³ ³³ ²¹", ["5-3"] = "²² ⁵⁵ ³³ ³³ ²¹", ["5-4"] = "³³ ⁵⁵ ³³ ³³ ²¹", ["5-5"] = "²² ⁵⁵ ³³ ³³ ²¹",

	["A-single"] = "⁴⁴", ["B-single"] = "³³", ["C-single"] = "⁴⁴", ["D-single"] = "²²",
	["A-multiple"] = "³³", ["B-multiple"] = "³³", ["C-multiple"] = "³³", ["D-multiple"] = "³³",
}

local syllabic = {
	["mm"] = "m̩", ["ngg"] = "ŋ̍"
}

local function determ_syl(text)
	local voicing, coda = "voiceless", "unchecked"
	if match(text, "^[bvdnlzg]") or match(text, "^m[^m]") or match(text, "jj") or match(text, "xx") or match(text, "hh") then
		voicing = "voiced"
	end
	if match(text, "q$") then
		coda = "checked"
	end
	return voicing, coda
end

local function tone_determ(text)
	local voicing, coda = determ_syl(text)
	
	return (gsub(voicing .."-" .. coda, ".+", { 
		["voiceless-unchecked"] = "A",
		["voiced-unchecked"] = "B",
		["voiceless-checked"] = "C",
		["voiced-checked"] = "D" }))
end

local function rom_check(text)
	local tone = text:sub(1, 1)
	text = mw.text.split(text:sub(2, -1), " ")[1]
	local voicing, coda = determ_syl(text)
	
	if match(text, "[kgs]h?[iy]") or match(text, "^z[iy]") or match(text, "^ni") then
		error("Invalid syllable: " .. text .. ". Palatalisation expected.")
	end

	if voicing == "voiced" and match(tone, "[124]") then
		error("Invalid syllable: " .. text .. tone .. ". Voiced initials only occur in tones 3 and 5.")
	
	elseif voicing == "voiceless" and match(tone, "[35]") then
		error("Invalid syllable: " .. text .. tone .. ". Voiceless initials only occur in tones 1, 2 and 4.")
	
	end
	
	if coda == "checked" and match(tone, "[123]") then
		error("Checked syllables only occur in tones 4 and 5.")
	
	elseif coda ~= "checked" and match(tone, "[45]") then
		error("Unchecked syllables only occur in tones 1, 2 and 3.")
	
	end
	return nil
end

function export.ipa_conv(original_text)
	if type(original_text) == "table" then original_text = original_text.args[1] end
	original_text = mw.ustring.lower(original_text)
	local text, conv_text = "", ""
	local reading = mw.text.split(original_text, ",", true)
	local syllable = {}
	local syl_tone = {}
	for reading_index = 1, #reading, 1 do
		local components = mw.text.split(reading[reading_index], "&", true)
		for component_index = 1, #components do
			local indep_words = mw.text.split(components[component_index], "+", true)
			for indep_index = 1, #indep_words do
				text = indep_words[indep_index]
				local no_syllables = string.len(text:gsub("[^ ]", "")) + 1
				rom_check(text)
				local tone = tone_contours[no_syllables.."-"..text:sub(1, 1)] or error("Tone notation is incorrect. See [[WT:WUU]].")
				text = text:sub(2, -1)
				local syllable = mw.text.split(text, " ", true)
				local syl_tone = mw.text.split(tone, " ", true)
				for i = 1, no_syllables, 1 do
					if i == no_syllables and indep_words[indep_index + 1] and tone ~= "³³" then
						syl_tone[i] = tone_contours[tone_determ(syllable[i]) .. "-" .. 
							(no_syllables > 1 and "multiple" or "single")]
					end
					syllable[i] = export.ipa_syl_conv(syllable[i])
					syllable[i] = syllable[i] .. syl_tone[i]
				end
				indep_words[indep_index] = table.concat(syllable, " ")
			end
			components[component_index] = table.concat(indep_words, "  ")
		end
		reading[reading_index] = table.concat(components, " ")
	end
	return table.concat(reading, "/, /")
end

function export.ipa_syl_conv(text)
	if text:match('^h?h?[mn][mg]?g?$') then
		text = gsub(text, "^(h?h?)([mn][mg]?g?)$", function(a, b) return (initial[a] or '') ..
			(syllabic[b] or error(("Invalid syllable: \"%s\""):format(text))) end)
	
	elseif text:match("^%'?ny") then
		text = gsub(text, "^([\']?ny)([aeiouyr][aeou]?[aeu]?[nqr]?)$",
			function(a, b) return initial[a] .. (final[b] or error(("Unrecognised final: \"%s\""):format(b))) end)
		
	elseif text:match("^[\']?[pbmfvtdnlszjqxkghr%']?[sjgx]?h?[aeiouyr][aeou]?[aeu]?[nqr]?$") then
		text = gsub(text, "^([\']?[pbmfvtdnlszjqxkghr%']?[sjgx]?[h]?)([aeiouyr][aeou]?[aeu]?[nqr]?)$",
			function(a, b) return
				(initial[a] or error(("Unrecognised initial: \"%s\""):format(a))) ..
				(final[b] or error(("Unrecognised final: \"%s\""):format(b))) end)
	else
		return error(("Invalid syllable: \"%s\""):format(text))
	end
	
	return text
end

function export.rom(text)
	if type(text) == 'table' then text = text.args[1] end
	text = mw.text.split(text, ",", true)
	for i = 1, #text, 1 do
		local parts = mw.text.split(text[i], '+', true)
		for i = 1, #parts do
			if string.match(parts[i]:sub(1,1), '[0-9%-]') then
				parts[i] = parts[i]:sub(2,-1) .. ' (T' .. parts[i]:sub(1,1) .. ')'
			end
		end
		text[i] = table.concat(parts, ' + ')
	end
	return table.concat(text, "; ")
end

return export