Module:cdo-pron

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
The following documentation is located at Module:cdo-pron/documentation. [edit]
Useful links: subpage listtransclusionstestcases

Min Dong pronunciation module.


local export = {}
 
local detone = {
	['ă'] = 'a', ['ĕ'] = 'e', ['ĭ'] = 'i', ['ŏ'] = 'o', ['ŭ'] = 'u', ['̆'] = '',
	['ā'] = 'a', ['ē'] = 'e', ['ī'] = 'i', ['ō'] = 'o', ['ū'] = 'u', ['̄'] = '',
	['á'] = 'a', ['é'] = 'e', ['í'] = 'i', ['ó'] = 'o', ['ú'] = 'u', ['́'] = '',
	['à'] = 'a', ['è'] = 'e', ['ì'] = 'i', ['ò'] = 'o', ['ù'] = 'u', ['̀'] = '',
	['â'] = 'a', ['ê'] = 'e', ['î'] = 'i', ['ô'] = 'o', ['û'] = 'u', ['̂'] = '',
}
 
local initial_ipa = {
	['b'] = { ['unchanged'] = 'p', ['lenited'] = '<sup>(p-)</sup>β', ['nasal'] = '<sup>(p-)</sup>m' },
	['p'] = { ['unchanged'] = 'pʰ', ['lenited'] = '<sup>(pʰ-)</sup>β', ['nasal'] = '<sup>(pʰ-)</sup>m' },
	['m'] = { ['unchanged'] = 'm', ['lenited'] = 'm', ['nasal'] = 'm' },
	['d'] = { ['unchanged'] = 't', ['lenited'] = '<sup>(t-)</sup>l', ['nasal'] = '<sup>(t-)</sup>n' },
	['t'] = { ['unchanged'] = 'tʰ', ['lenited'] = '<sup>(tʰ-)</sup>l', ['nasal'] = '<sup>(tʰ-)</sup>n' },
	['n'] = { ['unchanged'] = 'n', ['lenited'] = 'n', ['nasal'] = 'n' },
	['l'] = { ['unchanged'] = 'l', ['lenited'] = 'l', ['nasal'] = '<sup>(l-)</sup>n' },
	['g'] = { ['unchanged'] = 'k', ['lenited'] = '<sup>(k-)</sup>', ['nasal'] = '<sup>(k-)</sup>ŋ' },
	['k'] = { ['unchanged'] = 'kʰ', ['lenited'] = '<sup>(kʰ-)</sup>', ['nasal'] = '<sup>(kʰ-)</sup>ŋ' },
	['ng'] = { ['unchanged'] = 'ŋ', ['lenited'] = 'ŋ', ['nasal'] = 'ŋ' },
	['h'] = { ['unchanged'] = 'h', ['lenited'] = '<sup>(h-)</sup>', ['nasal'] = '<sup>(h-)</sup>ŋ' },
	['c'] = { ['unchanged'] = 't͡s', ['lenited'] = '<sup>(t͡s-)</sup>ʒ', ['nasal'] = '<sup>(t͡s-)</sup>ʒ' },
	['ch'] = { ['unchanged'] = 't͡sʰ', ['lenited'] = '<sup>(t͡sʰ-)</sup>ʒ', ['nasal'] = '<sup>(t͡sʰ-)</sup>ʒ' },
	['s'] = { ['unchanged'] = 's', ['lenited'] = '<sup>(s-)</sup>l', ['nasal'] = '<sup>(s-)</sup>n' },
	[''] = { ['unchanged'] = '', ['lenited'] = '', ['nasal'] = '<sup>(Ø-)</sup>ŋ' },
}
 
local final_ipa = {
	["a"] = { ["close"] = "a", ["open"] = "ɑ" }, 
	["ia"] = { ["close"] = "ia", ["open"] = "iɑ" }, 
	["ua"] = { ["close"] = "ua", ["open"] = "uɑ" }, 
	["a̤"] = { ["close"] = "ɛ", ["open"] = "ɑ" }, 
	["ie"] = { ["close"] = "ie", ["open"] = "iɛ" }, 
	["o̤"] = { ["close"] = "o", ["open"] = "ɔ" }, 
	["io"] = { ["close"] = "yo", ["open"] = "yɔ" }, 
	["uo"] = { ["close"] = "uo", ["open"] = "uɔ" }, 
	["e̤"] = { ["close"] = "œ", ["open"] = "ɔ" }, 
	["ae̤"] = { ["close"] = "œ", ["open"] = "ɔ" }, 
	["au"] = { ["close"] = "au", ["open"] = "ɑu" }, 
	["eu"] = { ["close"] = "ɛu", ["open"] = "ɛu" }, 
	["aiu"] = { ["close"] = "ɛu", ["open"] = "ɑu" }, 
	["ieu"] = { ["close"] = "iu", ["open"] = "iɛu" }, 
	["iu"] = { ["close"] = "iu", ["open"] = "iu" }, 
	["eu"] = { ["close"] = "iu", ["open"] = "iɛu" }, 
	["oi"] = { ["close"] = "øy", ["open"] = "øy" }, 
	["o̤i"] = { ["close"] = "øy", ["open"] = "ɔy" }, 
	["ai"] = { ["close"] = "ai", ["open"] = "ɑi" }, 
	["uai"] = { ["close"] = "uai", ["open"] = "uɑi" }, 
	["uoi"] = { ["close"] = "ui", ["open"] = "uoi" }, 
	["ui"] = { ["close"] = "ui", ["open"] = "ui" }, 
	["oi"] = { ["close"] = "ui", ["open"] = "uoi" }, 
	["i"] = { ["close"] = "i", ["open"] = "i" }, 
	["e"] = { ["close"] = "i", ["open"] = "ɛi" }, 
	["u"] = { ["close"] = "u", ["open"] = "u" }, 
	["o"] = { ["close"] = "u", ["open"] = "ou" }, 
	["ṳ"] = { ["close"] = "y", ["open"] = "øy" }, 
	["e̤ṳ"] = { ["close"] = "y", ["open"] = "øy" }, 
	["ah"] = { ["close"] = "aʔ", ["open"] = "ɑʔ" }, 
	["iah"] = { ["close"] = "iaʔ", ["open"] = "iɑʔ" }, 
	["uah"] = { ["close"] = "uaʔ", ["open"] = "uɑʔ" }, 
	["a̤h"] = { ["close"] = "eʔ", ["open"] = "ɛʔ" }, 
	["ieh"] = { ["close"] = "ieʔ", ["open"] = "iɛʔ" }, 
	["o̤h"] = { ["close"] = "oʔ", ["open"] = "ɔʔ" }, 
	["ioh"] = { ["close"] = "yoʔ", ["open"] = "yɔʔ" }, 
	["uoh"] = { ["close"] = "uoʔ", ["open"] = "uɔʔ" }, 
	["e̤h"] = { ["close"] = "øʔ", ["open"] = "œʔ" }, 
	["ang"] = { ["close"] = "aŋ", ["open"] = "ɑŋ" }, 
	["iang"] = { ["close"] = "iaŋ", ["open"] = "iɑŋ" }, 
	["uang"] = { ["close"] = "uaŋ", ["open"] = "uɑŋ" }, 
	["ieng"] = { ["close"] = "ieŋ", ["open"] = "iɛŋ" }, 
	["iong"] = { ["close"] = "yoŋ", ["open"] = "yɔŋ" }, 
	["uong"] = { ["close"] = "uoŋ", ["open"] = "uɔŋ" }, 
	["ing"] = { ["close"] = "iŋ", ["open"] = "iŋ" }, 
	["eng"] = { ["close"] = "iŋ", ["open"] = "ɛiŋ" }, 
	["ung"] = { ["close"] = "uŋ", ["open"] = "uŋ" }, 
	["ong"] = { ["close"] = "uŋ", ["open"] = "ouŋ" }, 
	["ṳng"] = { ["close"] = "yŋ", ["open"] = "yŋ" }, 
	["e̤ṳng"] = { ["close"] = "yŋ", ["open"] = "øyŋ" }, 
	["eng"] = { ["close"] = "eiŋ", ["open"] = "eiŋ" }, 
	["aing"] = { ["close"] = "eiŋ", ["open"] = "aiŋ" }, 
	["ong2"] = { ["close"] = "ouŋ", ["open"] = "ouŋ" }, 
	["aung"] = { ["close"] = "ouŋ", ["open"] = "auŋ" }, 
	["e̤ng"] = { ["close"] = "øyŋ", ["open"] = "øyŋ" }, 
	["ae̤ng"] = { ["close"] = "øyŋ", ["open"] = "ɔyŋ" }, 
	["ak"] = { ["close"] = "aʔ", ["open"] = "ɑʔ" }, 
	["iak"] = { ["close"] = "iaʔ", ["open"] = "iɑʔ" }, 
	["uak"] = { ["close"] = "uaʔ", ["open"] = "uɑʔ" }, 
	["iek"] = { ["close"] = "ieʔ", ["open"] = "iɛʔ" }, 
	["iok"] = { ["close"] = "yoʔ", ["open"] = "yɔʔ" }, 
	["uok"] = { ["close"] = "uoʔ", ["open"] = "uɔʔ" }, 
	["ik"] = { ["close"] = "iʔ", ["open"] = "iʔ" }, 
	["ek"] = { ["close"] = "iʔ", ["open"] = "ɛiʔ" }, 
	["uk"] = { ["close"] = "uʔ", ["open"] = "uʔ" }, 
	["ok"] = { ["close"] = "uʔ", ["open"] = "ouʔ" }, 
	["ṳk"] = { ["close"] = "yʔ", ["open"] = "yʔ" }, 
	["e̤ṳk"] = { ["close"] = "yʔ", ["open"] = "øyʔ" }, 
	["ek"] = { ["close"] = "eiʔ", ["open"] = "eiʔ" }, 
	["aik"] = { ["close"] = "eiʔ", ["open"] = "aiʔ" }, 
	["ok2"] = { ["close"] = "ouʔ", ["open"] = "ouʔ" }, 
	["auk"] = { ["close"] = "ouʔ", ["open"] = "auʔ" }, 
	["e̤k"] = { ["close"] = "øyʔ", ["open"] = "øyʔ" }, 
	["ae̤k"] = { ["close"] = "øyʔ", ["open"] = "ɔyʔ" },
	["ng"] = { ["close"] = "ŋ̍", ["open"] = "ŋ̍" },
}
 
local tone_ipa = {
	[1] = '⁵⁵',
	[2] = '³³',
	[3] = '²¹³',
	[4] = '²⁴',
	[5] = '⁵³',
	[6] = '²⁴²',
	[7] = '⁵',
}
 
local tone_sandhi = {
	['first'] = {
		[1] = '⁻⁵⁵', [5] = '⁻⁵⁵', [7] = '⁻⁵⁵',
		[2] = '⁻⁵³', [3] = '⁻⁵³', [4] = '⁻⁵³', [6] = '⁻⁵³',
	},
	['second'] = {
		[1] = '⁻⁵⁵',
		[2] = '⁻³³', [5] = '⁻³³', [7] = '⁻³³',
		[3] = '⁻²¹', [4] = '⁻²¹', [6] = '⁻²¹',
	},
	['third'] = {
		[1] = '⁻²¹', [5] = '⁻²¹', [7] = '⁻²¹',
		[2] = '⁻³⁵',
		[3] = '⁻⁵⁵', [4] = '⁻⁵⁵', [6] = '⁻⁵⁵',
	}
}
 
local neg_assim = {
	['nasal'] = "<sup>(ŋ̍-)</sup>m̩",
	['dental'] = "<sup>(ŋ̍-)</sup>n̩",
	['velar'] = "<sup>(ŋ̍-)</sup>ŋ̍",
}
 
function export.ipa(text)
	if type(text) == 'table' then
		text = text.args[1]
	end
	text = mw.ustring.lower(text)
	local syllables, initial, final, tone, tone_conv, ipa = {}, {}, {}, {}, {}, {}
	syllables = mw.text.split(text, "-")
	for i, syllable in ipairs(syllables) do
		syllable = mw.ustring.gsub(syllable, '([\{\}])', function(captured_initial)
			lenition_blocked = true
			return '' end)
		initial[i] = mw.ustring.match(syllable, '^([bpmdtnlgkhcs]?[gh]?)')
		final[i] = mw.ustring.sub(syllable, mw.ustring.len(initial[i]) + 1, -1)
		if mw.ustring.find(final[i], '[ăĕĭŏŭ̆]') or final[i] == '' then
			if mw.ustring.find(final[i], '[hk]$') then
				tone[i] = 7
			else
				tone[i] = 1
			end
		elseif mw.ustring.find(final[i], '[āēīōū̄]') then
			tone[i] = 2
		elseif mw.ustring.find(final[i], '[áéíóú́]') then
			if mw.ustring.find(final[i], '[hk]$') then
				tone[i] = 4
			else
				tone[i] = 3
			end
		elseif mw.ustring.find(final[i], '[àèìòù̀]') then
			tone[i] = 5
		elseif mw.ustring.find(final[i], '[âêîôû̂]') then
			tone[i] = 6
		end
		final[i] = mw.ustring.gsub(final[i], '[ăĕĭŏŭāēīōūáéíóúàèìòùâêîôû̆̄́̀̂]', detone)
		if (final[i] == 'ong' or final[i] == 'ok') and mw.ustring.find(tostring(tone[i]), '[1257]') then
			final[i] = final[i] .. '2'
		end
		if mw.ustring.find(initial[i] .. final[i], '[dtnlcs]h?io') then
			final[i] = mw.ustring.gsub(final[i], 'io', 'uo')
		end
		if (initial[i] .. final[i]) == 'ng' then
			initial[i], final[i] = '', 'ng'
		end
		if mw.ustring.find(tostring(tone[i]), '[346]') and (#syllables == 1 or i == #syllables) then
			final[i] = final_ipa[final[i]]["open"]
		else
			final[i] = final_ipa[final[i]]["close"]
		end
		if i == 1 or mw.ustring.find(syllables[i-1], 'k$') or lenition_blocked then
			initial_state = 'unchanged'
		elseif mw.ustring.find(final[i-1], '[ŋ̍]$') then
			initial_state = 'nasal'
		else
			initial_state = 'lenited'
		end
		initial[i] = initial_ipa[initial[i]][initial_state]
	end
	for i = 1, #syllables do
		if final[i] == 'ŋ̍' then
			if mw.ustring.match(syllables[i+1], '[bpm]') then
				neg_type = 'nasal'
			elseif mw.ustring.match(syllables[i+1], '[dtnlsc]') then
				neg_type = 'dental'
			else
				neg_type = 'velar'
			end
			final[i] = neg_assim[neg_type]
		end
		tone_conv[i] = tone_ipa[tone[i]]
		if i ~= #syllables then
			if mw.ustring.find(tostring(tone[i]), '[136]') or (tone[i] == 4 and mw.ustring.find(final[i], 'h$')) then
				tone_conv[i] = tone_conv[i] .. tone_sandhi['first'][tone[i+1]]
			elseif mw.ustring.find(tostring(tone[i]), '[57]') then
				tone_conv[i] = tone_conv[i] .. tone_sandhi['second'][tone[i+1]]
			else
				tone_conv[i] = tone_conv[i] .. tone_sandhi['third'][tone[i+1]]
			end
			tone_conv[i] = mw.ustring.gsub(tone_conv[i], '([¹²³⁴⁵]+)⁻([¹²³⁴⁵]+)', function(original, sandhi)
				if original == sandhi then
					return original
				end end)
		end
		ipa[i] = initial[i] .. final[i] .. tone_conv[i]
	end
	return table.concat(ipa, " ")
end
 
return export