Module:cmn-pron-Jianghuai

From Wiktionary, the free dictionary
Jump to navigation Jump to search
This module page is experimental.
The details of its operation have not yet been fully decided upon. Do not deploy widely until the module page is finished.

This module does romanisation conversion, IPA conversion, etc. for Jianghuai Mandarin. See {{zh-pron}}.


local export = {}
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local gsplit = mw.text.gsplit
local lower = mw.ustring.lower
local upper = mw.ustring.upper

local initialConv = {
	["b"] = "p", ["d"] = "t", ["g"] = "k",
	["p"] = "pʰ", ["t"] = "tʰ", ["k"] = "kʰ",
	["z"] = "t͡s", ["j"] = "t͡ɕ",
	["c"] = "t͡sʰ", ["q"] = "t͡ɕʰ",
	["m"] = "m", ["l"] = "l", 
	["f"] = "f", ["s"] = "s", ["x"] = "ɕ", ["h"] = "x",
	['r'] = 'ʐ', ['zh'] = 'ʈ͡ʂ',
	['ch'] = 'ʈ͡ʂʰ', ['sh'] = 'ʂ',
	[""] = "",
}

-- note that 'ir' is for internal use by the code and not used in actual sichuanese pinyin
local finalConv = {
	["y"] = "ɿ", ["r"] = "ʅ", ["er"] = "ɚ",
	["a"] = "a", ["o"] = "o", ["e"] = "e",
	["ä"] = "ɛ", ["ei"] = "əɪ", ["ao"] = "ɔ", ["ou"] = "əɯ",
	["en"] = "ən", ["än"] = "ẽ", ["ang"] = "ã", ["ong"] = "oŋ",

	["i"] = "i", ["ia"] = "ia", ["ie"] = "ie",
	["iä"] = "iɛ", ["iao"] = "iau", ["iou"] = "iəɯ",
	["iän"] = "iẽ", ["in"] = "in", ["iang"] = "iã", ["iong"] = "ioŋ",

	["u"] = "u", ["ua"] = "ua", ["uä"] = "uɛ",
	["uei"] = "uəɪ", ["uen"] = "uən", ["uang"] = "uã",

	["ü"] = "y", ["üe"] = "ye",
	["üän"] = "yẽ", ["üin"] = "yin", 
}

local toneConv = {
	["1"] = "³¹", ["2"] = "¹³", ["3"] = "²¹²", ["4"] = "⁴⁴", ["5"] = "⁵",   ["-"] = "⁻",
}

local function fix(initial, final)

	return initial, final
end

local function warn(initial, final, tone)
    if (initial == "j" or initial == "q" or initial == "x") and final == "u" then
		error("Syllables in Nankinese Pinyin do not include ju, qu, xu, but include jü, qü, xü.")
	end
	if (initial == "j" or initial == "q" or initial == "x") and ( final == "iao"  or final == "io"  or final == "iang"  or final == "iä"  or final == "iän"  or final == "ia" or final == "ie"  or final == "iong" ) then
		error("Syllables in Nankinese Pinyin do not include ji+vowel, qi+vowel, xi+vowel. You can try j-, q-, x- instead ji-, qi-, xi- ")
	end
	if (initial == "j" or initial == "q" or initial == "x") and ( final == "iou"  or final == "iu") then
		error("Nankinese Pinyin does not use jiou, qiou ,xiou ,jiu, qiu ,xiu, but uses jou, qou, xou instead.")
	end
	if (initial == "zh" or initial == "ch" or initial == "sh" or initial == "r") and (final == "i") then
		error("Nankinese Pinyin does not use zhi, chi, shi ,ri, but uses zhr, chr, shr, r instead.")
	end
	if  (final == "uo") then
		error("Nankinese Pinyin does not use -uo, but uses -o instead.")
	end
end

function export.convert(text, scheme)
	if type(text) == "table" then
		text, scheme = text.args[1], text.args[2]
	end

	local result = {}
	for word in gsplit(text, '/') do
		local converted = {}

		local extra2 = match(word, '^[^A-ZÄÜa-zäü]*')
		for syllable in gmatch(word, '[A-ZÄÜa-zäü]+[%d%-]+[^A-ZÄÜa-zäü]*') do
			local initial, final, erhua, tone, extra = match(syllable, '^([BDGPTKZJCQLMNFSXHVRWUIÜbdgptkzjlcqmnfsxhvrw]?h?)([AEOÄÜRIUYaiueoäüyrng]+)(r?)([%d%-]+)([^A-ZÄÜa-zäü]*)$')
			local caps = false

			if find(initial .. final, '[A-Z]') then
				caps = true
				initial, final = lower(initial), lower(final)
			end

			warn(initial, final, tone)

			initial, final = fix(initial, final)
			if final == 'e' and erhua == 'r' then
				final, erhua = 'er', ''
			end

			if scheme == 'IPA' then
				initial = initialConv[initial]
				final = finalConv[final]
				tone = gsub(tone, '.', function(char) return toneConv[char] end)

				if erhua == 'r' then
					if find(final, '^ü') then -- 撮口呼
						final = 'uɚ'
					elseif find(final, '^i') then -- 齊齒呼
						final = 'ɚ'
					elseif find(final, '^u') then -- 合口呼
						final = 'uɚ'
					elseif final == 'o' or final == 'ong' then
						final = 'uɚ'
					else -- 開口呼
						final = 'ɚ'
					end
				end

				syllable = initial .. final .. tone

				table.insert(converted, syllable)
			elseif scheme == 'NKG' then
				initial = initialConv_swz[initial] or initial
				final = finalConv_swz[final] or final

				tone = gsub(tone, '(%d)%-(%d)', '%2')

				-- idk what happens with erhua, so disable output
				if erhua == 'r' then return false end

				syllable = initial .. final
				if caps then syllable = gsub(syllable, '^(.)', upper) end

				table.insert(converted, '@' .. syllable .. extra)
			else
				error('Convert to what representation?')
			end
		end

		if scheme == 'IPA' then
			table.insert(result, table.concat(converted, ' '))
		elseif scheme == 'NKG' then
			local text = table.concat(converted, '')
			text = gsub(text, '([a-z])@(u)', '%1w')
			text = gsub(text, '([a-z])@(i)', '%1j')
			text = gsub(text, '([ng])@(y)', '%1j')
			text = gsub(text, '@un', 'wen')
			text = gsub(text, '@', '')
			table.insert(result, extra2 .. text)
		end
	end

	if scheme == 'IPA' then
		return '/' .. table.concat(result, '/, /') .. '/'
	else
		return table.concat(result, ' / ')
	end
end

return export