Module:cmn-pron-Xian

From Wiktionary, the free dictionary
Jump to navigation Jump to search

a

[edit]




sandhi checkers:


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: xǐ'ngǎn / sǎntiǎn [Phonetic: sántiǎn] / Guǎnbvěng [Phonetic: guánbvěng]
      • Sinological IPA (key): /ɕi²¹ ŋã²¹/, /sã²¹⁻²⁴ tʰiã²¹/, /kuã²¹⁻²⁴ pfəŋ²¹/

西安/三天


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: guàngchàng / làobàn [Phonetic: lǎobàn] / yìzì [Phonetic: yì] / làohù [Phonetic: lǎo]
      • Sinological IPA (key): /kuaŋ⁵³ t͡ʂʰaŋ⁵³/, /lau⁵³⁻²¹ pã⁵³/, /i⁵³ t͡sz̩⁵³⁻²¹/, /lau⁵³⁻²¹ xu⁵³⁻²¹/

廣場/老闆/椅子/老虎


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: rěrěr [Phonetic: rěrér] / tuàntuànr [Phonetic: tuàntuánr] / pāngpāngr [Phonetic: pāngpángr]
      • Sinological IPA (key): /ʐɤ²¹ ʐər²¹⁻²⁴/, /tʰuã⁵³ tʰuɐ̃r⁵³⁻²⁴/, /pʰaŋ⁵⁵ pʰɐ̃r⁵⁵⁻²⁴/

熱熱兒/短短兒/胖胖兒


鏡兒


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: ā / ngā / gā / xīng'ān [Phonetic: xǐng'ān] / xīngān [Phonetic: xǐngān] / xī'ngān [Phonetic: 'ngān]
      • Sinological IPA (key): /a⁵⁵/, /ŋa⁵⁵/, /ka⁵⁵/, /ɕiŋ⁵⁵⁻²¹ ã⁵⁵/, /ɕiẽ⁵⁵⁻²¹ kã⁵⁵/, /ɕi⁵⁵⁻²¹ ŋã⁵⁵/


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: gangangan / zhichishiri / zhirchirshirrir
      • Sinological IPA (key): /kã kã kã/, /t͡ʂʐ̩ t͡ʂʰʐ̩ ʂʐ̩ ʐ̩/, /t͡ʂər t͡ʂʰər ʂər ʐər/

Massive tests

[edit]

All finals, null initial

[edit]


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: ā'ō'ē'ēr'āi'ēi'āo'ōu'ān'ēn'āng'ēng
      • Sinological IPA (key): /a⁵⁵ o⁵⁵ ɤ⁵⁵ ər⁵⁵ æ⁵⁵ ei⁵⁵ au⁵⁵ ɤu⁵⁵ ã⁵⁵ ẽ⁵⁵ aŋ⁵⁵ əŋ⁵⁵/


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: yāyēyīyāiyāoyōuyānyīnyāngyīng
      • Sinological IPA (key): /ia⁵⁵ iɛ⁵⁵ i⁵⁵ iæ⁵⁵ iau⁵⁵ iɤu⁵⁵ iã⁵⁵ iẽ⁵⁵ iaŋ⁵⁵ iŋ⁵⁵/


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: wāwōwēwūwāiwēiwānwēnwāngwēng
      • Sinological IPA (key): /ua⁵⁵ uo⁵⁵ ɯ⁵⁵ u⁵⁵ uæ⁵⁵ uei⁵⁵ uã⁵⁵ uẽ⁵⁵ uaŋ⁵⁵ uəŋ⁵⁵/


All finals, not null initial

[edit]


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: bābōbēbērzhīzībāibēibāobōubānbēnbāngbēng
      • Sinological IPA (key): /pa⁵⁵ po⁵⁵ pɤ⁵⁵ pər⁵⁵ t͡ʂʐ̩⁵⁵ t͡sz̩⁵⁵ pæ⁵⁵ pei⁵⁵ pau⁵⁵ pɤu⁵⁵ pã⁵⁵ pẽ⁵⁵ paŋ⁵⁵ pəŋ⁵⁵/


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: biābiēbībiāibiāobiūbiānbīnbiāngbīng
      • Sinological IPA (key): /pia⁵⁵ piɛ⁵⁵ pi⁵⁵ piæ⁵⁵ piau⁵⁵ piɤu⁵⁵ piã⁵⁵ piẽ⁵⁵ piaŋ⁵⁵ piŋ⁵⁵/


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: buābuōbuēbūbuāibuībuānbūnbuāngbōng
      • Sinological IPA (key): /pua⁵⁵ puo⁵⁵ pɯ⁵⁵ pu⁵⁵ puæ⁵⁵ puei⁵⁵ puã⁵⁵ puẽ⁵⁵ puaŋ⁵⁵ puəŋ⁵⁵/


All finals with erhua, null initial

[edit]


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: ār'ēr'āir'ēir'āor'ōur'ānr'ēnr'āngr'ēngr
      • Sinological IPA (key): /ɐr⁵⁵ ər⁵⁵ ær⁵⁵ er⁵⁵ ɔr⁵⁵ ər⁵⁵ ɐ̃r⁵⁵ ə̃r⁵⁵ ɐ̃r⁵⁵ ə̃r⁵⁵/


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: yāryēryīryāiryāoryōuryānryīnryāngryīngr
      • Sinological IPA (key): /iɐr⁵⁵ iɛr⁵⁵ iər⁵⁵ iær⁵⁵ iɔr⁵⁵ iər⁵⁵ iɐ̃r⁵⁵ iə̃r⁵⁵ iɐ̃r⁵⁵ iə̃r⁵⁵/


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: wārwōrwūrwāirwēirwānrwēnrwāngrwēngr
      • Sinological IPA (key): /uɐr⁵⁵ uər⁵⁵ uər⁵⁵ uær⁵⁵ uer⁵⁵ uɐ̃r⁵⁵ uə̃r⁵⁵ uɐ̃r⁵⁵ uə̃r⁵⁵/


All finals with erhua, not null initial

[edit]


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: bārbērbāirbēirzhīrzīrbāorbōurbānrbēnrbāngrbēngr
      • Sinological IPA (key): /pɐr⁵⁵ pər⁵⁵ pær⁵⁵ per⁵⁵ t͡ʂər⁵⁵ t͡sər⁵⁵ pɔr⁵⁵ pər⁵⁵ pɐ̃r⁵⁵ pə̃r⁵⁵ pɐ̃r⁵⁵ pə̃r⁵⁵/


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: biārbiērbīrbiāirbiāorbiūrbiānrbīnrbiāngrbīngr
      • Sinological IPA (key): /piɐr⁵⁵ piɛr⁵⁵ piər⁵⁵ piær⁵⁵ piɔr⁵⁵ piər⁵⁵ piɐ̃r⁵⁵ piə̃r⁵⁵ piɐ̃r⁵⁵ piə̃r⁵⁵/


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: buārbuōrbūrbuāirbuīrbuānrbūnrbuāngrbōngr
      • Sinological IPA (key): /puɐr⁵⁵ puər⁵⁵ puər⁵⁵ puær⁵⁵ puer⁵⁵ puɐ̃r⁵⁵ puə̃r⁵⁵ puɐ̃r⁵⁵ puə̃r⁵⁵/


All initials

[edit]


  • Mandarin
    • (Xi'an)
      • Guanzhong Pinyin: bāpāmāfāvābvāpfā dātānālā gākā'ngāhā / jiāqiāxiā zhāchāshārā zācāsā yāwā
      • Sinological IPA (key): /pa⁵⁵ pʰa⁵⁵ ma⁵⁵ fa⁵⁵ va⁵⁵ pfa⁵⁵ pfʰa⁵⁵ ta⁵⁵ tʰa⁵⁵ na⁵⁵ la⁵⁵ ka⁵⁵ kʰa⁵⁵ ŋa⁵⁵ xa⁵⁵/, /t͡ɕia⁵⁵ t͡ɕʰia⁵⁵ ɕia⁵⁵ t͡ʂa⁵⁵ t͡ʂʰa⁵⁵ ʂa⁵⁵ ʐa⁵⁵ t͡sa⁵⁵ t͡sʰa⁵⁵ sa⁵⁵ ia⁵⁵ ua⁵⁵/

local export = {}

-- see the encoding below
local initials = {
	b = "p", p = "pʰ", m = "m", f = "f", v = "v", B = "pf", P = "pfʰ",
	d = "t", t = "tʰ", n = "n", l = "l",
	g = "k", k = "kʰ", N = "ŋ", h = "x",
	j = "t͡ɕ", q = "t͡ɕʰ", x = "ɕ",
	Z = "t͡ʂ", C = "t͡ʂʰ", S = "ʂ", r = "ʐ",
	z = "t͡s", c = "t͡sʰ", s = "s",
	[""] = "",
}

-- see the encoding below (U=ü, N=ng)
local finals = {
	a = "a", ia = "ia", ua = "ua",
	ar = "ɐr", iar = "iɐr", uar = "uɐr",
	o = "o", uo = "uo", Uo = "yo",
	er = "ər", uor = "uər",
	e = "ɤ",
	ue = "ɯ", ie = "iɛ", Ue = "yɛ",
	ier = "iɛr", Uer = "yɛr",
	ii = "z̩", ih = "ʐ̩", i = "i", u = "u", U = "y",
	iir = "ər", ihr = "ər", ir = "iər", ur = "uər", Ur = "yər",
	ai = "æ", iai = "iæ", uai = "uæ",
	air = "ær", iair = "iær", uair = "uær",
	ei = "ei", ui = "uei",
	eir = "er", uir = "uer",
	ao = "au", iao = "iau",
	aor = "ɔr", iaor = "iɔr",
	ou = "ɤu", iu = "iɤu",
	our = "ər", iur = "iər",
	an = "ã", ian = "iã", uan = "uã", Uan = "yã",
	anr = "ɐ̃r", ianr = "iɐ̃r", uanr = "uɐ̃r", Uanr = "yɐ̃r",
	en = "ẽ", ["in"] = "iẽ", un = "uẽ", Un = "yẽ",
	enr = "ə̃r", inr = "iə̃r", unr = "uə̃r", Unr = "yə̃r",
	aN = "aŋ", iaN = "iaŋ", uaN = "uaŋ",
	aNr = "ɐ̃r", iaNr = "iɐ̃r", uaNr = "uɐ̃r",
	eN = "əŋ", iN = "iŋ", oN = "uəŋ", ioN = "yoŋ",
	eNr = "ə̃r", iNr = "iə̃r", oNr = "uə̃r", ioNr = "yə̃r",
}

local tones = {
	["1"] = "²¹", --陰平(T1)
	["2"] = "²⁴", --陽平(T2)
	["3"] = "⁵³", --上(T3)
	["4"] = "⁵⁵", --去(T4)
	["5"] = "", -- toneless (T0)
}

-- internal use, encode and decode digraphs
local digraph_encode = {
	bv = "B", pf = "P", ng = "N", zh = "Z", ch = "C", sh = "S",
	["\204\140"] = "\1",
	["\204\129"] = "\2",
	["\204\128"] = "\3",
	["\204\132"] = "\4",
}
local digraph_decode = {
	B = "bv", P = "pf", N = "ng", Z = "zh", C = "ch", S = "sh", U = "ü",
	["\1"] = "\204\140",
	["\2"] = "\204\129",
	["\3"] = "\204\128",
	["\4"] = "\204\132",
	["\5"] = '<span style="background-color:#F5DEB3">',
	["\6"] = "</span>",
}
local function encode(text)
	text = mw.ustring.toNFD(text)
		:gsub("u\204\136","U")
		:gsub("[bpnzcs\204][vfgh\128\129\132\140]",digraph_encode)
	return text
end
local function decode(text)
	text = mw.ustring.toNFC(text:gsub("[BPNZCSU\1-\7]",digraph_decode))
	return text
end

local function py_join_syllables(text)
	text = text:gsub("'(\5?[bpmfvBPdtnlgkhjqxZCSrzcsyw])","%1"):gsub("ng","N")
	return text
end

local function py_divide_syllables(text)
	local res = text
		:gsub("([aeiouU\1-\4])N%f[aeiouU]","%1n'g")
		:gsub("[bpmfvBPdtnlgkNhjqxZCSrzcsyw][aeiouU]","'%0")
		:gsub("''+","'")
		:gsub("%f[^ %z]'","")
	local check = py_join_syllables(res)
	if text ~= check then
		error("Xi'an: error with apostrophes, "..decode(text).." should be "..decode(check)..".")
	end
	return res
end

local function py_put_tone(syllable, tone)
	syllable = syllable:gsub("[iuU]?[aeiouU]", "%0" .. (tone~="5" and string.char(tone) or ""), 1)
	return syllable
end

local function py_transf(syllable)
	local tone = tostring((syllable:match("[\1-\4]") or "\5"):byte(1))
	local syllable_detone, count = syllable:gsub("[\1-\4]","")
	if count > 1 then error("Xi'an: two tones in one syllable: " .. decode(syllable)) end
	local check = py_put_tone(syllable_detone,tone)
	if check ~= syllable then
		error("Xi'an: error with tone placement, "..decode(syllable).." should be "..decode(check)..".")
	end
	return tone .. syllable_detone
end

-- canonize to adhere to pinyin rules, e.g. jü -> ju
local function py_canonize(text)
	text = text
		:gsub("([jqx])U","%1u")
		:gsub("%f[%l%u]u[in]?",{u="w",ui="wei",un="wen"})
		:gsub("%f[%l%u]oN","weN")
		:gsub("w(r?)%f[^%l%u]","wu%1")
		:gsub("%f[%l%u]i[hu]?",{i="y",ih="ri",iu="you"})
		:gsub("y([nN]?r?)%f[^%l%u]","yi%1")
		:gsub("%f[%l%u]U","yu")
		:gsub("i[ih]","i")
	return text
end

-- normalize to initial+final, e.g. ju -> jü
local function py_normalize(text)
	local res = text
		:gsub("([jqx])u","%1U")
		:gsub("w[ue][inN]?",{wu="u",wei="ui",wen="un",weN="oN"})
		:gsub("w","u")
		:gsub("y[iuo]u?",{yi="i",yu="U",you="iu"})
		:gsub("y","i")
		:gsub("([zcs])i","%1ii")
		:gsub("([ZCSr])i","%1ih")
		:gsub("rih%f[^%l%u]","ih")
	local check = py_canonize(res)
	if text ~= check then
		error("Xi'an: invalid syllable: "..decode(text).." should be "..decode(check))
	end
	return res
end

local function py_to_ipa(text)
	text = text:gsub("[^ ]+",function(syllable)
		local a,b,c,d = syllable:match("^([12345])([bpmfvBPdtnlgkNhjqxZCSrzcs]?)([aeiouU][%lN]*)([12345]?)$")
		if not a then error("Xi'an: Invalid syllable: " .. decode(syllable)) end
		return (initials[b] or error("Xi'an: Invalid initial: " .. decode(b)))
			.. (finals[c] or error ("Xi'an: Invalid final: " .. decode(c)))
			.. tones[a]
			.. (d~="" and "⁻"..tones[d] or "")
		end)
	return "/" .. text .. "/"
end

-- returns (display_text, phonetic_text, ipa)
function export.py_process(text)
	local conv_display = {}
	local conv_hidden = {}
	local conv_ipa = {}
	local i = 0
	for reading in mw.text.gsplit(text,"/",true) do
		i = i + 1
		conv_display[i] = reading:gsub("[12345]","")
		-- no check is done for things like "xUān", any capitalisation is valid
		reading = mw.ustring.lower(reading)
		reading = encode(reading)
		reading = py_divide_syllables(reading)
		if reading:match("[12345]") then
			local phonetic = reading
				:gsub("([bpmfvBPdtnlgkNhjqxZCSrzcsyw]?[iuU]?[aeiouU])[\1-\4]?([%lN]*)([1-5])", function(a,b,c)
					return "\5" .. a .. (c~="5" and string.char(c) or "") .. b .. "\6"
				end)
			phonetic = py_join_syllables(phonetic)
			conv_hidden[i] = conv_display[i] .. " [Phonetic: " .. decode(phonetic) .. "]"
		else
			conv_hidden[i] = conv_display[i]
		end
		reading = reading:gsub("'"," "):gsub("[^ ]+",py_transf)
		reading = py_normalize(reading)
		conv_ipa[i] = py_to_ipa(reading)
	end
	return table.concat(conv_display, " / "),
		table.concat(conv_hidden, " / "),
		table.concat(conv_ipa, ", ")
end

return export