Module:User:AmazingJus/sce

From Wiktionary, the free dictionary
Jump to navigation Jump to search
TextExpectedActual
test_phonemic_IPA:
Passedbaer/pɑˈɚ//pɑˈɚ/
Passedtiigha/tʰɯˈqɑ//tʰɯˈqɑ/
Passedniere/njəˈrə//njəˈrə/
Passedenzhegve/ənt͡ʂəˈʁə//ənt͡ʂəˈʁə/
Passedxiaojierun (xiaojieruŋ)/ɕjɑwt͡ɕjəˈruŋ//ɕjɑwt͡ɕjəˈruŋ/
Passedershi ('ershi)/ˈɚʂi//ˈɚʂi/
Passedruhher ('ruhher)/ˈʐuhɚ//ˈʐuhɚ/
TextExpectedActual
test_phonetic_IPA:
Failedkorolon[kʰoroˈlõŋ][k⁽ʷ⁾or⁽ʷ⁾ol⁽ʷ⁾õn]
Failedhotou[xʷoˈtəu̯~xʷoˈtɤu̯][h⁽ʷ⁾otəu̯ ~ h⁽ʷ⁾otɤu̯]

local export = {}

local gsub = mw.ustring.gsub
local gsplit = mw.text.gsplit
local lower = mw.ustring.lower
local match = mw.ustring.match
local str = require("Module:string")
local trim = mw.text.trim

--[[
	Sourced from A grammatical overview of Santa Mongolian by Kenneth Lynn Field (1997),
	东乡语汉语词典 (Dongxiang-Chinese Dictionary) by Ma Guozhong (马国忠) (2001)
]]--
-- List of valid phonemes
local phons = {
	-- list of valid consonant phonemes
	["cons_m"] = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l", ["r"] = "r",
		["g"] = "k", ["k"] = "kʰ", ["gh"] = "q", ["kh"] = "qʰ",
		["h"] = "x", ["gv"] = "ʁ", ["hh"] = "h",
		["j"] = "t͡ɕ", ["q"] = "t͡ɕʰ", ["x"] = "ɕ",
		["zh"] = "t͡ʂ", ["ch"] = "t͡ʂʰ", ["sh"] = "ʂ",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s",
		["w"] = "w", ["y"] = "j"
	},
	-- list of valid vowel phomemes
	["vowel_m"] = {
		["iao"] = "jɑw", ["uai"] = "wɑj",
		["ai"] = "ɑj", ["ei"] = "əj", ["ao"] = "ɑw", ["ou"] = "əw", ["ii"] = "ɯ",
		["ia"] = "jɑ", ["ie"] = "jə", ["iu"] = "ju", ["ui"] = "wəj", ["ua"] = "wɑ",
		["a"] = "ɑ", ["o"] = "o", ["e"] = "ə", ["i"] = "i", ["u"] = "u"
	},
	-- list of phonetic diphthong representations
	["diph_t"] = {
		["iao"] = { "i̯ɑu̯", "iou̯" }, ["uai"] = { "u̯ɑi̯", "u̯ɛi̯" },
		["ai"] = { "ɑi̯", "ɛi̯" }, ["ei"] = "ə̝i̯", ["ao"] = { "ɑu̯", "ou̯" }, ["ou"] = { "əu̯", "ɤu̯" },
		["ia"] = "i̯ɑ", ["ie"] = "i̯ɛ", ["iu"] = "i̯u", ["ui"] = "u̯(ə̝)i̯", ["ua"] = "u̯ɑ"
	}
}
-- List of valid graphemes
local graphs = {
	cons_g = "[bpmfdtnŋlrgkhjqxzcswy]?[hv]?", -- list of valid consonant graphemes
	vowel_g = "[aeiou]+" -- list of valid vowel graphemes
}

-- Returns the number of syllables in a given string
local function count_syll(text)
	return str.count(text, "ɚ") + str.count(text, "[aeiou]+")
end

-- Handles ambiguous syllable-final n
local function n_final(text)
	local n = match(text, "n[^aeiou]+")
	if n and str.count(text, n) > 1 then
		error("ŋ is ambiguous. Please provide the full respelling.")
	elseif text == "ŋ" then
		text = mw.title.getCurrentTitle().text
		text = gsub(text, "n", "ŋ")
	end

	return text
end

-- Returns a string with two unique substitutions given a table with two items
local function sub_multiple(text, string, subs)
	local foo, foo_sub = gsub(text, string, subs[1])
	local bar, bar_sub = gsub(text, string, subs[2])

	if foo_sub == 0 and bar_sub == 0 then
		return text
	else
		return foo .. " ~ " .. bar
	end
end

-- Adjusts the text including respelling the term
local function adjust(text, etyl)
	text = lower(text) -- make text lowercase
	text = " " .. text .. " " -- make all word borders have a space
	text = gsub(text, "'", "ˈ") -- convert apostrophe to a stress mark
	text = gsub(text, "gvi", "gvii") -- i after gv is pronounced as ii
	text = gsub(text, "([gkaeiou ]hi)", "%1i") -- same after gh, kh and h
	text = gsub(text, "er([^aeiou])", "ɚ%1") -- "er" is a unique syllable in its own right
	if etyl == "zh" or etyl == "ar" then text = gsub(text, "r", "ʐ") end -- r is pronounced as ʐ in words of Chinese and Arabic origin

	return text
end

-- Convert the text to a phonemic IPA transcription
function export.IPA_m(text, etyl)
	text = n_final(text) -- handle syllable-final n
	text = adjust(text, etyl) -- adjust the text by respelling

	text = gsub(text, "[^ ]* ", function(word)
		if not match(word, "ˈ") and count_syll(word) > 1 then -- add stress mark to the final syllable if no stress mark is present
			word = match(word, "ɚ ") and gsub(word, "(" .. graphs.cons_g .. "ɚ) ", "ˈ%1 ") or gsub(word, "(" .. graphs.cons_g .. graphs.vowel_g .. "[^aeiou]*) ", "ˈ%1 ")
		end

		return gsub(word, graphs.cons_g, phons.cons_m):gsub(graphs.vowel_g, phons.vowel_m)
	end)

	return trim(text)
end

-- Convert the text to a phonetic IPA transcription from the phonemic transcription
function export.IPA_t(text, etyl)
	text = n_final(text) -- handle syllable-final n
	text = adjust(text, etyl) -- adjust the text by respelling

	-- a
	text = gsub(text, "ɑn", "æn") -- /ɑ/ before /n/ is [æ]
	text = gsub(text, "a", "ɑ")

	-- diphthongs
	for k, v in pairs(phons.diph_t) do
		if type(v) == "table" then
			text = sub_multiple(text, k, v)
		else
			text = gsub(text, k, v)
		end
	end

	-- e
	text = gsub(text, "eŋ", "ɤŋ") -- stressed /ə/ before /ŋ/ is [ɤ]
	text = gsub(text, "ˈ(" .. graphs.cons_g .. ")e", "ˈ%1ɛ") -- otherwise stressed /ə/ is [ɛ]
	text = gsub(text, "e", "ə")

	-- i
	text = gsub(text, "([csz]h)i", "%1ɨ") -- /i/ is [ɨ] after retroflex consonants
	text = gsub(text, "([csz])i", "%1ɪ") -- /i/ is [ɪ] after alveolar affricates and fricatives

	-- o
	text = gsub(text, "o(" .. graphs.cons_g .. ")", "⁽ʷ⁾o%1") -- /o/ can be labialised as [⁽ʷ⁾o]

	-- devoiced vowels

	-- vowels before /n, ŋ/ are nasalised
	text = gsub(text, "(.̯?)([nŋ])", "%1̃%2")

	return trim(text)
end

function export.show(frame)
	local args = frame:getParent().args -- get the arguments
	local etyl = args.etyl -- get the etymology
	local text = mw.title.getCurrentTitle().text -- get current page title
	local results = {} -- store the results

	if args[1] then
		for _, param in ipairs(args) do
			if param ~= "" then
				table.insert(results, {pron = "/" .. export.IPA_m(param, etyl) .. "/"})
			end
		end
	else
		table.insert(results, {pron = "/" .. export.IPA_m(text, etyl) .. "/"})
	end

	return "*" .. require("Module:IPA").format_IPA_full { lang = require("Module:languages").getByCode("sce"), items = results }
end

return export