Module:User:AmazingJus/mn

From Wiktionary, the free dictionary
Jump to navigation Jump to search

mn

[edit]
TextExpectedActual
test_phonemic_IPA:
Failedавто́бус (avtóbus)/afˈtʰɔpʊs//awˈtʰps/
Passedай (aj)/ˈai̯//ˈai̯/
Failedаргалаа (argalaa)/arɢəˈɮa//arɢˈɮa/
Failedая (aja)/ˈaj//ajˈa/
Failedхонх (xonx)/ˈxɔŋx//ˈxɔnx/
Passedбаг (bag)/ˈpaɡ//ˈpaɡ/
Passedбага (baga)/ˈpaɢ//ˈpaɢ/
Failedборви (borvi)/ˈpɔrʲəwʲ//ˈpɔrʲwʲ/
PassedБудда~ (Budda~)/pʊtˈta//pʊtˈta/
FailedБудда́ (Buddá)/pʊtˈta//pʊtˈt/
Passedгалуу (galuu)/ɢaˈɮʊ//ɢaˈɮʊ/
Failedежен (ježen)/ˈjet͡ɕəŋ//ˈjet͡ɕŋ/
Failedкоммерса́нт (kommersánt)/kʰɔmirˈsantʰ//kʰɔmmrˈsntʰ/
Failedлхагва (lxagva)/ˈɬaɢʷ//ˈɬaɢw/
Failedмэргэжилтэн (mergežilten)/ˈmerəɢt͡ɕəɮtʰəŋ//ˈmerɢt͡ɕɮtʰŋ/
Failedмонгол хэл (mongol xel)/ˈmɔnɢəɮ xeɮ//ˈmɔnɢɮ ˈxeɮ/
Failedнутаг (nutag)/ˈnʊtʰəɡ//ˈnʊtʰɡ/
Passedоньс (onʹs)/ˈɔnʲs//ˈɔnʲs/
Passedсайн (sajn)/ˈsai̯ŋ//ˈsai̯ŋ/
Failedсайн байна уу (sajn bajna uu)/sai̯ŋ pai̯n ˈʊː//ˈsai̯ŋ ˈpai̯n ˈʊː/
Passedсалхи (salxi)/ˈsaɮʲxʲ//ˈsaɮʲxʲ/
Failedсармагчин (sarmagčin)/ˈsarməɡt͡ɕʰəŋ//ˈsarmɢt͡ɕʰŋ/
Passedтагт (tagt)/ˈtʰaɢtʰ//ˈtʰaɢtʰ/
Passedтаг*т (tag*t)/ˈtʰaɡtʰ//ˈtʰaɡtʰ/
Failedтакси́ (taksí)/tʰakʰˈsʲi//tʰakʰˈs/
Passedтийм (tiim)/ˈtʲʰiːm//ˈtʲʰiːm/
Failedхаалга (xaalga)/ˈxaːɮəɢ//ˈxaːɮɢ/
Passedхиам (xiam)/ˈxʲaːm//ˈxʲaːm/
Failedхонх (xonx)/ˈxɔŋx//ˈxɔnx/
Failedцонх (conx)/ˈt͡sʰɔŋx//ˈt͡sʰɔnx/
Passedшуу (šuu)/ˈʃʊː//ˈʃʊː/
Passedууртайгаар (uurtajgaar)/ʊːrˈtʰai̯ɢar//ʊːrˈtʰai̯ɢar/
Failedуйтгартай (ujtgartaj)/ˈʊi̯tʰɢərtʰai̯//ʊi̯tʰɢrˈtʰai̯/
Passedюул (juul)/ˈjʊːɮ//ˈjʊːɮ/
Passedэнэ (ene)/ˈin//ˈin/
Failedэрдэнэ (erdene)/ˈirtən//ˈirtn/
FailedЭрдэнэт (Erdenet)/ˈirtəntʰ//ˈirtntʰ/
FailedӨргөдөл (Örgödöl)/ˈorəɢtəɮ//ˈorɢtɮ/
Passedхөшөөт (xöšööt)/xoˈʃotʰ//xoˈʃotʰ/



local export = {}

local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsubn = mw.ustring.gsub
local lc = mw.ustring.lower
local match = mw.ustring.match
local strip = mw.text.trim
local sub = mw.ustring.sub
local u = require("Module:string/char")

local lang = require("Module:languages").getByCode("mn")
local sc = require("Module:scripts").getByCode("Cyrl")
local mn = require("Module:mn-common")
local final_clusters = require("Module:mn/data").syll_final_cons

function export.tag_text(text, face)
	return require("Module:script utilities").tag_text(text, lang, sc, face)
end

function export.link(term, face)
	return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end

-- A version of gsubn() that discards all but the first return value.
local function gsub(term, foo, bar, n)
	local retval = gsubn(term, foo, bar, n)
	return retval
end

--[[
	Primarily sourced from The Phonology of Mongolian by Jan-Olof Svantesson (2005)
]]--

--[[
	Define certain sets of characters.
]]--
local chars = {
	c = "бвгджзклмнпрстфхцчшщ", -- Consonants
	v = "аеёиоуэюяөү", -- All reducible vowels
	u = "aeiɔoʊu", -- All full vowels (IPA)
	y = "аеёиыоуэюяөү" -- All vowels
}

--[[ 
	Define stress accents with their corresponding IPA representation.
]]--
local stress = {
	[u(0x0301)] = u(0x02C8), -- Primary stress (acute accent, ˈ)
	[u(0x0300)] = u(0x02CC) -- Secondary stress (grave accent, ˌ)
}

--[[
	Define other symbols.
]]--
local long = u(0x02D0) -- Long vowel mark (ː)
local diphthong = u(0x032F) -- Diphthong mark (̯)
local primary = u(0x02C8) -- Primary stress mark (ˈ)
local secondary = u(0x02CC) -- Secondary stress mark (ˌ)

--[[
	Map letters to their respective representations.
--]]
local mapping = {
	["cons"] = { -- Consonants (phonemic)
		["б"] = "p", ["в"] = "w", ["г"] = "ɢ", ["г*"] = "ɡ", ["д"] = "t", ["ж"] = "t͡ɕ", ["з"] = "t͡s", ["й"] = "i̯",
		["к"] = "kʰ", ["л"] = "ɮ", ["м"] = "m", ["н"] = "n", ["н*"] = "ŋ", ["п"] = "pʰ", ["р"] = "r", ["с"] = "s",
		["т"] = "tʰ", ["ф"] = "f", ["х"] = "x", ["ц"] = "t͡sʰ", ["ч"] = "t͡ɕʰ", ["ш"] = "ʃ", ["щ"] = "ʃt͡ɕ",
	},
	["vowels"] = { -- Vowels (phonemic)
		["а"] = "a", ["е"] = "je", ["ё"] = "jɔ", ["и"] = "i", ["о"] = "ɔ", ["у"] = "ʊ", ["э"] = "e",
		["ю"] = "jʊ", ["я"] = "ja", ["ө"] = "o", ["ү"] = "u", ["ы"] = "i", ["ъ"] = "", ["ь"] = "ʲ"
	},
	["double"] = { -- Double vowels (orthographic)
		"аа", "ее", "еи", "еө", "ёо", "ий", "оо",
		"уу", "ээ", "юу", "юү", "яа", "өө", "үү"
	},
	["alloph"] = { -- Consonant allophones (phonemic)
		["w"] = "w̜", ["ɡʲ"] = "ɟ", ["xʲ"] = "ç", ["x"] = "χ"
	},
	["diph"] = { -- Diphthongs (phonetic)
		["ai"] = "æe", ["ei"] = "e", ["oi"] = "ɞe", ["ui"] = "ɵe", ["üi"] = "ue"
	}
}

--[[
	Determine the position of a vowel in a word.
	First, check the vowel harmony of the vowel. If the vowel is not
	part of a vowel harmony, return nil. Otherwise, return the position
	of the vowel in the word.
]]--
local function get_position(vowel)
	local vh = mn.vowelharmony(vowel)

	if vh[1] then
		return vh[1].position
	end

	return nil
end

--[[
	Check if a given cluster is valid.
	Loop through all the final clusters and check if the given cluster
	matches any of the patterns. If so, return true, otherwise return false.
]]--
local function is_valid_cluster(a, b)
	if b == "j" then
		return true -- Any cluster ending in /j/ is valid.
	end
	for _, depth in ipairs(final_clusters) do -- Loop through every final cluster pattern.
		for _, pattern in ipairs(depth) do
			if match(pattern, a .. b) then
				return true
			end
		end
	end
	return false
end

--[[
	Handle vowel quality based on syllable position by respelling. Matches initial and
	non-initial syllables and checks for the quality of the syllable. In initial syllables
	(marked with an initial #), single vowels are short and double vowels are long.
	In non-initial syllables, single vowels are reduced and double vowels are short.
	Note that reduced vowels are removed but are added back in accordance with Mongolian
	phonological rules.
--]]
local function respell_vowels(word)
	-- Handle substitutions to palatalise consonants.
	word = gsub(word, "([" .. chars.c .. "])и([аоу])", "%1j%2%2") -- иа, ио, иу are long monophthongs which palatalise the preceding consonant.
	word = gsub(word, "и(й?)", "jи%1") -- Palatalise all other instances of и. (FIXME: need to consider when converting chars.v into Cyrillic)
	word = gsub(word, "([" .. chars.c .. "])j", function(c) return match("[бвгдлмнпртх]", c) and c .. "ʲ" or c end) -- Certain consonants are palatalised before orthographic й.

	-- Handle vowel respellings.
	word = gsub(word, "#э", "#и") -- э word-initially merges with и.
	word = gsub(word, "([" .. chars.v .. "])~", "%1" .. long) -- A tilde singifies a vowel's full form word-finally.
	for _, vowel in pairs(mapping.double) do
		word = gsub(word, vowel, sub(vowel, 1, 1) .. long) -- Double vowels are long.
	end
	word = gsub(word, "([гн])#", "%1*#") -- Mark word-final г and н with an asterisk (important for distinguishing between uvular and alveolar phonemes marked by silent vowels).

	-- Handle non-initial syllables.
	return gsub(word, "(#[^" .. chars.y .. "]*[" .. chars.y .. "]+)(.*#)", function(i, non_i) -- Match initial and non-initial syllables.
		non_i = gsub(non_i, "[" .. chars.v .. "]([^" .. long .. "й])", "%1") -- Remove single vowels (excluding diphtongs) in non-initial syllables.
		non_i = gsub(non_i, "([" .. chars.v .. "])" .. long, "%1") -- Double vowels are actually short in non-initial syllables.
		return i .. non_i -- Concatenate initial and non-initial syllables with schwa handling.
	end)
end

--[[
	Handle substitutions for consonants.
--]]
local function handle_consonants(word)
	--[[
	word = gsub(word, "([" .. chars.b .. "]?)г([^* ])", function(b, c) -- Handle further substituions for vular consonants.
		if b ~= "" or match(chars.b, c) then
			return b .. "г*" .. c  -- г is uvular when in contact with back vowels and non-final.
		end
	end)
	--]]
	-- word = gsub(word, "н([" .. chars.v .. "ыгшх])", "н*%1") -- н is uvular preceding a vowel or г, ш, х

	word = gsub(word, "#(" .. primary .. "?)лх", "#%1ɬ") -- лх word-initially is a voiceless alveolar lateral fricative (found initially in some Tibetan loanwords).

	word = gsub(word, "([" .. chars.c .. "ə]+)ʲ", function(c) return gsub(c, "([" .. chars.c .. "])", "%1ʲ") end) -- All consonants before a palatalised consonant are also palatalised.
	word = gsub(word, ".%*?", mapping.cons) -- Substitute consonants with their IPA representation.
	return gsub(word, "ʰʲ", "ʲʰ") -- Swap the palatal mark with the aspirated.
end

--[[
	Determine primary stress for any given word.
	According to https://roa.rutgers.edu/files/172-0197/172-0197-WALKER-0-1.PDF,
	any rightmost non-final heavy syllable is stressed.
	If the final syllable is heavy but all previous syllables are non-heavy,
	that syllable is stressed. Otherwise, the first syllable is stressed.
--]]
local function add_stress(word)
	if match(word, u(0x0301)) then
		word = gsub(word, ".", stress) -- No need to determine stress if a primary stress mark is present.
	else
		word = gsub(word, "(#[^" .. chars.u .. "]*[" .. chars.u .. "]+[й" .. long .. "]?)(.*#)", function(i, non_i) -- Match initial and non-initial syllables.
			local _, non_i_count = gsubn(non_i, "[" .. chars.u .. "]", "") -- Count the number of full vowels in the non-initial syllable.
			if non_i_count == 0 or (non_i_count == 1 and match("[" .. long .. "й]", i)) then
				i = gsub(i, "#", "#" .. primary)
				return i .. non_i -- Stress the initial syllable if there is no full vowel in the non-initial syllable or a single long vowel in the initial syllable.
			else
				non_i = gsub(non_i, "(.*)([" .. chars.u .. "])", "%1" .. primary .. "%2") -- Find the last instance of a full vowel and add primary stress before the vowel.
				non_i = gsub(non_i, "(.*)([" .. chars.u .. "].*)" .. primary, "%1" .. primary .. "%2") -- However, if there is a full vowel in a previous syllable, shift the stress there instead.
				return i .. non_i
			end
		end)
	end

	return gsub(word, "([" .. chars.c .. "][^" .. chars.c .. "]*)" ..  primary, primary .. "%1") -- Shift the stress before the consonant of the stressed syllable.
end

--[[
	Adds reduced vowels when there is no valid consonant cluster.
	Looks at consonant sequences that are two or more characters long
	and checks if they form a valid cluster. If not, it adds a schwa after
	the first consonant.
--]]
local function add_reduced(word)
	return word
end

--[[
	Transcribe the term phonemically into IPA.
]]
local function pron_m(term)
	return gsub(term, "(#[^#]*#)", function(word) -- Match every word.
	-- return gsub(word, "(#[^" .. chars.y .. "]*[" .. chars.y .. "]+)(.*#)", function(i, non_i) -- Match the initial and non-initial syllables of each word.
		-- Handle substitutions for vowels.
		word = respell_vowels(word) -- Handle each vowel based on its syllable position.
		word = add_reduced(word) -- Add reduced vowels based on Mongolian phonological rules
		word = gsub(word, ".", mapping.vowels) -- Substitute vowels with their IPA representation.
		word = add_stress(word) -- Add stress marks to the word.
		word = handle_consonants(word) -- Handle consonants.

		return word
	end)
end

--[[
	Main function for the module.
--]]
function export.toIPA(term)
	if type(term) == "table" then
		term = term.args[1] -- Get the user input as a table.
	end

	-- Handle the term.
	term = lc(term) -- Make the text lowercase.
	term = gsub(term, "([^ ]+)", "#%1#") -- Mark all word borders with a #.
	term = pron_m(term) -- Get phonemic transcription.
	return gsub(term, "#", "") -- Remove all instances of #.
end

return export