Module:hy-pronunciation

Definition from Wiktionary, the free dictionary
Jump to navigation Jump to search

This module implements {{hy-pron}}.

For testcases, see Module:hy-pronunciation/testcases.


local export = {}

-- single characters that map to IPA sounds   
local phonetic_chars_map = {
	["ա"]="ɑ", ["բ"]="b", ["գ"]="ɡ", ["դ"]="d", ["ե"]="ɛ", ["զ"]="z",
	["է"]="ɛ", ["ը"]="ə", ["թ"]="tʰ", ["ժ"]="ʒ", ["ի"]="i", ["լ"]="l",
	["խ"]="χ", ["ծ"]="t͡s", ["կ"]="k", ["հ"]="h", ["ձ"]="d͡z", ["ղ"]="ʁ", 
	["ճ"]="t͡ʃ", ["մ"]="m", ["յ"]="j", ["ն"]="n", ["շ"]="ʃ", ["ո"]="ɔ",
	["չ"]="t͡ʃʰ", ["պ"]="p", ["ջ"]="d͡ʒ", ["ռ"]="r", ["ս"]="s", ["վ"]="v", 
	["տ"]="t", ["ր"]="ɾ", ["ց"]="t͡sʰ", ["ւ"]="v", ["փ"]="pʰ", ["ք"]="kʰ",
	["օ"]="ɔ", ["ֆ"]="f", ["-"]=" ", ["՚"]="", ["-"]=""
}

-- character sequences of two that map to IPA sounds
local phonetic_2chars_map = {
	['ու'] = 'u',
	-- diphthongization in the following combinations: [իե] = [jɛ], [իա] = [jɑ]
	['իե'] = 'jɛ',
	['իա'] = 'jɑ'
}

function export.pronunciation(word)
	if type(word) == "table" then
		word = word.args[1] or word:getParent().args[1]
	end
	if not word or (word == "") then
		error("Please put the word as the first positional parameter!")
	end
	word = mw.ustring.lower(word)

	local phonetic = word

	-- then long consonants that are orthographically geminated.
	phonetic = mw.ustring.gsub(phonetic, "(.)%1", "%1ː")

	for pat, repl in pairs(phonetic_2chars_map) do
		phonetic = mw.ustring.gsub(phonetic, pat, repl)
	end

	-- ե and ո are pronounced as jɛ and vɔ word-initially.
	phonetic = mw.ustring.gsub(phonetic, "^ե", "յէ")
	phonetic = mw.ustring.gsub(phonetic, "^ո", "վօ")
	-- except when followed by another վ.
	phonetic = mw.ustring.gsub(phonetic, "^վօվ", "օվ")

	phonetic = mw.ustring.gsub(phonetic, '.', phonetic_chars_map)

	-- assimilation: nasal + velar plosives = velar nasal + velar plosives
	phonetic = mw.ustring.gsub(phonetic, "n([ɡkχ]+)", "ŋ%1")

	-- pseudo-palatalization under the influence of Russian [COLLOQUIAL, NOT STANDARD]
	--phonetic = mw.ustring.gsub(phonetic, "tj", "t͡sj")
	--phonetic = mw.ustring.gsub(phonetic, "tʰj", "t͡sʰj")
	--phonetic = mw.ustring.gsub(phonetic, "dj", "d͡zj")

	-- trilling of ɾ in some positions [COLLOQUIAL, NOT STANDARD]
	--phonetic = mw.ustring.gsub(phonetic, "ɾt", "rt")

	-- Do not add a stress mark for monosyllabic words. Check to see if the word contains only a single instance of [ɑɛəɔiu]+.
	local numberOfVowels = select(2, mw.ustring.gsub(phonetic, "[ɑɛəɔiu]+", ""))

	-- If polysyllabic, add IPA stress mark using the following rules. The stress is always on the last syllable not 
	-- formed by schwa [ə]. In some rare cases the stress is not on the last syllable. In such cases the stressed vowel
	-- is marked by the Armenian stress character <՛>, e.g. մի՛թե. So:
	--      1) Find the vowel followed by <՛>․ If none, jump to step 2. Else check if it is the first vowel of the word.
	--         If true, put the IPA stress at the beginning, else do step 3.
	--      2) Find the last non-schwa vowel, i.e. [ɑɛɔiu],
	--      3) If the IPA symbol preceding it is [ɑɛəɔiu], i.e. a vowel, put the stress symbol between them, 
	--         if it is NOT [ɑɛɔiuə], i.e. it is a consonant, 
	--         put the stress before that consonant.
	if numberOfVowels > 1 then
		local rcount
		phonetic, rcount = mw.ustring.gsub(phonetic, "([^ɑɛɔiuə]*[ɑɛɔiuə])՛", "ˈ%1")
		if rcount == 0 then
			phonetic = mw.ustring.gsub(phonetic, "([^ɑɛɔiuə]*[ɑɛɔiu][^ɑɛɔiuə]*)$", "ˈ%1")
			phonetic = mw.ustring.gsub(phonetic, "([^ɑɛɔiuə]*[ɑɛəɔiu]?[ɑɛɔiu][^ɑɛɔiuə]*ə[^ɑɛɔiuə]*)$", "ˈ%1")
		end
		phonetic = mw.ustring.gsub(phonetic, "([ɑɛəɔiu])ˈ([^ɑɛɔiuə]+)([^ɑɛɔiuəːˈʰ])", "%1%2ˈ%3")
		phonetic = mw.ustring.gsub(phonetic, "(.)͡ˈ", "ˈ%1͡")
	end

	return phonetic
end
 
return export