Module:mn-IPA

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local export = {}
local mn = require("Module:mn-common")
local gsub = mw.ustring.gsub
local u = require("Module:string/char")

local vowels1 = {
	['а'] = 'a', ['о'] = 'ɔ', ['у'] = 'ʊ',
	['э'] = 'e', ['ө'] = 'ɵ', ['ү'] = 'u',
	['и'] = 'i', ['й'] = 'i', ['ы'] = 'i', --й very occasionally used at the start of loanwords
	['я'] = 'ja', ['ю'] = 'jʊ', ['ё'] = 'jɔ', ['е'] = 'jɵ',
	['ъ'] = '', ['ә'] = 'ə',
}

local vowels2 = {
	['аа'] = 'aː', ['ай'] = 'ɛː', ['яа'] = 'jaː', ['яй'] = 'jɛː', ['ъя'] = 'ja', ['ао'] = 'ɒː', ['ау'] = 'aʊ̯', ['яу'] = 'jaʊ̯',
	['оо'] = 'oː', ['ой'] = 'ɔi̯', ['ёо'] = 'jɔː', ['ёй'] = 'jɔi̯', ['ъё'] = 'jɔ', ['оа'] = 'ʊ̯a', ['ёу'] = 'joʊ',
	['уу'] = 'ʊː', ['уй'] = 'ʊi̯', ['уа'] = 'u̯a', ['юу'] = 'jʊː', ['юй'] = 'jʊi̯', ['уи'] = 'u̯iː',
	['ээ'] = 'eː', ['эй'] = 'ei̯', ['еэ'] = 'jeː', ['ей'] = 'jei̯', ['еа'] = 'ia', ['ео'] = 'eɔ', ['еу'] = 'iʊ̯', ['еү'] = 'iuː',
	['өө'] = 'ɵː', ['өй'] = 'ɵi̯', ['ье'] = 'jɵ',
	['үү'] = 'uː', ['үй'] = 'ui̯', ['юү'] = 'juː', ['юй'] = 'jui̯',
	['ии'] = 'iː', ['ий'] = 'iː', ['иа'] = 'i̯aː', ['ио'] = 'i̯oː', ['иу'] = 'i̯ʊː', ['иэ'] = 'i̯eː', ['иө'] = 'i̯ɵː', ['иү'] = 'i̯uː',
}

local vowels3 = {
	['ъяа'] = 'jaː', ['ъёо'] = 'jɔː', ['ьеэ'] = 'jeː',
	['яай'] = 'jɛː', ['уай'] = 'u̯ɛː', ['ёий'] = 'jiː',
	['аоа'] = 'aʊ̯a',
}

local consonants1 = {
	['п'] = 'pʰ', ['б'] = 'p', ['т'] = 'tʰ', ['д'] = 't', ['к'] = 'kʰ', ['г'] = 'k',
	['ц'] = 't͡sʰ', ['з'] = 't͡s', ['ч'] = 't͡ɕʰ', ['ж'] = 't͡ɕ',
	['ф'] = 'f', ['в'] = 'w̜', ['с'] = 's', ['ш'] = 'ɕ', ['щ'] = 'ɕ', ['х'] = 'x',
	['м'] = 'm', ['н'] = 'n',
	['р'] = 'r', ['л'] = 'ɮ',
	['ь'] = 'ʲ',
}

local consonants2 = {
	['бб'] = 'b',
	['вб'] = 'b',
	['дд'] = 't͉.t',
	['лл'] = 'ɮː', ['лх'] = 'ɬʰ',
	['мм'] = 'mː',
	['нб'] = 'mp', ['нн'] = 'nː',
	['рк'] = 'r̥kʰ', ['рп'] = 'r̥pʰ', ['рт'] = 'r̥tʰ', ['рц'] = 'r̥t͡sʰ', ['рч'] = 'r̥t͡ɕʰ',
	['сс'] = 'sː',
}

local double_consonants = {
	['пп'] = 'п', ['тт'] = 'т', ['кк'] = 'к', ['гг'] = 'г',
	['цц'] = 'ц', ['зз'] = 'з', ['чч'] = 'ч', ['жж'] = 'ж',
	['фф'] = 'ф', ['вв'] = 'в', ['шш'] = 'ш', ['щщ'] = 'щ', ['хх'] = 'х',
	['рр'] = 'р',
}

local palatalisation = {
	['пе'] = 'пʲө', ['пю'] = 'пʲу', ['пё'] = 'пʲо', ['пя'] = 'пʲа',
	['бе'] = 'бʲө', ['бю'] = 'бʲу', ['бё'] = 'бʲо', ['бя'] = 'бʲа',
	['те'] = 'тʲө', ['тю'] = 'тʲу', ['тё'] = 'тʲо', ['тя'] = 'тʲа',
	['де'] = 'дʲө', ['дю'] = 'дʲу', ['дё'] = 'дʲо', ['дя'] = 'дʲа',
	['ке'] = 'кʲө', ['кю'] = 'кʲу', ['кё'] = 'кʲо', ['кя'] = 'кʲа',
	['ге'] = 'гʲө', ['гю'] = 'гʲу', ['гё'] = 'гʲо', ['гя'] = 'гʲа',
	['фе'] = 'фʲө', ['фю'] = 'фʲу', ['фё'] = 'фʲо', ['фя'] = 'фʲа',
	['ве'] = 'вʲө', ['вю'] = 'вʲу', ['вё'] = 'вʲо', ['вя'] = 'вʲа',
	['хе'] = 'хʲө', ['хю'] = 'хʲу', ['хё'] = 'хʲо', ['хя'] = 'хʲа',
	['ме'] = 'мʲө', ['мю'] = 'мʲу', ['мё'] = 'мʲо', ['мя'] = 'мʲа',
	['не'] = 'нʲө', ['ню'] = 'нʲу', ['нё'] = 'нʲо', ['ня'] = 'нʲа',
	['ре'] = 'рʲө', ['рю'] = 'рʲу', ['рё'] = 'рʲо', ['ря'] = 'рʲа',
	['ле'] = 'лʲө', ['лю'] = 'лʲу', ['лё'] = 'лʲо', ['ля'] = 'лʲа',
}


function export.to_IPA(text)

	local orig_text = text
	text = mw.ustring.lower(text)
	
	text = gsub(text, '-', '')
	
	text = gsub(text, "ы$", "ы~")
	text = gsub(text, "ы(%s)", "ы~%1")
	
	--primary stress
	-- U+0301 COMBINING ACUTE ACCENT, U+02C8 MODIFIER LETTER VERTICAL LINE
	--secondary stress
	-- U+0300 COMBINING GRAVE ACCENT, U+02C8 MODIFIER LETTER LOW VERTICAL LINE
	text = gsub(text, "'", u(0x02C8)) -- can use apostrophe for convenience
	
	if not mw.ustring.match(text, u(0x02C8)) then
		if not mw.ustring.match(text, '[' .. u(0x0301) .. u(0x0300) .. ']') then
			text = u(0x02C8) .. text
		end
		text = gsub(text, '([аеиоөүуъыэюяёә%s%p%z])([аеиоөүуъыэюяёә])'..u(0x0301), '%1'..u(0x02C8)..'%2')
		text = gsub(text, '([бвгджзйклмнпрстфхцчшщь]%*?)([аеиоөүуъыэюяёә])'..u(0x0301), u(0x02C8)..'%1%2')
		text = gsub(text, '([аеиоөүуъыэюяёә%s%p%z])([аеиоөүуъыэюяёә])'..u(0x0300), '%1'..u(0x02CC)..'%2')
		text = gsub(text, '([бвгджзйклмнпрстфхцчшщь]%*?)([аеиоөүуъыэюяёә])'..u(0x0300), u(0x02CC)..'%1%2')
	end
	for k, v in pairs(double_consonants) do
		text = gsub(text, k, v)
		text = gsub(text, k, v) --remove double consonants
	end
	for k, v in pairs(palatalisation) do
		text = gsub(text, k, v)
		text = gsub(text, k, v) --convert iotated vowels following consonants
	end
	for k, v in pairs(vowels3) do
		text = gsub(text, k, v)
		text = gsub(text, k, v)
		text = gsub(text, k, v)
	end
	for k, v in pairs(vowels2) do
		text = gsub(text, k, v)
		text = gsub(text, k, v)
	end
	for k, v in pairs(vowels1) do
		text = gsub(text, k, v)
	end
	
	for k, v in pairs(consonants2) do
		text = gsub(text, k, v)
		text = gsub(text, k, v)
	end
	
	text = gsub(text, '[бвгджзйклмнпрстфхцчшщь]%*?', consonants1)
	text = gsub(text, '([aɛoɔʊi̯eɵuə])t͡ɕ([aɛoɔʊi̯eɵuə])', '%1d͡ʑ%2')
	text = gsub(text, '([aɛoɔʊi̯eɵuə])p([aɛoɔʊi̯eɵuə])', '%1β%2')
	text = gsub(text, '([aɛoɔʊi̯eɵuə])k([aɛoɔʊi̯eɵuə])', '%1ɣ%2')
	text = gsub(text, 'n([%sˈ]*)%f[pbmf]', 'm%1')
	text = gsub(text, 'n%f[ɡɢkxʃ%s%p%z]', 'ŋ')
	text = gsub(text, 'r%f[%s%p%z]', 'r̥')

	text = gsub(text, '([aɔuʊ])i%f[%s%p%z]', '%1ⁱ')
	
	if #mn.syllables( orig_text ) ~= 1 then
		text = gsub(text, '[aeiɔɵuʊə]%f[%s%p%z]', '')
	end
	
	text = gsub(text, 'ⁱ', 'i')
	text = gsub(text, 'ʲj', 'j')

	--put ~ at the end of respelling to prevent vowel cutoff
	text = gsub(text, '~', '')

	return text

end

-- only works with Cyrillic Mongolian
function export.show(frame)
	local args = frame:getParent().args
	local page_title = mw.title.getCurrentTitle().text
	local text = args[1] or page_title
	local qualifier = args["q"] or nil
	local lang = require("Module:languages").getByCode("mn")
	local script = require("Module:scripts").getByCode("Cyrl")
	
	local syllables = mn.syllables(text)
	local transcription = export.to_IPA(text)
	local IPA = "<li>" .. (qualifier and require("Module:qualifier").format_qualifier{qualifier} .. " " or "") ..
		require("Module:IPA").format_IPA_full {
		lang = lang,
		items = {{ pron = "/" .. transcription .. "/" }},
	} .. "</li>"
	local hyphenation = "<li>" .. require("Module:hyphenation").format_hyphenations { 
		lang = lang,
		hyphs = { { hyph = syllables } },
		sc = script,
		caption = "[[syllabification|Syllabification]]",
		} .. mw.ustring.char( 0x2002 ) .. "(" .. #syllables .. " syllable"
	if #syllables >= 2 then hyphenation = hyphenation .. "s" end
	hyphenation = hyphenation .. ")</li>"

	return "<ul>" .. IPA .. hyphenation .. "</ul>"
end

return export