Module:swi-pron

From Wiktionary, the free dictionary
Jump to navigation Jump to search

local export = {}
local gsub = mw.ustring.gsub
local find = mw.ustring.find
local match = mw.ustring.match

local lang = require("Module:languages").getByCode("swi")

local convert_initial = {
	['b'] = 'p', ['p'] = 'pʰ', ['mb'] = 'b', ['d'] = 't', ['t'] = 'tʰ',
	['nd'] = 'ⁿd', ['j'] = 't̠ʲ', ['q'] = 't̠ʲʰ', ['qb'] = 'ˀb', ['qd'] = 'ˀd',
	['hm'] = 'm̥', ['m'] = 'm', ['qm'] = 'ˀm', ['hn'] = 'n̥', ['n'] = 'n',
	['qn'] = 'ˀn', ['hnn'] = 'ɲ̟̊', ['nn'] = 'ɲ̟', ['qnn'] = 'ˀɲ̟', ['f'] = 'f',
	['z'] = 't͡s', ['c'] = 't͡sʰ', ['s'] = 's', ['r'] = 'z', ['x'] = 'ɕ',
	-- ['v'] = 'v/w'
	['v'] = 'v', ['qw'] = 'ˀw', ['l'] = 'l', ['y'] = 'j', ['qy'] = 'ˀj',
	['g'] = 'k', ['k'] = 'kʰ', ['gg'] = 'q', ['kk'] = 'qʰ', ['hng'] = 'ŋ̥',
	['ng'] = 'ŋ', ['qng'] = 'ˀŋ', ['xg'] = 'ɣ', ['qxg'] = 'ˀɣ', ['xgg'] = 'ʁ',
	['h'] = 'h', ['by'] = 'pj', ['py'] = 'pʰj', ['mby'] = 'bj', ['dy'] = 'tj', 
	['ty'] = 'tʰj', ['ndy'] = 'ⁿdj', ['qby'] = 'ˀbj', ['qdy'] = 'ˀdj',
	['hmy'] = 'm̥j', ['my'] = 'mj', ['hny'] = 'n̥j', ['ny'] = 'nj',
	['qny'] = 'ˀnj', ['fy'] = 'fj', ['zy'] = 't͡sj', ['cy'] = 't͡sʰj',
	['sy'] = 'sj', ['ly'] = 'lj', ['ndw'] = 'ⁿdw', ['gw'] = 'kw', ['kw'] = 'kʰw',
	['qdw'] = 'ˀdw', ['ngw'] = 'ŋw', ['qngw'] = 'ˀŋw', ['zw'] = 't͡sw',
	['sw'] = 'sw', ['xgw'] = 'ɣw', ['lw'] = 'lw', [''] = 'ʔ'
}

local convert_final = {
	['i'] = 'i', ['ii'] = 'ɿ', ['ee'] = 'e', ['a'] = 'a', ['o'] = 'o', ['u'] = 'u', 
	['e'] = 'ə', ['ai'] = 'aːi', ['ei'] = 'ai', ['oi'] = 'oi', ['ui'] = 'ui',
	['iu'] = 'iu', ['eeu'] = 'eu', ['ao'] = 'aːu', ['ou'] = 'au', ['im'] = 'im',
	['eem'] = 'em', ['aam'] = 'aːm', ['am'] = 'am', ['om'] = 'om', ['um'] = 'um', 
	['in'] = 'in', ['een'] = 'en', ['aan'] = 'aːn', ['an'] = 'an', ['on'] = 'on',
	['un'] = 'un', ['en'] = 'ən', ['ing'] = 'iŋ', ['eeng'] = 'eŋ', 
	['aang'] = 'aːŋ', ['ang'] = 'aŋ', ['ong'] = 'oŋ', ['ung'] = 'uŋ',
	['eng'] = 'əŋ', ['ib'] = 'ip', ['eeb'] = 'ep', ['aab'] = 'aːp', ['ab'] = 'ap',
	['ob'] = 'op', ['ub'] = 'up', ['id'] = 'it', ['eed'] = 'et', ['aad'] = 'aːt',
	['ad'] = 'at', ['od'] = 'ot', ['ud'] = 'ut', ['ed'] = 'ət', ['ig'] = 'ik', 
	['eeg'] = 'ek', ['aag'] = 'aːk', ['ag'] = 'ak', ['og'] = 'ok', ['ug'] = 'uk',
	['eg'] = 'ək'
}

local convert_tone = {
	['l'] = '˩˧', ['z'] = '˧˩', ['c'] = '˧', ['x'] = '˥˧', ['s'] = '˧˥', ['h'] = '˥', ['0'] = ''
}

local function get_tone(final, tone)
	if tone == '' then
		return match(final, 'ː') and '˦˨' or '˦˧'
	elseif tone == 's' and not match(final, 'ː') then
		require("Module:debug").track("swi-pron/tone/s")
	end
	return convert_tone[tone]
end

local function syllabify(text)
	text = gsub(text, "'", ".")
	text = gsub(text, "([aeiou])([bcdfghjklmnpqrstvwxyz][aeiou])", "%1.%2")
	return mw.text.gsplit(text, "%.")
end

function export.ipa(text)
	text = string.lower(text)
	local syllables = {}
	for syllable in mw.text.gsplit(text, " ") do
		local initial, final, tone_value
		initial, final, tone = match(syllable, "^([bcdfghjklmnpqrstvwxyz]*)([aeiou]+n?[bdgmn]?)([lzcxsh0]?)$")
		if not final then
			error(syllable .. " cannot be recognized")
		end
		local initial_ipa, final_ipa = convert_initial[initial], convert_final[final]
		local tone_value = get_tone(final_ipa, tone)
		if not initial_ipa then
			error(initial .. " is not a valid initial")
		elseif not final_ipa then
			error(final .. " is not a valid final")
		end
		table.insert(syllables, initial_ipa .. final_ipa .. tone_value)
	end
	return "/" .. table.concat(syllables, " ") .. "/"
end

function export.show(frame)
	local params = {
		[1] = { },
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local text = args[1]
	if not text then
		text = mw.title.getCurrentTitle().text
	else
		require("Module:debug").track("swi-pron/manual")
	end
	
	local display = string.format("* %s %s",
		require("Module:accent qualifier").format_qualifiers(lang, {"Sandong"}),
		require("Module:IPA").format_IPA_full { lang = lang, items = {{ pron = export.ipa(text) }}, }
	)
	
	return display
end

return export