Module:sa-pronunc/sandbox

From Wiktionary, the free dictionary
Jump to navigation Jump to search
This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local export = {}

local u = mw.ustring.char
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local find = mw.ustring.find

local ACUTE     = u(0x0301)
local COARTIC   = u(0x0361)
local DENTAL    = u(0x032A)
local FLAP      = u(0x0306)
local NORELEASE = u(0x031A)
local SYLLABIC  = u(0x0329)
local NASAL     = u(0x0303)

local udatta = "/"
local anudatta = "_"
local anudattara = "="
local lis = "\\" --long independent svarita
local sis = "|" --short independent svarita
local ds = "`" --dependent svarita
local long_kampa = "^"
local short_kampa = "\*"

local accent_list = udatta .. anudatta .. anudattara .. lis .. sis .. ds .. long_kampa .. short_kampa
local accent = "[" .. accent_list .. "]"

local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("sa")
local m_a = require("Module:accent qualifier")
local m_sa_translit = require("Module:sa-utilities/translit")
local m_str = require("Module:string")

local consonants = {
	["क"] = "k", ["ग"] = "ɡ", ["ख"] = "kʰ", ["घ"] = "ɡʱ", ["ङ"] = "ŋ",
	["च"] = "t͡ɕ", ["ज"] = "d͡ʑ", ["छ"] = "t͡ɕʰ", ["झ"] = "d͡ʑʱ", ["ञ"] = "ɲ",
	["त"] = "t̪", ["द"] = "d̪", ["थ"] = "t̪ʰ", ["ध"] = "d̪ʱ", ["न"] = "n̪",
	["ट"] = "ʈ", ["ड"] = "ɖ", ["ठ"] = "ʈʰ", ["ढ"] = "ɖʱ", ["ण"] = "ɳ",
	["प"] = "p", ["ब"] = "b", ["फ"] = "pʰ", ["भ"] = "bʱ", ["म"] = "m",
	["य"] = "j", ["र"] = "ɽ", ["ल"] = "l̪", ["व"] = "ʋ", ["ळ"] = "ɭ̆", ["ळ्ह"] = "ɭ̆ʱ",
	["श"] = "ɕ", ["ष"] = "ʂ", ["स"] = "s̪", ["ह"] = "ɦ",
}

local diacritics = {
	["ा"] = "ɑː", ["ि"] = "i", ["ी"] = "iː", ["ु"] = "u", ["ू"] = "uː", ["ृ"] = "r̩", ["ॄ"] = "r̩ː",
	["ॢ"] = "l̩", ["ॣ"] = "l̩ː", ["े"] = "ɐj", ["ै"] = "ɑːj", ["ो"] = "ɐw", ["ौ"] = "ɑːw", ["्"] = "",
}

local vowel_list = {
	["ɐ"] = true, ["ɑː"] = true, ["i"] = true, ["iː"] = true, ["u"] = true, ["uː"] = true, ["r̩"] = true, ["r̩ː"] = true,
	["l̩"] = true, ["l̩ː"] = true, ["ɐj"] = true, ["ɑːj"] = true, ["ɐw"] = true, ["ɑːw"] = true,
}

local stop_list = {
	["k"] = true, ["ɡ"] = true, ["kʰ"] = true, ["ɡʱ"] = true,
	["t͡ɕ"] = true, ["d͡ʑ"] = true, ["t͡ɕʰ"] = true, ["d͡ʑʱ"] = true,
	["t̪"] = true, ["d̪"] = true, ["t̪ʰ"] = true, ["d̪ʱ"] = true,
	["ʈ"] = true, ["ɖ"] = true, ["ʈʰ"] = true, ["ɖʱ"] = true,
	["p"] = true, ["b"] = true, ["pʰ"] = true, ["bʱ"] = true,
}

local consonant_sonority = {
	-- voiceless stops and affricates
	["k"] = 1, ["kʰ"] = 1,
	["t͡ɕ"] = 1, ["t͡ɕʰ"] = 1,
	["t̪"] = 1, ["t̪ʰ"] = 1,
	["ʈ"] = 1, ["ʈʰ"] = 1,
	["p"] = 1, ["pʰ"] = 1,
	-- voiceless fricatives
	["ɕ"] = 2, ["ʂ"] = 2, ["s̪"] = 2, ["h"] = 2, ["x"] = 2, ["ɸ"] = 2,
	-- voiced stops and affricates
	["ɡ"] = 3, ["ɡʱ"] = 3,
	["d͡ʑ"] = 3, ["d͡ʑʱ"] = 3,
	["d̪"] = 3, ["d̪ʱ"] = 3,
	["ɖ"] = 3, ["ɖʱ"] = 3,
	["b"] = 3, ["bʱ"] = 3,
	-- voiced fricatives
	["ɦ"] = 4,
	-- nasals
	["ŋ"] = 5, ["ɲ"] = 5, ["n̪"] = 5, ["ɳ"] = 5, ["m"] = 5, ["m̐"] = 5, ["ṃ"] = 5,
	-- flaps
	["ɽ"] = 6,
	-- laterals
	["l̪"] = 7, ["ɭ̆"] = 7, ["ɭ̆ʱ"] = 7,
	-- glides
	["j"] = 8, ["ʋ"] = 8,
}

local tt = {
	-- vowels
	["अ"] = "ɐ", ["आ"] = "ɑː", ["इ"] = "i", ["ई"] = "iː", ["उ"] = "u", ["ऊ"] = "uː", ["ऋ"] = "r̩", ["ॠ"] = "r̩ː",
	["ऌ"] = "l̩", ["ॡ"] = "l̩ː", ["ए"] = "ɐj", ["ऐ"] = "ɑːj", ["ओ"] = "ɐw", ["औ"] = "ɑːw",
	-- visarga
	["ः"] = "h",
	-- chandrabindu
	["ँ"] = "m̐",
	-- anusvara
	["ं"] = "ṃ",
	-- avagraha
	['ऽ'] = "",
	--Vedic extensions
	['ᳵ'] = "x", ['ᳶ'] = "ɸ",
}

local accent_vowel = {
	["ɐ"] = "ɐ́", ["ɑː"] = "ɑ́ː", ["i"] = "í", ["iː"] = "íː", ["u"] = "ú", ["uː"] = "úː", ["r̩"] = "ŕ̩", ["r̩ː"] = "ŕ̩ː",
	["l̩"] = "ĺ̩", ["l̩ː"] = "ĺ̩ː", ["ɐj"] = "ɐ́j", ["ɑːj"] = "ɑ́ːj", ["ɐw"] = "ɐ́w", ["ɑːw"] = "ɑ́ːw",
}

local function shift_to_codas(syllables)
	-- shift codas to previous syllable using the Weerasinghe-Wasala-Gamage method
	local to_move = 0
	for i, syll in ipairs(syllables) do
		if i == 1 then
			-- no need to shift to coda if in the first syllable
		elseif #syll < 3 then
			-- coda movement only needed for onset clusters of 2 or more
		elseif #syll == 3 then
			-- V.CCV => VC.CV
			to_move = 1
		elseif #syll == 4 then
			if syll[#syll - 1] == "ɽ" or syll[#syll - 1] == "j" or (stop_list[syll[1]] and stop_list[syll[2]]) then
				-- V.CCrV or V.CCyV => VC.CrV or VC.CyV
				-- if the first two consonants are stops, VC.CCV
				to_move = 1
			else
				-- V.CCCV => VCC.CV
				to_move = 2
			end
		else
			-- 4 consonants or more
			if syll[#syll - 1] == "ɽ" or syll[#syll - 1] == "j" then
				to_move = #syll - 3
			else
				-- find index of consonant of least sonority
				to_move = #syll - 1
				local min_son = consonant_sonority[syll[#syll - 1]]
				for i = (#syll - 1), 1, -1 do
					if consonant_sonority[syll[i]] < min_son then
						to_move = i
						min_son = consonant_sonority[syll[i]]
					end
				end
			end
		end
		
		while to_move > 0 do
			table.insert(syllables[i - 1], table.remove(syllables[i], 1))
			to_move = to_move - 1
		end
	end
	return syllables
end

local function syllabify(remainder, accent)
	local syllables = {}
	local syll = {}
	
	while #remainder > 0 do
		local phoneme = table.remove(remainder, 1)
		
		if vowel_list[phoneme] then
			table.insert(syll, phoneme)
			table.insert(syllables, syll)
			syll = {}
		else
			table.insert(syll, phoneme)
		end
	end
	-- store whatever consonants remain
	local final_cons = syll
	
	-- Vedic pitch accent
	if accent ~= nil and accent <= #syllables then
		syll = syllables[accent]
		syllables[accent][#syll] = accent_vowel[syll[#syll]]
	end
	
	syllables = shift_to_codas(syllables)
	
	local short_vowel_patt = "^[ɐiurl]" .. SYLLABIC .. "?" .. ACUTE .. "?$"
	
	-- Classic stress accent
	local num_sylls = #syllables
	if num_sylls == 2 then
		table.insert(syllables[1], 1, 'ˈ')
	elseif num_sylls == 3 then
		-- if the final segment of the second syllable is not a short vowel, stress the second syllable
		if mw.ustring.match(syllables[2][#syllables[2]], short_vowel_patt) == nil then
			table.insert(syllables[2], 1, 'ˈ')
			-- else stress the third
		else
			table.insert(syllables[1], 1, 'ˈ')
		end
	elseif num_sylls >= 4 then
		if mw.ustring.match(syllables[num_sylls - 1][#syllables[num_sylls - 1]], short_vowel_patt) == nil then
			table.insert(syllables[num_sylls - 1], 1, 'ˈ')
		elseif mw.ustring.match(syllables[num_sylls - 2][#syllables[num_sylls - 2]], short_vowel_patt) == nil then
			table.insert(syllables[num_sylls - 2], 1, 'ˈ')
		else
			table.insert(syllables[num_sylls - 3], 1, 'ˈ')
		end
	end
	
	-- If there are phonemes left, then the word ends in a consonant
	-- Add them to the last syllable
	for _, phoneme in ipairs(final_cons) do
		table.insert(syllables[#syllables], phoneme)
	end
	
	for i, _ in ipairs(syllables) do
		syllables[i] = table.concat(syllables[i], "")
	end
	
	return table.concat(syllables, ".")
end

local anu_to_nasals = {
	["k"] = "ŋ", ["ɡ"] = "ŋ",
	["t͡ɕ"] = "ɲ", ["d͡ʑ"] = "ɲ",
	["t̪"] = "n̪", ["d̪"] = "n̪",
	["ʈ"] = "ɳ", ["ɖ"] = "ɳ",
	["p"] = "m", ["b"] = "m",
}

local function anusvara(text)
	text = gsub(text, "ṃ$", "m")
	text = gsub(
		text,
		"ṃ([ %.ˈ]?)([kɡtdʈɖpb])([" .. DENTAL .. COARTIC .. "]?)([ɕʑ]?)",
		function(div, cons, mark, fric)
			return anu_to_nasals[cons .. mark .. fric] .. div .. cons .. mark .. fric
		end
	)
	text = gsub(
		text,
		"([ɐɑiurleo])(" .. SYLLABIC .. "?)(" .. ACUTE .. "?)(ː?)([jw]?)ṃ",
		"%1%2" .. NASAL .. "%3%4%5"
	)
	return text
end

local function convert_word(word, accent)
	local chars = {}
	local t = {}
	
	gsub(word, ".", function(c) table.insert(chars, c) end)
	
	for i, c in ipairs(chars) do
		if consonants[c] then
			table.insert(t, consonants[c])
			if not diacritics[chars[i + 1]] then
				table.insert(t, "ɐ")
			end
		elseif c == "्" then
			-- do nothing
		elseif diacritics[c] then
			table.insert(t, diacritics[c])
		elseif tt[c] then
			table.insert(t, tt[c])
		end
	end
	
	word = syllabify(t, accent)
	
	word = gsub(word, "%.ˈ", "ˈ")
	
	-- chandrabindu
	word = gsub(
		word,
		"([ɐɑiurleo])(" .. SYLLABIC .. "?)(" .. ACUTE .. "?)(ː?)([jw]?)m̐",
		"%1%2" .. NASAL .. "%3%4%5"
	)
	return word
end

local function convert_words(words, accents)
	local result = {}
	
	local word_num = 1
	for word in mw.text.gsplit(words, " ") do
		table.insert(result, convert_word(word, accents[word_num]))
		word_num = word_num + 1
	end
	
	text = table.concat(result, " ")
	
	return text
end

local function phon_procs(text)
	-- Anusvāra
	text = anusvara(text)
	
	return text
end

local function abhinidhana_phonemic(text)
	--de-aspirate and de-affricate before stops
	text = gsub(
		text,
		"([kɡtdʈɖpb])(" .. DENTAL .. "?)[ʰʱ]?([ %.ˈ]?)([kɡtdʈɖpb])",
		"%1%2%3%4"
	)
	text = gsub(
		text,
		"([td])" .. COARTIC .. "[ɕʑ][ʰʱ]?([ %.ˈ]?)([kɡtdʈɖpb])",
		"%1%2%3"
	)
	return text
end

local function abhinidhana_phonetic(text)
	text = gsub(
		text,
		"([kɡtdʈɖpb])(" .. DENTAL .. "?)([ %.ˈ]?)([kɡtdʈɖpb])",
		"%1%2" .. NORELEASE .. "%3%4"
	)
	return text
end

local superscript = {
	["ɐ"] = "ᵄ",
	["ɑ"] = "ᵅ",
	["e"] = "ᵉ",
	["o"] = "ᵒ",
	["i"] = "ⁱ",
	["u"] = "ᵘ",
}

local function make_dialects(text)
	local dialects = {}
	
	text = abhinidhana_phonemic(text)
	
	-- Rigvedic Sanskrit
	local rig_phnm = text
	rig_phnm = gsub(rig_phnm, "^ˈ", "")
	rig_phnm = gsub(rig_phnm, "ˈ", ".")
	rig_phnm = gsub(rig_phnm, " %.", " ")
	
	local rig_phnt = abhinidhana_phonetic(rig_phnm)
	-- visarga alternation
	rig_phnt = gsub(rig_phnt, "h([ %.ˈ]?)([p])", "ɸ%1%2")
	rig_phnt = gsub(rig_phnt, "h([ %.ˈ]?)([k])", "x%1%2")
	-- nasalized semivowels
	rig_phnt = gsub(
		rig_phnt,
		"([ŋɲnɳm])(" .. DENTAL .. "?)([ %.ˈ]?)([lɭjʋ])([" .. DENTAL .. FLAP .. "]?)(ʱ?)",
		"%4%5" .. NASAL .. "%3%4%5%6"
	)
	
	dialects['rig'] = {
		label = "Vedic",
		phonemic = rig_phnm,
		phonetic = rig_phnt,
	}
	
	-- Classical Sanskrit
	local cla_phnm = text
	cla_phnm = gsub(cla_phnm, "[éóíúŕ" .. ACUTE .. "]", {["é"] = "e", ["ó"] = "o", ["í"] = "i", ["ú"] = "u", [ACUTE] = "", ["ŕ"] = "r"})
	cla_phnm = gsub(cla_phnm, "ɐ(" .. NASAL .. "?)j", "e%1ː")
	cla_phnm = gsub(cla_phnm, "ɐ(" .. NASAL .. "?)w", "o%1ː")
	cla_phnm = gsub(cla_phnm, "ɑ(" .. NASAL .. "?)ː([jw])", "ɑ%1%2")
	
	local cla_phnt = abhinidhana_phonetic(cla_phnm)
	-- cla_pron = gsub(cla_pron, "r̩(" .. NASAL .. "?)(" .. ACUTE .. "?)(ː?)", "ɽi%1%2%3")
	-- cla_pron = gsub(cla_pron, "l̩(" .. NASAL .. "?)(" .. ACUTE .. "?)(ː?)", "l̪i%1%2%3")
	
	cla_phnt = gsub(
		cla_phnt,
		"([ɐɑeoiu])(" .. NASAL .. "?)(ː?)([jw]?)h$",
		function (vow, nas, length, glide)
			return vow .. nas .. length .. glide .. "h" .. superscript[vow]
		end
	)
	cla_phnt = gsub(
		cla_phnt,
		"([ɐɑeoiu])(" .. NASAL .. "?)(ː?)([jw]?)h ",
		function (vow, nas, length, glide)
			return vow .. nas .. length .. glide .. "h" .. superscript[vow] .. " "
		end
	)
	
	dialects['cla'] = {
		label = "Classical Sanskrit",
		phonemic = cla_phnm,
		phonetic = cla_phnt,
	}
	
	return dialects
end

local function make_table(dialects, novedic)
	local dial_types = {'rig', 'cla'}
	
	if novedic then
		table.remove(dial_types, 1)
	end
	
	if #dial_types == 1 then
		local dial = dial_types[1]
		local IPA_args = {{pron = '/' .. dialects[dial].phonemic .. '/'}}
		if dialects[dial].phonemic ~= dialects[dial].phonetic then
			table.insert(IPA_args, {pron = '[' .. dialects[dial].phonetic .. ']'})
		end
		return table.concat{
			'\n* ',
			m_a.format_qualifiers(lang, {dialects[dial].label}),
			' ',
			m_IPA.format_IPA_full { lang = lang, items = IPA_args },
		}
	else
		
		local inline_args = {{pron = '/' .. dialects.cla.phonemic .. '/'}}
		
		if dialects.cla.phonemic ~= dialects.cla.phonetic then
			table.insert(inline_args, {pron = '['.. dialects.cla.phonetic ..']'})
		end
		
		local inline = table.concat{
			'<div class="vsShow" style="display:none">',
			'\n* ',
			m_IPA.format_IPA_full { lang = lang, items = inline_args },
			'</div>',
		}
		
		local full = {}
		for _, dial in ipairs(dial_types) do
			local full_args = {{pron = '/' .. dialects[dial].phonemic .. '/'}}
			if dialects[dial].phonemic ~= dialects[dial].phonetic then
				table.insert(full_args, {pron = '['.. dialects[dial].phonetic ..']'})
			end
			table.insert(full, table.concat{
				'\n* ',
				m_a.format_qualifiers(lang, {dialects[dial].label}),
				' ',
				m_IPA.format_IPA_full { lang = lang, items = full_args },
			})
		end
		
		return table.concat{
			inline,
			table.concat(full, ""),
		}
	end
end


local consonant_list = "kKgGNcCjJYwWqQRtTdDnpPbBmyrlLvSzshMHZV'"
local consonant = "[" .. consonant_list .. "]"
local vowel_list = "aAiIuUfFxXeEoO"
local vowel = "[" .. vowel_list .. "]"
local not_vowel = "[^" .. vowel_list .. "]"

local short_vowel_cat = "[aiufx]"
local long_vowel_cat = "[AIUFXeEoO]"

local function mark_accent(SLP)
	local i, syls = 1, {}

	for c, v, a in gmatch(SLP, "(" .. consonant .. "*)(" .. vowel .. ")(" .. accent .. "?)") do
		table.insert(syls, {c, v, a})
		i = i + 1
	end
	if match(SLP, consonant .. "+$") then
		table.insert(syls, {match(SLP, consonant .. "+$")})
	end
	
--	SLP = gsub(SLP, "(" .. short_vowel_cat .. ")" .. lis, "%1" .. sis) -- long to short independent svarita
--	SLP = gsub(SLP, "(" .. vowel .. ")([" .. consonant_list .. " ]" .. vowel .. udatta .. ")", "%1" .. anudatta .. "%2")
--	SLP = gsub(
--		SLP,
--		"(" .. vowel .. udatta .. not_vowel .. "*" .. vowel .. ")%f[" .. consonant_list .. " ]",
--		"%1" .. ds
--	) -- dependent svarita
--	return SLP
	
	local out ={}
	for i, ret in ipairs(syls) do
		local foo = {}
		if ret[1] then
			table.insert(foo, ret[1])
		end
		if ret[2] then
			table.insert(foo, ret[2])
		end
		if ret[3] then
			table.insert(foo, ret[3])
		end
		table.insert(out, i .. ": " .. table.concat(foo, ","))
	end
	return table.concat(out, "<br/>")
end

function export.show(frame)
	local params = {
		[1] = {default = mw.title.getCurrentTitle().text},
		novedic = {type = 'boolean'}
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local SLP = m_sa_translit.detect_to_SLP(args[1])
	local accented = match(SLP, "[/\\]") and true or false
	if accented then
		SLP = mark_accent(SLP)
	end
	return SLP
end

function export.show1(frame)
	local params = {
		[1] = {alias_of = 'w'},
		w = {default = mw.title.getCurrentTitle().text},
		a = {list = true, allow_holes = true, type = 'number'},
		novedic = {type = 'boolean'}
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	local text = convert_words(args.w, args.a)
	
	text = phon_procs(text)
	
	local dialects = make_dialects(text)
	
	return make_table(dialects, args.novedic)
end

return export