Module:pl-IPA

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
The following documentation is located at Module:pl-IPA/documentation. [edit]
Useful links: subpage listtransclusionstestcases

This module is used by {{pl-IPA-auto}} to generate IPA pronunciations of Polish words.

See also: Module:pl-IPA/testcases.


local export = {}
 
local letters2phones = {
	["a"] = {
		["u"  ] = { "a", "w" },
		[false] = "a",
	},
	["ą"] = "ɔ̃",
	["b"] = "b",
	["c"] = {
		["i"  ] = {
			["ą"  ] = { "t͡ɕ", "ɔ̃" },
			["a"  ] = { "t͡ɕ", "a" },
			["e"  ] = { "t͡ɕ", "ɛ" },
			["ę"  ] = { "t͡ɕ", "ɛ̃" },
			["o"  ] = { "t͡ɕ", "ɔ" },
			["ó"  ] = { "t͡ɕ", "u" },
			["u"  ] = { "t͡ɕ", "u" },
			["y"  ] = { "t͡ɕ", "ɨ" },
			[false] = { "t͡ɕ", "i" }
		},
		["h"  ] = "x",
		["z"  ] = "t͡ʂ",
		[false] = "t͡s"
	},
	["ć"] = "t͡ɕ",
	["d"] = {
		["z"  ] = {
			["i"  ] = {
				["ą"  ] = { "d͡ʑ", "ɔ̃" },
				["a"  ] = { "d͡ʑ", "a" },
				["e"  ] = { "d͡ʑ", "ɛ" },
				["ę"  ] = { "d͡ʑ", "ɛ̃" },
				["o"  ] = { "d͡ʑ", "ɔ" },
				["ó"  ] = { "d͡ʑ", "u" },
				["u"  ] = { "d͡ʑ", "u" },
				["y"  ] = { "d͡ʑ", "ɨ" },
				[false] = { "d͡ʑ", "i" }
			},
			[false] = "d͡z"
		},
		["ż"  ] = "d͡ʐ",
		["ź"  ] = "d͡ʑ",
		[false] = "d"
	},
	["e"] = {
		["u"  ] = { "ɛ", "w" },
		[false] = "ɛ",
	},
	["ę"] = "ɛ̃",
	["f"] = "f",
	["g"] = "ɡ",
	["h"] = "h",
	["i"] = {
		["a"  ] = "ʲa",
		["ą"  ] = "ʲɔ̃",
		["e"  ] = "ʲɛ",
		["ę"  ] = "ʲɛ̃",
		["o"  ] = "ʲɔ",
		["ó"  ] = { "j", "u" },
		["u"  ] = { "j", "u" },
		[false] = "i"
	},
	["j"] = "j",
	["k"] = "k",
	["l"] = "l",
	["ł"] = "w",
	["m"] = "m",
	["n"] = {
		["i"  ] = {
			["ą"  ] = { "ɲ", "ɔ̃" },
			["a"  ] = { "ɲ", "a" },
			["e"  ] = { "ɲ", "ɛ" },
			["ę"  ] = { "ɲ", "ɛ̃" },
			["o"  ] = { "ɲ", "ɔ" },
			["ó"  ] = { "ɲ", "u" },
			["u"  ] = { "ɲ", "u" },
			["y"  ] = { "ɲ", "ɨ" },
			[false] = { "ɲ", "i" }
		},
 
		-- "bank", "bankowy", "bankowość" is [baŋk], [baŋˈkɔ.vɨ], [baŋˈko.voɕt͡ɕ]
		-- but "wybranka", "łapanka" and "zapinka" would be rather [vɨˈbran.ka], [waˈpan.ka] and [zaˈpin.ka].
		-- looks like "bank" and related should be manually transcribed.
		-- although [bank], etc. is not incorrect, even if somewhat posh.
 
		-- ["g"  ] = { "ŋ", "ɡ" },
		-- ["k"  ] = { "ŋ", "k" },
		[false] = "n"
	},
	["ń"] = "ɲ",
	["o"] = "ɔ",
	["ó"] = "u",
	["p"] = "p",
	["r"] = {
		["z"  ] = "ʐ",
		[false] = "r"
	},
	["q"] = {
		["u"  ] = { "k", "v" },
		[false] = false
	},
	["s"] = {
		["i"  ] = {
			["ą"  ] = { "ɕ", "ɔ̃" },
			["a"  ] = { "ɕ", "a" },
			["e"  ] = { "ɕ", "ɛ" },
			["ę"  ] = { "ɕ", "ɛ̃" },
			["o"  ] = { "ɕ", "ɔ" },
			["ó"  ] = { "ɕ", "u" },
			["u"  ] = { "ɕ", "u" },
			["y"  ] = { "ɕ", "ɨ" },
			[false] = { "ɕ", "i" }
		},
		["z"  ] = "ʂ",
		[false] = "s",
	},
	["ś"] = "ɕ",
	["t"] = "t",
	["u"] = "u",
	["v"] = "v",
	["w"] = "v",
	["x"] = { "k", "s" },
	["y"] = "ɨ",
	["z"] = {
		["i"  ] = {
			["ą"  ] = { "ʑ", "ɔ̃" },
			["a"  ] = { "ʑ", "a" },
			["e"  ] = { "ʑ", "ɛ" },
			["ę"  ] = { "ʑ", "ɛ̃" },
			["o"  ] = { "ʑ", "ɔ" },
			["ó"  ] = { "ʑ", "u" },
			["u"  ] = { "ʑ", "u" },
			["y"  ] = { "ʑ", "ɨ" },
			[false] = { "ʑ", "i" }
		},
		[false] = "z"
	},
	["ź"] = "ʑ",
	["ż"] = "ʐ",
	["-"] = {},
}
 
local valid_phone = {
	["a" ] = true, ["ʲa"] = true, ["b" ] = true, ["d" ] = true, ["d͡z"] = true, ["d͡ʑ"] = true,
	["ɛ" ] = true, ["ɛ̃" ] = true, ["ʲɛ"] = true, ["ʲɛ̃"] = true, ["f" ] = true, ["ɡ" ] = true,
	["i" ] = true, ["j" ] = true, ["k" ] = true, ["l" ] = true, ["m" ] = true, ["n" ] = true,
	["ŋ" ] = true, ["ɲ" ] = true, ["ɔ" ] = true, ["ʲɔ"] = true, ["ɔ̃" ] = true, ["ʲɔ̃"] = true,
	["p" ] = true, ["r" ] = true, ["s" ] = true, ["ʂ" ] = true, ["ɕ" ] = true, ["t" ] = true,
	["t͡s"] = true, ["t͡ʂ"] = true, ["t͡ɕ"] = true, ["u" ] = true, ["v" ] = true, ["w" ] = true,
	["x" ] = true, ["ɨ" ] = true, ["z" ] = true, ["ʑ" ] = true, ["ʐ" ] = true, ["d͡ʐ"] = true,
	["h" ] = true,
}
 
local sylmarks = {
	["."] = ".", ["'"] = "ˈ", [","] = "ˌ"
}
 
local vowel = {
	["ʲa"] = true, ["ʲɛ"] = true, ["ʲɛ̃"] = true,
	["ʲɔ̃"] = true, ["ʲɔ"] = true, [ "a"] = true,
	[ "ɛ"] = true, [ "i"] = true, [ "ɔ"] = true,
	[ "ɔ̃"] = true, [ "ɛ̃"] = true, [ "u"] = true,
	[ "ɨ"] = true
}
 
local devoice = {
	["d͡z"] = "t͡s", ["d͡ʑ"] = "t͡ɕ", ["d͡ʐ"] = "t͡ʂ", ["b" ] = "p" ,
	["d" ] = "t" , ["ɡ" ] = "k" , ["v" ] = "f" , ["z" ] = "s" ,
	["ʑ" ] = "ɕ" , ["ʐ" ] = "ʂ" ,
 
	-- non-devoicable
	["m" ] = "m" , ["n" ] = "n" , ["ɲ" ] = "ɲ" , ["ŋ" ] = "ŋ" ,
	["w" ] = "w" , ["l" ] = "l" , ["j" ] = "j" , ["r" ] = "r"
}
 
local denasalized = {
	["ʲɛ̃"] = "ʲɛ", 	[ "ɛ̃"] =  "ɛ",
	["ʲɔ̃"] = "ʲɔ", 	[ "ɔ̃"] =  "ɔ",
}
 
local nasal_map = {
	["p" ] = "m", ["b" ] = "m", -- zębu, klępa
	["k" ] = "ŋ", ["ɡ" ] = "ŋ", -- pąk, łęgowy
	["t" ] = "n", ["d" ] = "n", -- wątek, piątek, swędzieć
 
	["t͡ɕ"] = "ɲ", ["d͡ʑ"] = "ɲ", -- pięć, pędziwiatr
	["ɕ" ] = "ɲ", ["ʑ" ] = "ɲ", -- gęsi, więzi
	["t͡ʂ"] = "ŋ", ["d͡ʐ"] = "ŋ", -- pączek
	["ʂ" ] = "ŋ", ["ʐ" ] = "ŋ", -- mężny
	["t͡s"] = "ŋ", ["d͡z"] = "ŋ", -- wiedząc, pieniędzy
	["s" ] = "ŋ", -- ["z" ] = "ŋ", -- wąs, więzy
}
 
function export.IPA(word)
	if type(word) == "table" then
		word = word.args[1]
	end
 
	-- convert letters to phones
	local phones = {}
	local l2ptab = letters2phones
	for cc in mw.ustring.gcodepoint(mw.ustring.lower(word)) do
		local ch = mw.ustring.char(cc)
		local value = l2ptab[ch]
 
		if value == nil then
			value = l2ptab[false]
			if value == false then
				return nil
			elseif type(value) == "table" then
				for _, phone in ipairs(value) do
					table.insert(phones, phone)
				end				
			else
				table.insert(phones, value)
			end
			l2ptab = letters2phones
			value = l2ptab[ch]
		end
 
		if type(value) == "table" then
			if value[false] == nil then
				for _, phone in ipairs(value) do
					table.insert(phones, phone)
				end
				l2ptab = letters2phones
			else
				l2ptab = value
			end
		elseif type(value) == "string" then
			table.insert(phones, value)
			l2ptab = letters2phones
		else
			table.insert(phones, ch)
		end
	end
 
	if l2ptab ~= letters2phones then
		table.insert(phones, l2ptab[false])
	end
 
	-- simplify nasals
	local new_phones = {}
	for i, phone in ipairs(phones) do
		local pnext = phones[i + 1]
		if denasalized[phone] then
			if nasal_map[pnext] then
				table.insert(new_phones, denasalized[phone])
				table.insert(new_phones, nasal_map[pnext])
			else
				table.insert(new_phones, phone)
			end
		else
			table.insert(new_phones, phone)
		end
	end
	phones = new_phones
 
	-- devoice
	for i = #phones, 1, -1 do
		local pprev, pcurr, pnext = phones[i - 1], phones[i]
		local j = i
		repeat
			j = j + 1
			pnext = phones[j]
		until not pnext or not sylmarks[pnext]
		if devoice[pcurr] and not devoice[pnext] and not vowel[pnext] and not denasalized[pnext] then
			phones[i] = devoice[pcurr]
		end
		-- prz, trz, krz, tw, kw
		if ((pcurr == "v") or (pcurr == "ʐ")) and valid_phone[pprev] and not devoice[pprev] and not vowel[pprev] and not denasalized[pprev] then
			phones[i] = devoice[pcurr]
		end
	end
 
	-- collect syllables
	local words, curword, sylmarked, sylbuf = {}, nil, false
	for i, pcurr in ipairs(phones) do
		local ppprev, pprev, pnext = phones[i - 2], phones[i - 1], phones[i + 1]
 
		if valid_phone[pcurr] then
			if not curword then
				curword, sylbuf, had_vowl, sylmarked = {}, '', false, false
				table.insert(words, curword)
			end
 
			local same_syl = true
 
			if vowel[pcurr] then
				if had_vowl then
					same_syl = false
				end
				had_vowl = true
			elseif had_vowl then
				if vowel[pnext] then
					same_syl = false
				elseif not vowel[pprev] and not vowel[pnext] then
					same_syl = false
				elseif ((pcurr == "s") and ((pnext == "t") or (pnext == "p") or (pnext == "k")))
				or (pnext == "r") or (pnext == "f") or (pnext == "w")
				or ((pcurr == "ɡ") and (pnext == "ʐ"))
				or ((pcurr == "d") and ((pnext == "l") or (pnext == "w") or (pnext == "ɲ")))
				then
					-- these should belong to a common syllable
					same_syl = false
				end
			end
 
			if same_syl then
				sylbuf = sylbuf .. pcurr
			else
				table.insert(curword, sylbuf)
				sylbuf, had_vowl = pcurr, vowel[pcurr]
			end
		elseif (curword or valid_phone[pnext]) and sylmarks[pcurr] then
			if not curword then
				curword, sylbuf, had_vowl = {}, '', false
				table.insert(words, curword)
			end
			sylmarked = true
			if sylbuf then
				table.insert(curword, sylbuf)
				sylbuf = ''
			end
			table.insert(curword, sylmarks[pcurr])
		else
			if sylbuf then
				if #curword > 0 and not had_vowl then
					curword[#curword] = curword[#curword] .. sylbuf
				else
					table.insert(curword, sylbuf)
				end
				if sylmarked then
					words[#words] = table.concat(curword)
				end
			end
			curword, sylbuf = nil, nil
			table.insert(words, pcurr)
		end
	end
	if sylbuf then
		if #curword > 0 and not had_vowl then
			curword[#curword] = curword[#curword] .. sylbuf
		else
			table.insert(curword, sylbuf)
		end
		if sylmarked then
			words[#words] = table.concat(curword)
		end
	end
 
	-- mark syllable breaks and stress
	for i, word in ipairs(words) do
		if type(word) == "table" then
			-- unless already marked
			if not ((word[2] == ".") or (word[2] == "ˈ") or (word[2] == "ˌ")) then
				for j, syl in ipairs(word) do
					if j == (#word - 1) then
						word[j] = "ˈ" .. syl
					elseif j ~= 1 then
						word[j] = "." .. syl
					end
				end
			end
			words[i] = table.concat(word)
		end
	end
 
	return table.concat(words)
end
 
return export