Module:pl-pronunciation

Definition from Wiktionary, the free dictionary
Jump to navigation Jump to search

Implements Template:pl-pronunciation; relies on Module:pl-IPA for IPA generation.


local export = {}

local langcode = "pl"
local lang = require("Module:languages").getByCode(langcode)

local m_IPA = require("Module:IPA")
local m_pl_IPA = require("Module:pl-IPA")
local m_qual = require("Module:qualifier")
local m_utilities = require("Module:utilities")

local vowels = "aeiouyąęó"
local vowel = "[" .. vowels .. "]"
local consonants = "bcćdfghjklłmnńpqrsśtuvwxyzźż"
local consonant = "[" .. consonants .. "]"
-- vowel digraphs, not necessarily actual phonetic diphthongs
local diphthong_i_v2 = "[aąoeęuó]"
local diphthongs = {
	["a"] = "u",
	["e"] = "u",
	["i"] = diphthong_i_v2
}
-- consonant digraphs (key = first letter, value = possible second letters)
local digraphs = {
	["c"] = "[hz]",
	["d"] = "[zźż]",
	["q"] = "u",
	["r"] = "z",
	["s"] = "z",
}

-- if this is changed, the next two functions also need to be
local function is_respelling_close_enough(respelling, word)
	word = mw.ustring.gsub(word, "j(" .. diphthong_i_v2 .. ")", "i%1")
	respelling = mw.ustring.gsub(respelling, "['.]", "")
	respelling = mw.ustring.gsub(respelling, "j(" .. diphthong_i_v2 .. ")", "i%1")
	return word == respelling
end
export.s = is_respelling_close_enough

local function partition(word, oword)
	local parts = {}
	local lenword = mw.ustring.len(word)
	local pos = 1
	local offset = 0
	word = mw.ustring.gsub(word, "['-]", ".")
	while pos <= lenword do
		if mw.ustring.find(mw.ustring.lower(word), "^" .. vowel, pos) then
			local initial = mw.ustring.sub(mw.ustring.lower(word), pos, pos)
			local seq = 1
			if diphthongs[initial] and mw.ustring.find(mw.ustring.lower(word), "^" .. initial .. diphthongs[initial], pos) then
				seq = 2
			end
			table.insert(parts, { "v", mw.ustring.sub(oword, pos - offset, pos - offset + seq - 1) })
			pos = pos + seq
		elseif mw.ustring.find(mw.ustring.lower(word), "^" .. consonant, pos) then
			local initial = mw.ustring.sub(mw.ustring.lower(word), pos, pos)
			local seq = 1
			if digraphs[initial] and mw.ustring.find(mw.ustring.lower(word), "^" .. initial .. digraphs[initial], pos) then
				seq = 2
			end
			table.insert(parts, { "c", mw.ustring.sub(oword, pos - offset, pos - offset + seq - 1) })
			pos = pos + seq
		elseif mw.ustring.find(word, "^% ", pos) then
			-- multiword, do not hyphenate
			return nil
		elseif mw.ustring.find(word, "^%.", pos) then
			-- syllable break
			if not mw.ustring.find(oword, "^['-]", pos - offset) then
				offset = offset + 1
			end
			table.insert(parts, { "b", nil })
			pos = pos + 1
		else
			-- unrecognized symbol
			return nil
		end
	end
	return parts
end

function export.generate_hyphenation(word, otitle)
	local syllables = {}
	local cursyl = ""
	local nucleus = false
	local coda = nil
	local pos = 1
	parts = partition(word, otitle)
	if not parts then return nil end
	for pos, p in ipairs(parts) do
		local kind, part = unpack(p)
		if kind == "v" then
			if coda then
				cursyl = cursyl .. mw.ustring.sub(syllables[#syllables], -coda)
				syllables[#syllables] = mw.ustring.sub(syllables[#syllables], 1, -coda - 1)
				coda = nil
			end
			if nucleus then
				table.insert(syllables, cursyl)
				cursyl = ""
			end
			nucleus = true
			coda = nil
			cursyl = cursyl .. part
		elseif kind == "c" then
			cursyl = cursyl .. part
			if nucleus then
				table.insert(syllables, cursyl)
				cursyl = ""
				nucleus = false
				coda = mw.ustring.len(part)
			else
				coda = nil
			end
		elseif kind == "b" then
			-- implicit syllable break
			if #cursyl > 0 then
				table.insert(syllables, cursyl)
			end
			cursyl = ""
			nucleus = false
			coda = nil
		else
			-- unrecognized kind
			return nil
		end
	end
	if #cursyl > 0 then
		if nucleus or #syllables < 1 then
			table.insert(syllables, cursyl)
		else
			syllables[#syllables] = syllables[#syllables] .. cursyl
		end
	end
	return syllables
end

local ipavowel = "[aɛiɨɔu]"
function export.generate_rhyme(ipa)
	local vowels_at = { }
	local pos = 1
	while true do
		local posnext = mw.ustring.find(ipa, ipavowel, pos)
		if not posnext then break end
		table.insert(vowels_at, posnext)
		pos = posnext + 1
	end
	local vend
	if #vowels_at < 1 then return nil end
	if #vowels_at > 1 then
		vend = vowels_at[#vowels_at - 1]
	else
		vend = vowels_at[#vowels_at]
	end
	local snippet = mw.ustring.sub(ipa, vend)
	snippet = mw.ustring.gsub(snippet, "[ˈˌ.]", "")
	if mw.ustring.find(snippet, " ") then
		return nil -- copout, something must be wrong
	end
	return snippet
end

function export.show(frame)
	local args = require "Module:parameters".process(frame:getParent().args, {
		[1] = { list = true },
		
		["ipa"] = { list = true, default = nil, allow_holes = true },
		["qual"] = { list = true, allow_holes = true },
		["n"] = { list = true, allow_holes = true },
		["h"] = { list = true, allow_holes = true }, ["hyphen"] = {},
		["r"] = { list = true, allow_holes = true }, ["rhymes"] = {},
		["a"] = { list = true, default = nil }, ["audio"] = {},
		["ac"] = { list = true, default = nil }, ["caption"] = {},
		["hh"] = { default = "" }, ["homophones"] = {},
		
		["q"] = { list = true, default = nil, allow_holes = true },
		["hp"] = { list = true, default = nil, allow_holes = true },
		["rp"] = { list = true, default = nil, allow_holes = true },
		["hhp"] = { list = true, default = nil, allow_holes = true },
			
		["nohyphen"] = { type = "boolean", default = false },
		["norhymes"] = { type = "boolean", default = false },
		
		["title"] = { default = nil }, -- for debugging or demonstration only
	})

	local words, transcriptions, transcriptions_raw
	local lines = {}
	local categories = {}
	local actual = args["title"] or mw.title.getCurrentTitle().text
	if next(args[1]) ~= nil then
		words = args[1]
	else
		words = { actual }
	end
	local multiword = mw.ustring.find(words[1], " ")
	local hyphenations = args["h"]
	local rhymes = args["r"]
	local ipa = args["ipa"]
	if #ipa < 1 then ipa = nil end
	local qualifiers = args["q"]
	if not qualifiers or qualifiers.maxindex < 1 then qualifiers = args["qual"] end
	local hyphlabels = args["hp"]
	local rhymlabels = args["rp"]
	local nohyphen = args["nohyphen"]
	local norhymes = args["norhymes"]
	local homophones = mw.text.split(args["hh"], ",")
	local homophonelabels = args["hhp"]
	if #homophones == 1 and homophones[1] == "" then homophones = {} end
	local audio = {}
	local audios = args["a"]
	local captions = args["ac"]
	if args["hyphen"] then hyphenation[1] = args["hyphen"] end
	if args["rhymes"] then rhymes[1] = args["rhymes"] end
	if args["homophones"] then homophones = mw.text.split(args["homophones"], ",") end
	if args["audio"] then audios[1] = args["audio"] end
	if args["captions"] then captions[1] = args["caption"] end
	local respelling_ok = true
	for i, w in ipairs(words) do
		if not is_respelling_close_enough(w, actual) then
			respelling_ok = false
			break
		end
	end

	for i, audiofile in ipairs(audios) do
		if audiofile then
			table.insert(audio, {file = audiofile, caption = captions[i] or "Audio"})
		end
	end

	if #hyphenations == 1 and hyphenations[1] == "-" then
		nohyphen = true
	end
	if #rhymes == 1 and rhymes[1] == "-" then
		norhymes = true
	end

	if ipa then
		transcriptions = ipa
		transcriptions_raw = {}
	else
		transcriptions = {}
		transcriptions_raw = {}
		for i = 1, #words do
			local qual = qualifiers[i]
			local ipaconv = m_pl_IPA.convert_to_IPA(words[i])
			table.insert(transcriptions_raw, ipaconv)
			table.insert(transcriptions, {
				pron = "/" .. ipaconv .. "/",
				qualifiers = qual and { qual } or nil,
				note = args.n[i]
			})
		end
	end
	table.insert(lines, "* " .. m_IPA.format_IPA_full(lang, transcriptions))
	
	for i, a in ipairs(audio) do
		table.insert(lines, "* " .. frame:expandTemplate{title = "audio", args = {langcode, a["file"], a["caption"]}})
	end

	if not ipa and #hyphenations < 1 and respelling_ok and not multiword then
		local autohyph = export.generate_hyphenation(words[1], actual)
		if autohyph then
			table.insert(hyphenations, autohyph)
		end
	elseif #hyphenations >= 1 then
		local newhyphenations = {}
		for i, h in ipairs(hyphenations) do
			local t = {}
			for x in mw.text.gsplit(h, "[-.']") do
				table.insert(t, x)	
			end
			newhyphenations[i] = t
		end
		hyphenations = newhyphenations
	end

	if not norhymes then
		if not ipa and #rhymes < 1 and #transcriptions_raw > 0 and respelling_ok then
			local autorhyme = export.generate_rhyme(transcriptions_raw[1])
			if autorhyme then
				table.insert(rhymes, autorhyme)
			end
		end
	
		if #rhymes > 0 then
			-- merge rhymes if they have identical labels
			local last_label = false
			local new_rhymes = {}
			local new_labels = {}
			local current_list = {}
			
			for i, r in ipairs(rhymes) do
				local label = rhymlabels[i]
				if last_label == label then
					table.insert(current_list, r)
				else
					if #current_list > 0 then
						table.insert(new_rhymes, current_list)
					end
					if last_label ~= false then
						table.insert(new_labels, last_label)
					end
					current_list = { r }
					last_label = label
				end
			end
			
			table.insert(new_rhymes, current_list)
			table.insert(new_labels, last_label)
			rhymes = new_rhymes
			rhymlabels = new_labels
		end
		
		for i, r in ipairs(rhymes) do
			local label = ""
			if rhymlabels[i] then
				label = " " .. m_qual.format_qualifier(rhymlabels[i])
			end
			if #r >= 1 then
				local sylkeys = {}
				local sylcounts = {}
				-- get all possible syllable counts from syllabifications
				for i, h in ipairs(hyphenations) do
					local hl = #h
					if hl > 0 and not sylkeys[hl] then
						table.insert(sylcounts, hl)
						sylkeys[hl] = true
					end
				end
				local rhymeobjs = {}
				for _, rhyme in ipairs(r) do
					table.insert(rhymeobjs, {rhyme = rhyme})
				end
				table.insert(lines, "* " .. require("Module:rhymes").format_rhymes(
					{ lang = lang, rhymes = rhymeobjs, num_syl = sylcounts }) .. label)
			end
		end
	end

	if not nohyphen then
		for i, h in ipairs(hyphenations) do
			local label = ""
			if hyphlabels[i] then
				label = " " .. m_qual.format_qualifier(hyphlabels[i])
			end
			table.insert(lines, "* Syllabification: " .. require("Module:links").full_link({lang = lang, alt = table.concat(h, "‧"), tr = "-"}) .. label)
		end
	end

	if #homophones > 0 then
		local homophone_param = {langcode}
		for i, h in ipairs(homophones) do
			table.insert(homophone_param, h)
			if homophonelabels[i] then
				homophone_param["q" .. i] = homophonelabels[i]
			end
		end
		table.insert(lines, "* " .. frame:expandTemplate{title = "homophones", args = homophone_param})
	end
	
	return table.concat(lines, "\n") .. m_utilities.format_categories(categories, lang)
end

return export