Module:izh-pronunciation

From Wiktionary, the free dictionary
Jump to navigation Jump to search

local export = {}
local m_izh = require("Module:izh")
local m_IPA = require("Module:IPA")
local gsub_lookahead = require("Module:gsub lookahead")

local lang = m_izh.lang
local U = mw.ustring.char

--- <<< DATA START >>> ---

local LONG = "ː"
local SEMILONG = "ˑ"
local STRESS_PRIMARY = "ˈ"
local STRESS_SECONDARY = "ˌ"
local FRONTAL = U(0x0308)
local NONSYLLABIC = U(0x032F)
local TIE = U(0x0361)
local VERYSHORT = U(0x0306)
local PALATAL = "ʲ"
local IPA_VOWELS = "ɑeiouyæøɨə"
local AUTO_STRESS = U(0xEEEE)
local VIRTUAL_BREAK = U(0xEEEC)
local J_PALATALIZE = U(0xEEEA)
local REALLY_JUST_PALATAL = U(0xEEE8)
local VIRTUAL_BREAK_UNGEMINATE = U(0xEEE6)

local IPA_CONSONANTS = m_izh.consonants .. "ɫʃʒ"
local IPA_CONSONANTS_GEMINATABLE = m_izh.consonants_geminatable .. "ɫ" .. "z" .. "ž"

local PALATALIZE = "'"
local UNGEMINATE = "/"
local ANY_DIACRITICS = "[" .. U(0x0300) .. "-" .. U(0x036F) .. "]*"
local SOME_DIACRITICS = "[" .. U(0x0300) .. "-" .. U(0x036F) .. "]+"

--- <<< DATA END >>> ---

--- <<< COMMON START >>> ---

local function split_syllables(word, keep_sep_symbols)
	local consonant = "[" .. IPA_CONSONANTS .. "]"
	local consonant_diacritic = "[" .. U(0x030A) .. U(0x0325) .. "]"
	local vowel = m_izh.vowel
	local consonants_geminatable = IPA_CONSONANTS_GEMINATABLE
	local sep_symbols = m_izh.sep_symbols .. VIRTUAL_BREAK
	local vowel_sequences = m_izh.vowel_sequences

	local res = {}
	local syllable = ""
	local pos = 1
	local found_vowel = false

	while pos <= #word do
		if mw.ustring.find(mw.ustring.lower(word), "^" .. consonant .. consonant_diacritic .. "*[" .. PALATALIZE .. UNGEMINATE .. VIRTUAL_BREAK_UNGEMINATE .. J_PALATALIZE .. PALATAL .. "]*" .. vowel, pos) then
			-- CV: end current syllable if we have found a vowel
			if found_vowel then
				if #syllable > 0 then table.insert(res, syllable) end
				found_vowel = false
				syllable = ""
			end
			syllable = syllable .. mw.ustring.sub(word, pos, pos)
			pos = pos + 1
		elseif mw.ustring.find(mw.ustring.lower(word), "^" .. consonant, pos) then
			if mw.ustring.find(mw.ustring.lower(word), "^" .. consonant .. TIE .. consonant, pos) then
				-- /t͡s/
				if found_vowel and #syllable > 0 then table.insert(res, syllable) end
				syllable = mw.ustring.sub(word, pos, pos + 2)
				pos = pos + 3
				found_vowel = false
			else
				-- C: continue
				syllable = syllable .. mw.ustring.sub(word, pos, pos)
				pos = pos + 1
			end
		elseif mw.ustring.find(mw.ustring.lower(word), "^" .. vowel, pos) then
			if found_vowel then
				-- already found a vowel, end current syllable
				if #syllable > 0 then
					table.insert(res, syllable)
				end
				syllable = ""
			end
			found_vowel = true

			-- check for diphthongs or long vowels
			local seq_ok = false
			local search_from = mw.ustring.gsub(mw.ustring.lower(mw.ustring.sub(word, pos)), "[" .. UNGEMINATE .. VIRTUAL_BREAK_UNGEMINATE .. "]", "")
			for k, v in pairs(vowel_sequences) do
				if mw.ustring.find(search_from, "^" .. v) then
					seq_ok = true
					break
				end
			end

			if seq_ok then
				local total = mw.ustring.len(select(3, mw.ustring.find(mw.ustring.lower(word), "^(" .. vowel .. "[" .. UNGEMINATE .. VIRTUAL_BREAK_UNGEMINATE .. "]*" .. vowel .. ")", pos)))
				syllable = syllable .. mw.ustring.sub(word, pos, pos + total - 1)
				pos = pos + total
			else
				syllable = syllable .. mw.ustring.sub(word, pos, pos)
				pos = pos + 1
			end
		elseif mw.ustring.find(mw.ustring.lower(word), "^[" .. sep_symbols .. "]", pos) then
			-- separates syllables
			if #syllable > 0 then
				table.insert(res, syllable)
			end

			local sepchar = mw.ustring.sub(word, pos, pos)
			syllable = (keep_sep_symbols == true or (type(keep_sep_symbols) == "string" and keep_sep_symbols:find(mw.ustring.sub(word, pos, pos)))) and sepchar or ""
			pos = pos + 1
			found_vowel = false
		else
			-- ?: continue
			syllable = syllable .. mw.ustring.sub(word, pos, pos)
			pos = pos + 1
		end
	end

	if #syllable > 0 then
		table.insert(res, syllable)
	end

	return res
end

local function zeroth_round_of_common_replacements(text)
	text = mw.ustring.gsub(text, "ts", "t͡s")
	text = mw.ustring.gsub(text, "([" .. m_izh.vowels .. m_izh.consonants .. "])(" .. m_izh.vowel .. ")" .. UNGEMINATE .. "i", "%1%2" .. VIRTUAL_BREAK_UNGEMINATE .. "i")
	return text
end

local function first_round_of_common_replacements(text)
	text = mw.ustring.gsub(text, "n[kg]", {
		["nk"] = "ŋk",
		["ng"] = "ŋg",
	})
	text = mw.ustring.gsub(text, "[aäövь’]", {
		["a"] = "ɑ",
		["ä"] = "æ",
		["ö"] = "ø",
		["v"] = "ʋ",
		["ь"] = "ɨ",
		["’"] = ".",
--		["-"] = STRESS_SECONDARY,
	})

	return text
end

local function second_round_of_common_replacements(text, do_palatal_repls)
	text = mw.ustring.gsub(text, "[cšž]", {
		["c"] = "t͡ʃ",
		["š"] = "ʃ",
		["ž"] = "ʒ"
	})
	text = mw.ustring.gsub(text, "h([" .. LONG .. SEMILONG .. "])", "x%1")
	if do_palatal_repls then
		text = mw.ustring.gsub(text, "([nʃ])" .. PALATAL, {
			["n"] = "ɲ",
			["ʃ"] = "ɕ",
			["ʒ"] = "ʑ"
		})
	end
	text = mw.ustring.gsub(text, "ɫ" .. PALATAL, "l" .. PALATAL)
	text = mw.ustring.gsub(text, "g", "ɡ")
	return text
end

local function automatic_palatalization(text, filter)
	return mw.ustring.gsub(text, "(" .. filter .. ")j(.?)", function (prev, next)
		if next == PALATALIZE then
			return prev .. PALATAL .. next
		else
			return prev .. PALATAL .. LONG .. next
		end
	end)
end

local function manual_palatalization(text)
	if mw.ustring.find(text, PALATALIZE) then
		text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "])" .. PALATALIZE, "%1" .. PALATAL)
		text = mw.ustring.gsub(text, PALATALIZE, "")
		text = mw.ustring.gsub(text, PALATAL .. PALATAL, PALATAL)
	end
	text = mw.ustring.gsub(text, "(t)([" .. STRESS_SECONDARY .. AUTO_STRESS .. ".])(t" .. PALATAL .. ")", "%1" .. PALATAL .. "%2%3")
	return text
end

local IPA_diphthongs = {
	"[ɑeouyæø]i",
	"[ɑeio]u",
	"[æeiø]y"
}
local function long_vowels_and_diphthongs(text)
	text = mw.ustring.gsub(text, "([" .. IPA_VOWELS .. "])%1", "%1" .. LONG)
	for _, diphthong in ipairs(IPA_diphthongs) do
		local mod_diphthong
		if mw.ustring.find(diphthong, "%]$") then
			mod_diphthong = mw.ustring.gsub(diphthong, "(.)(%[[^%]]-%])", "%1" .. VERYSHORT .. "?%2")
			mod_diphthong = mw.ustring.gsub(diphthong, "(%[[^%]]-%])(%[[^%]]-%])", "%1" .. VERYSHORT .. "?%2")
		else
			mod_diphthong = mw.ustring.sub(diphthong, 1, -2) .. VERYSHORT .. "?" .. mw.ustring.sub(diphthong, -1, -1)
		end
		text = mw.ustring.gsub(text, "(" .. mod_diphthong .. ")", "%1" .. NONSYLLABIC)
	end
	return text
end

local function long_consonants(text)
	text = mw.ustring.gsub(text, "(%a)%1", "%1" .. LONG)
	text = mw.ustring.gsub(text, LONG .. PALATAL, PALATAL .. LONG)
	return text
end

local function standard_sandhi(text)
	text = mw.ustring.gsub(text, "n([%s" .. AUTO_STRESS .. "-]*[mpb])", "m%1")
	text = mw.ustring.gsub(text, "n([%s" .. AUTO_STRESS .. "-]*[kgɡ̊])", "ŋ%1")
	return text
end

local function add_primary_stress(text)
	text = mw.ustring.gsub(text, AUTO_STRESS, "-")
	text = mw.ustring.gsub(text, "-%.", "-")
	text = mw.ustring.gsub(text, "-", STRESS_SECONDARY)
	return mw.ustring.toNFC(STRESS_PRIMARY .. mw.ustring.gsub(text, " ", " " .. STRESS_PRIMARY))
end

local function is_stressed_syllable(syllable)
	return mw.ustring.find(syllable, "^[ " .. AUTO_STRESS .. "-]")
end

local function add_secondary_stress(syllables)
	local distance = 0
	for index, syllable in ipairs(syllables) do
		if index == #syllables then break end
		local stressed = index == 1 or is_stressed_syllable(syllable)
		if stressed then
			distance = 0
		else
			distance = distance + 1
			if distance == 2 then
				distance = 0
				if not is_stressed_syllable(syllables[index + 1]) then
					syllables[index] = AUTO_STRESS .. syllable
				end
			end
		end
	end
end

local function clean_virtual_break(text)
	if mw.ustring.find(text, "[" .. VIRTUAL_BREAK .. VIRTUAL_BREAK_UNGEMINATE .. "]") then
		local cleaned = mw.ustring.gsub(mw.ustring.gsub(text, "[" .. AUTO_STRESS .. VIRTUAL_BREAK .. "]", ""), VIRTUAL_BREAK_UNGEMINATE, UNGEMINATE)
		local syllables = split_syllables(cleaned, true)
		add_secondary_stress(syllables)
		text = table.concat(syllables)
	end
	return text
end

local function clean_ungeminate(text)
	return mw.ustring.gsub(text, "[" .. VIRTUAL_BREAK .. UNGEMINATE .. VIRTUAL_BREAK_UNGEMINATE .. "]", "")
end

local function do_gemination(syllables, diacritic)
	local try_to_geminate = false
	for index, syllable in ipairs(syllables) do
		local stressed = index == 1 or is_stressed_syllable(syllable)
		if try_to_geminate and not stressed then
			-- check if the initial consonant in this syllable is followed by two vowels
			local rest = syllable .. (syllables[index + 1] or "")
			if mw.ustring.find(rest, "^[" .. IPA_CONSONANTS_GEMINATABLE .. "][" .. PALATALIZE .. J_PALATALIZE .. "]*" .. m_izh.vowel .. m_izh.vowel) then
				-- CVCVV -> CVC:VV
				local cg = select(3, mw.ustring.find(syllable, "^([" .. IPA_CONSONANTS_GEMINATABLE .. "][" .. PALATALIZE .. J_PALATALIZE .. "]*)"))
				syllables[index - 1] = syllables[index - 1] .. cg
				syllables[index] = mw.ustring.gsub(syllable, "^" .. cg, diacritic)
			end
		end
		try_to_geminate = stressed and mw.ustring.find(syllable, "^[ " .. AUTO_STRESS .. "-]?[" .. IPA_CONSONANTS .. PALATALIZE .. J_PALATALIZE .. TIE .. "]*" .. m_izh.vowel .. "$")
	end
end

local REDUCED = U(0x0325) .. U(0x0306)
local reduce_final_vowel = {
	["o"] = "o" .. REDUCED,
	["ö"] = "ø" .. REDUCED,
	["u"] = "u" .. REDUCED,
	["y"] = "y" .. REDUCED,
}

local function to_schwa(letter, filter)
	return (not filter or mw.ustring.find(letter, "[" .. filter .. "]")) and "ə" or letter .. VERYSHORT
end

local function split_syllables_by_words(syllables)
	local i = 1
	return function()
		local r = {}
		local e = i
		if e <= #syllables then
			table.insert(r, (mw.ustring.gsub(syllables[e], "^%s+", "")))
			e = e + 1
			while e <= #syllables and not mw.ustring.find(syllables[e], "^%s") do
				table.insert(r, syllables[e])
				e = e + 1
			end
			i = e
			return r
		end
	end
end

local function do_by_word_syllables(out_syllables, fn)
	local old_syllables = {}
	for k, v in pairs(out_syllables) do
		old_syllables[k] = v
		out_syllables[k] = nil
	end
	local next_word = false
	for syllables in split_syllables_by_words(old_syllables) do
		fn(syllables)
		for i, syllable in ipairs(syllables) do
			if next_word and i == 1 then
				table.insert(out_syllables, " " .. syllable)
			else
				table.insert(out_syllables, syllable)
			end
		end
		next_word = true
	end
end

local function begins_with_affricate(syllable)
	return syllable and mw.ustring.find(syllable, "^[" .. IPA_CONSONANTS .. "]" .. TIE)
end

local function do_reduction_internal(syllables, replacement)
	local prev_was_stressed = false
	local prev_was_long = false
	local syllables_since_last_stressed = 0
	for index, syllable in ipairs(syllables) do
		local stressed = index == 1 or is_stressed_syllable(syllable)
		local final = index == #syllables
		if stressed then
			syllables_since_last_stressed = 0
		else
			syllables_since_last_stressed = syllables_since_last_stressed + 1
		end
		prev_was_long = prev_was_long or begins_with_affricate(syllable)

		if mw.ustring.find(syllable, "^j'") and prev_was_long then
			-- hack. /Cj'/ is one consonant.
			local previous_syllable = syllables[index - 1]
			if mw.ustring.find(previous_syllable, m_izh.vowel .. "[" .. IPA_CONSONANTS .. "]") then
				prev_was_long = mw.ustring.find(previous_syllable, m_izh.vowel .. m_izh.vowel) or mw.ustring.find(previous_syllable, m_izh.vowel .. "[" .. IPA_CONSONANTS .. "][" .. IPA_CONSONANTS .. "]")
			end
		end

		if not stressed and ((prev_was_stressed and prev_was_long) or (index > 1 and final and (syllables_since_last_stressed > 1 or prev_was_long))) then
			syllables[index] = mw.ustring.gsub(syllable, "(" .. m_izh.vowel .. "+)(.*)", function (nucleus, coda) return replacement(nucleus, coda, index) end)
		end
		-- reduce the next syllable only if the current syllable is stressed and not short
		prev_was_stressed = stressed
		prev_was_long = mw.ustring.find(syllable, m_izh.vowel .. "[" .. IPA_CONSONANTS .. m_izh.vowels .. "]")
	end
end

local function do_reduction_word(syllables)
	do_reduction_internal(syllables, function (nucleus, coda, index)
		local final = index == #syllables
		local never_open = false
		if mw.ustring.find(nucleus, "(" .. m_izh.vowel .. ")%1") then
			return mw.ustring.sub(nucleus, 1, 1) .. coda
		end

		if mw.ustring.find(nucleus, m_izh.vowel .. m_izh.vowel) then
			if mw.ustring.sub(nucleus, 2) ~= "i" then
				coda = mw.ustring.sub(nucleus, 2) .. coda
			else
				never_open = true
			end
			nucleus = mw.ustring.sub(nucleus, 1, 1)
		end

		local open = #coda == 0 and not never_open
		if final then
			if open then
				-- reduced, but simply drop it
				return (reduce_final_vowel[nucleus] or "") or coda
			else
				if coda == "" and reduce_final_vowel[nucleus] then
					-- /oi/, /ui/, /yi/, /øi/
					return reduce_final_vowel[nucleus]
				end
				local reduced
				if nucleus == "e" then
					reduced = "e"
				else
					reduced = to_schwa(nucleus, "aä")
				end
				return reduced .. coda
			end
		else
			local next_syllable = syllables[index + 1]
			local next_syllable_starts_with_vowel = mw.ustring.find(next_syllable, "^[ -]?%.?" .. m_izh.vowel)
			local next_syllable_stressed = is_stressed_syllable(next_syllable)
			local next_syllable_open = not (mw.ustring.find(next_syllable, "[" .. IPA_CONSONANTS .. "]$") or begins_with_affricate(syllables[index + 2]))

			if next_syllable_starts_with_vowel then
				return nucleus .. coda
			elseif next_syllable_stressed then
				return to_schwa(nucleus, "aäe") .. coda
			elseif next_syllable_open then
				return to_schwa(nucleus) .. coda
			else
				return to_schwa(nucleus, "aäe") .. coda
			end
		end
	end)
end

local function do_coalesce_rhyme_word(syllables)
	local vowel = mw.ustring.match(syllables[#syllables], "^[aä]$")
	if mw.ustring.match(syllables[#syllables], "^([aä])$") and #syllables > 1 and not is_stressed_syllable(syllables[#syllables - 1] .. syllables[#syllables]) then
		local replacement
		local prefinal = mw.ustring.sub(syllables[#syllables - 1], -1)
		if vowel == "a" then
			replacement = ({ ["i"] = "e", ["u"] = "o", ["o"] = "o" })[prefinal]
		elseif vowel == "ä" then
			replacement = ({ ["i"] = "e", ["y"] = "ö", ["ö"] = "ö" })[prefinal]
		end

		if replacement then
			syllables[#syllables - 1] = mw.ustring.gsub(mw.ustring.sub(syllables[#syllables - 1], 1, -2) .. replacement .. replacement, "^" .. AUTO_STRESS, "")
			syllables[#syllables] = nil
		end
	end
end

local function do_reduction_rhyme_word(syllables)
	do_reduction_internal(syllables, function (nucleus, coda, index)
		local final = index == #syllables
		if mw.ustring.find(nucleus, "(" .. m_izh.vowel .. ")%1") then
			return nucleus .. coda
		end

		if mw.ustring.find(nucleus, m_izh.vowel .. m_izh.vowel) then
			if mw.ustring.sub(nucleus, 2) ~= "i" then
				coda = mw.ustring.sub(nucleus, 2) .. coda
			else
				return nucleus .. coda
			end
			nucleus = mw.ustring.sub(nucleus, 1, 1)
		end

		local open = #coda == 0
		if final and open then
			-- reduced, but simply drop it
			return (reduce_final_vowel[nucleus] and nucleus or "") or coda
		else
			return nucleus .. coda
		end
	end)
end

local function do_final_vowel_dropping_word(syllables)
	if #syllables == 1 or not mw.ustring.find(table.concat(syllables, ""), "[^" .. m_izh.vowels .. "]" .. m_izh.vowel .. "$") then return end

	local final = mw.ustring.sub(syllables[#syllables], -1, -1)
	if reduce_final_vowel[final] then return end

	local reduced = {}
	for _, syllable in ipairs(syllables) do
		table.insert(reduced, syllable)
	end
	do_reduction_word(reduced)
	if not mw.ustring.find(reduced[#reduced], m_izh.vowel .. "$") then
		local leftovers = ""
		if mw.ustring.find(syllables[#syllables], "i$") then
			leftovers = REALLY_JUST_PALATAL
		end
		syllables[#syllables - 1] = mw.ustring.gsub(syllables[#syllables - 1] .. reduced[#reduced] .. leftovers, "^" .. AUTO_STRESS, "")
		syllables[#syllables] = nil
	end
end

local function do_reduction(syllables)
	do_by_word_syllables(syllables, do_reduction_word)
end

local function do_reduction_rhyme(syllables)
	do_by_word_syllables(syllables, do_reduction_rhyme_word)
end

local function do_coalesce_rhyme(syllables)
	do_by_word_syllables(syllables, do_coalesce_rhyme_word)
end

local function do_final_vowel_dropping(syllables)
	do_by_word_syllables(syllables, do_final_vowel_dropping_word)
end

local function do_narrow_l(text)
	-- failsafe
	if not mw.ustring.find(text, "l") then return text end
	if mw.ustring.find(text, "l" .. PALATALIZE) then return text end

	local velar_l = "ɫ"
	local palatal_l = U(0xEEEF)

	text = mw.ustring.gsub(text, "([aouäöyь])l(" .. m_izh.consonant .. ")", function (before, after)
			if after == "l" or after == "j" then
				return before .. "l" .. after
			elseif mw.ustring.find(before, "[aouь]") then
				return before .. velar_l .. after
			else
				return before .. palatal_l .. after
			end
		end)

	local length = mw.ustring.len(text)
	local l_indexes = {}
	local i = 1
	local env = {}

	while true do
		local index = mw.ustring.find(text, "l", i)
		if index == nil then break end
		table.insert(l_indexes, index)
		i = index + 1
	end

	local env_tags = {
		["a"] = "a", ["o"] = "a", ["u"] = "a", ["i"] = "i", ["j"] = "j",
		["ä"] = "ä", ["ö"] = "ä", ["y"] = "ä", ["e"] = "e", [" "] = "_",
		["-"] = "_", ["ь"] = "a"
	}

	local cleaned = mw.ustring.gsub(text, "[^aeiouäöyjlь -]", "") .. " "
	local env_index = 1
	local current_env = "_"
	local current_env_before = "_"
	local backburner, backburner_count = {}, 0
	for c in mw.ustring.gmatch(cleaned, ".") do
		if c == "l" then
			env[env_index] = current_env_before
			backburner_count = backburner_count + 1
			backburner[backburner_count] = env_index
			env_index = env_index + 1
		else
			current_env = env_tags[c] or "_"
			for i = 1, backburner_count do
				local back_index = backburner[i]
				env[back_index] = env[back_index] .. current_env
			end
			backburner_count = 0
			if current_env ~= "j" then
				current_env_before = current_env
			end
		end
	end

	old_text = text
	text = ""
	i = 1

	local l_conv = {
		["i_"] = palatal_l, ["_i"] = palatal_l, ["äi"] = palatal_l,
		["ei"] = palatal_l, ["aj"] = palatal_l, ["äj"] = palatal_l,
		["ij"] = palatal_l, ["ej"] = palatal_l, ["ie"] = palatal_l,
		["oj"] = palatal_l, ["uj"] = palatal_l,

		["a_"] = velar_l, ["_a"] = velar_l, ["aa"] = velar_l,
		["ia"] = velar_l, ["ea"] = velar_l, ["ae"] = velar_l
	}

	for env_index, l_index in ipairs(l_indexes) do
		text = text .. mw.ustring.sub(old_text, i, l_index - 1) .. (l_conv[env[env_index]] or "l")
		i = l_index + 1
	end
	text = text .. mw.ustring.sub(old_text, i, length)

	text = mw.ustring.gsub(text, palatal_l .. palatal_l, "ll" .. PALATALIZE)
	text = mw.ustring.gsub(text, palatal_l, "l" .. PALATALIZE)
	return text
end

local reduce_a_diphthong = {
	["e"] = "e", ["i"] = "e",
	["o"] = "o", ["ö"] = "ö",
	["u"] = "o", ["y"] = "ö",
}

local reduce_e_diphthong = {
	["u"] = "o", ["y"] = "ö",
}

local function do_additional_reduction(syllables)
	-- /VA/ (V != A) never in the same syllable
	local last_stressed = 1
	for i = 1, #syllables - 1 do
		if i == 1 or is_stressed_syllable(syllables[i]) then
			last_stressed = i
		else
			local nucleus = mw.ustring.match(syllables[i], m_izh.vowel .. "+")
			if i - last_stressed <= 2 and nucleus then
				nucleus = select(3, mw.ustring.find(nucleus, "^" .. UNGEMINATE .. "?(" .. m_izh.vowel .. ")$"))
				if nucleus then
					local next_syllable_onset, next_syllable_onset_end, consequent = mw.ustring.find(syllables[i + 1], "^" .. UNGEMINATE .. "?([aeä])")
					if next_syllable_onset then
						if mw.ustring.find(consequent, "[aä]") and reduce_a_diphthong[nucleus] then
							syllables[i] = mw.ustring.gsub(syllables[i], nucleus, reduce_a_diphthong[nucleus] .. reduce_a_diphthong[nucleus]) .. mw.ustring.sub(syllables[i + 1], next_syllable_onset_end + 1)
							syllables[i + 1] = ""
						elseif consequent == "e" and reduce_e_diphthong[nucleus] then
							syllables[i] = mw.ustring.gsub(syllables[i], nucleus, reduce_e_diphthong[nucleus] .. reduce_e_diphthong[nucleus]) .. mw.ustring.sub(syllables[i + 1], next_syllable_onset_end + 1)
							syllables[i + 1] = ""
						end
					end
				end
			end
		end
	end

	-- remove empty syllables
	local i, j = 1, 1
	while i <= #syllables do
		if mw.ustring.len(syllables[i]) > 0 then
			syllables[j] = syllables[i]
			j = j + 1
		end
		i = i + 1
	end
	while j < i do
		syllables[j] = nil
		j = j + 1
	end
end

local function pass_diacritics_through(map, consonant)
	local consonant, diacritics = mw.ustring.match(consonant, "([" .. IPA_CONSONANTS .. "])([" .. PALATAL .. "]?)")
	return map[consonant] .. diacritics
end

local voiced_consonants = "jlɫmnŋrvʋ"
local voiced_sounds = IPA_VOWELS .. m_izh.vowels .. voiced_consonants
local function do_voicing(text)
	text = mw.ustring.gsub(text, "[bdgzž]", { ["b"] = "p", ["d"] = "t", ["g"] = "k", ["z"] = "s", ["ž"]="š" })
	local voice = { ["k"] = "g", ["p"] = "b", ["t"] = "d", ["s"] = "z", ["š"] = "ž" }
	local semivoice = { ["k"] = "g̊", ["p"] = "b̥", ["t"] = "d̥", ["s"] = "z̥", ["š"] = "ž̥" }

	local consonants_to_voice = "[kptsš][" .. PALATAL .. "]?"
	local vowel = "[" .. IPA_VOWELS .. m_izh.vowels .. "]"

	-- k/p/t/s/š is semivoiced if it follows a voiced sound and is followed by a short vowel or a voiced consonant
	text = gsub_lookahead(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. PALATAL .. "?[" .. AUTO_STRESS .. "-]?)(" .. consonants_to_voice .. ")([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. ".?)",
		function (before, consonant, after)
			if mw.ustring.find(after, vowel .. ANY_DIACRITICS .. vowel) then
				return before .. consonant, after
			else
				return before .. pass_diacritics_through(semivoice, consonant), after
			end
		end)

	-- k/p/t/s/š is semivoiced if it follows a voiced sound and is not followed by anything
	text = mw.ustring.gsub(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. PALATAL .. "?[" .. AUTO_STRESS .. "-]?)(" .. consonants_to_voice .. ")$",
		function (before, consonant)
			return before .. pass_diacritics_through(semivoice, consonant)
		end)

	-- k/p/t/s/š is voiced if it follows a voiced sound and the next sound in the next word is a voiced sound
	-- k/p/t/s/š is semivoiced if it follows a voiced sound and the next sound in the next word is not a voiced sound
	text = gsub_lookahead(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. "[" .. AUTO_STRESS .. "-]?)(" .. consonants_to_voice .. ")([%s" .. AUTO_STRESS .. "-]+)(.)",
		function (before, consonant, space, after)
			if mw.ustring.find(after, "^[" .. voiced_sounds .. "]") then
				return before .. pass_diacritics_through(voice, consonant) .. space, after
			else
				return before .. consonant .. space, after
			end
		end)

	-- devoice word-initial
	text = mw.ustring.gsub(text, "^([bdgzž])[" .. U(0x030a) .. U(0x0325) .. "]?", { ["b"] = "p", ["d"] = "t", ["g"] = "k", ["z"] = "s", ["ž"]="š" })

	return text
end

local function do_alalaukaa_voicing(text)
	text = mw.ustring.gsub(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. LONG .. "?" .. PALATAL .. "?)s$", "%1z")
	text = mw.ustring.gsub(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. LONG .. "?" .. PALATAL .. "?)t$", "%1d")
	text = mw.ustring.gsub(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. LONG .. "?" .. PALATAL .. "?)s([%s-][ˈˌ]?[" .. voiced_sounds .. "])", "%1z%2")
	text = mw.ustring.gsub(text, "([" .. voiced_sounds .. "]" .. ANY_DIACRITICS .. LONG .. "?" .. PALATAL .. "?)t([%s-][ˈˌ]?[" .. voiced_sounds .. "])", "%1d%2")
	return text
end

local function do_vowel_replacements(text, vowels_find, vowels_short, vowels_long)
	return gsub_lookahead(text, "([" .. vowels_find .. "])(.?)",
		function (vowel, post)
			if post == LONG then
				return vowels_long[vowel], post
			else
				return vowels_short[vowel], post
			end
		end
	)
end

local function cleanup_palatal(text)
	text = mw.ustring.gsub(text, REALLY_JUST_PALATAL, PALATAL)
	text = mw.ustring.gsub(text, LONG .. PALATAL, PALATAL .. LONG)
	text = mw.ustring.gsub(text, PALATAL .. "+", PALATAL)
	text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "])([." .. STRESS_SECONDARY .. AUTO_STRESS .. "])%1" .. PALATAL, "%1" .. PALATAL .. "%2%1" .. PALATAL)
	return text
end

--- <<< COMMON END >>> ---

--- <<< DIALECTS START >>> ---

-- narrow_level 0 = broad, 1 = rhyme, 2 = narrow

-- Ala-Laukaa
local function IPA_alalaukaa(text, narrow_level)
	if narrow_level <= 1 then
		text = mw.ustring.gsub(text, "j?" .. PALATALIZE, { [PALATALIZE] = "", ["j" .. PALATALIZE] = PALATALIZE })
	end
	text = mw.ustring.gsub(text, "([nr])h", "%1")
	text = mw.ustring.gsub(zeroth_round_of_common_replacements(text), VIRTUAL_BREAK_UNGEMINATE, VIRTUAL_BREAK)
	if narrow_level > 0 then
		if narrow_level > 1 then
			text = do_narrow_l(text)
			text = mw.ustring.gsub(mw.ustring.gsub(text, "l", "l" .. PALATALIZE), "l" .. PALATALIZE .. "l" .. PALATALIZE, "ll" .. PALATALIZE)
			text = mw.ustring.gsub(text, PALATALIZE .. PALATALIZE, PALATALIZE)
			text = mw.ustring.gsub(text, "l" .. PALATALIZE .. "j", "lj")
			text = do_alalaukaa_voicing(text)
		end
		text = mw.ustring.gsub(text, "j" .. PALATALIZE, J_PALATALIZE)
		local syllables = split_syllables(text, true)
		add_secondary_stress(syllables)
		if narrow_level > 1 then
			do_final_vowel_dropping(syllables)
			do_gemination(syllables, LONG)
			do_additional_reduction(syllables)
			do_reduction(syllables)
		elseif narrow_level == 1 then
			do_final_vowel_dropping(syllables)
			do_coalesce_rhyme(syllables)
			do_reduction_rhyme(syllables)
		end
		text = table.concat(syllables)
		text = mw.ustring.gsub(text, J_PALATALIZE, "j" .. PALATALIZE)
		if narrow_level > 1 then
			text = automatic_palatalization(text, "[ln]") -- palatalization
			text = mw.ustring.gsub(text, "h([kg])", "x%1")
		end
		text = clean_virtual_break(text)
	end
	text = clean_ungeminate(text)
	text = mw.ustring.gsub(text, "j" .. PALATALIZE, PALATALIZE)
	text = manual_palatalization(text)
	text = first_round_of_common_replacements(text)
	text = long_vowels_and_diphthongs(text)
	text = long_consonants(text)
	text = second_round_of_common_replacements(text, narrow_level > 1)
	if narrow_level > 1 then
		local vowels_short = { ["e"] = "e̞", ["o"] = "o̞", ["ø"] = "ø̞" }
		local vowels_long = { ["e"] = "e", ["o"] = "o", ["ø"] = "ø" }
		text = do_vowel_replacements(text, "eoø", vowels_short, vowels_long)
		text = mw.ustring.gsub(text, "[sz]", { ["s"] = "s̠", ["z"] = "z̠" })
		text = standard_sandhi(text)
		text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "]" .. ANY_DIACRITICS .. PALATAL .. "?)j%f[ " .. AUTO_STRESS .. "-]", "%1i")
		text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "]" .. ANY_DIACRITICS .. PALATAL .. "?)j$", "%1i")
	end
	text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j$", "%1i" .. NONSYLLABIC)
	text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j(" .. STRESS_PRIMARY .. "?" .. STRESS_SECONDARY .. "?[" .. IPA_CONSONANTS .. "])", "%1i" .. NONSYLLABIC .. "%2")
	text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j ", "%1i" .. NONSYLLABIC .. " ")
	text = cleanup_palatal(text)
	return add_primary_stress(text)
end

-- Soikkola
local function IPA_soikkola(text, narrow_level)
	text = zeroth_round_of_common_replacements(text)
	if narrow_level > 0 then
		if narrow_level > 1 then
			text = do_narrow_l(text)
			text = mw.ustring.gsub(text, "h([kg])", "x%1")
		end
		text = mw.ustring.gsub(text, "j" .. PALATALIZE, J_PALATALIZE)
		local syllables = split_syllables(text, true)
		add_secondary_stress(syllables)
		if narrow_level > 1 then
			do_gemination(syllables, SEMILONG)
		end
		text = table.concat(syllables)
		text = mw.ustring.gsub(text, VIRTUAL_BREAK_UNGEMINATE, VIRTUAL_BREAK)
		text = mw.ustring.gsub(text, J_PALATALIZE, "j" .. PALATALIZE)
	end
	text = mw.ustring.gsub(text, "(.)" .. PALATALIZE,
			function (preceding)
				if preceding == "l" then
					return preceding .. PALATALIZE
				elseif preceding == "j" then
					return PALATALIZE
				else
					return preceding
				end
			end)
	text = manual_palatalization(text)
	if narrow_level > 1 then text = do_voicing(text) end
	if narrow_level > 0 then text = clean_virtual_break(text) end
	text = first_round_of_common_replacements(text)
	text = clean_ungeminate(text)
	text = long_vowels_and_diphthongs(text)
	text = long_consonants(text)
	text = second_round_of_common_replacements(text, narrow_level > 1)
	if narrow_level > 1 then
		local vowels_short = { ["e"] = "e̞", ["o"] = "o̞", ["ø"] = "ø̞" }
		local vowels_long = { ["e"] = "e̝", ["o"] = "o̝", ["ø"] = "ø̝" }
		text = do_vowel_replacements(text, "eoø", vowels_short, vowels_long)
		text = mw.ustring.gsub(mw.ustring.gsub(text, "^s", "ʃ"), "([^" .. TIE .. "])s", "%1ʃ")
		text = mw.ustring.gsub(mw.ustring.gsub(text, "^z", "ʒ"), "([^" .. TIE .. "])z", "%1ʒ")
		text = standard_sandhi(text)
	end
	text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j$", "%1i" .. NONSYLLABIC)
	text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j(" .. STRESS_PRIMARY .. "?" .. STRESS_SECONDARY .. "?[" .. IPA_CONSONANTS .. "])", "%1i" .. NONSYLLABIC .. "%2")
	text = mw.ustring.gsub(text, "([ɑəæeoøuy]" .. ANY_DIACRITICS .. ")j ", "%1i" .. NONSYLLABIC .. " ")
	return add_primary_stress(text)
end

-- Hevaha
local function IPA_hevaha(text, narrow_level)
	text = IPA_soikkola(text, narrow_level)
	text = mw.ustring.gsub(text, "ˑ", "ː")
	if narrow_level > 1 then
		text = mw.ustring.gsub(text, "([bdʒ])" .. U(0x0325) .. "([lr])", "%1%2")
		text = mw.ustring.gsub(text, "ɡ" .. U(0x030A) .. "([lr])", "ɡ%1")
	end
	return text
end

-- Ylä-Laukaa
local function IPA_ylalaukaa(text, narrow_level)
	error("Ylä-Laukaa not implemented") -- TODO
end

--- <<< DIALECTS END >>> ---

--- <<< INTERFACE START >>> ---

local function cleanup_IPA(ipa)
	return mw.ustring.gsub(ipa, "g", "ɡ")
end

local function cleanup_for_hyphenate(text)
	local no_hyph_symbols = "[" .. PALATALIZE .. UNGEMINATE .. "]"
	return mw.ustring.gsub(text, no_hyph_symbols, "")
end

local function cleanup_for_hyphenate_int(text)
	local no_hyph_symbols = "[" .. PALATALIZE .. UNGEMINATE .. "-]"
	return mw.ustring.gsub(text, no_hyph_symbols, "")
end

local function cleanup_for_hyphenate_final(sp)
	-- allow final /oi/, /ui/, /yi/, /øi/ for <o>, <u>, <y>, <ö>
	return (mw.ustring.gsub(sp, "([ouyö])i$", "%1"))
end

local function match_spelling_with_title_for_hyphenation(sp, title)
	if mw.ustring.find(sp, "i$") and not mw.ustring.find(title, "i$") then
		sp = mw.ustring.gsub(sp, "i$", "")
	end

	if mw.ustring.lower(title) == title then
		return mw.ustring.lower(sp)
	else
		-- find letters in title
		local letters = {}
		for letter in mw.ustring.gmatch(title, "%a") do
			table.insert(letters, letter)
		end

		local respelled = ""
		local letter_index = 1

		for character in mw.ustring.gmatch(sp, ".") do
			if mw.ustring.match(character, "%a") then
				local next_letter = letters[letter_index]
				if mw.ustring.lower(next_letter) == mw.ustring.lower(character) then
					respelled = respelled .. next_letter
					letter_index = letter_index + 1
				else
					respelled = respelled .. character
				end
			else
				respelled = respelled .. character
			end
		end

		return respelled
	end
end

local function hyphenate_matches(sp, title)
	return cleanup_for_hyphenate_final(mw.ustring.lower(mw.ustring.gsub(cleanup_for_hyphenate_int(sp), "%.", ""))) == cleanup_for_hyphenate_final(mw.ustring.lower(title))
end

local function hyphenate(text)
	return split_syllables(cleanup_for_hyphenate(text))
end

local function spell_long_consonants(text)
	return mw.ustring.gsub(text, "([" .. m_izh.consonants_geminatable .. "])" .. "(" .. PALATALIZE .. "?)" .. LONG,
			function (c, p) return c == "j" and "ij" or c .. c .. p end)
end

local function generate_rhyme(tuple)
	local text = tuple.rhyme
	text = mw.ustring.gsub(cleanup_IPA(text), STRESS_PRIMARY, "")

	local index = mw.ustring.find(text, STRESS_SECONDARY .. "[^" .. STRESS_SECONDARY .. "]*$")
	if index ~= nil then text = mw.ustring.sub(text, index + 1) end

	index = mw.ustring.find(text, "[" .. IPA_VOWELS .. "]")
	if index == nil then return nil end

	return mw.ustring.sub(text, index)
end

local function make_IPAs(fn, forms, variety)
	local p = {}
	for _, form in ipairs(forms) do
		form = mw.ustring.lower(form)
		local suffix = mw.ustring.find(form, "^%-")
		local prefix = mw.ustring.find(form, "%-$")

		if suffix then form = mw.ustring.gsub(form, "^%-", "") end
		if prefix then form = mw.ustring.gsub(form, "%-$", "") end

		local broad = fn(form, 0)
		local rhyme = fn(form, 1)
		local narrow = fn(form, 2)

		if prefix then
			broad = broad .. "-"
			rhyme = nil
			narrow = narrow .. "-"
		end

		if suffix then
			broad = "-" .. mw.ustring.gsub(broad, "^" .. STRESS_PRIMARY, "")
			rhyme = nil
			narrow = "-" .. mw.ustring.gsub(narrow, "^" .. STRESS_PRIMARY, "")
		end

		table.insert(p, { broad = broad, rhyme = rhyme, narrow = narrow })
	end
	local result = {
		forms = p,
		varieties = { variety }
	}
	return result
end

local function format_IPAs(tuple, title, has_spaces)
	local dialects = require("Module:accent qualifier").format_qualifiers(tuple.varieties)
	local p = {}
	for _, form in ipairs(tuple.forms) do
		table.insert(p, {pron = "/" .. cleanup_IPA(form.broad) .. "/"})
		table.insert(p,	{pron = "[" .. cleanup_IPA(form.narrow) .. "]"})
	end
	return "* " .. dialects .. " " .. m_IPA.format_IPA_full(lang, p, nil, nil, nil, has_spaces)
end

local function get_arg_list(param, fallback, allow_dash)
	if not param or #param == 0 then return fallback end
	if not allow_dash and #param == 1 and param[1] == "-" then return {} end
	return param
end

local varieties = {
	{"A", "Ala-Laukaa", IPA_alalaukaa, false},
	{"S", "Soikkola", IPA_soikkola, false},
	{"H", "Hevaha", IPA_hevaha, true},
--	{"Y", "Ylä-Laukaa", IPA_ylalaukaa, true},
}

-- rhymes only for these varieties
local varieties_with_rhymes = {
	["Ala-Laukaa"] = true,
	["Soikkola"] = true
}

local function get_variety(variety_code)
	for _, variety in ipairs(varieties) do
		if variety[1] == variety_code then
			return variety
		end
	end
	error("Unrecognized variety code: " .. variety_code)
end

function export.get_variety(variety_code)
	return get_variety(variety_code)[2]
end

local function allow_rhyme_for_varieties(varieties)
	for _, variety in ipairs(varieties) do
		if varieties_with_rhymes[variety] then
			return true
		end
	end
	return false
end

function export.generate_one(form, variety_code, transcription)
	local param, name, fn = unpack(get_variety(variety_code))
	local result = make_IPAs(fn, {form}, name).forms[1]
	if transcription then result = result[transcription] end
	return result
end

function export.generate_multiple(forms, variety_code, transcription)
	local param, name, fn = unpack(get_variety(variety_code))
	local result = make_IPAs(fn, forms, name).forms
	if transcription then
		for i, form in ipairs(result) do
			result[i] = form[transcription]
		end
	end
	return result
end

function export.show(frame)
	local title = mw.title.getCurrentTitle().text
	local hyphenation = nil
	local rhymes = nil
	local categories = {}

	local params = {
		[1] = { list = true },

		["A"] = { list = true }, -- Ala-Laukaa
		["S"] = { list = true }, -- Soikkola
		["H"] = { list = true }, -- Hevaha
		["Y"] = { list = true }, -- Ylä-Laukaa

		["title"] = {}, -- for debugging or demonstration only
	}

	local args = require("Module:parameters").process(frame:getParent().args, params)
	title = args["title"] or title

	local spellings = get_arg_list(args[1], { mw.ustring.lower(title) }, true)
	local IPAs = {}

	for _, variety in ipairs(varieties) do
		local param, name, fn, optional = unpack(variety)
		local forms = get_arg_list(args[param], not optional and spellings or nil, true)
		if forms then
			table.insert(IPAs, make_IPAs(fn, forms, name))
		end
	end

	local results = {}
	local has_spaces = mw.ustring.find(title, " ")

	if not hyphenation then
		hyphenation = {}
		if not has_spaces then
			local sp = spellings[1]
			if not hyphenate_matches(sp, title) then
				-- try to geminate
				local syllables = split_syllables(sp, true)
				do_gemination(syllables, LONG)
				sp = spell_long_consonants(clean_ungeminate(table.concat(syllables)))
			end
			if hyphenate_matches(sp, title) then
				table.insert(hyphenation, hyphenate(match_spelling_with_title_for_hyphenation(sp, title)))
			end
		end
	end

	if not rhymes then
		rhymes = {}
		if not has_spaces then
			local found_rhymes = {}
			for _, tuple in ipairs(IPAs) do
				if allow_rhyme_for_varieties(tuple.varieties) then
					for _, form in ipairs(tuple.forms) do
						if form.rhyme then
							local rhyme = generate_rhyme(form)
							if not found_rhymes[rhyme] then
								found_rhymes[rhyme] = true
								table.insert(rhymes, rhyme)
							end
						end
					end
				end
			end
		end
	end

	for _, tuple in ipairs(IPAs) do
		table.insert(results, format_IPAs(tuple, title, has_spaces))
	end

	if #rhymes > 0 then
		local sylkeys = {}
		local sylcounts = {}
		-- get all possible syllable counts from syllabifications
		for i, h in ipairs(hyphenation) do
			local hl = #h
			if hl > 0 and not sylkeys[hl] then
				table.insert(sylcounts, hl)
				sylkeys[hl] = true
			end
		end
		local rhymeobjs = {}
		for _, rhyme in ipairs(rhymes) do
			table.insert(rhymeobjs, {rhyme = rhyme})
		end
		table.insert(results, "* " .. require("Module:rhymes").format_rhymes(
			{ lang = lang, rhymes = rhymeobjs, num_syl = sylcounts }))
	end

	if #hyphenation > 0 then
		local hyphs = {}
		for i, h in ipairs(hyphenation) do
			table.insert(hyphs, { ["hyph"] = h })
		end
		table.insert(results, "* " .. require("Module:hyphenation").format_hyphenations(
			{ lang = lang, hyphs = hyphs, caption = "Hyphenation" }))
	end

	return table.concat(results, "\n") .. require("Module:utilities").format_categories(categories, lang)
end

--- <<< INTERFACE END >>> ---

return export