Module:tsg-pronunciation

This module lacks a documentation subpage. Please create it.
Useful links: subpage list • links • transclusions • testcases • sandbox
-- Based on [[Module:es-pronunc]] by Benwing2.
-- Adaptation by TagaSanPedroAko, Improved by Ysrael214.
-- Partly rewritten by Benwing2, merging code from [[Module:es-pronunc]] back into this module; {{tl-pr}} restructured
-- to take inline modifiers, like {{es-pr}}.
-- Modified for {{tsg-pr}}.

local export = {}

local force_cat = false -- enable for testing

local m_IPA = require("Module:IPA")
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local accent_qualifier_module = "Module:accent qualifier"
local audio_module = "Module:audio"
local headword_data_module = "Module:headword/data"
local hyphenation_module = "Module:hyphenation"
local labels_module = "Module:labels"
local parse_utilities_module = "Module:parse utilities"
local rhymes_module = "Module:rhymes"
local set_utilities_module = "Module:set utilities"
-- local jawi_decode_module = "Module:tsg-translit"
-- local jawi_encode_module = "Module:tsg-jaw sc"

local lang = require("Module:languages").getByCode("tsg")

local maxn = table.maxn
local rfind = m_str_utils.find
local rsubn = m_str_utils.gsub
local rsplit = m_str_utils.split
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local trim = mw.text.trim
local u = m_str_utils.char
local ulen = m_str_utils.len
local ulower = m_str_utils.lower

local AC = u(0x0301) -- acute =  ́
local GR = u(0x0300) -- grave =  ̀
local CFLEX = u(0x0302) -- circumflex =  ̂
local TILDE = u(0x0303) -- tilde =  ̃
local DIA = u(0x0308) -- diaeresis =  ̈
local MACRON = u(0x0304) -- macron =  ̄
local DOTOVER = u(0x0307) -- dot over =  ̇

local vowel = "aeëəiouüɜ" -- vowel
local V = "[" .. vowel .. "]"
local NV = "[^" .. vowel .. "]"
local accent = AC .. GR .. CFLEX .. MACRON .. DOTOVER
local accent_c = "[" .. accent .. "]"
local ipa_stress = "ˈˌ"
local ipa_stress_c = "[" .. ipa_stress .. "]"
local separator = accent .. ipa_stress .. "# .-"
local C = "[^" .. vowel .. separator .. "]" -- consonant

local unstressed_words = m_table.listToSet {
	--TO DO
}
local unstressed_affixes = m_table.listToSet {
	--TO DO
}

local special_words = {
	-- TO DO
}

local function track(page)
	require("Module:debug/track")("tsg-pronunciation/" .. page)
	return true
end

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
	local retval, nsubs = rsubn(term, foo, bar)
	return retval, nsubs > 0
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

-- Combine two sets of qualifiers, either of which may be nil or a list of qualifiers. Remove duplicate qualifiers.
-- Return value is nil or a list of qualifiers.
local function combine_qualifiers(qual1, qual2)
	if not qual1 then
		return qual2
	end
	if not qual2 then
		return qual1
	end
	local qualifiers = m_table.deepcopy(qual1)
	for _, qual in ipairs(qual2) do
		m_table.insertIfNot(qualifiers, qual)
	end
	return qualifiers
end


local function decompose(text, recompose_e_dia)
	-- decompose everything but ñ and ü
	text = toNFD(text)
	text = rsub(text, ".[" .. TILDE .. DIA .. "]", {
		["n" .. TILDE] = "ñ",
		["N" .. TILDE] = "Ñ",
		["u" .. DIA] = "ü",
		["U" .. DIA] = "Ü",
	})
	if recompose_e_dia then
		text = rsub(text, ".[" .. DIA .. "]", {
			["e" .. DIA] = "ë",
			["E" .. DIA] = "Ë",
		})
	end
	return text
end

local function remove_accents(str)
	str = decompose(str, "recompose e-dia")
	str = rsub(str, "(.)" .. accent_c, "%1")
	return str
end

local function split_on_comma(term)
	if term:find(",%s") then
		return require(parse_utilities_module).split_on_comma(term)
	else
		return rsplit(term, ",")
	end
end

--Cleanup Jawi inputs--
local function decode_jawi(text)
	-- TO DO
	return text
end

-- ĵ, ɟ and ĉ are used internally to represent [d͡ʒ], [j] and [t͡ʃ]
--

function export.IPA(text, include_phonemic_syllable_boundaries)
	local debug = {}
	
	-- text = decode_jawi(text)
	text = ulower(text)
	text = decompose(text, "recompose e-dia")
	-- convert commas and en/en dashes to IPA foot boundaries
	text = rsub(text, "%s*[,–—]%s*", " | ")
	-- question mark or exclamation point in the middle of a sentence -> IPA foot boundary
	text = rsub(text, "([^%s])%s*[!?]%s*([^%s])", "%1 | %2")

	-- canonicalize multiple spaces and remove leading and trailing spaces
	local function canon_spaces(text)
		text = rsub(text, "%s+", " ")
		text = rsub(text, "^ ", "")
		text = rsub(text, " $", "")
		return text
	end

	text = canon_spaces(text)

	-- Make prefixes unstressed unless they have an explicit stress marker; also make certain monosyllabic words (e.g.
	-- [[ang]], [[ng]], [[si]], [[na]], etc.) without stress marks be unstressed. We want to do this in most cases as
	-- well with hyphenated compounds, e.g. [[bato-sa-rinyon]] and [[kalahatian-ng-buwan]]. To do this, we use a
	-- capturing split on space or hyphen; in this situation, the actual words are at odd positions, and the separators
	-- (always a single space or hyphen) are at even positions.
	local words = rsplit(text, "([ %-])")
	local function make_unstressed(word)
		-- add dotover to the last vowel not the first one, in case of affixes with qui/que/gui/gue (which don't
		-- currently exist)
		return rsub(word, "^(.*" .. V .. ")", "%1" .. DOTOVER)
	end
	local function signal_no_initial_glottal_stop(word)
		return rsub(word, "^(" .. V .. ")", "◌%1")
	end

	for i=1, #words do
		if i % 2 == 1 then -- a word, not a hyphen or space
			if words[i - 1] == "-" and (not words[i - 2] or words[i - 2] == "" and words[i - 3] ~= "-") and
				words[i + 1] ~= "-" then
				-- a suffix
				if unstressed_affixes["-" .. words[i]] then
					words[i] = make_unstressed(words[i])
				end
				words[i] = signal_no_initial_glottal_stop(words[i])
			elseif words[i + 1] == "-" and (not words[i + 2] or words[i + 2] == "" and words[i + 3] ~= "-") and
				words[i - 1] ~= "-" then
				-- a prefix
				if not rfind(words[i], accent_c) then
					-- an unstressed prefix
					words[i] = make_unstressed(words[i])
				end
			elseif words[i + 1] == "-" and (not words[i + 2] or words[i + 2] == "" and words[i + 3] ~= "-") and
				words[i - 1] == "-" and (not words[i - 2] or words[i - 2] == "" and words[i - 3] ~= "-") then
				-- an interfix or infix
				if not rfind(words[i], accent_c) then
					-- an unstressed interfix or infix
					words[i] = make_unstressed(words[i])
				end
				words[i] = signal_no_initial_glottal_stop(words[i])
			else
				-- a space-delimited word or a word in a hyphen-delimited compound
				words[i] = special_words[words[i]] or words[i]
				if unstressed_words[words[i]] then
					words[i] = make_unstressed(words[i])
				elseif words[i + 1] == "-" and (not words[i - 1] or words[i - 1] == " ") and
					-- e.g. 'mag-' in [[mag-post]]
					unstressed_affixes[words[i] .. "-"] then
					words[i] = make_unstressed(words[i])
				end
			end
		end
		-- old code that I didn't port because I don't understand why it's being done; the purpose is to make suffixes
		-- and infixes with explicit initial glottal stop be unstressed, which seems a weird exception
		-- words[i] = rsub(words[i], "^%-([7ʔ])(" .. V .. ")", "-%1%2" .. DOTOVER)	-- affix that requires glottal stop
	end
	text = table.concat(words, "")
	-- Convert hyphens to spaces
	text = rsub(text, "%-", " ")
	-- canonicalize multiple spaces again, which may have been introduced by hyphens
	text = canon_spaces(text)
	-- now eliminate punctuation
	text = rsub(text, "[!?]", "")
	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"
	text = rsub_repeatedly(text, "([.]?)#([.]?)", "#")

	table.insert(debug, text)

	-- handle certain combinations; ch ng and sh handling needs to go first
	text = rsub(text, "([t]?)ch", "ĉ") --not the real sound
	text = rsub(text, "([n]?)g̃", "ng") -- Spanish spelling support
	text = rsub(text, "ng", "ŋ")
	text = rsub(text, "sh", "ʃ")

	--x
	text = rsub(text, "([#])x(" .. V .. ")", "%1s%2")
	text = rsub(text, "x", "ks")

	--Arabic loan sounds
	text = rsub(text, "dh", "ð")
	text = rsub(text, "kh", "x")
	text = rsub(text, "[gɡ]h", "ɣ")

	--c, gü/gu+e or i, q
	text = rsub(text, "c([iey])", "s%1")
	text = rsub(text, "(" .. V .. ")gü([ie])", "%1ɡw%2")
	text = rsub(text, "gü([ie])", "ɡuw%1")
	text = rsub(text, "gui([aeëo])", "ɡy%1")
	text = rsub(text, "gu([ie])", "ɡ%1")
	text = rsub(text, "qu([ie])", "k%1")

	-- Correction for vowels with in-between glottal stop, now default
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)(" .. V .. ")", "%1'%2")
	text = rsub(text, "(i)" .. MACRON, "iy")
	text = rsub(text, "(u)" .. MACRON, "uw")
	text = rsub(text, "(".. V .. ")" .. MACRON, "%1%1")

	--glottal stop
	text = rsub(text, "[ꞌ']", "7")

	--alphabet-to-phoneme
	text = rsub(text, "[cfgjñqrvz7]",
	--["g"]="ɡ":  U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
		{ ["c"] = "k", ["g"] = "ɡ", ["j"] = "ĵ", ["ñ"] = "ny", ["r"] = "ɾ", ["7"] = "ʔ"})

    -- ts
	text = rsub(text, "t_s", "ć") --not the real sound
	text = rsub(text, "ts", "ĉ") --not the real sound

	table.insert(debug, text)

	--determining whether "y" is a consonant or a vowel
	text = rsub(text, "y(" .. accent_c .. ")", "i%1")
	text = rsub(text, "y(" .. V .. ")", "ɟ%1") -- not the real sound
	text = rsub(text,"(" .. NV ..  ")y([ˈˌ.]*)([bćĉdðfɡɣhjĵklmnɲŋpqɾrsʃtvwxɟzʔ#" .. vowel .. "])","%1i%2%3")
	text = rsub(text,"(" .. V ..  ")y([ˈˌ.]*)([bćĉdðfɡɣhjĵklmnɲŋpqɾrsʃtvwxɟzʔ#" .. vowel .. "])","%1j%2%3")
	text = rsub(text, "w(" .. V .. ")","w%1")
	text = rsub(text,"(" .. NV ..  ")w([ˈˌ]?)([bćĉdðfɡɣjĵklmnɲŋpqɾrsʃtvxwɟzʔ#])","%1u%2%3")

	-- Convert CV(wy)C pattern into diphthong
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)w(" .. NV ..  ")([^lɾr" .. vowel .. separator .."])" ,"%1%2u%3%4")
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)w([ĉdðjĵlmnɲŋɾrstvwɟzʔ])([l" .. vowel .. separator .."])" ,"%1%2u%3%4")
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)[jyɟ](" .. NV ..  ")([^lɾr" .. vowel .. separator .."])" ,"%1%2i%3%4")
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)[jyɟ]([ĉdðjĵlmnɲŋɾrstvwɟzʔ])([l" .. vowel .. separator .."])" ,"%1%2i%3%4")

	--vowels with grave/circumflex to vowel+glottal stop
	text = rsub(text, CFLEX, AC .. GR)
	text = rsub(text, "(" .. V .. ")([" .. AC .. "]?)" .. GR .. "([#" .. vowel .. "])", "%1%2ʔ%3")
	text = rsub(text, "(" .. V .. ")([" .. AC .. "]?)" .. GR, "%1%2")

	-- Add glottal stop for words starting with vowel
	text = rsub(text, "([#])(" .. V .. ")", "%1ʔ%2")
	text = rsub(text, "◌", "")

	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)(" .. C .. V .. ")", "%1.%2")

	-- "mb", "mp", "nd", "nk", "nt" combinations
	text = rsub_repeatedly(text, "(m)([bp])([^hlɾrɟ" .. vowel .. separator .."])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(n)([dkt])([^hlɾrɟ" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(n)([s])([^ɟ" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(ŋ)([k])([^hlɾrɟ" .. vowel .. separator ..  "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "([ɾr])([bćĉdfɡɣklmnpqsʃvxz])([^hlɾrɟ" .. vowel .. separator ..  "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "([ɾr])([t])([sz]?)([^hlɾrɟsʃ" .. vowel .. separator ..  "])(" .. V .. ")", "%1%2%3.%4%5")

	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. ")(" .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
	-- Any aeo, or stressed iu, should be syllabically divided from a following aeo or stressed iu.
	text = rsub_repeatedly(text, "([aeo]" .. accent_c .. "*)([aeo])", "%1.%2")
	text = rsub_repeatedly(text, "([aeo]" .. accent_c .. "*)(" .. V .. AC .. ")", "%1.%2")
	text = rsub(text, "([iuə]" .. AC .. ")([aeo])", "%1.%2")
	text = rsub_repeatedly(text, "([iuə]" .. AC .. ")(" .. V .. AC .. ")", "%1.%2")
	text = rsub_repeatedly(text, "i(" .. accent_c .. "*)i", "i%1.i")
	text = rsub_repeatedly(text, "u(" .. accent_c .. "*)u", "u%1.u")

	table.insert(debug, text)

	local accent_to_stress_mark = { [AC] = "ˈ", [DOTOVER] = "" }

	local function accent_word(word, syllables)
		-- Now stress the word. If any accent exists in the word (including macron indicating an unaccented word),
		-- put the stress mark(s) at the beginning of the indicated syllable(s). Otherwise, apply the default
		-- stress rule.
		if rfind(word, accent_c) then
			for i = 1, #syllables do
				syllables[i] = rsub(syllables[i], "^(.*)(" .. accent_c .. ")(.*)$",
					function(pre, accent, post)
						return accent_to_stress_mark[accent] .. pre .. post
					end
				)
			end
		else
			-- Default stress rule. Words without vowels (e.g. IPA foot boundaries) don't get stress.
			if #syllables > 1 and rfind(word, "[^aeiouəʔbcćĉdðfɡgɣhjɟĵklmnñɲŋpqrɾsʃtvwxz#]#") or #syllables == 1 and rfind(word, V) then
				syllables[#syllables] = "ˈ" .. syllables[#syllables]
			elseif #syllables >= 2 then
				-- Default stress on the last syllable in Tausug
				local vowel_find = false
				local stress_find = false
				for i=0, #syllables-1 do
					if rfind(syllables[#syllables - i], V) then
						vowel_find = true
						if vowel_find then
							syllables[#syllables - i] = "ˈ" .. syllables[#syllables - i]
							stress_find = true
							break
						end
					end
				end
				if vowel_find and not stress_find then
					syllables[#syllables] = "ˈ" .. syllables[#syllables]
				end
			end
		end
	end

	local words = rsplit(text, " ")
	for j, word in ipairs(words) do
		-- accentuation
		local syllables = rsplit(word, "%.")
		accent_word(word, syllables)
		-- Reconstruct the word.
		words[j] = table.concat(syllables, ".")
	end

	text = table.concat(words, " ")

	-- suppress syllable mark before IPA stress indicator
	text = rsub(text, "%.(" .. ipa_stress_c .. ")", "%1")
	--make all primary stresses but the last one be secondary
	text = rsub_repeatedly(text, "ˈ(.+)ˈ", "ˌ%1ˈ")

    table.insert(debug,text)

    --correct final glottal stop placement
    text = rsub(text,"([ˈˌ])ʔ([#]*)([ʔbĉćdðfɡɣhĵɟklmnŋɲpqɾrsʃtvwz])(" .. V .. ")","%1%2%3%4ʔ")

    table.insert(debug,text)

	-- Long vowels
	text = rsub(text,"(i)[jɟ]","%1ː")
	text = rsub(text,"(u)w","%1ː")
	text = rsub(text,"([aeiouëü])[.]%1","%1ː")
	text = rsub(text,"([ʔbćĉdðfɡɣhjĵklmnɲŋpqɾrsʃtvwxɟz])([lnɾst]?)([aeiouëü])([ˈˌ.])([aeiouëü])","%4%1%2%3ː")

    --add temporary macron for /a/, /i/ and /u/ in stressed syllables so they don't get replaced by unstressed form

	text = rsub(text,"([ˈˌ])([#]*)([ʔbćĉdðfɡɣhĵɟklmnŋpɾrstvxwz]?)([ɟlnɾstw]?)([a])([ː]?)([ʔbdðfɡɣiklmnŋpɾstu]?)([bdɡklmnpɾst]?)","%1%2%3%4ā%6%7%8")
	text = rsub(text,"([ˈˌ])([#]*)([ʔbćĉdðfɡɣhĵɟklmnŋpɾrstvxwz]?)([ɟlnɾstw]?)([i])([ː]?)([ʔbdðfɡɣklmnŋpɾstu]?)([bdɡklmnpɾst]?)","%1%2%3%4ī%6%7%8")
	text = rsub(text,"([ˈˌ])([#]*)([ʔbćĉdðfɡɣhĵɟklmnŋpɾrstvxwz]?)([ɟlnɾstw]?)([u])([ː]?)([ʔbdðfɡɣiklmnŋpɾst]?)([bdɡklmnpɾst]?)","%1%2%3%4ū%6%7%8")

    table.insert(debug, text)

      --Corrections for diphthongs
    text = rsub(text,"([aāeəëiīouūü])([ː]?)i","%1%2j") --ay
    text = rsub(text,"([aāeəëiīouūü])([ː]?)u","%1%2w") --aw
	text = rsub(text,"([i])j","%1ː") --ay

    table.insert(debug, text)

    --remove "ɟ" and "w" inserted on vowel pair starting with "i" and "u"
    text = rsub(text,"([i])([ˈˌ]?)ɟ([aāeəouū])","%1%2%3")
    text = rsub(text,"([u])([ˈˌ]?)w([aāeəiī])","%1%2%3")

    table.insert(debug,text)

    --/z/ changes
    -- text = rsub(text,"([aāeəoiīuū])z([ˈˌ.#])([^bdfɡĵjɟŋɾrvz])","%1s%2%3") -- /z/ turn to /s/ before some unvoiced sounds
    -- text = rsub(text,"([^#bdfɡĵjɟnŋɾrvzaāeəoiīuū])([ˈˌ.#])z","%1%2s") -- /z/ turn to /s/ after some unvoiced sounds
    -- text = rsub(text,"([bćĉdfɡhĵjɟklmnŋptvwz])([ˈˌ.]?)([ɟlɾst])([aāeəoiīuū])([.]?)([z])","%1%2%3%4%5s") -- consonant cluster before /z/ turn to /s/
    -- text = rsub_repeatedly(text, "([^z]*)z([^z]*)([^#bdfɡĵjɟnŋɾrvzˈˌ.#][ˈˌ.#]?)z", "%1z%2%3s") -- /z/ turn to /s/ if /z/ already said earlier

    text = rsub_repeatedly(text, "^([#]*)([ˈˌ])([#]*)", "%1%3%2") -- Move stress inside word boundary fix at start
    text = rsub_repeatedly(text, "([ ])([#]*)([ˈˌ])([#]*)", "%1%2%4%3") -- Move stress inside word boundary fix at start

    local tsg_IPA_table = {
    	["phonetic"] = text,
    	["phonemic"] = text,
    	["raw"] = text
    }

	for key, value in pairs(tsg_IPA_table) do
		text = tsg_IPA_table[key]

		--phonetic transcription
		if key == "phonetic" then
	       	table.insert(debug, text)

			text = rsub(text,"([e])","i")
			text = rsub(text,"([o])","u")

			--Turn phonemic diphthongs to phonetic diphthongs
			-- text = rsub(text, "([aāeëəouūüɜ])([ː]?)j", "%1%2ɪ̯")
			-- text = rsub(text, "([aāeëəiīouūɜ])([ː]?)w", "%1%2ʊ̯")

	        table.insert(debug, text)

	        text = rsub(text,"n([ˈˌ.])ɟ","%1ɲ") -- /n/ before /j/
	        text = rsub(text,"n[ɟj]([aāeëəiɪ̯īouʊ̯ūüɜ])", "ɲ%1") -- /n/ before /j/

	        --Combine consonants (except H) followed by I/U and certain stressed vowels
		    text = rsub(text,"([bćĉdðfɡɣhĵklmnɲŋpɾrstvz])([lnɾst]?)i([ˈˌ.ː])ɟ?([āaeëəoūuüɜ])","%3%1%2ɟ%4")
		    text = rsub(text,"([bćĉdðfɡɣhĵklmnɲŋpɾrstvz])([lnɾst]?)u([ˈˌ.ː])w?([āaeëəioüɜ])","%3%1%2w%4")
			text = rsub(text,"([bćĉdðfɡɣhĵklmnɲŋpɾrstvz])([lnɾst]?)u([ˈˌ.ː])w?([ī])","%3%1%2wi")
			text = rsub_repeatedly(text, "([.]+)", ".")

	       	table.insert(debug, text)

		    table.insert(debug, text)

	    	text = rsub(text,"([nŋ])([ˈˌ# .]*[bfpv])","m%2")
	    	text = rsub(text,"([ŋ])([ˈˌ# .]*[dst])","n%2")
		    text = rsub_repeatedly(text,"([aāeëəiɪ̯īouʊ̯ūüɜ])([#]?)([ ]?)([ˈˌ#.ː])([ɡ])([aāeəiīouūüɜ])", "%1%2%3%4ɣ%6") -- /ɡ/ between vowels
	        text = rsub(text,"([n])([ˈ ˌ# .]*[ɡkhw])","ŋ%2") -- /n/ before /k/ and /g/ (some proper nouns and loanwords)
	        text = rsub(text,"([ˈˌ.])n([ɟj])([aāeəiīouūüɜ])","%1ɲ%3") -- /nj/ before any vowel following stress
	        text = rsub(text,"([ɾ])([ˈˌ# .]*[mnɡk])","ɹ%2")

	        --final fix for phonetic diphthongs
		    text = rsub(text,"([a])ɪ̯","āɪ̯") --ay
		    text = rsub(text,"([a])ʊ̯","āʊ̯") --aw
		    text = rsub(text,"([i])ʊ̯","īʊ̯") --iw

	       	table.insert(debug, text)
			
		    text = rsub_repeatedly(text, "ˈ(.+)ˈ", "ˌ%1ˈ") -- Reset primary to secondary stresses if not on last word

		    -- Tausug intervocalic consonants
			text = rsub_repeatedly(text,"([aāeəiīouɜūʊü])([.ˈˌ #ː]*)[b]([aāeəiīouɜūʊ])","%1%2β%3")
			text = rsub_repeatedly(text,"([aāeəiīouɜūʊü])([.ˈˌ #ː]*)[d]([aāeəiīouɜūʊ])","%1%2ɾ%3")
			text = rsub_repeatedly(text,"([aāeəiīouɜūʊü])([.ˈˌ #ː]*)[ɡ]([aāeəiīouɜūʊ])","%1%2ɣ%3")
			text = rsub_repeatedly(text,"([aāeəiīouɜūʊü])([.ˈˌ #ː]*)[h]([aāeəiīouɜūʊ])","%1%2ɦ%3")

			-- Nasal vowels
			text = rsub_repeatedly(text,"([mnɲŋ])([aāeëəiīouɜūʊ])([.ˈˌ #ː]*)([mnɲŋ])","%1%2" .. TILDE .. "%3%4")

			-- Add vowel length to stressed open vowels
			text = rsub_repeatedly(text,"([ˈˌ])([ʔbćĉdfɡɣhĵɟkxlmnɲŋpɾrɹsʃtwvzʒ]?)([ɟlnɾstw]?)([āeëəīoūü])([ˈˌ.])","%1%2%3%4ː%5")

			-- Tausug /a/
			text = rsub_repeatedly(text,"([kxgɡɣqŋʔhɦw])[āa]","%1ɑ") -- after back consonants
			text = rsub(text,"[ā]","a")

			-- /k/ fronting and retracting
			text = rsub_repeatedly(text,"([k])([iī])","%1̟%2")
			text = rsub_repeatedly(text,"([k])([uūɑ])","%1̠%2")

		    --change a, i, u to unstressed equivalents (certain forms to restore)
		    -- text = rsub(text,"a","ɐ")
		    -- text = rsub(text,"a","ä")
		    text = rsub(text,"i","ɪ")
		    text = rsub(text,"u","ʊ")

			-- Some alveolars are actually dental
		    text = rsub(text, "([tdn])", "%1̪")

		   -- Unreleased stops
		   text = rsub(text,"([pbtdkɡq])([̪]?)([.ˈˌ])([pbtdkɡqĵʔmnɲŋ])","%1̚%2%3%4")
		   text = rsub(text,"([pbtdkɡq])([̪]?)([# ])","%1̚%2%3")

		elseif key == "phonemic" then
			--Turn semivowel to different symbol
			text = rsub(text, "([aāeëəouūüɜ])j([^.ˈˌ#])", "%1i%2")
			text = rsub(text, "([aāeëəiīouūüɜ])w([^.ˈˌ#])", "%1u%2")

			text = rsub(text,"([n])([ˈˌ#.]?[ɡk])","ŋ%2") -- /n/ before /k/ and /g/ (some proper nouns and loanwords)
			if not include_phonemic_syllable_boundaries then
				text = rsub(text,"%.","")
			end
			text = rsub(text,"‿", " ")
			text = rsub_repeatedly(text,"[ˈˌ]", "") -- Remove stresses
		end

		table.insert(debug, text)

	    --delete temporary macron in /a/, /i/ and /u/
	    text = rsub(text,"ā","a")
	    text = rsub(text,"ī","i")
	    text = rsub(text,"ū","u")

		-- Final fix for "iy" and "uw" combination
		text = rsub(text,"([iɪ])([ː]?)([ˈˌ.]*)ɟ([aɐeɛəouʊ])","%1%2%3%4")
		text = rsub(text,"([uʊ])([ː]?)([ˈˌ.]*)w([aɐeɛəiɪo])","%1%2%3%4")
		text = rsub(text,"([ɪ])([ː]?)([ˈˌ.]*)ɟ([i])","%1%2%3%4")
		text = rsub(text,"([i])([ː]?)([.]*)ɟ([ɪ])","%1%2%3%4")
		text = rsub(text,"([ʊ])([ː]?)([ˈˌ.]*)w([u])","%1%2%3%4")
		text = rsub(text,"([u])([ː]?)([.]*)w([ʊ])","%1%2%3%4")

		--remove "ɟ" and "w" inserted on vowel pair starting with "e" and "o"
	    text = rsub(text,"([ɛe])([ː]?)([ˈˌ.]*)[ɟj]([aɐo])","%1%2%3%4")
	    text = rsub(text,"([o])([ː]?)([ˈˌ.]*)w([aɐeɛə])","%1%2%3%4")

		-- convert fake symbols to real ones
	    local final_conversions = {
			["ĉ"] = "t͡ʃ", -- fake "ch" to real "ch"
			["ć"] = "t͡s", -- fake "ts" to real "ts"
			["ɟ"] =  "j", -- fake "y" to real "y"
	        ["ĵ"] = "d͡ʒ", -- fake "j" to real "j"
			["ë"] = "ə", -- schwa
			["ü"] = "ɜ" -- obscure u
		}

		text = rsub(text, "[ć]([" .. separator .. "])", "ts%1")
		text = rsub(text, "[ĉćɟĵëü]", final_conversions)

		-- Do not have multiple syllable break consecutively
		text = rsub_repeatedly(text, "([.]+)", ".")
    	text = rsub_repeatedly(text, "([.]?)(‿)([.]?)", "%2")

    	-- remove # symbols at word and text boundaries
		text = rsub_repeatedly(text, "([.]?)#([.]?)", "")

		-- resuppress syllable mark before IPA stress indicator
		text = rsub(text, "%.(" .. ipa_stress_c .. ")", "%1")
		text = rsub_repeatedly(text, "([.]?)(" .. ipa_stress_c .. ")([.]?)", "%2")

    	tsg_IPA_table[key] = canon_spaces(toNFC(text))
	end

	return tsg_IPA_table
end

function export.show(frame)
	local params = {
		[1] = {},
		["pre"] = {},
		["bullets"] = {type = "number", default = 1},
	}

	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)

	local results = {}

	local text = args[1] or mw.title.getCurrentTitle().text

	local IPA_result = export.IPA(text)
	table.insert(results, { pron = "/" .. IPA_result["phonemic"] .. "/" })
	table.insert(results, { pron = "[" .. IPA_result["phonetic"] .. "]" })

	local pre = args.pre and args.pre .. " " or ""
	local bullet = (args.bullets ~= 0) and "* " or ""

	return bullet .. pre .. m_IPA.format_IPA_full { lang = lang, items = results }
end


local function parse_gloss(arg)
	local poses, gloss
	if arg:find("%^") then
		poses, gloss = arg:match("^(.-)%^(.*)$")
		if gloss == "" then
			gloss = nil
		end
	else
		gloss = arg
	end
	if poses then
		poses = split_on_comma(poses)
		local m_headword_data = mw.loadData(headword_data_module)
		for i, pos in ipairs(poses) do
			poses[i] = m_headword_data.pos_aliases[pos] or pos
		end
	end
	return {
		poses = poses,
		gloss = gloss,
	}
end


-- Parse a raw accent spec, which is one or more comma-separated accent qualifiers.
local function parse_accents(arg)
	return require(labels_module).split_labels_on_comma(arg)
end


-- Return the number of syllables of a phonemic or phonetic representation, which should have syllable dividers in it
-- but no hyphens.
local function get_num_syl_from_ipa(pron)
	-- Maybe we should just count vowels instead of the below code.
	pron = rsub(pron, "|", " ") -- remove IPA foot boundaries
	local words = rsplit(pron, " +")
	for i, word in ipairs(words) do
		-- IPA stress marks are syllable divisions if between characters; otherwise just remove.
		word = rsub(word, "(.)[ˌˈ](.)", "%1.%2")
		word = rsub(word, "[ˌˈ]", "")
		words[i] = word
	end
	-- There should be a syllable boundary between words.
	pron = table.concat(words, ".")
	
	pron = rsub(pron,"([i])([au])", "%1y%2")
	pron = rsub(pron,"([u])([ai])", "%1w%2")
	
	local vowel = 0
	for check in pron:gmatch('([aeiou]+)') do
		vowel = vowel + 1
	end
	
	return vowel
end


-- Get the rhyme by truncating everything up through the last stress mark + any following consonants, and remove
-- syllable boundary markers.
local function convert_phonemic_to_rhyme(phonemic)
	-- NOTE: This works because the phonemic vowels are just [aeiou] possibly with diacritics that are separate
	-- Unicode chars. If we want to handle things like ɛ or ɔ we need to add them to `vowel`.
	phonemic = rsplit(phonemic, " ")
	phonemic = phonemic[#phonemic]
	phonemic = rsub(rsub(phonemic, ".*[ˌˈ]", ""), "^" .. NV .. "*", ""):gsub("%.", "")
	phonemic = rsub(phonemic,"([i])([au])", "%1y%2")
	phonemic = rsub(phonemic,"([u])([ai])", "%1w%2")
	phonemic = rsub_repeatedly(phonemic,"(" .. V .. "+)(" .. NV .. "+)(" .. V .. "+)", "%3")
	return phonemic
end


local function split_syllabified_spelling(spelling)
	spelling = "#" .. spelling .. "#"
	spelling = rsub_repeatedly(spelling, "%.([ #])", "·%1")
	spelling = rsub_repeatedly(spelling, "#", "")
	spelling = rsplit(spelling, "%.")
	for key, value in ipairs(spelling) do
		spelling[key] = rsub_repeatedly(value, "·", ".")
	end
	return spelling
end


-- "Align" syllabified respelling `syllab` to original spelling `spelling` by matching character-by-character, allowing
-- for extra syllable and accent markers in the syllabification and certain mismatches in the consonants. The goal is to
-- produce the appropriately syllabified version of the original spelling (the pagename) by matching characters in the
-- syllabified respelling to the original spelling, putting the syllable boundaries in the appropriate places in the
-- original spelling. As an example, given syllabified respelling 'a.ma.7ín' and original spelling 'amain', we would
-- like to produce 'a.ma.in'.
--
-- If we encounter an extra syllable marker (.), we allow and keep it. If we encounter an extra accent marker in thes
-- syllabification, we drop it. We allow for mismatches in capitalization and for certain other mismatches, e.g. extra
-- glottal stops (written 7), h in respelling vs. g or j in the original, etc. If we can't match, we return nil
-- indicating the alignment failed.
local function align_syllabification_to_spelling(syllab, spelling)
	local result = {}
	local function concat_result()
		-- Postprocess to remove dots (syllable boundaries) next to hyphens.
		return (toNFC(table.concat(result)):gsub("%.%-", "-"):gsub("%-%.", "-"))
	end
	-- Remove glottal stop (7) from respelling to simplify the code below, because it's never found in the original
	-- spelling. (FIXME: We should do the same for diacritics, but they're currently removed earlier, in
	-- syllabify_from_spelling(). We should probably get rid of the removal there and put it here.)
	syllab = decompose(syllab:gsub("ː", ""), "recompose e-dia"):gsub("7", "")
	spelling = decompose(spelling, "recompose e-dia")
	local syll_chars = rsplit(ulower(syllab), "")
	local spelling_chars = rsplit(spelling, "")
	local i = 1
	local j = 1
	local function matches(uci, ucj)
		-- Return true if a syllabified respelling character (uci) matches the corresponding spelling char (ucj).
		-- Both uci and ucj should be lowercase.
		-- Sound is at the key, values are the letters sound can match
		local matching_chars = {
			["e"] = {"i"},
			["ë"] = {"a", "e", "o", "u"},
			["h"] = {"g", "j", "x"},
			["i"] = {"e"},
			["j"] = {"g"},
			["k"] = {"j"},
			["o"] = {"u"},
			["s"] = {"j", "c", "x"},
			["u"] = {"o"},
			["w"] = {"u", "o"},
			["y"] = {"i"}
		}

		return uci == ucj or (matching_chars[uci] and m_table.contains(matching_chars[uci], ucj))
	end
	local function silent_spelling_letter(ucj)
		return ucj == "h" or ucj == "'" or ucj == "-"
	end
	local function syll_at(pos)
		return syll_chars[pos] or ""
	end
	local function spell_at(pos)
		return spelling_chars[pos] or ""
	end
	local function uspell_at(pos)
		local c = spelling_chars[pos]
		return c and ulower(c) or ""
	end
	while i <= #syll_chars or j <= #spelling_chars do
		local uci = syll_at(i)
		local cj = spell_at(j)
		local ucj = uspell_at(j)

		if uci == "g" and syll_at(i - 1) == "n" and syll_at(i + 1) == "." and matches(syll_at(i + 2), ucj) and
			not matches(syll_at(i + 2), uspell_at(j + 1)) then
			-- As a special case, before checking whether the corresponding characters match, we have to skip an extra
			-- g in an -ng- sequence in the syllabified respelling if the corresponding spelling character matches the
			-- next respelling character (taking into account the syllable boundary). This is so that e.g.
			-- syll='ba.rang.gay' matches spelling='barangay'. Otherwise we will match the first respelling g against
			-- the spelling g and the second respelling g won't match. A similar case occurs with
			-- syll='E.vang.he.lis.ta' and spelling='Evangelista'. But we need an extra condition to not do this hack
			-- when syll='ba.rang.gay' matches spelling='baranggay'.
			i = i + 1
		elseif uci == "g" and ucj == "g" and uspell_at(j + 1) == TILDE  then
			table.insert(result, cj)
			table.insert(result, uspell_at(j + 1))
			i = i + 1
			j = j + 2
		elseif matches(uci, ucj) then
			table.insert(result, cj)
			i = i + 1
			j = j + 1
		elseif ucj == uspell_at(j - 1) and uci == "." and ucj ~= syll_at(i + 1) then
			-- See below. We want to allow for a doubled letter in spelling that is pronounced single, and preserve the
			-- doubled letter. But it's tricky in the presence of syllable boundaries on both sides of the doubled
			-- letter as well as doubled letters pronounced double. Specifically, there are three possibilities,
			-- exemplified by:
			-- (1) syll='Mal.lig', spelling='Mallig' -> 'Mal.lig';
			-- (2) syll='Ma.lig', spelling='Mallig' -> 'Ma.llig';
			-- (3) syll='Wil.iam', spelling='William' -> 'Will.iam'.
			-- If we copy the dot first, we get (1) and (2) right but not (3).
			-- If we copy the double letter first, we get (2) and (3) right but not (1).
			-- We choose to copy the dot first except in the situation exemplified by (3), where we copy the doubled
			-- letter first. The condition above handles (3) (the doubled letter matches against a dot) while not
			-- interfering with (1) (where the doubled letter also matches against a dot but the next letter in the
			-- syllabification is the same as the doubled letter, because the doubled letter is pronounced double).
			table.insert(result, cj)
			j = j + 1
		elseif silent_spelling_letter(ucj) and uci == "." and ucj ~= syll_at(i + 1) and
			not rfind(uspell_at(j + 1), V) then
			-- See below for silent h or apostrophe in spelling. This condition is parallel to the one directly above
			-- for silent doubled letters in spelling and handles the case of syllab='Abduramán', spelling='Abdurahman',
			-- which should be syllabified 'Ab.du.rah.man'. But we need a check to see that the next spelling character
			-- isn't a vowel, because in that case we want the silent letter to go after the period, e.g.
			-- syllab='Jumu7á', spelling='Jumu'ah' -> 'Ju.mu.'ah' (the 7 is removed above).
			table.insert(result, cj)
			j = j + 1
		elseif uci == "." then
			table.insert(result, uci)
			i = i + 1
		elseif ucj == uspell_at(j - 1) then
			-- A doubled letter in spelling that is pronounced single. Examples:
			-- * syllab='Ma.líg', spelling='Mallig' -> 'Ma.llig' (with l)
			-- * syllab='Lu.il.yér', spelling='Lhuillier' -> 'Lhu.ill.ier' (with l; a more complex example)
			-- * syllab='a.sa.la.mu a.lai.kum', spelling='assalamu alaikum' -> 'as.sa.la.mu a.lai.kum' (with s)
			-- * syllab='Jé.fer.son', spelling='Jefferson' -> 'Je.ffer.son' (with f)
			-- * syllab='Je.ma', spelling='Gemma' -> 'Ge.mma' (with m)
			-- * syllab='Ha.na', spelling='Hannah' -> 'Ha.nnah' (with n)
			-- * syllab='A.by', spelling='Abby' -> 'A.bby' (with b)
			-- * syllab='Ka.ba', spelling='Kaaba' -> 'Kaa.ba' (with a)
			-- * syllab='Fu.ji', spelling='Fujii' -> 'Fu.jii' (with i)
			table.insert(result, cj)
			j = j + 1
		elseif silent_spelling_letter(ucj) then
			-- A silent h, apostrophe or hyphen in spelling. Examples:
			-- * syllab='adán', spelling='adhan' -> 'a.dhan'
			-- * syllab='Atanasya', spelling='Athanasia' -> 'A.tha.nas.ia'
			-- * syllab='Cýntiya', spelling='Cynthia' -> 'Cyn.thi.a'
			-- * syllab='Ermóhenes', spelling='Hermogenes' -> 'Her.mo.ge.nes'
			-- * syllab='Abduramán', spelling='Abdurahman' -> 'Ab.du.rah.man'
			-- * syllab='Jumu7á', spelling='Jumu'ah' -> 'Ju.mu.'ah'
			-- * syllab='pag7ibig', spelling='pag-ibig' -> 'pag-i.big'
			table.insert(result, cj)
			j = j + 1
		elseif uci == AC or uci == GR or uci == CFLEX or uci == DIA or uci == TILDE or uci == MACRON or uci == DOTOVER or
			uci == "y" or uci == "w" then
			-- skip character
			i = i + 1
		else
			-- non-matching character
			mw.log(("Syllabification alignment mismatch for pagename '%s' (position %s, character %s), syllabified respelling '%s' (position %s, character %s), aligned result so far '%s'"
				):format(spelling, j, ucj, syllab, i, uci, concat_result()))
			return nil
		end
	end
	if i <= #syll_chars or j <= #spelling_chars then
		-- left-over characters on one side or the other
		mw.log(("Syllabification alignment mismatch for pagename '%s' (%s), syllabified respelling '%s' (%s), aligned result so far '%s'"
			):format(
				spelling, j > #spelling_chars and "end of string" or ("position %s, character %s"):format(j, uspell_at(j)),
				syllab, i > #syll_chars and "end of string" or ("position %s, character %s"):format(i, syll_at(i)),
				concat_result()))
		return nil
	end
	return concat_result()
end


local function generate_syll_obj(term)
	return {syllabification = term, hyph = split_syllabified_spelling(term)}
end


-- Word should already be decomposed.
local function word_has_vowels(word)
	word = ulower(word)
	return rfind(word, V) or word:find("y")
end


local function any_words_have_vowels(term)
	local words = rsplit(decompose(term), "[ %-]")
	for i, word in ipairs(words) do
		-- Allow empty word; this occurs with prefixes and suffixes.
		if word_has_vowels(word) then
			return true
		end
	end
	return false
end

local function has_jawi(text)
	return text:match("[ᜀ-ᜟ]")
end


local function should_generate_rhyme_from_respelling(term)
	local words = rsplit(decompose(term), " +")
	local last_word = words[#words]
	local should_generate_cat = #words == 1
	local should_generate_rhyme =
		not last_word:find("%-$") and -- no if word is a prefix
		not (last_word:find("^%-") and last_word:find(DOTOVER)) and -- no if word is an unstressed suffix
		word_has_vowels(last_word) -- no if word has no vowels (e.g. a single letter)
	return should_generate_rhyme, should_generate_cat
end


local function should_generate_rhyme_from_ipa(ipa)
	local should_generate_cat = not ipa:find("%s")
	local should_generate_rhyme = word_has_vowels(decompose(ipa))
	return should_generate_rhyme, should_generate_cat
end


local function should_generate_rhyme_from_termobj(termobj)
	if termobj.raw then
		return should_generate_rhyme_from_ipa(termobj.raw_phonemic or termobj.raw_phonetic)
	else
		return should_generate_rhyme_from_respelling(termobj.term)
	end
end


local function process_specified_rhymes(rhymes, sylls, parsed_respellings)
	local rhyme_ret = {}
	for _, rhyme in ipairs(rhymes) do
		local num_syl = rhyme.num_syl
		local no_num_syl = false

		-- If user explicitly gave the rhyme but didn't explicitly specify the number of syllables, try to take it from
		-- the syllabification.
		if not num_syl then
			num_syl = {}
			for _, syll in ipairs(sylls) do
				if should_generate_rhyme_from_respelling(syll.syllabification) then
					local this_num_syl = 1 + ulen(rsub(syll.syllabification, "[^.]", ""))
					m_table.insertIfNot(num_syl, this_num_syl)
				else
					no_num_syl = true
					break
				end
			end
			if no_num_syl or #num_syl == 0 then
				num_syl = nil
			end
		end

		-- If that fails and term is single-word, try to take it from the phonemic.
		if not no_num_syl and not num_syl then
			for _, parsed in ipairs(parsed_respellings) do
				for _, pronun in ipairs(parsed.pronuns) do
					-- Check that pronun.phonemic exists (it may not if raw phonetic-only pronun is given), and rhyme
					-- isn't suppressed (which may happen if the term has a qualifier "colloquial", "obsolete" or the
					-- like or is an auto-generated "glottal stop elision" pronunciation).
					if pronun.phonemic and not pronun.no_rhyme then
						if not should_generate_rhyme_from_ipa(pronun.phonemic) then
							no_num_syl = true
							break
						end
						-- Count number of syllables by looking at syllable boundaries (including stress marks).
						local this_num_syl = get_num_syl_from_ipa(pronun.phonemic)
						m_table.insertIfNot(num_syl, this_num_syl)
					end
				end
				if no_num_syl then
					break
				end
			end
			if no_num_syl or #num_syl == 0 then
				num_syl = nil
			end
		end

		local rhymeobj = m_table.shallowcopy(rhyme)
		rhymeobj.num_syl = num_syl
		table.insert(rhyme_ret, rhymeobj)
	end
end


-- Parse a pronunciation modifier in `arg`, the argument portion in an inline modifier (after the prefix), which
-- specifies a pronunciation property such as rhyme, syllabification, homophones or audio. The argument can itself have
-- inline modifiers, e.g. <audio:Foo.ogg<a:Colombia>>. The allowed inline modifiers are specified by `param_mods` (of
-- the format expected by `parse_inline_modifiers()`); in addition to any modifiers specified there, the modifiers
-- <q:...>, <qq:...>, <a:...> and <aa:...> are always accepted (and can be repeated). `generate_obj` and `parse_err` are
-- like in `parse_inline_modifiers()` and specify respectively a function to generate the object into which modifier
-- properties are stored given the non-modifier part of the argument, and a function to generate an error message (given
-- the message). Normally, a comma-separated list of pronunciation properties is accepted and parsed, where each element
-- in the list can have its own inline modifiers and where no spaces are allowed next to the commas in order for them to
-- be recognized as separators. If `no_split_on_comma` is given, only a single pronunciation property is accepted. If
-- `has_outer_container` is given, the list of pronunciation properties is embedded in the `terms` property of an outer
-- container, into which other list-level modifiers can also be stored (by setting `overall = "true"` in the respective
-- spec in `param_mods`). The return value is a list if neither `no_split_on_comma` nor `has_outer_container` are given,
-- otherwise a container object (which, in the case of `has_outer_container`, will contain a list inside of it, in the
-- `terms` property).
local function parse_pron_modifier(arg, parse_err, generate_obj, param_mods, no_split_on_comma, has_outer_container)
	if arg:find("<") then
		local insert = { store = "insert" }
		param_mods.q = insert
		param_mods.qq = insert
		param_mods.a = insert
		param_mods.aa = insert
		return require(parse_utilities_module).parse_inline_modifiers(arg, {
			param_mods = param_mods,
			generate_obj = generate_obj,
			parse_err = parse_err,
			splitchar = not no_split_on_comma and "," or nil,
			outer_container = has_outer_container and {} or nil,
		})
	elseif no_split_on_comma then
		return generate_obj(arg)
	else
		local retval = {}
		for _, term in ipairs(split_on_comma(arg)) do
			table.insert(retval, generate_obj(term))
		end
		if has_outer_container then
			retval = {
				terms = retval,
			}
		end
		return retval
	end
end


local function parse_rhyme(arg, parse_err)
	local function generate_obj(term)
		return {rhyme = term}
	end
	local param_mods = {
		s = {
			item_dest = "num_syl",
			type = "number",
			sublist = true,
		},
	}

	return parse_pron_modifier(arg, parse_err, generate_obj, param_mods)
end


local function parse_syll(arg, parse_err)
	local param_mods = {
		cap = { overall = true},
	}

	-- We need to pass in has_outer_container because we have an overall property <cap:...> (the caption, defaulting
	-- to "Syllabification") applying to the whole set of syllabifications.
	return parse_pron_modifier(arg, parse_err, generate_syll_obj, param_mods, nil, "has outer container")
end


local function parse_homophone(arg, parse_err)
	local function generate_obj(term)
		return {term = term}
	end
	local param_mods = {
		t = {
			-- [[Module:links]] (called from [[Module:homophones]]) expects the gloss in "gloss".
			item_dest = "gloss",
		},
		gloss = {},
		pos = {},
		alt = {},
		lit = {},
		id = {},
		g = {
			-- [[Module:links]] (called from [[Module:homophones]]) expects the genders in "genders".
			item_dest = "genders",
			sublist = true,
		},
	}

	return parse_pron_modifier(arg, parse_err, generate_obj, param_mods)
end


local function generate_audio_obj(arg)
	local file, gloss = arg:match("^(.-)%s*#%s*(.*)$")
	if not file then
		file = arg
		gloss = "Audio"
	end
	return {file = file, gloss = gloss}
end


local function parse_audio(arg, parse_err)
	-- None other than qualifiers
	local param_mods = {}

	-- Don't split on comma because some filenames have embedded commas not followed by a space (typically followed by
	-- an underscore).
	return parse_pron_modifier(arg, parse_err, generate_audio_obj, param_mods, "no split on comma")
end


local function syllabify_from_spelling(text, pagename)
	-- Auto syllabifications start --
	local vowel = vowel .. "ẃýāēīōū" -- vowel
	local V = "[" .. vowel .. "]"
	local NV = "[^" .. vowel .. "]"
	local C = "[^" .. vowel .. separator .."]" -- consonant
	
	text = decompose(text, "recompose e-dia")

	local origtext = text
	text = string.lower(text)

	text = rsub(text, "[.] ", "․ ")
	text = rsub(text, "[.]$", "․")

	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"
	text = rsub_repeatedly(text, "([.]?)#([.]?)", "#")

	text = rsub(text, "ng̃", "ŋ")
	text = rsub(text, "ng", "ŋ")
	text = rsub(text, "g̃", "ġ")
	text = rsub(text, "ch", "ĉ")
	text = rsub(text, "t_s", "ć")
	text = rsub(text, "sh", "ʃ")
	text = rsub(text, "gu([eëiy])", "ǵ%1")
	text = rsub(text, "qu([eëiy])", "ḱ%1")
	text = rsub(text, "r", "ɾ")
	text = rsub(text, "ɾɾ", "r")
	text = rsub(text, "ʔ", "7")

	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)(" .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "#(" .. C .. "+)u([aeio])","#%1u.%2")
	text = rsub_repeatedly(text, "#(" .. C .. "+)i([aeou])","#%1i.%2")
	text = rsub_repeatedly(text, "(" .. C .. ")u([aeio])","#%1.u%2")
	text = rsub_repeatedly(text, "(" .. C .. ")i([aeou])","#%1.i%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)u([aeio])","%1.u%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)o([aei])","%1.ó%2")
	text = rsub(text, "a(" .. accent_c .. "*)o([#.7])","a%1ó%2")

	-- eu rules
	text = rsub_repeatedly(text, "([^" .. vowel .. "#])([e])("  .. accent_c .. "?)([u])("  .. accent_c .. "?)","%1%2%3.%4%5")

	text = rsub(text, "y([ˈˌ." .. accent .. "]*)([bćĉdfgǵhjĵkḱlmnɲŋpɾrsʃtvwɟzʔ#" .. vowel .. "])","ý%1%2")
	text = rsub(text, "ý(" .. V .. ")", "y%1")
	text = rsub(text, "w([ˈˌ]?)([bćĉdfgǵjĵkḱlmnɲŋpɾrsʃtvwɟzʔ#" .. vowel .. "])","ẃ%1%2")
	text = rsub(text, "ẃ(" .. V .. ")","w%1")

	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ẃ([bdfgǵkḱpt])([ɾr" .. vowel .. separator .."])" ,"%1%2w%3%4")
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ẃ([bfgǵkḱp])([l" .. vowel .. separator .."])" ,"%1%2w%3%4")
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ý([bdfgǵkḱpt])([ɾr" .. vowel .. separator .."])" ,"%1%2y%3%4")
	text = rsub(text, "(" .. V .. ")(" .. accent_c .. "?)ý([bfgǵkḱp])([l" .. vowel .. separator .."])" ,"%1%2y%3%4")

	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)(" .. C .. V .. ")", "%1.%2")

	-- "mb", "mp", "nd", "nk", "nt" combinations
	text = rsub_repeatedly(text, "(m)([bp])([^lɾrɟy" .. vowel .. separator .."])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(n)([dk])([^lɾrɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(n)([s])([^ɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(n)([t])([^lɾrɟys" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "(ŋ)([k])([^lɾrɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "([ɾr])([bćĉdfgǵkḱlmnpsʃvz])([^lɾrɟy" .. vowel .. separator .. "])(" .. V .. ")", "%1%2.%3%4")
	text = rsub_repeatedly(text, "([ɾr])([t])([sz]?)([^lɾrɟysʃ" .. vowel .. separator .. "])(" .. V .. ")", "%1%2%3.%4%5")

	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. ")(" .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")

	-- Any aeëo, or stressed iu, should be syllabically divided from a following aeëo or stressed iu.
	text = rsub_repeatedly(text, "([aeëo]" .. accent_c .. "*)([aeëo])", "%1.%2")
	text = rsub_repeatedly(text, "([aeëo]" .. accent_c .. "*)(" .. V .. accent_c .. ")", "%1.%2")
	text = rsub(text, "([iu]" .. accent_c .. ")([aeëo])", "%1.%2")
	text = rsub_repeatedly(text, "([iu]" .. accent_c .. ")(" .. V .. accent_c .. ")", "%1.%2")
	text = rsub_repeatedly(text, "i(" .. accent_c .. "*)i", "i%1.i")
	text = rsub_repeatedly(text, "u(" .. accent_c .. "*)u", "u%1.u")

	text = rsub(text, "ĉ", "ch")
	text = rsub(text, "ć", "ts")
	text = rsub(text, "ŋ", "ng")
	text = rsub(text, "ʃ", "sh")
	text = rsub(text, "ǵ", "gu")
	text = rsub(text, "ġ", "g̃")
	text = rsub(text, "ḱ", "qu")
	text = rsub(text, "r", "rr")
	text = rsub(text, "ɾ", "r")
	-- text = remove_accents(text)

	text = rsub_repeatedly(text, "([.]+)", ".")
	text = rsub(text, "[.]?-[.]?", "-")
	text = rsub(text, "[‿]([^ ])", "|%1")
	text = rsub(text, "[.]([^ ])", "|%1")

	text = rsub(text, "([|])+", "%1")

	-- remove # symbols at word and text boundaries
	text = rsub_repeatedly(text, "([.]?)#([.]?)", "")
	text = rsub(text, "․", ".")

	-- Fix Capitalization --
	local syllbreak = 0
	for i=1, #text do
		if text:sub(i,i) == "|" and origtext:sub(i-syllbreak, i-syllbreak) ~= "." and origtext:sub(i-syllbreak, i-syllbreak) ~= "7" then
			syllbreak = syllbreak + 1
		elseif origtext:sub(i-syllbreak, i-syllbreak) == text:sub(i,i):upper() then
			text = table.concat({text:sub(1, i-1), text:sub(i,i):upper(), text:sub(i+1)})
		end
	end

	-- Fix hyphens --
	-- FIXME!!! Why are we relying on looking at the pagename here? This should not be happening.
	origtext = pagename

	if (table.concat(rsplit(origtext, "-")) == table.concat(rsplit(table.concat(rsplit(text, "|")), "-"))) then
		syllbreak = 0
		for i=1, #text do
			if text:sub(i,i) == "|" then
				if origtext:sub(i-syllbreak, i-syllbreak) == "-" then
					text = table.concat({text:sub(1, i-1), "-", text:sub(i+1)})
				else
					syllbreak = syllbreak + 1
				end
			end
		end
	end

	-- FIXME! Hack -- up above we changed periods to vertical bars. The rest of the code expects periods so change
	-- them back. We should clean up the code above to leave the periods alone.
	return (text:gsub("|", "%."))
end

function export.syllabify_and_align(respelling, pagename)
	local syllabification = syllabify_from_spelling(respelling, pagename)
	return align_syllabification_to_spelling(syllabification, pagename)
end

local function css_wrap(text, classes)
	return ('<span class="%s">%s</span>'):format(classes, text)
end

local function format_glosses(glosses)
	if not glosses then
		return ""
	end

	local formatted_glosses = {}
	for _, glossobj in ipairs(glosses) do
		local gloss_parts = {}
		if glossobj.gloss then
			table.insert(gloss_parts, css_wrap("“", "mention-gloss-double-quote") ..
				css_wrap(glossobj.gloss, "mention-gloss") .. css_wrap("”", "mention-gloss-double-quote"))
		end
		if glossobj.poses then
			for _, pos in ipairs(glossobj.poses) do
				table.insert(gloss_parts, css_wrap(pos, "ann-pos"))
			end
		end
		table.insert(formatted_glosses, table.concat(gloss_parts, css_wrap(",", "mention-gloss-comma") .. " "))
	end

	return " " .. css_wrap("(", "mention-gloss-paren annotation-paren") ..
		table.concat(formatted_glosses, css_wrap(";", "mention-gloss-semicolon") .. " ") ..
		css_wrap(")", "mention-gloss-paren annotation-paren")
end

local function format_pronuns(pronuns)
	local pronunciations = {}

	-- Loop through each pronunciation. For each one, add the phonemic and phonetic versions to `pronunciations`,
	-- for formatting by [[Module:IPA]].
	for j, pronun in ipairs(pronuns) do
		local qs = pronun.q

		local first_pronun = #pronunciations + 1

		if not pronun.phonemic and not pronun.phonetic then
			error("Internal error: Saw neither phonemic nor phonetic pronunciation")
		end

		if pronun.phonemic then -- missing if 'raw:[...]' given
			-- don't display syllable division markers in phonemic
			local slash_pron = "/" .. pronun.phonemic:gsub("%.", "") .. "/"
			table.insert(pronunciations, {
				pron = slash_pron,
			})
		end

		if pronun.phonetic then -- missing if 'raw:/.../' given
			local bracket_pron = "[" .. pronun.phonetic .. "]"
			table.insert(pronunciations, {
				pron = bracket_pron,
			})
		end

		local last_pronun = #pronunciations

		if pronun.q then
			pronunciations[first_pronun].q = pronun.q
		end
		if j > 1 then
			pronunciations[first_pronun].separator = ", "
		end
		if pronun.qq then
			pronunciations[last_pronun].qq = pronun.qq
		end

		if pronun.refs then
			pronunciations[last_pronun].refs = pronun.refs
		end
		if first_pronun ~= last_pronun then
			pronunciations[last_pronun].separator = " "
		end
	end

	return m_IPA.format_IPA_full { lang = lang, items = pronunciations, separator = "" }
end

local function format_pronun_line(parsed)
	local formatted_pronuns = format_pronuns(parsed.pronuns)
	local pre = is_first and parsed.pre and parsed.pre .. " " or ""
	local post = is_first and parsed.post and " " .. parsed.post or ""
	return pre .. formatted_pronuns .. format_glosses(parsed.t) .. post
end


local function parse_respelling(respelling, pagename, parse_err)
	local raw_respelling = respelling:match("^raw:(.*)$")
	if raw_respelling then
		local raw_phonemic, raw_phonetic = raw_respelling:match("^/(.*)/ %[(.*)%]$")
		if not raw_phonemic then
			raw_phonemic = raw_respelling:match("^/(.*)/$")
		end
		if not raw_phonemic then
			raw_phonetic = raw_respelling:match("^%[(.*)%]$")
		end
		if not raw_phonemic and not raw_phonetic then
			parse_err(("Unable to parse raw respelling '%s', should be one of /.../, [...] or /.../ [...]")
				:format(raw_respelling))
		end
		return {
			raw = true,
			raw_phonemic = raw_phonemic,
			raw_phonetic = raw_phonetic,
		}
	end
	if respelling == "+" then
		respelling = pagename
	end
	return {term = respelling}
end


-- External entry point for {{tsg-pr}}.
function export.show_full(frame)

	--------------------------------- 1. Parse the arguments. ------------------------------------

	local params = {
		[1] = {list = true},
		["rhyme"] = {},
		["syll"] = {},
		["hmp"] = {},
		["audio"] = {list = true},
		["pagename"] = {},
	}
	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)
	local pagename = args.pagename or mw.title.getCurrentTitle().subpageText

	local respellings = #args[1] > 0 and args[1] or {"+"}
	local parsed_respellings = {}
	local function overall_parse_err(msg, arg, val)
		error(msg .. ": " .. arg .. "=" .. val)
	end
	local overall_rhyme = args.rhyme and
		parse_rhyme(args.rhyme, function(msg) overall_parse_err(msg, "rhyme", args.rhyme) end) or nil
	local overall_syll = args.syll and
		parse_syll(args.syll, function(msg) overall_parse_err(msg, "syll", args.syll) end) or nil
	local overall_hmp = args.hmp and
		parse_homophone(args.hmp, function(msg) overall_parse_err(msg, "hmp", args.hmp) end) or nil
	local overall_audio
	if #args.audio > 0 then
		overall_audio = {}
		for _, audio in ipairs(args.audio) do
			local parsed_audio = parse_audio(audio, function(msg) overall_parse_err(msg, "audio", audio) end)
			table.insert(overall_audio, parsed_audio)
		end
	end

	-- Parse each respelling. Individual arguments in 1=, 2=, etc. can consist of one or more comma-separated
	-- respellings, each of which can have inline modifiers <q:...>, <qq:...>, <a:...>, <aa:...> or <ref:...>.
	-- In addition, the respellings as a whole of a given argument can be followed by various inline modifiers,
	-- such as <t:...>, <rhyme:...>, <syll:...>, etc. The result of parsing goes into `parsed_respellings`, which
	-- is a list of objects (one per numbered argument), each of which is a table of the form
	--
	-- {
	--   terms = {TERM, TERM, ...},
	--   audio = {AUDIO, AUDIO, ...},
	--   rhyme = {RHYME, RHYME, ...},
	--   syll = {SYLL, SYLL, ...},
	--   hmp = {HMP, HMP, ...},
	--   t = {GLOSS, GLOSS, ...},
	--   pre = "PRE-TEXT" or nil,
	--   post = "POST-TEXT" or nil,
	--   bullets = NUM_BULLETS,
	--   accents = {"ACCENT", "ACCENT", ...},
	-- }
	--
	-- In this structure, TERM is an object that usually has the form
	--
	-- {
	--   term = "RESPELLING",
	--   ref = {"REF-SPEC", "REF-SPEC", ...},
	--   q = {"QUALIFIER", "QUALIFIER", ...},
	--   qq = {"QUALIFIER", "QUALIFIER", ...},
	-- }
	--
	-- Note that in this structure, "REF-SPEC" of the form parsable by parse_references() in [[Module:references]].
	--
	-- Alternatively, if phonemic or phonetic IPA is given in place of a respelling, TERM will have the form
	--
	-- {
	--   raw = true,
	--   phonemic = "PHONEMIC",
	--   phonetic = "PHONETIC",
	--   ref = {"REF-SPEC", "REF-SPEC", ...},
	--   q = {"QUALIFIER", "QUALIFIER", ...},
	--   qq = {"QUALIFIER", "QUALIFIER", ...},
	-- }
	--
	-- AUDIO is a table of the form
	--
	-- {
	--   file = "FILE",
	--   gloss = "GLOSS",
	--   q = {"QUALIFIER", "QUALIFIER", ...},
	--   qq = {"QUALIFIER", "QUALIFIER", ...},
	--   a = {"ACCENT-QUALIFIER", "ACCENT-QUALIFIER", ...},
	--   aa = {"ACCENT-QUALIFIER", "ACCENT-QUALIFIER", ...},
	-- }
	--
	-- RHYME is a table of the form
	--
	-- {
	--   rhyme = "RHYME",
	--   num_syl = {NUM_SYL, NUM_SYL, ...},
	--   q, qq, a, aa = (as for AUDIO),
	-- }
	--
	-- SYLL is a table of the form (where `hyph` is required to be named this way for [[Module:hyphenation]])
	--
	-- {
	--   syllabification = "SYL.LAB.LES",
	--   hyph = {"SYL", "LAB", "LES"},
	--   q, qq, a, aa = (as for AUDIO),
	-- }
	--
	-- HMP is a table of the form
	--
	-- {
	--   term = "HOMOPHONE",
	--   gloss = "GLOSS" or nil,
	--   pos = "POS" or nil,
	--   alt = "ALT" or nil,
	--   lit = "LIT" or nil,
	--   id = "ID" or nil,
	--   g = {"G", "G", ...},
	--   q, qq, a, aa = (as for AUDIO),
	-- }
	--
	-- GLOSS is a table of the form
	--
	-- {
	--   poses = {"POS", "POS", ...} or nil,
	--   gloss = "GLOSS" or nil,
	-- }
	for i, respelling in ipairs(respellings) do
		if respelling:find("<") then
			local param_mods = {
				pre = { overall = true },
				post = { overall = true },
				bullets = {
					overall = true,
					type = "number",
				},
				t = {
					overall = true,
					store = "insert",
					convert = parse_gloss,
				},
				rhyme = {
					overall = true,
					store = "insert-flattened",
					convert = parse_rhyme,
				},
				syll = {
					overall = true,
					-- Not `store = "insert-flattened"`. parse_syll() does not generates a list but a structure where
					-- the syllabifications are in `terms` and there's an additional overall property `cap` for the
					-- caption (defaulting to "Syllabification"). FIXME: Rethink whether we even want "insert-flattened"
					-- or just "insert" for the remaining pronunciation properties.
					convert = parse_syll,
				},
				hmp = {
					overall = true,
					store = "insert-flattened",
					convert = parse_homophone,
				},
				audio = {
					overall = true,
					store = "insert", -- not "insert-flattened" because parse_audio returns a single object
					convert = parse_audio,
				},
				ref = { store = "insert" },
				q = { store = "insert" },
				qq = { store = "insert" },
				a = {
					item_dest = "accents",
					overall = true,
					convert = parse_accents,
				},
			}

			local parsed = require(parse_utilities_module).parse_inline_modifiers(respelling, {
				paramname = i,
				param_mods = param_mods,
				generate_obj = function(term, parse_err)
					return parse_respelling(term, pagename, parse_err)
				end,
				pre_normalize_modifiers = function(data)
					local modtext = data.modtext
					modtext = modtext:match("^<(.*)>$")
					if not modtext then
						error(("Internal error: Passed-in modifier isn't surrounded by angle brackets: %s"):format(
							data.modtext))
					end
					if modtext:find("%^") and not modtext:find("^t:") then
						modtext = "t:" .. modtext
					end
					return "<" .. modtext .. ">"
				end,
				splitchar = ",",
				outer_container = {},
			})
			if not parsed.bullets then
				parsed.bullets = 1
			end
			table.insert(parsed_respellings, parsed)
		else
			local termobjs = {}
			local function parse_err(msg)
				error(msg .. ": " .. i .. "=" .. respelling)
			end
			for _, term in ipairs(split_on_comma(respelling)) do
				table.insert(termobjs, parse_respelling(term, pagename, parse_err))
			end
			table.insert(parsed_respellings, {
				terms = termobjs,
				bullets = 1,
			})
		end
	end

	--------------------------------- 2. Generate IPA, rhymes and syllabification. ------------------------------------

	-- Used for categorization below.
	local syllabification_alignment_failed = false

	-- Canonicalize syllabifications in `sylls` by convering '+' to the default syllabification of the pagename, '#' to
	-- the pagename itself, and '-' to no syllabification (return `null_syll`). If '-' not seen, return `sylls`.
	local function canonicalize_syllabification(sylls, null_syll)
		for _, syll in ipairs(sylls.terms) do
			if syll.syllabification == "+" then
				syll.syllabification = syllabify_from_spelling(pagename, pagename)
				syll.hyph = split_syllabified_spelling(syll.syllabification)
			elseif syll.syllabification == "#" then
				syll.syllabification = pagename
				syll.hyph = {syll.syllabification}
			elseif syll.syllabification == "-" then
				return null_syll
			end
		end
		return sylls
	end

	if overall_syll then
		overall_syll = canonicalize_syllabification(overall_syll, {})
	end

	local function doesnt_count_for_rhyme(list)
		if not list then
			return false
		end
		local accent_no_count = {"colloquial", "obsolete", "relaxed"}
		for _, item in ipairs(list) do
			for _, word_no_count in ipairs(accent_no_count) do
				if item:find("%f[%w]" .. word_no_count .. "%f[%W]") then
					return true
				end
			end
		end
		return false
	end

	-- Loop over individual respellings, processing each.
	for _, parsed in ipairs(parsed_respellings) do
		-- First, sort the specified accents and default to "Sinūgan Parianun".
		local default_accent = "Sinūgan Parianun"
		if not parsed.accents then
			parsed.accents = {default_accent}
		end

		-- If more than one respelling given, then if any accent or qualifier has the words 'colloquial', 'obsolete' or
		-- 'relaxed' in them, don't generate a rhyme or a '#-syllable word' category.
		local more_than_one_respelling = #parsed.terms > 1 or #parsed_respellings > 1
		local is_standard = m_table.contains(parsed.accents, default_accent)
		local all_terms_no_rhyme = more_than_one_respelling and doesnt_count_for_rhyme(parsed.accents)

		parsed.pronuns = {}
		for i, term in ipairs(parsed.terms) do
			local phonemic, phonetic
			if term.raw then
				phonemic = term.raw_phonemic
				phonetic = term.raw_phonetic
			else
				local ret = export.IPA(term.term, "include phonemic syllable boundaries")
				phonemic = ret.phonemic
				phonetic = ret.phonetic
			end
			local refs
			if not term.ref then
				refs = nil
			else
				refs = {}
				for _, refspec in ipairs(term.ref) do
					local this_refs = require("Module:references").parse_references(refspec)
					for _, this_ref in ipairs(this_refs) do
						table.insert(refs, this_ref)
					end
				end
			end

			local no_rhyme, rhyme_with_cat
			-- Same check as above for colloquial/obsolete/relaxed but check the qualifiers, which are attached to
			-- individual respellings rather than a single-line set of respellings.
			no_rhyme = all_terms_no_rhyme or more_than_one_respelling and (
				doesnt_count_for_rhyme(term.q) or doesnt_count_for_rhyme(term.qq)
			)
			if not no_rhyme then
				local should_generate_rhyme, should_generate_cat = should_generate_rhyme_from_termobj(term)
				no_rhyme = not should_generate_rhyme
				rhyme_with_cat = should_generate_cat
			end
			local pronobj = {
				raw = term.raw,
				phonemic = phonemic,
				phonetic = phonetic,
				refs = refs,
				q = term.q,
				qq = term.qq,
				no_rhyme = no_rhyme,
				rhyme_with_cat = rhyme_with_cat,
			}

			table.insert(parsed.pronuns, pronobj)
			
			-- Gimbahanun variant
			if pronobj.phonemic:find("[iuɪʊ]") then
				local gimb_charmap = { ["i"] = "ɨ", ["ɪ"] = "ɨ", ["u"] = "ɯ", ["ʊ"] = "ɯ"}
				local gimb_pronobj = {
					raw = pronobj.raw,
					phonemic = pronobj.phonemic:gsub("[iuɪʊ]", gimb_charmap),
					phonetic = pronobj.phonetic:gsub("[iuɪʊ]", gimb_charmap),
					refs = pronobj.refs,
					q = combine_qualifiers(pronobj.q, {"Sinūgan Gimbahanun"}),
					qq = pronobj.qq,
					no_rhyme = pronobj.no_rhyme,
					rhyme_with_cat = pronobj.rhyme_with_cat,
					move_to_next_line = true,
				}
		    
				--table.insert(parsed.pronuns, gimb_pronobj)
			end


			-- If [fvz] present in phonemic pronunciation, generate a "more native-sounding" variant with [pbs] in
			-- place.
		-- 	local fvz_pronobj
		-- 	if pronobj.phonemic:find("[fvz]") then
		-- 		local fvz_charmap = { ["f"] = "p", ["v"] = "b", ["z"] = "s"}
		-- 		fvz_pronobj = {
		-- 			raw = pronobj.raw,
		-- 			phonemic = pronobj.phonemic:gsub("[fvz]", fvz_charmap),
		-- 			phonetic = pronobj.phonetic:gsub("[fvz]", fvz_charmap),
		-- 			refs = pronobj.refs,
		-- 			q = combine_qualifiers(pronobj.q, {"more native-sounding"}),
		-- 			qq = pronobj.qq,
		-- 			no_rhyme = pronobj.no_rhyme,
		-- 			rhyme_with_cat = pronobj.rhyme_with_cat,
		-- 			move_to_next_line = true,
		-- 		}

		-- 		-- "bv"/ "pf", "sz" correction
		--     	-- fvz_pronobj.phonetic = fvz_pronobj.phonetic:gsub("[b]([.ˈˌ]*)[b]", "%1b")
		--     	-- fvz_pronobj.phonetic = fvz_pronobj.phonetic:gsub("[p]([.ˈˌ]*)[p]", "%1p")
		--     	-- fvz_pronobj.phonetic = fvz_pronobj.phonetic:gsub("[s]([.ˈˌ]*)[s]", "%1s")

		-- 		-- Phrase-Final unreleased stops correction
		--     	fvz_pronobj.phonetic = fvz_pronobj.phonetic:gsub("([pbtdkɡ])$", "%1̚")
		    
		-- 		table.insert(parsed.pronuns, fvz_pronobj)
		-- 	end
		end

		if not parsed.syll then
			if not overall_syll and any_words_have_vowels(pagename) then
				for _, term in ipairs(parsed.terms) do
					if not term.raw then
						local syllabification = syllabify_from_spelling(term.term, pagename)
						local aligned_syll = align_syllabification_to_spelling(syllabification, pagename)
						if aligned_syll then
							if not parsed.syll then
								parsed.syll = {terms = {}}
							end
							m_table.insertIfNot(parsed.syll.terms, generate_syll_obj(aligned_syll))
						else
							if lang:findBestScript(pagename):getCode() == "Latn" or 
								lang:findBestScript(syllabification):getCode() == lang:findBestScript(pagename) then
									syllabification_alignment_failed = true
							end
						end
					end
				end
			end
		else
			parsed.syll = canonicalize_syllabification(parsed.syll, nil)
		end

		if not parsed.rhyme then
			if overall_rhyme then
				parsed.rhyme = nil
			else
				-- Generate the rhymes.
				for _, pronun in ipairs(parsed.pronuns) do
					-- We should have already excluded multiword terms and terms without vowels from rhyme generation
					-- (see `no_auto_rhyme` below). But make sure to check that pronun.phonemic exists (it may not if
					-- raw phonetic-only pronun is given), and rhyme isn't suppressed (which may happen if the term has
					-- a qualifier "colloquial", "obsolete" or the like or is an auto-generated "glottal stop elision"
					-- pronunciation).
					if pronun.phonemic and not pronun.no_rhyme then
						-- Count number of syllables by looking at syllable boundaries (including stress marks).
						local num_syl = get_num_syl_from_ipa(pronun.phonemic)
						-- Get the rhyme by truncating everything up through the last stress mark + any following
						-- consonants, and remove syllable boundary markers.
						local rhyme = convert_phonemic_to_rhyme(pronun.phonemic)
						-- Copying qualifiers to rhymes:
						-- (1) If there's only one pronunciation, displaying any associated qualifier on the rhyme is
						--     is redundant, so don't do it.
						-- (2) If there are multiple pronunciations, then we generally do want to copy the qualifier(s)
						--     from pronunciation to rhyme, but only if a given rhyme either derives from a single
						--     pronunciation, or derives from multiple pronunciations all of which share the same
						--     qualifier(s). We do NOT want to combine two different qualifiers from two different
						--     pronunciations.
						-- (3) If there are multiple pronunciations that map to a single rhyme, and all pronunciations
						--     share qualifiers, then we might consider omitting the qualifiers as redundant; but this
						--     case will rarely happen so it might not be worth worrying about.
						-- (4) Similarly, if there are multiple pronunciations where some have the rhyme suppressed (see
						--     above), and all pronunciations share qualifiers, then we might consider omitting the
						--     qualifiers as redundant; but again, this case will rarely happen (especially since in
						--     almost all cases the suppressed-rhyme pronunciation will have distinctive qualifiers) so
						--     it probably isn't worth worrying about. Note that in the common case where the qualifiers
						--     of the rhyme-suppressed pronunciation differ from those of the rhyme-included
						--     pronunciation, we do want to include the qualifiers of the rhyme-included pronunciation
						--     (imagine e.g. there are two pronunciations marked "standard" and "colloquial"; we want to
						--     mark the rhyme as "standard").
						-- (4) There are two different types of qualifiers (left and right); when comparing qualifiers,
						--     we need to compare the entire set of both qualifiers and make sure they both match
						--     (although it will be rare to have both left and right qualifiers on a single
						--     pronunciation).
						local saw_already = false
						if not parsed.rhyme then
							parsed.rhyme = {}
						end
						for _, existing in ipairs(parsed.rhyme) do
							if existing.rhyme == rhyme then
								saw_already = true
								-- We already saw this rhyme but possibly with a different number of syllables,
								-- e.g. if the user specified two pronunciations 'biología' (4 syllables) and
								-- 'bi.ología' (5 syllables), both of which have the same rhyme /ia/.
								m_table.insertIfNot(existing.num_syl, num_syl)
								if not m_table.deepEquals(existing.q, pronun.q) or not
									m_table.deepEquals(existing.qq, pronun.qq) then
									existing.q = nil
									existing.qq = nil
								end
								break
							end
						end
						if not saw_already then
							table.insert(parsed.rhyme, {
								rhyme = rhyme,
								num_syl = {num_syl},
								q = #parsed.pronuns > 1 and pronun.q or nil,
								qq = #parsed.pronuns > 1 and pronun.qq or nil,
								nocat = not pronun.rhyme_with_cat,
							})
						end
					end
				end
			end
		else
			local no_rhyme = false
			for _, rhyme in ipairs(parsed.rhyme) do
				if rhyme.rhyme == "-" then
					no_rhyme = true
					break
				end
			end
			if no_rhyme then
				parsed.rhyme = nil
			else
				parsed.rhyme = process_specified_rhymes(parsed.rhyme, parsed.syll and parsed.syll.terms or {}, {parsed})
			end
		end
	end

	if overall_rhyme then
		local no_overall_rhyme = false
		for _, orhyme in ipairs(overall_rhyme) do
			if orhyme.rhyme == "-" then
				no_overall_rhyme = true
				break
			end
		end
		if no_overall_rhyme then
			overall_rhyme = nil
		else
			local all_sylls
			if overall_syll then
				all_sylls = overall_syll
			else
				all_sylls = {}
				for _, parsed in ipairs(parsed_respellings) do
					if parsed.syll then
						for _, syll in ipairs(parsed.syll.terms) do
							m_table.insertIfNot(all_sylls, syll)
						end
					end
				end
			end
			overall_rhyme = process_specified_rhymes(overall_rhyme, all_sylls, parsed_respellings)
		end
	end

	-- Determine whether all sets of pronunciations have the same value for a pronunciation property (rhymes,
	-- syllabifications or homophones). If so, we display them them only once at the bottom, otherwise beneath each set,
	-- indented. This function takes one argument, the name of a slot specifying the pronunciation property, and
	-- returns two values, a boolean indicating whether all values are the same and the first value seen (which will
	-- be the only value seen if all values are the same).
	local function all_sets_equal(parsed_slot)
		local first_set
		local all_sets_eq = true
		for j, parsed in ipairs(parsed_respellings) do
			if j == 1 then
				first_set = parsed[parsed_slot]
			elseif not m_table.deepEquals(first_set, parsed[parsed_slot]) then
				all_sets_eq = false
				break
			end
		end

		return all_sets_eq, first_set
	end

	local all_rhyme_sets_eq, first_rhyme_ret = all_sets_equal("rhyme")
	local all_syll_sets_eq, first_sylls = all_sets_equal("syll")
	local all_hmp_sets_eq, first_hmps = all_sets_equal("hmp")

	------------------------------ 3. Insert categories as appropriate. ---------------------------------

	local categories = {}

	-- local function get_rhymes_categories(rhymes)
	-- 	if not rhymes then
	-- 		return
	-- 	end
	-- 	for _, rhyme in ipairs(rhymes) do
	-- 		local num_vowels_in_rhyme = #rsub(rhyme.rhyme, NV, "")
	-- 		local penult = num_vowels_in_rhyme == 2
	-- 		local glottal = rhyme.rhyme:find("ʔ$")
	-- 		local pron_cat
	-- 		if penult and glottal then
	-- 			pron_cat = "malumi"
	-- 		elseif penult then
	-- 			pron_cat = "malumay"
	-- 		elseif glottal then
	-- 			pron_cat = "maragsa"
	-- 		else
	-- 			pron_cat = "mabilis"
	-- 		end
	-- 		m_table.insertIfNot(categories,
	-- 			("%s terms with %s pronunciation"):format(lang:getCanonicalName(), pron_cat))
	-- 	end
	-- end

	-- get_rhymes_categories(overall_rhyme)
	-- for _, parsed in ipairs(parsed_respellings) do
	-- 	get_rhymes_categories(parsed.rhyme)
	-- end

	local function get_syll_categories(sylls)
		if not sylls then
			return
		end
		for _, syll in ipairs(sylls.terms) do
			local syll_no_dot = "#" .. syll.syllabification .. "#"
			syll_no_dot = syll.syllabification:gsub("%.([^ #])", "%1"):gsub("#", "")
			if syll_no_dot ~= pagename then
				mw.log(("For page '%s', saw syllabification '%s' not matching pagename"):format(
					pagename, syll.syllabification))
				m_table.insertIfNot(categories, ("%s terms with syllabification not matching pagename"):format(
					lang:getCanonicalName()))
			end
		end
	end

	get_syll_categories(overall_syll)
	for _, parsed in ipairs(parsed_respellings) do
		get_syll_categories(parsed.syll)
	end

	if syllabification_alignment_failed then
		table.insert(categories, ("%s terms where syllabification alignment failed"):format(lang:getCanonicalName()))
	end

	---------------------------- 4. Format IPA, rhymes and syllabification for display. -------------------------------

	local function bullet_prefix(num_bullets)
		return string.rep("*", num_bullets) .. " "
	end

	local function format_rhyme(rhymes)
		return require(rhymes_module).format_rhymes {
			lang = lang,
			rhymes = rhymes,
			force_cat = force_cat,
		}
	end

	local function format_syllabifications(syllobj)
		return require(hyphenation_module).format_hyphenations {
			lang = lang,
			hyphs = syllobj.terms,
			caption = syllobj.cap or "Syllabification"
		}
	end

	local function format_homophones(hmps)
		return require("Module:homophones").format_homophones { lang = lang, homophones = hmps }
	end

	local function format_audio(audios, num_bullets)
		local ret = {}
		for i, audio in ipairs(audios) do
			local text = require(audio_module).format_audio {
				lang = lang,
				file = audio.file,
				caption = audio.gloss,
				q = audio.q,
				qq = audio.qq,
				a = audio.a,
				aa = audio.aa,
			}
			table.insert(ret, bullet_prefix(num_bullets) .. text)
		end
		return table.concat(ret, "\n")
	end

	-- Implement grouping by accent. If there is a run of more than one consecutive set of pronunciations with the
	-- same accent, the accent goes on its own line and the pronunciations with this accent go below with an extra
	-- bullet.
	local prev_accents
	local num_seen_with_these_accents
	for j, parsed in ipairs(parsed_respellings) do
		if m_table.deepEquals(prev_accents, parsed.accents) then
			parsed.of_several_accents = "continuation"
			num_seen_with_these_accents = num_seen_with_these_accents + 1
			if num_seen_with_these_accents == 2 then
				parsed_respellings[j - 1].of_several_accents = "first"
			end
		else
			prev_accents = parsed.accents
			num_seen_with_these_accents = 1
		end
	end

	-- Pull out autogenerated pronunciations and move to the next line, indented.
	for _, parsed in ipairs(parsed_respellings) do
		local saw_next_line_pronuns = false
		for _, pronun in ipairs(parsed.pronuns) do
			if pronun.move_to_next_line then
				saw_next_line_pronuns = true
				break
			end
		end
		if saw_next_line_pronuns then
			local this_line_pronuns = {}
			local next_line_pronuns = {}
			for _, pronun in ipairs(parsed.pronuns) do
				if pronun.move_to_next_line then
					table.insert(next_line_pronuns, pronun)
				else
					table.insert(this_line_pronuns, pronun)
				end
			end
			-- Now see if there are qualifiers shared among all elements of the next-line pronuns and deduplicate if so.
			local function deduplicate_qualifiers(field, keepfirst)
				local saw_nil = false
				for _, pronun in ipairs(next_line_pronuns) do
					if not pronun[field] then
						saw_nil = true
						break
					end
				end
				if not saw_nil then
					local m_setutil = require(set_utilities_module)
					local qualifiers = {}
					for _, pronun in ipairs(next_line_pronuns) do
						table.insert(qualifiers, m_setutil.list_to_set(pronun[field]))
					end
					local all_shared = m_setutil.intersect(unpack(qualifiers))
					if next(all_shared) then
						local first_index, last_index
						if keepfirst then
							first_index = 2
							last_index = #pronun
						else
							first_index = 1
							last_index = #pronun - 1
						end
						for i = first_index, last_index do
							local pronun = next_line_pronuns[i]
							local new_qualifiers = {}
							for _, q in ipairs(pronun[field]) do
								if not all_shared[q] then
									table.insert(new_qualifiers, q)
								end
							end
							pronun[field] = new_qualifiers
						end
					end
				end
			end
			parsed.pronuns = this_line_pronuns
			parsed.next_line_pronuns = next_line_pronuns
		end
	end

	-- Now actually format the pronunciations.
	local textparts = {}
	local first_line = true
	local function ins_line(linetext, num_bullets)
		if not first_line then
			table.insert(textparts, "\n")
		end
		first_line = false
		table.insert(textparts, bullet_prefix(num_bullets) .. linetext)
	end
	local min_num_bullets = 9999
	for j, parsed in ipairs(parsed_respellings) do
		if parsed.bullets < min_num_bullets then
			min_num_bullets = parsed.bullets
		end
		local accent_grouping_offset = 0
		if parsed.of_several_accents == "first" then
			ins_line(require(accent_qualifier_module).format_qualifiers(lang, parsed.accents), parsed.bullets)
		end
		local pronuns = format_pronun_line(parsed)
		local accent_prefix
		if not parsed.of_several_accents then
			accent_prefix = require(accent_qualifier_module).format_qualifiers(lang, parsed.accents) .. " "
		else
			accent_prefix = ""
			accent_grouping_offset = 1
		end
		ins_line(accent_prefix .. pronuns, parsed.bullets + accent_grouping_offset)
		if parsed.next_line_pronuns then
			ins_line(format_pronuns(parsed.next_line_pronuns), parsed.bullets + accent_grouping_offset + 1)
		end
		if parsed.audio then
			-- format_audio() inserts multiple lines and handles bullets by itself.
			table.insert(textparts, "\n")
			-- If only one pronunciation set, add the audio with the same number of bullets, otherwise indent audio by
			-- one more bullet.
			table.insert(textparts, format_audio(parsed.audio,
				(#parsed_respellings == 1 and parsed.bullets or parsed.bullets + 1) + accent_grouping_offset))
		end
		if not all_rhyme_sets_eq and parsed.rhyme then
			ins_line(format_rhyme(parsed.rhyme), parsed.bullets + 1 + accent_grouping_offset)
		end
		if not all_syll_sets_eq and parsed.syll then
			ins_line(format_syllabifications(parsed.syll), parsed.bullets + 1 + accent_grouping_offset)
		end
		if not all_hmp_sets_eq and parsed.hmp then
			ins_line(format_homophones(parsed.hmp), parsed.bullets + 1 + accent_grouping_offset)
		end
	end
	if overall_audio then
		-- format_audio() inserts multiple lines and handles bullets by itself.
		table.insert(textparts, "\n")
		table.insert(textparts, format_audio(overall_audio, min_num_bullets))
	end
	if all_rhyme_sets_eq and first_rhyme_ret then
		ins_line(format_rhyme(first_rhyme_ret), min_num_bullets)
	end
	if overall_rhyme then
		ins_line(format_rhyme(overall_rhyme), min_num_bullets)
	end
	if all_syll_sets_eq and first_sylls then
		ins_line(format_syllabifications(first_sylls), min_num_bullets)
	end
	if overall_syll then
		ins_line(format_syllabifications(overall_syll), min_num_bullets)
	end
	if all_hmp_sets_eq and first_hmps then
		ins_line(format_homophones(first_hmps), min_num_bullets)
	end
	if overall_hmp then
		ins_line(format_homophones(overall_hmp), min_num_bullets)
	end

	return table.concat(textparts) ..
		require("Module:utilities").format_categories(categories, lang, nil, nil, force_cat)
end

-- Meant to be called from a bot.
function export.pron_json(frame)
	local iparams = {
		[1] = {list = true, required = true},
		["pagename"] = {required = true},
	}
	local iargs = require("Module:parameters").process(frame.args, iparams)
	local data = {}
	local syllabification_from_pagename = syllabify_from_spelling(iargs.pagename, iargs.pagename)
	for _, respelling in ipairs(iargs[1]) do
		local pronun = export.IPA(respelling, "include phonemic syllable boundaries")
		local syllabification = export.syllabify_and_align(respelling, iargs.pagename)
		local num_syl = get_num_syl_from_ipa(pronun.phonemic)
		local rhyme = convert_phonemic_to_rhyme(pronun.phonemic)
		table.insert(data, {
			respelling = respelling,
			phonemic = pronun.phonemic,
			phonetic = pronun.phonetic,
			syllabification = syllabification,
			num_syl = num_syl,
			rhyme = rhyme,
		})
	end
	local retval = {
		pagename = iargs.pagename,
		syllabification_from_pagename = syllabification_from_pagename,
		data = data,
	}
	return require("Module:JSON").toJSON(retval)
end

return export
Module:tsg-pronunciation

Navigation menu

Search