Module:xh-common

From Wiktionary, the free dictionary
Jump to navigation Jump to search
This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local export = {}

local u = mw.ustring.char

local ACUTE     = u(0x0301)
local CIRC      = u(0x0302)
local SYLL      = u(0x0324)

export.diacritic = ACUTE .. CIRC
export.toneless_vowel = "aeiouAEIOU." .. SYLL
export.vowel = export.toneless_vowel .. "áéíóúâêîôûÁÉÍÓÚÂÊÎÔÛ" .. export.diacritic

-- Use '.' to denote syllabic m. e.g. um.ntu

function export.split_syllables(word)
	local syllables = {}
	
	for syll in mw.ustring.gmatch(word, "[^" .. export.vowel .. "]*[" .. export.vowel .. "%.]+") do
		if mw.ustring.sub(syll, mw.ustring.len(syll)) == '.' then
			syll = mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1)
		end
		
		table.insert(syllables, syll)
	end
	
	syllables[#syllables] = syllables[#syllables] .. mw.ustring.match(word, "[^" .. export.vowel .. "]*$")
	
	return syllables
end


function export.apply_tone(word, pattern)
	word = export.split_syllables(word)
	pattern = mw.text.split(pattern or mw.ustring.rep("L", #word), "")
	
	if #word ~= #pattern then
		error("The word \"" .. table.concat(word) .. "\" and the tone pattern " .. table.concat(pattern) .. " have different numbers of syllables.")
	end

	for i, tone in ipairs(pattern) do
		if tone == "F" then
			word[i] = mw.ustring.gsub(word[i], "([" .. export.toneless_vowel .. "])", "%1" .. CIRC)
		elseif tone == "H" then
			word[i] = mw.ustring.gsub(word[i], "([" .. export.toneless_vowel .. "])", "%1" .. ACUTE)
		elseif tone ~= "L" then
			error("Invalid character \"" .. tone .. "\" in tone pattern string.")
		end
	end
	
	return (mw.ustring.gsub(mw.ustring.toNFC(table.concat(word)), "̩", ""))
end

function export.split_tone(word)
	local syllables = export.split_syllables(word)
	local tones = ""
	local stripped = ""
	
	for _, syll in ipairs(syllables) do
		-- remove any '.' char and convert to NFD
		syll = mw.ustring.toNFD(mw.ustring.gsub(syll, "%.", ""))
		local endChar = mw.ustring.sub(syll, mw.ustring.len(syll))
    	if endChar == ACUTE then
    		tones = tones .. "H"
    		stripped = stripped .. mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1)
    	elseif endChar == CIRC then
    		tones = tones .. "F"
    		stripped = stripped .. mw.ustring.sub(syll, 1, mw.ustring.len(syll)-1)
    	else
    		tones = tones .. "L"
			stripped = stripped .. syll
		end
	end
	
	return {mw.ustring.toNFC(stripped), tones}
end

return export