Module:vot

From Wiktionary, the free dictionary
Jump to navigation Jump to search

local export = {}
local lang = require("Module:languages").getByCode("vot")
export.lang = lang

function export.guess_vowel_harmony(word)
	local l = mw.ustring.len(word)
	for i = l, 1, -1 do
		local c = mw.ustring.sub(word, i, i)
		if mw.ustring.match(c, "[aouõ]") then
			return "a"
		elseif mw.ustring.match(c, "[äöüe]") then
			return "ä"
		end
	end
	return "ä"
end

local consonants = "bdfghjklmnprsštvzž"

-- deprecated aliases
consonants = consonants .. "cč"

local consonants_geminatable = "bčdghjklmnprsštvz"

local consonant = "[" .. consonants .. "]"
local vowels = "aeiouõäöü"
local vowel = "[" .. vowels .. "]"

local palatalize = "ʹ"

-- orthographic symbols that signify separation of syllables
local virtual_syllable_break = mw.ustring.char(0xE200)
local sep_symbols = "-./ " .. virtual_syllable_break
local stress_symbols = "-"
local ungeminate = mw.ustring.char(0xE201)

local consonant_sequence = "t[sš]"

local diphthongs = {
	"[aeouõäöü]i",
	"[iouõ]a",
	"[iöüe]ä",
	"[aoõei]u",
	"[au]õ",
	"[äöie]ü",
	"[aiäü]e",
	"[ai]o",
}
local vowel_sequences = { unpack(diphthongs) }
local vowel_sequences_additional = {
	"aa", "ee", "ii", "oo", "uu", "ää", "öö", "üü", "õõ"
}
for _, vs in ipairs(vowel_sequences_additional) do
	table.insert(vowel_sequences, vs)
end

local consonant_geminatable = "[" .. consonants_geminatable .. "]"

export.consonants = consonants
export.consonant = consonant
export.vowels = vowels
export.vowel = vowel
export.palatalize = palatalize
export.consonants_geminatable = consonants_geminatable

export.virtual_syllable_break = virtual_syllable_break
export.ungeminate = ungeminate
export.sep_symbols = sep_symbols
export.diphthongs = diphthongs
export.vowel_sequences = vowel_sequences

function export.is_stressed_syllable(syllable, syllable_index, first_stressed_syllable)
    return mw.ustring.find(syllable, "^[" .. stress_symbols .. "]")
        or (syllable_index and syllable_index == (first_stressed_syllable or 1))
end

-- adapted from [[Module:fi-hyphenation]]
function export.split_syllables(word, keep_sep_symbols)
	local res = {}
	local syllable = ""
	local pos = 1
	local found_vowel = false

    -- the following consonants stick together
	
	while pos <= #word do
        if mw.ustring.find(mw.ustring.lower(word), "^" .. consonant_sequence .. vowel, pos) then
			-- CV: end current syllable if we have found a vowel
			if found_vowel then
				if syllable then table.insert(res, syllable) end
				found_vowel = false
				syllable = ""
			end
			syllable = syllable .. mw.ustring.sub(word, pos, pos + 1)
			pos = pos + 2
        elseif mw.ustring.find(mw.ustring.lower(word), "^" .. consonant_sequence, pos) then
			-- C: continue
			syllable = syllable .. mw.ustring.sub(word, pos, pos + 1)
			pos = pos + 2
		elseif mw.ustring.find(mw.ustring.lower(word), "^" .. consonant .. palatalize .. "?" .. vowel, pos) then
			-- CV: end current syllable if we have found a vowel
			if found_vowel then
				if syllable then table.insert(res, syllable) end
				found_vowel = false
				syllable = ""
			end
			syllable = syllable .. mw.ustring.sub(word, pos, pos)
			pos = pos + 1
		elseif mw.ustring.find(mw.ustring.lower(word), "^" .. consonant, pos) then
			-- C: continue
			syllable = syllable .. mw.ustring.sub(word, pos, pos)
			pos = pos + 1
		elseif mw.ustring.find(mw.ustring.lower(word), "^" .. vowel, pos) then
			if found_vowel then
				-- already found a vowel, end current syllable
				if syllable then
					table.insert(res, syllable)
				end
				syllable = ""
			end	
			found_vowel = true
			
			-- check for diphthongs or long vowels
			local seq_ok = false
			for k, v in pairs(vowel_sequences) do
				if mw.ustring.find(mw.ustring.lower(word), "^" .. v, pos) then
					seq_ok = true
					break
				end
			end
			
			if seq_ok then
				if mw.ustring.find(mw.ustring.lower(word), "^" .. vowel .. "(" .. vowel .. ")%1", pos) then
					seq_ok = false
				end
			end
			
			if seq_ok then
				syllable = syllable .. mw.ustring.sub(word, pos, pos + 1)
				pos = pos + 2
			else
				syllable = syllable .. mw.ustring.sub(word, pos, pos)
				pos = pos + 1
			end
		elseif mw.ustring.find(mw.ustring.lower(word), "^" .. palatalize, pos) then
			syllable = syllable .. mw.ustring.sub(word, pos, pos)
			pos = pos + 1
		elseif mw.ustring.find(mw.ustring.lower(word), "^" .. ungeminate, pos) then
			syllable = syllable .. mw.ustring.sub(word, pos, pos)
			pos = pos + 1
			found_vowel = false
		elseif mw.ustring.find(mw.ustring.lower(word), "^[" .. sep_symbols .. "]", pos) then
			-- separates syllables
			if syllable then
				table.insert(res, syllable)
			end
			
			local sepchar = mw.ustring.sub(word, pos, pos)
            syllable = keep_sep_symbols and sepchar or ""
			pos = pos + 1
			found_vowel = false
		else
			-- ?: continue
			syllable = syllable .. mw.ustring.sub(word, pos, pos)
			pos = pos + 1
		end
	end
	
	if syllable then
		table.insert(res, syllable)
	end
	
	return res
end

function export.is_heavy_syllable(syl)
	return mw.ustring.match(syl, vowel .. ".")
end

function export.reduce_vowel(r)
	if mw.ustring.find(r, "(" .. export.vowel .. ")%1$") then
		return mw.ustring.sub(r, 1, 1), true
	elseif r == "a" then
		return "õ", true
	elseif r == "ä" then
		return "e", true
	else
		return r, false
	end
end

function export.guess_reduction(word, first_stressed_syllable)
	local syl = export.split_syllables(word, true)
	if not syl then return nil end
    if export.is_stressed_syllable(syl[#syl], #syl, first_stressed_syllable) then return false, word end

	-- if the penultimate syllable is stressed and light, do not reduce.
	if export.is_stressed_syllable(syl[#syl - 1], #syl - 1, first_stressed_syllable) and not export.is_heavy_syllable(syl[#syl - 1]) then
		return false, word
	end
	
	local was_reduced = false
	local reduced = (mw.ustring.gsub(word, "(" .. vowel .. "+)([^" .. vowels .. "]*)$", function (v, c)
		local r = v
		r, was_reduced = export.reduce_vowel(r)
		return r .. c
	end))
	
	return was_reduced, reduced
end

function export.apply_reduction(word, stem, reduce_vowel, first_stressed_syllable)
	local syl = export.split_syllables(word, true)
	if not syl then return nil end
	
	local never_reduce_first_n = 0
	if stem then
		never_reduce_first_n = #export.split_syllables(stem)
	end
	
	local syl_n = #syl
	reduce_vowel = reduce_vowel or export.reduce_vowel
	
	local last_stressed = 0
	local last_was_heavy = false

	first_stressed_syllable = first_stressed_syllable or 1
	for i = 1, syl_n do
		local is_stressed = export.is_stressed_syllable(syl[i], i, first_stressed_syllable) or (last_stressed >= 2 and i >= first_stressed_syllable and i < syl_n)
		
		if is_stressed then
			last_stressed = 1
		else
			if (last_was_heavy or last_stressed >= 2) and i > never_reduce_first_n and not mw.ustring.find(syl[i], ungeminate) then
				syl[i] = mw.ustring.gsub(syl[i], "(" .. vowel .. "+)", reduce_vowel)
			end
			last_stressed = last_stressed + 1
		end
		
		last_was_heavy = export.is_heavy_syllable(syl[i])
	end
	
	return table.concat(syl, "")
end

local function make_geminated_stem(word, syl)
	local ultimate = syl[#syl]
	local penultimate = syl[#syl - 1]
	
	if mw.ustring.find(penultimate, export.vowel .. export.vowel) then
		return nil
	end
	
	local c = select(3, mw.ustring.find(word, "(" .. consonant_geminatable .. ").$"))
	if not c then
		return nil
	end
	
	if not mw.ustring.find(penultimate, export.vowel .. "$") then
		return nil
	end
	
	if not mw.ustring.find(ultimate, export.vowel .. "$") then
		return nil
	end
	
	local stem = mw.ustring.sub(word, 1, -2)
	
	local seq = select(3, mw.ustring.find(stem, "(t[sš])$"))
	if seq then
		if seq == "ts" then return nil end
		return string.sub(stem, 1, #stem - #seq) .. mw.ustring.sub(seq, 1, 1) .. seq
	end
	
	if c then
		return stem .. c
	end
	
	return nil
end

function export.guess_gemination(word)
	local syl = export.split_syllables(word)
	if not syl then return nil end
	local n = #syl
	if n < 2 then return nil end
	if n % 2 == 1 then return nil end
	return make_geminated_stem(word, syl)
end

local front_vowel = "[ieäöü]"

function export.extract_ci(word)
	local final = mw.ustring.sub(word, -1)
	local stem
	
	stem = mw.ustring.match(word, "(.+)ttš" .. front_vowel .. "$")
	if stem then
		return true, stem .. "kk" .. final
	end
	
	stem = mw.ustring.match(word, "(.+)tš" .. front_vowel .. "$")
	if stem then
		return true, stem .. "k" .. final
	end
	
	return false, word
end

function export.apply_ci(ci, stem, cons, final)
	local vowel_matches
	if type(final) == "string" then
		vowel_matches = mw.ustring.find(final, "^" .. front_vowel)
	else
		vowel_matches = final
		final = ""
	end
	
	ci = ci and vowel_matches and mw.ustring.match(cons, "k+")
	if not ci then
		return stem .. cons .. final
	end
	
	return stem .. mw.ustring.gsub(cons, "k", "t") .. "š" .. final
end

return export