Module:ro-common

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local export = {}

local vowels = "aeiouăâî"

-- Split a word into stem, the last group of vowels and the last group of consonants
-- TODO: make sure this works for all modules
local function split(word)
	local stem, vow, cons
	
	if mw.ustring.match(word, "[" .. vowels .. "][iu]$") then
		stem, vow, cons = mw.ustring.match(word, "^(.-)([" .. vowels .. "]-)([iu])$")
	else
		stem, vow, cons = mw.ustring.match(word, "^(.-)([" .. vowels .. "]-)([^" .. vowels .. "]-)$")
	end
	
	return stem, vow, cons
end

-- Split a "vowel" group into vowel and semivowels
local function split_vow(vow)
	local pre, post = "", ""
	
	if mw.ustring.len(vow) > 1 then
		pre, vow, post = mw.ustring.match(vow, "^([iu]?)(.-)([iu]?)$")
	end
	
	return pre, vow, post
end

-- Isolate the changing vowels from the stem variations
-- TODO: somehow account for hiatus (deochea)
local function get_vow_changes(words, result)
	local all_dupes = true
	
	for _, val in ipairs(words) do
		if val ~= words[1] then
			all_dupes = false
			break
		end
	end
	
	if all_dupes then
		table.insert(result, words[1])
		
		return
	else
		local split_words = {}
		
		for _, val in ipairs(words) do
			if val == "" then
				error("Cannot match stems, should only have different vowels")
			end
			
			local stem, pre, vow, post, cons
			local res = {}
			
			stem, vow, cons = split(val)
			pre, vow, post = split_vow(vow)
			
			table.insert(res, stem)
			table.insert(res, pre)
			table.insert(res, vow)
			table.insert(res, post)
			table.insert(res, cons)
			
			table.insert(split_words, res)
		end
		
		local vowel_appearances, found_vowels = {}, {}
		
		for i, val in ipairs(split_words) do
			-- compare pre-vowel, post-vowel, cons
			if val[5] ~= split_words[1][5] then
				error("Stems differ in something other than main vowels: " 
					.. words[1] .. ", " .. words[i] .. ", "
					.. (val[5] or "fjdfl") .. ", " .. (split_words[1][5] or " fda"))
			end
			
			if val[4] ~= split_words[1][4] then
				error("Stems differ in something other than main vowels: " 
					.. words[1] .. ", " .. words[i] .. ", "
					.. val[4] .. ", " .. split_words[1][4])
			end
			
			if val[2] ~= split_words[1][2] then
				error("Stems differ in something other than main vowels: " 
					.. words[1] .. ", " .. words[i] .. ", "
					.. val[2] .. ", " .. split_words[1][2])
			end
			
			-- add the vowel
			vowel_appearances[val[3]] = true
		end
		
		for vow, _ in pairs(vowel_appearances) do
			table.insert(found_vowels, vow)
		end
		
		table.sort(found_vowels)
		
		-- replace words with stems
		for key, _ in ipairs(words) do
			words[key] = split_words[key][1]
		end
		
		get_vow_changes(words, result)
		
		-- join pre-vowel to last consonant
		result[#result] = result[#result] .. split_words[1][2]
		
		-- add all the vowel variations
		table.insert(result, table.concat(found_vowels, "-"))
		
		-- add post-vowel + last cons
		table.insert(result, split_words[1][4] .. split_words[1][5])
		
		return
	end
end

local function find_cons(cons, mode, conj_23, vow)
	local n
	
	-- ugly hack
	if conj_23 and mode == "â" and cons == "t" 
	and mw.ustring.find(vow, "[eio]$") then
		cons = "ț"
		
		return cons
	end
	
	if conj_23 and consonants_23[mode] then
		for _, p in ipairs(consonants_23[mode]) do
			cons, n = mw.ustring.gsub(cons, p[1], p[2], 1)
			
			if n >= 1 then
				return cons
			end
		end
	end
	
	if consonants[mode] then
		for _, p in pairs(consonants[mode]) do
			cons, n = mw.ustring.gsub(cons, p[1], p[2], 1)
			
			if n >= 1 then
				return cons
			end
		end
	end

	return cons
end

-- Remove duplicates from an array
local function remove_duplicates(array)
	local existing = {}
	
	for i, val in ipairs(array) do
		if existing[val] then
			table.remove(array, i)
			
			if array.maxindex then
				array.maxindex = array.maxindex - 1
			end
		else
			existing[val] = true
		end
	end
end

-- Get consonant changes when followed by -i or -e
local function find_cons(cons, mode)
	for i = -3, -1 do
		if consonants[mode][mw.ustring.sub(cons, i)] and not mw.ustring.match(mw.ustring.sub(cons, 1, i-1), "^[șj]$") then
			cons = mw.ustring.sub(cons, 1, i-1) .. consonants[mode][mw.ustring.sub(cons, i)]
			break
		end
	end

	return cons
end

-- Form the definite singular form of a noun or adjective
local function make_definite(word, gender)
	local definite
	
	if gender == "m" or gender == "n" then
		if word:find("u$") then
			definite = word .. "l"
		elseif word:find("e$") then
			definite = word .. "le"
		else
			definite = word .. "ul"
		end
	elseif gender == "f" then
		if word:find("ă$") then
			definite = word:gsub("ă$", "a")
		elseif mw.ustring.find(word, "[aeiouăâî]ie") then
			definite = word:gsub("e$", "a")
		elseif mw.ustring.find(word, "ie$") then
			definite = word:gsub("e$", "a")
		elseif mw.ustring.find(word, "e$") then
			definite = word .. "a"
		elseif mw.ustring.find(word, "a$") then
			definite = word .. "ua"
		end
	else
		error("Invalid or missing gender")
	end

	return definite
end

-- Vowel changes for various word endings
-- TODO: fix for adjectives
local vow_changes = {
	-- sort order: aeiouâîă
	
	["a-e"] = {	-- zbiera/așeza, a variant of e-ea below
		["-"] = "e",
		["ă"] = "a",
		["e"] = "e",
		["i"] = "e",
	},

	["a-e-ă"] = {	-- spăla
		["-"] = "ă",
		["ă"] = "a",
		["e"] = "e",
		["i"] = "e",
	},

	["a-ă"] = {	-- arăta
		["-"] = "ă",
		["ă"] = "a",
		["e"] = "a",
		["i"] = "ă",
	},

	["e-ea"] = {	-- încerca
		["-"] = "e",
		["ă"] = "ea",
		["e"] = "e",
		["i"] = "e",
	},

	["e-ea-2"] = {	-- lepăda
		["-"] = {"e", "ea"},
		["ă"] = "ea",
		["e"] = "e",
		["i"] = "e",
	},

	["e-ă"] = {		-- supăra
		["-"] = "ă",
		["ă"] = "ă",
		["e"] = "e",
		["i"] = "e",
	},

	["i-â"] = {		-- vinde
		["-"] = "â",
		["ă"] = "â",
		["e"] = "i",
		["i"] = "i",
	},

	["o-oa"] = {	-- toca
		["-"] = "o",
		["ă"] = "oa",
		["e"] = "oa",
		["i"] = "o",
	},
}

-- get a list of stems for each ending
-- given a comma-separated list of slash-separated lists of stems
local function get_stems(stems)
	local stem_types = {"-", "ă", "e", "i"}
	
	local res = {}
	
	for _, stype in ipairs(stem_types) do
		res[stype] = {}
	end
	
	if not stems then
		local stem = get_inf_stem()
		
		for _, val in ipairs(stem_types) do
			res[val] = stem
		end
	else
		local stem_lists = mw.text.split(stems, " *, *")
		
		for _, stem_set in ipairs(stem_lists) do
			-- make a new resi table to hold all the stems
			local resi = {}
			local stems2, stem_parts = mw.text.split(stem_set, " */ *"), {}
			
			get_vow_changes(stems2, stem_parts)
			
			for _, stype in ipairs(stem_types) do
				resi[stype] = {""}
				
				for i = 1, #stem_parts, 2 do
					for j, _ in ipairs(resi[stype]) do
	                    resi[stype][j] = resi[stype][j] .. stem_parts[i]
	                end
	                
	                -- which syllable this is, counting from the end
	                local syln = (#stem_parts - i) / 2
					
					if stem_parts[i + 1] then
						local vc = vow_changes[stem_parts[i + 1] .. "-" .. syln] 
	                    or vow_changes[stem_parts[i + 1]]
	                    
						if not vc then
	                    -- no vowel alternation found for these vowels
	                    
							if not mw.ustring.find(stem_parts[i + 1], "\-") then
							-- there's only one vowel, so use it
							
								for k, stem in ipairs(resi[stype]) do
									resi[stype][k] = stem .. stem_parts[i + 1]
								end
							else
							-- multiple vowels not matching a known vowel alternation
								
								error(stem_parts[i + 1] ..
								" is not a valid vowel change")
		                    end
						elseif type(vc[stype]) == "string" then
						-- only one vowel can be used with that ending
						
							for k, stem in ipairs(resi[stype]) do
								resi[stype][k] = stem .. vc[stype]
							end
						elseif type(vc[stype]) == "table" then
						-- multiple vowels for that ending, so make a copy
						-- of the partial stem for each one
							
							local copy = resi[stype]
							resi[stype] = {}
							
							for _, vow in ipairs(vc[stype]) do
								for _, stem in ipairs(copy) do
									table.insert(resi[stype], stem .. vow)
								end
							end
						end
					end
				end
			end
			
			-- add only the stems that are not already in res
			for _, stype in ipairs(stem_types) do
				for _, stemi in ipairs(resi[stype]) do
					local is_new = true
					
					for _, stem in ipairs(res[stype]) do
						if stem == stemi then
							is_new = false
							
							break
						end
					end
					
					if is_new then
						table.insert(res[stype], stemi)
					end
				end
			end
		end
	end
	
	return res
end

return export