Module:smn-common

From Wiktionary, the free dictionary
Jump to navigation Jump to search

local export = {}

local langdata = {
	consonant = {
		"^(.-[aâeouyäáAÂEOUYÄÁ])([iI][^aâeiouyäáAÂEIOUÄÁ{}-]*)$",
		"^(.-)([^aâeiouyäáAÂEIOUYÄÁ{}-]*)$",
	},
	vowel = {
		"^(.-)(aa)$",
		"^(.-)(ee)$",
		"^(.-)(i[eiä])$",
		"^(.-)(oo)$",
		"^(.-)(u[ouá])$",
		"^(.-)(ye)$",
		"^(.-)(ää)$",
		"^(.-)(áá)$",
		"^(.-)([aâeiouyäá]?)$",
	},
	scons = {
		[1] = {
		},
		[2] = {
			{"^([đjlmnŋrv])" .. mw.ustring.char(0x0323) .. "$", "%1", "%1ˈ%1"},
			{"^n" .. mw.ustring.char(0x0323) .. "j$", "nj", "nˈnj"},
			
			{"^c$", "s", "cˈc"},
			{"^č$", "j" .. mw.ustring.char(0x0323), "čˈč"},
			{"^h$", "v" .. mw.ustring.char(0x0323), "kˈk"},
			{"^p$", "v", "pˈp"},
			{"^s$", "s", "sˈs"},
			{"^š$", "š", "šˈš"},
			{"^t$", "đ", "tˈt"},
		},
		[3] = {
			{"^([bdgzž])ˈ%1$", "%1"},
			{"^tˈk$", "đh"},
			
			{"^([jlnr])ˈ(c)%2$", "%1%2"},
			{"^([lr])ˈ(č)%2$", "%1%2"},
			{"^([jlŋrv])ˈ(k)%2$", "%1h"},
			{"^([lmrv])ˈ(p)%2$", "%1%2"},
			--{"^([lnr])ˈ(s)%2$", "%1%2"},
			--{"^([lr])ˈ(š)%2$", "%1%2"},
			{"^([jlnr])ˈ(t)%2$", "%1%2"},
			
			{"^hˈ([č])$", "v%1"},
			
			{"^([^b])ˈ(b)$", "%1%2"},
			{"^([^d])ˈ(d)$", "%1%2"},
			{"^([^g])ˈ(g)$", "%1%2"},
			{"^([^jn])ˈ(j)$", "%1%2"},
			{"^([s])ˈ(k)$", "%1%2"},
			{"^([^l])ˈ(l)$", "%1%2"},
			{"^([^m])ˈ(m)$", "%1%2"},
			{"^([^n])ˈ(n)$", "%1%2"},
			{"^([^r])ˈ(r)$", "%1%2"},
			--{"^([^š])ˈ(š)$", "%1%2"},
			{"^([ps])ˈ(t)$", "%1%2"},
			{"^([^v])ˈ(v)$", "%1%2"},
			{"^([^z])ˈ(z)$", "%1%2"},
			{"^([^ž])ˈ(ž)$", "%1%2"},
			
			{"^(v)ˈ(sk)$", "%1%2"},
		},
	},
	vowel_variants = {
		normal          = {                        ["e"] = {"e", "Q" },                       ["o"] = {"o", "Q" },                                         },
		per             = {["â"] = {"â", "G"    }, ["e"] = {"ee", "QG"}, ["i"] = {"i", "G"   }, ["o"] = {"oo", "QG"}, ["u"] = {"u", "G" }, ["á"] = {"á", "G" }},
		ill_sg          = {["â"] = {"â", "D"    }, ["e"] = {"á", "EAQD"}, ["i"] = {"á", "EAD" }, ["o"] = {"o", "QD"}, ["u"] = {"u", "D" }, ["á"] = {"á", "D" }},
		loc_sg          = {                        ["e"] = {"ee", "GS"},                       ["o"] = {"oo", "GS"}                                          },
		com_sg          = {["â"] = {"á", "GS"   }, ["e"] = {"i", "EIGS"}, ["i"] = {"i", "EIGS"}, ["o"] = {"o", "QGS" }, ["u"] = {"o", "GS"}, ["á"] = {"á", "GS"}},
		abe_sg          = {["â"] = {"â", "S"    }, ["e"] = {"e", "Q"}, ["i"] = {"i", "S"   }, ["o"] = {"o", "Q" }, ["u"] = {"u", "S" }, ["á"] = {"á", "S" }},
		nom_pl          = {["â"] = {"a"         }, ["e"] = {"e", "Q"},                       ["o"] = {"o", "Q" },                                         },
		acc_pl          = {                        ["e"] = {"i", "EI"}, ["i"] = {"i", "EI"  }, ["o"] = {"o", "Q" },                                         },
		abe_pl          = {["â"] = {"â", "S"    }, ["e"] = {"i", "EIS"}, ["i"] = {"i", "EIS" }, ["o"] = {"o", "Q" }, ["u"] = {"u", "S" }, ["á"] = {"á", "S" }},
		
		f3_s_pres_indc  = {["â"] = {"á", "AO"   }                                                                 },
		f3_p_pres_indc  = {["â"] = {"i", "AE"   }                                                                 },
		f12_s_past_indc = {["â"] = {"i", "G"    }                                                                 },
		f3_p_past_indc  = {["â"] = {"ii", "GS"  }                                                                 },
		f3_s_impr       = {["â"] = {"u", "AO"   }                                                                 },
		f1_d_impr       = {["â"] = {"oo", "AOGS"}                                                                 },
		f3_dp_impr      = {["â"] = {"u", "AOG"  }                                                                 },
		potn_conn       = {["â"] = {"ii", "S3"  }                                                                 },
		
		j_contr         = {["â"] = {"ee", "AEGS"}                                                                 },
	},
	preprocess = function(self)
		self.scons = mw.ustring.gsub(self.scons, "ḷ", "l" .. mw.ustring.char(0x0323))
		self.scons = mw.ustring.gsub(self.scons, "ṃ", "m" .. mw.ustring.char(0x0323))
		self.scons = mw.ustring.gsub(self.scons, "ṇ", "n" .. mw.ustring.char(0x0323))
		self.scons = mw.ustring.gsub(self.scons, "ṛ", "r" .. mw.ustring.char(0x0323))
		self.scons = mw.ustring.gsub(self.scons, "ṿ", "v" .. mw.ustring.char(0x0323))
		
		self.scons = mw.ustring.gsub(self.scons, "i", "j")
		
		-- Add overlong mark after a short vowel
		if mw.ustring.len(self.svowel) == 1 then
			self.scons = mw.ustring.gsub(self.scons, "^(.)([^" .. mw.ustring.char(0x0323) .. "])", "%1ˈ%2")
		end
		
		-- Make all single vowels double
		if mw.ustring.len(self.svowel) == 1 then
			self.svowel = mw.ustring.gsub(self.svowel, "^(.)$", "%1%1")
		end
	end,
	postprocess = function(self, vowel_effect)
		if vowel_effect then
			if vowel_effect:find("Q", nil, true) then
				if not self.scons:find("ˈ", nil, true) then
					self.scons = "ˈ" .. self.scons
				end
			end
			
			if vowel_effect:find("AO", nil, true) then
				if self.svowel == "aa" then
					self.svowel = "áá"
				elseif self.svowel == "ee" or self.svowel == "ie" then
					self.svowel = "iä"
				elseif self.svowel == "oo" or self.svowel == "uo" then
					self.svowel = "uá"
				end
			elseif vowel_effect:find("AE", nil, true) then
				if self.svowel == "aa" then
					self.svowel = "ää"
				elseif self.svowel == "oo" then
					self.svowel = "uá"
				elseif self.svowel == "uo" then
					self.svowel = "ye"
				elseif self.svowel == "uu" then
					self.svowel = "oo"
				end
			elseif vowel_effect:find("EA", nil, true) then
				if self.svowel == "ee" then
					self.svowel = "iä"
				elseif self.svowel == "ie" then
					self.svowel = "iä"
				elseif self.svowel == "ye" then
					self.svowel = "uá"
				elseif self.svowel == "ää" then
					self.svowel = "áá"
				end
			elseif vowel_effect:find("EI", nil, true) then
				if self.svowel == "aa" then
					self.svowel = "oo"
				elseif self.svowel == "oo" then
					self.svowel = "uu"
				elseif self.svowel == "uá" then
					self.svowel = "oo"
				elseif self.svowel == "ye" then
					self.svowel = "uo"
				elseif self.svowel == "ää" then
					self.svowel = "aa"
				end
			end
			
			if vowel_effect:find("D", nil, true) then
				if self.quantity == 3 then
					self.scons = mw.ustring.gsub(self.scons, "ˈ", "")
				end
				
				self.scons = mw.ustring.gsub(self.scons, "([đjlmnŋrv])%1", "%1" .. mw.ustring.char(0x0323))
				self.scons = mw.ustring.gsub(self.scons, "(.)%1", "%1")
			elseif vowel_effect:find("GS", nil, true) then
				if mw.ustring.find(self.scons, "^ˈ?[đjlmnŋrv]" .. mw.ustring.char(0x0323) .. "$") then
					self.scons = mw.ustring.gsub(self.scons, "^ˈ?([đjlmnŋrv])" .. mw.ustring.char(0x0323) .. "$", "%1ˈ%1")
				elseif mw.ustring.len(self.scons) > 1 and not self.scons:find("ˈ", nil, true) or mw.ustring.find(self.scons, "^[bcčdghptzž]$") or (mw.ustring.find(self.scons, "^[sš]$") and self.quantity > 1) then
					if mw.ustring.find(self.scons, "^[bcčdghpsštzž]$") or mw.ustring.find(self.scons, "^[^ps][cčpt]$") then
						self.scons = mw.ustring.gsub(self.scons, "(.)$", "%1%1")
					end
					
					self.scons = mw.ustring.gsub(self.scons, "^(.)", "%1ˈ")
				end
			elseif vowel_effect:find("G", nil, true) then
				if mw.ustring.find(self.scons, "^ˈ?[đjlmnŋrv]" .. mw.ustring.char(0x0323) .. "$") then
					self.scons = mw.ustring.gsub(self.scons, "^(ˈ?)([đjlmnŋrv])" .. mw.ustring.char(0x0323) .. "$", "%2%1%2")
				elseif mw.ustring.find(self.scons, "^ˈ?[cčpsšt]$") or self.scons == "ˈh" then
					self.scons = mw.ustring.gsub(self.scons, "^(ˈ?)(.)$", "%2%1%2")
				elseif self.scons == "h" then
					self.scons = self.scons .. mw.ustring.char(0x0323)
				end
			elseif vowel_effect:find("S3", nil, true) then
				if self.quantity > 1 and not self.scons:find("ˈ", nil, true) then
					self.scons = "ˈ" .. self.scons
				end
			elseif vowel_effect:find("S", nil, true) then
				if not self.scons:find("ˈ", nil, true) then
					self.scons = "ˈ" .. self.scons
				end
			end
		end
		
		-- á > a after a
		if self.svowel == "aa" then
			self.uvowel = self.uvowel:gsub("á", "a")
		end
		
		-- Shorten e and o after quantity 3
		if self.quantity == 3 then
			self.uvowel = mw.ustring.gsub(self.uvowel, "^([eo])%1$", "%1")
		end
		
		-- Shorten the vowel if there is an overlong mark
		if mw.ustring.find(self.scons, "ˈ") and mw.ustring.find(self.svowel, "^(.)%1$") then
			self.svowel = mw.ustring.gsub(self.svowel, "^(.)%1$", "%1")
		end
		
		-- Remove the overlong mark after a non-diphthong
		if mw.ustring.len(self.svowel) == 1 or mw.ustring.find(self.svowel, "^(.)%1$") then
			self.scons = mw.ustring.gsub(self.scons, "ˈ", "")
		end
		
		-- Change j to i in ending after a vowel other than i
		if self.ucons == "" and not self.uvowel:find("i$") then
			self.ending = self.ending:gsub("^j", "i")
		end
		
		-- Change j to i in scons after a vowel other than i
		if not self.svowel:find("i$") then
			self.scons = mw.ustring.gsub(self.scons, "^(ˈ?)j([^" .. mw.ustring.char(0x0323) .. "])", "%1i%2")
		end
	end,
}

export.Stem = require("Module:smi-common").make_constructor(langdata)

return export