Module:User:Vitalik/inflection-new/data/ru-noun

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Vitalik, for their own experimentation. Items in this module may be added and removed at Vitalik's discretion; do not rely on this module's stability.


return {
	template = 'ru-decl-noun-table',
	affixes = {
		-- masculine singular
		m_hard_nom_sg = '',
		m_soft_nom_sg = 'ь',
		m_hard_gen_sg = 'а',
		m_soft_gen_sg = 'я',
		m_hard_dat_sg = 'у',
		m_soft_dat_sg = 'ю',
		m_hard_ins_sg = 'ом',
		m_soft_ins_sg_stressed = 'ём',
		m_soft_ins_sg_unstressed = 'ем',
		-- neuter singular
		n_hard_nom_sg = 'о',
		n_soft_nom_sg_stressed = 'ё',
		n_soft_nom_sg_unstressed = 'е',
		n_hard_gen_sg = 'а',
		n_soft_gen_sg = 'я',
		n_hard_dat_sg = 'у',
		n_soft_dat_sg = 'ю',
		n_hard_ins_sg = 'ом',
		n_soft_ins_sg_stressed = 'ём',
		n_soft_ins_sg_unstressed = 'ем',
		-- feminine singular
		f_hard_nom_sg = 'а',
		f_soft_nom_sg = 'я',
		f_hard_gen_sg = 'ы',
		f_soft_gen_sg = 'и',
		f_hard_dat_sg = 'е',
		f_soft_dat_sg_stressed = 'е',
		f_soft_dat_sg_unstressed = 'е',
		f_hard_acc_sg = 'у',
		f_soft_acc_sg = 'ю',
		f_hard_ins_sg = 'ой',
		f_soft_ins_sg_stressed = 'ёй',
		f_soft_ins_sg_unstressed = 'ей',
		-- common singular
		c_prp_sg_stressed = 'е',
		c_prp_sg_unstressed = 'е',
		-- masculine plural
		m_hard_nom_pl = 'ы',
		m_soft_nom_pl = 'и',
		m_hard_gen_pl = 'ов',
		m_soft_gen_pl_stressed = 'ей',
		m_soft_gen_pl_unstressed = 'ей',
		-- neuter plural
		n_hard_nom_pl = 'а',
		n_soft_nom_pl = 'я',
		n_hard_gen_pl_stressed = '',
		n_hard_gen_pl_unstressed = '',
		n_soft_gen_pl_stressed = 'ей',
		n_soft_gen_pl_unstressed = 'ь',
		-- feminine plural
		f_hard_nom_pl = 'ы',
		f_soft_nom_pl = 'и',
		f_hard_gen_pl_stressed = '',
		f_hard_gen_pl_unstressed = '',
		f_soft_gen_pl_stressed = 'ей',
		f_soft_gen_pl_unstressed = 'ь',
		-- common plural
		c_hard_dat_pl = 'ам',
		c_soft_dat_pl = 'ям',
		c_hard_ins_pl = 'ами',
		c_soft_ins_pl = 'ями',
		c_hard_prp_pl = 'ах',
		c_soft_prp_pl = 'ях',
	},
	conditions = {
		{
			class = 'common',
			{},
		},
		{
			comment = 'Remove ending (-а, -е, -ё, -о, -я, -й, -ь) to get stem',

			stem = {'replace', 'base', '[аеёояйь]$', ''},
			stem_stressed = {'arg_replace', 'base_stressed', '[аеёояйь]́?$', ''},  -- it was also removed last stress here
		},
		{
			comment = "Add stress to stem_stressed if there is only one syllable and stress is absent",

			_if = {
				stem_stressed__not_match = '́',
				stem_stressed__match_once = '{vowel}',
			},
			stem_stressed = {'var_replace', 'stem_stressed', '({vowel})', '%1́'},
		},
		{
			comment = 'Parse "gender_class" argument and get values for "gender" and "animate"',
			gender = {'match_arg', 'gender_class', '([mnf])%-[ia]n'},
			animate = {'match_arg', 'gender_class', '[mnf]%-([ia]n)'},
		},

		{SECTION = 'Determination of stem type'},
		{
			stem_type = '',
		},
		{
			_if = {stem__endswith = '{velar}'},
			stem_type = 'velar',
		},
		{
			_if = {stem__endswith = '{sibilant}'},
			stem_type = 'sibilant',
		},
		{
			_if = {stem__endswith = 'ц'},
			stem_type = 'letter-ц',
		},
		{
			_if = {stem__endswith = {'[йь]', '[аеёоуыюя]'}},
			stem_type = 'vowel',
		},
		{
			_if = {stem__endswith = 'и'},
			stem_type = 'letter-и',
		},
		{
			_if = {stem__not_endswith = {'{velar}', '{sibilant}', 'ц', '[йь]', '[аеёоуыюя]', 'и'}},
			_then = {
				{
					_if = {gender = 'm'},
					_then = {
						{
							_if = {stem = '<base>'},
							stem_type = 'hard',
						},
						{
							_if = {base__endswith = 'ь', base__not_endswith = 'путь'},
							stem_type = 'soft',
						},
						{
							_if = {base__endswith = 'путь'},
							stem_type = 'm-3rd',
						},
					},
				},
				{
					_if = {gender = 'f'},
					_then = {
						{
							_if = {base__endswith = 'а'},
							stem_type = 'hard',
						},
						{
							_if = {base__endswith = 'я'},
							stem_type = 'soft',
						},
						{
							_if = {base__endswith = 'ь'},
							stem_type = 'f-3rd',
						},
					},
				},
				{
					_if = {gender = 'n'},
					_then = {
						{
							_if = {base__endswith = 'о'},
							stem_type = 'hard',
						},
						{
							_if = {base__endswith = 'е'},
							stem_type = 'soft',
						},
						{
							_if = {base__endswith = 'мя'},
							stem_type = 'n-3rd',
						},
					},
				},
			},
		},
		{
			_if = {
				gender = 'f', 
				stem_type = 'sibilant',
				base__endswith = 'ь',
			},
			stem_type = 'f-3rd-sibilant',
		},
		{
			_if = {stem_type = ''},
			stem_type = 'hard',
		},

		{SECTION = 'Special changes for velar, sibilant, vowel etc. stem types'},
		{
			_if = {stem_type = {'velar', 'sibilant'}},
			f_hard_gen_sg = 'и',
			m_hard_nom_pl = 'и',
			f_hard_nom_pl = 'и',
		},
		{
			_if = {
				stem_type = {'sibilant', 'letter-ц'},
				stress = {'a', 'c', 'e'},
			},
			m_hard_ins_sg = 'ем',
			n_hard_ins_sg = 'ем',
			f_hard_ins_sg = 'ей',
			m_hard_gen_pl = 'ев',
		},
		{
			_if = {stem_type = {'sibilant'}},
			m_hard_gen_pl = 'ей',
			n_hard_gen_pl_stressed = 'ей',
			n_hard_gen_pl_unstressed = '',
			f_hard_gen_pl_stressed = 'ей',
			f_hard_gen_pl_unstressed = '',
		},
		{
			_if = {stem_type = {'vowel', 'letter-и'}},
			m_soft_nom_sg = 'й',
			n_soft_gen_pl_unstressed = 'й',
			f_soft_gen_pl_unstressed = 'й',
		},
		{
			_if = {stem_type = {'vowel', 'letter-и'}},
			m_soft_gen_pl_stressed = 'ёв',
			m_soft_gen_pl_unstressed = 'ев',
			n_soft_gen_pl_stressed = 'й',
			n_soft_gen_pl_unstressed = 'й',
			f_soft_gen_pl_stressed = 'й',
			f_soft_gen_pl_unstressed = 'й',
		},
		{
			_if = {stem_type = {'letter-и'}},
			f_soft_dat_sg_unstressed = 'и',
			c_prp_sg_unstressed = 'и',				
		},
		{
			_if = {stem_type = {'m-3rd'}},
			m_soft_gen_sg = 'и',
			m_soft_dat_sg = 'и',
			c_prp_sg_stressed = 'и',
			c_prp_sg_unstressed = 'и',
		},
		{
			_if = {stem_type = {'f-3rd', 'f-3rd-sibilant'}},
			f_soft_nom_sg = 'ь',
			f_soft_dat_sg_stressed = 'и',
			f_soft_dat_sg_unstressed = 'и',
			f_soft_acc_sg = 'ь',
			f_soft_ins_sg_stressed = 'ью',
			f_soft_ins_sg_unstressed = 'ью',
			c_prp_sg_stressed = 'и',
			c_prp_sg_unstressed = 'и',
			f_soft_gen_pl_stressed = 'ей',
			f_soft_gen_pl_unstressed = 'ей',
		},
		{
			_if = {stem_type = {'f-3rd-sibilant'}},
			c_soft_dat_pl = 'ам',
			c_soft_ins_pl = 'ами',
			c_soft_prp_pl = 'ах',
		},

		{SECTION = 'Resolve stressed/unstressed cases of endings'},
		{
			_if = {stress = {'a', 'c', 'e'}},
			f_soft_dat_sg = '<f_soft_dat_sg_unstressed>',
			m_soft_ins_sg = '<m_soft_ins_sg_unstressed>',
			n_soft_ins_sg = '<n_soft_ins_sg_unstressed>',
			f_soft_ins_sg = '<f_soft_ins_sg_unstressed>',
			r_prp_sg = '<c_prp_sg_unstressed>',
		},
		{
			_if = {stress = {'b', 'd', "d'", 'f', "f'"}},
			f_soft_dat_sg = '<f_soft_dat_sg_stressed>',
			m_soft_ins_sg = '<m_soft_ins_sg_stressed>',
			n_soft_ins_sg = '<n_soft_ins_sg_stressed>',
			f_soft_ins_sg = '<f_soft_ins_sg_stressed>',
			r_prp_sg = '<c_prp_sg_stressed>',
		},
		{
			_if = {stress = {'a', 'd', "d'"}},
			m_soft_gen_pl = '<m_soft_gen_pl_unstressed>',
			n_hard_gen_pl = '<n_hard_gen_pl_unstressed>',
			n_soft_gen_pl = '<n_soft_gen_pl_unstressed>',
			f_hard_gen_pl = '<f_hard_gen_pl_unstressed>',
			f_soft_gen_pl = '<f_soft_gen_pl_unstressed>',
		},
		{
			_if = {stress = {'b', 'c', 'e', 'f', "f'"}},
			m_soft_gen_pl = '<m_soft_gen_pl_stressed>',
			n_hard_gen_pl = '<n_hard_gen_pl_stressed>',
			n_soft_gen_pl = '<n_soft_gen_pl_stressed>',
			f_hard_gen_pl = '<f_hard_gen_pl_stressed>',
			f_soft_gen_pl = '<f_soft_gen_pl_stressed>',
		},

		{SECTION = 'Determination of ending (depending on gender and stem type)'},
		{
			_if = {stem_type = {'hard', 'velar', 'sibilant', 'letter-ц', }},
			_then = {
				r_dat_pl = '<c_hard_dat_pl>',
				r_ins_pl = '<c_hard_ins_pl>',
				r_prp_pl = '<c_hard_prp_pl>',
				{
					_if = {gender = 'm'},
					r_nom_sg = '<m_hard_nom_sg>',
					r_gen_sg = '<m_hard_gen_sg>',
					r_dat_sg = '<m_hard_dat_sg>',
					r_ins_sg = '<m_hard_ins_sg>',
					r_nom_pl = '<m_hard_nom_pl>',
					r_gen_pl = '<m_hard_gen_pl>',
				},
				{
					_if = {gender = 'n'},
					r_nom_sg = '<n_hard_nom_sg>',
					r_gen_sg = '<n_hard_gen_sg>',
					r_dat_sg = '<n_hard_dat_sg>',
					r_ins_sg = '<n_hard_ins_sg>',
					r_nom_pl = '<n_hard_nom_pl>',
					r_gen_pl = '<n_hard_gen_pl>',
				},
				{
					_if = {gender = 'f'},
					r_nom_sg = '<f_hard_nom_sg>',
					r_gen_sg = '<f_hard_gen_sg>',
					r_dat_sg = '<f_hard_dat_sg>',
					r_acc_sg = '<f_hard_acc_sg>',
					r_ins_sg = '<f_hard_ins_sg>',
					r_nom_pl = '<f_hard_nom_pl>',
					r_gen_pl = '<f_hard_gen_pl>',
				},
			},
		},
		{
			_if = {stem_type = {'soft', 'vowel', 'letter-и', 'm-3rd', 'f-3rd', 'f-3rd-sibilant'}},
			_then = {
				r_dat_pl = '<c_soft_dat_pl>',
				r_ins_pl = '<c_soft_ins_pl>',
				r_prp_pl = '<c_soft_prp_pl>',
				{
					_if = {gender = 'm'},
					r_nom_sg = '<m_soft_nom_sg>',
					r_gen_sg = '<m_soft_gen_sg>',
					r_dat_sg = '<m_soft_dat_sg>',
					r_ins_sg = '<m_soft_ins_sg>',
					r_nom_pl = '<m_soft_nom_pl>',
					r_gen_pl = '<m_soft_gen_pl>',
				},
				{
					_if = {gender = 'n'},
					r_nom_sg = '<n_soft_nom_sg>',
					r_gen_sg = '<n_soft_gen_sg>',
					r_dat_sg = '<n_soft_dat_sg>',
					r_ins_sg = '<n_soft_ins_sg>',
					r_nom_pl = '<n_soft_nom_pl>',
					r_gen_pl = '<n_soft_gen_pl>',
				},
				{
					_if = {gender = 'f'},
					r_nom_sg = '<f_soft_nom_sg>',
					r_gen_sg = '<f_soft_gen_sg>',
					r_dat_sg = '<f_soft_dat_sg>',
					r_acc_sg = '<f_soft_acc_sg>',
					r_ins_sg = '<f_soft_ins_sg>',
					r_nom_pl = '<f_soft_nom_pl>',
					r_gen_pl = '<f_soft_gen_pl>',
				},
			},
		},

		{SECTION = 'Apply stress type'},
		{
			_if = {stress = {'a', 'c', 'e'}},
			stem_sg = '<stem_stressed>',
		},
		{
			_if = {stress = {'b', 'd', "d'", 'f', "f'"}},
			stem_sg = '<stem>',
			r_nom_sg = {'replace', 'r_nom_sg', '^([аеиоуыюя])', '%1́'},  -- it was added stress here
			r_gen_sg = {'replace', 'r_gen_sg', '^([аеиоуыюя])', '%1́'},  -- it was added stress here
			r_dat_sg = {'replace', 'r_dat_sg', '^([аеиоуыюя])', '%1́'},  -- it was added stress here
			r_ins_sg = {'replace', 'r_ins_sg', '^([аеиоуыюя])', '%1́'},  -- it was added stress here
			r_prp_sg = {'replace', 'r_prp_sg', '^([аеиоуыюя])', '%1́'},  -- it was added stress here
		},
		{
			_if = {
				gender = 'f',
				stress = {'b', 'd', 'f'},
			},
			r_acc_sg = {'replace', 'r_acc_sg', '^([аеиоуыюя])', '%1́'},  -- it was added stress here
		},
		{
			_if = {stress = {'a', 'd', "d'", 'e', 'f', "f'"}},
			stem_nom_pl = '<stem_stressed>',
		},		
		{
			_if = {stress = {'b', 'c'}},
			stem_nom_pl = '<stem>',
			r_nom_pl = {'replace', 'r_nom_pl', '^([аеиоуыюя])', '%1́'},  -- it was added stress here
		},		
		{
			_if = {stress = {'a', 'd', "d'"}},
			stem_pl = '<stem_stressed>',
		},		
		{
			_if = {stress = {'b', 'c', 'e', 'f', "f'"}},
			stem_pl = '<stem>',
			r_gen_pl = {'replace', 'r_gen_pl', '^([аеиоуыюя])', '%1́'},  -- it was added stress here
			r_dat_pl = {'replace', 'r_dat_pl', '^([аеиоуыюя])', '%1́'},  -- it was added stress here
			r_ins_pl = {'replace', 'r_ins_pl', '^([аеиоуыюя])', '%1́'},  -- it was added stress here
			r_prp_pl = {'replace', 'r_prp_pl', '^([аеиоуыюя])', '%1́'},  -- it was added stress here
		},

		{SECTION = 'Generate forms'},
		{
			form_nom_sg = '<stem_sg><r_nom_sg>',
			form_gen_sg = '<stem_sg><r_gen_sg>',
			form_dat_sg = '<stem_sg><r_dat_sg>',
			form_ins_sg = '<stem_sg><r_ins_sg>',
			form_prp_sg = '<stem_sg><r_prp_sg>',
			form_nom_pl = '<stem_nom_pl><r_nom_pl>',
			form_gen_pl = '<stem_pl><r_gen_pl>',
			form_dat_pl = '<stem_pl><r_dat_pl>',
			form_ins_pl = '<stem_pl><r_ins_pl>',
			form_prp_pl = '<stem_pl><r_prp_pl>',
		},
		{
			_if = {gender = 'f'},
			form_acc_sg = '<stem_sg><r_acc_sg>',
		},
		{  -- TODO: refactoring
			_if = {
				gender = 'f',
				stress = {"d'", "f'"},
			},
			form_acc_sg = '<stem_stressed><r_acc_sg>',
		},

		-- NEW IDEAS HERE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
		functions = {
			add_stress = {
				comment = '...',

				_if = {
					arg1__match_several = '@vowel',
					arg1__not_match = '[́ё]',
				},
				arg1 = {'var_replace', 'arg1', '(@vowel)(@consonant*)$', '%1́%2'},
			}
		},

		{SECTION = "Add stress if there is no one"},
		{
			_if = {
				form_nom_sg__match_several = '@vowel',
				form_nom_sg__not_match = '[́ё]',
			},
			form_nom_sg = {'var_replace', 'form_nom_sg', '(@vowel)(@consonant*)$', '%1́%2'},
		},
		{
			_if = {
				form_gen_pl__match_several = '{vowel}',
				{equals = {'<тип>', '<base>'}},
				{equals = {'<тип2>', '<base2>'}},
				form_gen_pl__not_match = '[́ё]',
				_or = {
					stem = '',
					stem_stressed = '',
				}
			},
			form_gen_pl = {'var_replace', 'form_gen_pl', '({vowel})({consonant}*)$', '%1́%2'},
		},

		{SECTION = "Remove stress if there is only one syllable"},
		{
			_if = {form_nom_sg__match_once = '{vowel+ё}'},
			form_nom_sg = {'var_replace', 'form_nom_sg', '́', ''},
		},
		{
			_if = {form_gen_pl__match_once = '{vowel+ё}'},
			form_gen_pl = {'var_replace', 'form_gen_pl', '́', ''},
		},

		{SECTION = 'Choose accusative forms'},
		{
			_if = {gender = 'n'},
			form_acc_sg = '<form_nom_sg>',
		},
		{
			_if = {gender = 'm'},
			_then = {
				{
					_if = {animate = 'in'},
					form_acc_sg = '<form_nom_sg>',
				},
				{
					_if = {animate = 'an'},
					form_acc_sg = '<form_gen_sg>',
				},
			},
		},
		{
			_if = {animate = 'in'},
			form_acc_pl = '<form_nom_pl>',
		},
		{
			_if = {animate = 'an'},
			form_acc_pl = '<form_gen_pl>',
		},

		{SECTION = 'If there are no plural forms'},
		{
			comment = 'This rule should be the last one. Otherwise — removing of plural form can be overriden by other classes',

			_if = {st = '1'},  -- if we've received argument "st" equals to "1"
			class = 'singularia_tantum',
		},
	},
	classes = {
		common = {
			nom_sg = '<form_nom_sg>',
			gen_sg = '<form_gen_sg>',
			dat_sg = '<form_dat_sg>',
			acc_sg = '<form_acc_sg>',
			ins_sg = '<form_ins_sg>',
			prp_sg = '<form_prp_sg>',
			nom_pl = '<form_nom_pl>',
			gen_pl = '<form_gen_pl>',
			dat_pl = '<form_dat_pl>',
			acc_pl = '<form_acc_pl>',
			ins_pl = '<form_ins_pl>',
			prp_pl = '<form_prp_pl>',
			stem_type = '<stem_type>',  -- just for testcases
		},
		singularia_tantum = {
			nom_pl = '—',
			gen_pl = '—',
			dat_pl = '—',
			acc_pl = '—',
			ins_pl = '—',
			prp_pl = '—',
		},
	},
}