Module:typing-aids/data/sa-Modi

From Wiktionary, the free dictionary
Jump to navigation Jump to search

local data = {}

local U = mw.ustring.char

local anusvAra = U(0x1163D) 
local visarga = U(0x1163E)
local virAma = U(0x1163F)
local zwj = U(0x200D)
local avagraha = "ऽ"
local consonants = "𑘎𑘏𑘐𑘑𑘒𑘓𑘔𑘕𑘖𑘗𑘘𑘙𑘚𑘛𑘜𑘝𑘞𑘟𑘠𑘡𑘢𑘣𑘤𑘥𑘦𑘧𑘨𑘩𑘪𑘫𑘬𑘭𑘮"
local consonant = "[" .. consonants .. "]"

local acute = U(0x301)		-- combining acute

data["sa-Modi"] = {
	-- Vowels and modifiers. Do the diphthongs and diaereses first.
	{"ai", "𑘋"},
	{"au", "𑘍"},
	{"ï", "𑘂"},
	{"i", "𑘂"},
	{"ī", "𑘃"},
	{"ü", "𑘄"},
	{"u", "𑘄"},
	{"ū", "𑘅"},
	{"a", "𑘀"},
	{"ā", "𑘁"},
	{"e", "𑘊"},
	{"o", "𑘌"},
	{"ṝ", "𑘇"},
	{"ṛ", "𑘆"},
	{"r̥", "𑘆"},
	{"ḹ", "𑘉"},
	{"ḷ", "𑘈"},
	{"(𑘀)[%-/]([𑘂𑘄])", "%1%2"},		-- a-i, a-u for 𑘀𑘂, 𑘀𑘄; must follow rules for "ai", "au"

	-- Two-letter consonants must go before h.
	{"kh", "𑘏"},
	{"gh", "𑘑"},
	{"ch", "𑘔"},
	{"jh", "𑘖"},
	{"ṭh", "𑘙"},
	{"ḍh", "𑘛"},
	{"th", "𑘞"},
	{"dh", "𑘠"},
	{"ph", "𑘣"},
	{"bh", "𑘥"},
	{"h", "𑘮"},

	-- Other stops.
	{"k", "𑘎"},
	{"g", "𑘐"},
	{"c", "𑘓"},
	{"j", "𑘕"},
	{"ṭ", "𑘘"},
	{"ḍ", "𑘚"},
	{"t", "𑘝"},
	{"d", "𑘟"},
	{"p", "𑘢"},
	{"b", "𑘤"},

	-- Nasals.
	{"ṅ", "𑘒"},
	{"ñ", "𑘗"},
	{"ṇ", "𑘜"},
	{"n", "𑘡"},
	{"m", "𑘦"},

	-- Remaining consonants.
	{"y", "𑘧"},
	{"r", "𑘨"},
	{"l", "𑘩"},
	{"v", "𑘪"},
	{"ś", "𑘫"},
	{"ṣ", "𑘬"},
	{"s", "𑘭"},

	{"ṃ", anusvAra},
	{"ḥ", visarga},
	{"'", avagraha},
	-- This rule must be applied twice because a consonant may only be in one capture per operation,
	-- so "CCC" will only recognize the first two consonants. Must follow all consonant conversions.
	{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
	{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
	{"(" .. consonant .. ")$", "%1" .. virAma},
	{acute, ""},
}

local vowels = {
	["𑘁"] = U(0x11630),
	["𑘂"] = U(0x11631),
	["𑘃"] = U(0x11632),
	["𑘄"] = U(0x11633),
	["𑘅"] = U(0x11634),
	["𑘆"] = U(0x11635),
	["𑘇"] = U(0x11636),
	["𑘈"] = U(0x11637),
	["𑘉"] = U(0x11638),
	["𑘊"] = U(0x11639),
	["𑘋"] = U(0x1163A),
	["𑘌"] = U(0x1163B),
	["𑘍"] = U(0x1163C),
}

-- Convert independent vowels to diacritics after consonants. Must go after all consonant conversions.
for independentForm, diacriticalForm in pairs(vowels) do
	table.insert(data["sa-Modi"], {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end

-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data["sa-Modi"], {"(" .. consonant .. ")𑘀", "%1"})

data["sa-Modi-tr"] = {
	[1] = {
		["A"] = "ā",
		["I"] = "ī",
		["U"] = "u",
		["J"] = "ñ",
		["T"] = "ṭ",
		["D"] = "ḍ",
		["N"] = "ṇ",
		["G"] = "ṅ",
		["z"] = "ś",
		["S"] = "ṣ",
		["M"] = "ṃ",
		["H"] = "ḥ",
		["LRR"] = "ḹ",
		["/"] = acute,
	},
	[2] = {
		["LR"] = "ḷ",
		["RR"] = "ṝ",
	},
	[3] = {
		["R"] = "ṛ",
	},
}

return data