Module:typing-aids/data/sa-Shrd

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local data = {}

local U = require("Module:string/char")

local anusvAra = U(0x11181)
local visarga = U(0x11182)
local virAma = U(0x111C0)
local avagraha = "𑇁"
local consonants = "𑆑𑆒𑆓𑆔𑆕𑆖𑆗𑆘𑆙𑆚𑆛𑆜𑆝𑆞𑆟𑆠𑆡𑆢𑆣𑆤𑆥𑆦𑆧𑆨𑆩𑆪𑆫𑆬𑆮𑆭𑆯𑆰𑆱𑆲"
local consonant = "[" .. consonants .. "]"

local acute = U(0x301)		-- combining acute

data["sa-Shrd"] = {
	-- Vowels and modifiers. Do the diphthongs and diaereses first.
	{"ai", "𑆎"},
	{"au", "𑆐"},
	{"ä", "𑆃"},
	{"ö", "𑆏"},
	{"ï", "𑆅"},
	{"ü", "𑆇"},
	{"a", "𑆃"},
	{"ā", "𑆄"},
	{"i", "𑆅"},
	{"ī", "𑆆"},
	{"u", "𑆇"},
	{"ū", "𑆈"},
	{"e", "𑆍"},
	{"o", "𑆏"},
	{"ṝ", "𑆊"},
	{"ṛ", "𑆉"},
	{"r̥", "𑆉"},
	{"ḹ", "𑆌"},
	{"ḷ", "𑆋"},
	{"(𑆃)[%-/]([𑆅𑆇])", "%1%2"},		-- a-i, a-u for अइ, अउ; must follow rules for "ai", "au"

	-- Two-letter consonants must go before h.
	{"kh", "𑆒"},
	{"gh", "𑆔"},
	{"ch", "𑆗"},
	{"jh", "𑆙"},
	{"ṭh", "𑆜"},
	{"ḍh", "𑆞"},
	{"th", "𑆡"},
	{"dh", "𑆣"},
	{"ph", "𑆦"},
	{"bh", "𑆨"},
	{"h", "𑆲"},

	-- Other stops.
	{"k", "𑆑"},
	{"g", "𑆓"},
	{"c", "𑆖"},
	{"j", "𑆘"},
	{"ṭ", "𑆛"},
	{"ḍ", "𑆝"},
	{"t", "𑆠"},
	{"d", "𑆢"},
	{"p", "𑆥"},
	{"b", "𑆧"},

	-- Nasals.
	{"ṅ", "𑆕"},
	{"ñ", "𑆚"},
	{"ṇ", "𑆟"},
	{"n", "𑆤"},
	{"m", "𑆩"},

	-- Remaining consonants.
	{"y", "𑆪"},
	{"r", "𑆫"},
	{"l", "𑆬"},
	{"v", "𑆮"},
	{"ś", "𑆯"},
	{"ṣ", "𑆰"},
	{"s", "𑆱"},

	{"ṃ", anusvAra},
	{"ḥ", visarga},
	{"'", avagraha},
	-- This rule must be applied twice because a consonant may only be in one capture per operation,
	-- so "CCC" will only recognize the first two consonants. Must follow all consonant conversions.
	{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
	{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
	{"(" .. consonant .. ")$", "%1" .. virAma},
	{acute, ""},
}

local vowels = {
	["𑆄"] = U(0x111B3),
	["𑆅"] = U(0x111B4),
	["𑆆"] = U(0x111B5),
	["𑆇"] = U(0x111B6),
	["𑆈"] = U(0x111B7),
	["𑆉"] = U(0x111B8),
	["𑆊"] = U(0x111B9),
	["𑆋"] = U(0x111BA),
	["𑆌"] = U(0x111BB),
	["𑆍"] = U(0x111BC),
	["𑆎"] = U(0x111BD),
	["𑆏"] = U(0x111BE),
	["𑆐"] = U(0x111BF),
}

-- Convert independent vowels to diacritics after consonants. Must go after all consonant conversions.
for independentForm, diacriticalForm in pairs(vowels) do
	table.insert(data["sa-Shrd"], {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end

-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data["sa-Shrd"], {"(" .. consonant .. ")𑆃", "%1"})

-- [[w:Harvard-Kyoto]] to [[w:International Alphabet of Sanskrit Transliteration]]
data["sa-Shrd-tr"] = {
	[1] = {
		["A"] = "ā",
		["I"] = "ī",
		["U"] = "ū",
		["J"] = "ñ",
		["T"] = "ṭ",
		["D"] = "ḍ",
		["N"] = "ṇ",
		["G"] = "ṅ",
		["z"] = "ś",
		["S"] = "ṣ",
		["M"] = "ṃ",
		["H"] = "ḥ",
		["lRR"] = "ḹ",
		["/"] = acute,
	},
	[2] = {
		["lR"] = "ḷ",
		["RR"] = "ṝ",
	},
	[3] = {
		["R"] = "ṛ",
	},
}

return data