Module:he-utilities

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
Text-x-generic with pencil.svg This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local p = {}

-- A wrapper function allowing the contents of this module to be called from
-- templates. For example, '{{#invoke:he-utilities|main|otSofit|כ}}' produces
-- 'ך', as does '{{#invoke:he-utilities|main|letters|kafSofit}}'.
function p.main(frame)
	if type(p[frame.args[1]]) == 'function' then
		return p[frame.args[1]](frame.args[2])
	else
		return p[frame.args[1]][frame.args[2]]
	end
end

-- A mapping from strings containing letter-names, e.g. "alef", to strings
-- containing letters, e.g. "\215\144". (The latter is a UTF-8-encoded string
-- containing the single Unicode character U+05D0 HEBREW LETTER ALEF.)
p.letters = {}
for i, name in ipairs({ 'alef', 'bet', 'gimel', 'dalet', 'hei', 'vav', 'zayen',
						'khet', 'tet', 'yud', 'kafSofit', 'kaf', 'lamed',
						'memSofit', 'mem', 'nunSofit', 'nun', 'samekh', 'ayin',
						'peiSofit', 'pei', 'tsadiSofit', 'tsadi', 'kuf', 'resh',
						'shin', 'tav' }) do
	p.letters[name] = mw.ustring.char(0x05D0 + i - 1)
end
p.letters.sin = p.letters.shin

-- Same as previous, but for vowels instead of letters. (It also includes a few
-- marks and diacritics that aren't quite "vowels", but are in the same sequence
-- of Unicode characters.)
p.vowels = {}
for i, name in ipairs({ 'shva', 'khatafSegol', 'khatafPatakh', 'khatafKamats',
						'khirik', 'tseirei', 'segol', 'patakh', 'kamats',
						'kholam', 'ignoreMe', 'kubuts', 'dagesh', 'meteg',
						'makaf', 'rafe', 'pasek', 'shinDot', 'sinDot',
						'sofPasuk', 'upperDot' }) do
	p.vowels[name] = mw.ustring.char(0x05B0 + i - 1)
end
p.vowels.ignoreMe = nil
p.vowels.mapik = p.vowels.dagesh

-- If letter is kaf, mem, nun, pei, or tsadi, returns kaf sofit, mem sofit,
-- etc., respectively; otherwise, just returns letter.
function p.otSofit(letter)
	if letter == p.letters.kaf or letter == p.letters.mem
			or letter == p.letters.nun or letter == p.letters.pei
			or letter == p.letters.tsadi then
		return mw.ustring.char(mw.ustring.codepoint(letter) - 1)
	else
		return letter
	end
end

-- Same as previous, except that if letter is kaf or kaf sofit, also tacks on
-- a sh'va (since kaf sofit is written with a sh'va when it has no other vowel).
function p.otSofitShva(letter)
	letter = p.otSofit(letter)
	if letter == p.letters.kafSofit then
		return letter .. p.vowels.shva
	else
		return letter
	end
end

-- If letter is kaf sofit, mem sofit, etc., returns kaf, mem, etc.; otherwise,
-- just returns letter.
function p.otLoSofit(letter)
	if letter == p.letters.kafSofit or letter == p.letters.memSofit
			or letter == p.letters.nunSofit or letter == p.letters.peiSofit
			or letter == p.letters.tsadiSofit then
		return mw.ustring.char(mw.ustring.codepoint(letter) + 1)
	else
		return letter
	end
end

-- If letter is bet, gimel, dalet, kaf, pei, or tav, returns letter plus a
-- dagesh; otherwise, just returns letter.
function p.dageshKal(letter)
	if letter == p.letters.bet or letter == p.letters.gimel
			or letter == p.letters.dalet or letter == p.letters.kaf
			or letter == p.letters.pei or letter == p.letters.tav then
		return letter .. p.vowels.dagesh
	else
		return letter
	end
end

-- If letter is shin plus a shin or sin dot, returns shin (without the dot);
-- otherwise, just returns letter.
function p.dotlessShin(letter)
	if letter == p.letters.shin .. p.letters.shinDot
			or letter == p.letters.sin .. p.letters.sinDot then
		return p.letters.shin
	else
		return letter
	end
end

local letters    = "[א-ת]"
local modifiers  = "[ּׁׂׄ]?"
local separators = "[-־ %.,!|]?"
local regex = "(" .. letters .. modifiers .. ")" .. separators

local medial_radicals = {
	["א"] = true,
	["ב"] = true,
	["ג"] = true,
	["ד"] = true,
	["ה"] = true,
	["הּ"] = false,
	["ו"] = true,
	["ז"] = true,
	["ח"] = true,
	["ט"] = true,
	["י"] = true,
	["כ"] = true,
	["ל"] = true,
	["מ"] = true,
	["נ"] = true,
	["ס"] = true,
	["ע"] = true,
	["פ"] = true,
	["צ"] = true,
	["ק"] = true,
	["ר"] = true,
	["שׁ"] = true,
	["שׂ"] = true,
	["ת"] = true,
}
local initial_radicals = medial_radicals
local final_radicals = {
	["א"] = true,
	["ב"] = true,
	["ג"] = true,
	["ד"] = true,
	["ה"] = true,
	["הּ"] = true,
	["ו"] = false,
	["ז"] = true,
	["ח"] = true,
	["ט"] = true,
	["י"] = false,
	["ך"] = true,
	["ל"] = true,
	["ם"] = true,
	["ן"] = true,
	["ס"] = true,
	["ע"] = true,
	["ף"] = true,
	["ץ"] = true,
	["ק"] = true,
	["ר"] = true,
	["שׁ"] = true,
	["שׂ"] = true,
	["ת"] = true,
}

function p.plain_root(frame)
	local radicals = {}
	local len = 0
	local subber = function(radical)
		table.insert(radicals, radical)
		len = len + 1
		return ""
	end
	local scraps = mw.ustring.gsub(frame.args[1], regex, subber)
	if scraps ~= "" then
		error("Unrecognized characters in root.")
	end
	if len < 2 then
		error("Root must have at least two radicals.")
	end
	for i, radical in ipairs(radicals) do
		if i == 1 then          -- initial
			if not initial_radicals[radical] then
				error("Unrecognized initial radical " .. radical .. ".")
			end
		elseif i == len then    -- final
			if not final_radicals[radical] then
				error("Unrecognized final radical " .. radical .. ".")
			end
		else                    -- medial
			if not medial_radicals[radical] then
				error("Unrecognized medial radical " .. radical .. ".")
			end
		end
	end
	return table.concat(radicals, "־")
end

return p