Module:User:M. I. Wright/ar test

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of M. I. Wright, for their own experimentation. Items in this module may be added and removed at M. I. Wright's discretion; do not rely on this module's stability.


local m_links = require("Module:links")
local m_utilities = require("Module:utilities")
local ar_translit = require("Module:ar-translit")
local m_headword = require("Module:headword")
local m_languages = require("Module:languages")

local export = {}

local lang =  m_languages.getByCode("ar")
local ar = lang
local sc = require("Module:scripts").getByCode("Arab")

local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub
local rsplit = mw.text.split
local u = mw.ustring.char

local consonants = "[بتثجحخدذرزسشصضطظعغقفلكمنهويء]"

local function ine(x) -- If Not Empty
    if x == "" then
        return nil
    else
        return x
    end
end

-- version of rsubn() that discards all but the first return value
function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- synthesize a frame so that exported functions meant to be called from
-- templates can be called from the debug console.
function debug_frame(parargs, args)
	return {args = args, getParent = function() return {args = parargs} end}
end

function export.catfix()
	return m_utilities.catfix(lang, sc)
end

--------------------------- hamza processing ------------------------------

-- hamza variants
local HAMZA            = u(0x0621) -- hamza on the line (stand-alone hamza) = ء
local HAMZA_ON_ALIF    = u(0x0623)
local HAMZA_ON_WAW     = u(0x0624)
local HAMZA_UNDER_ALIF = u(0x0625)
local HAMZA_ON_YA      = u(0x0626)
local HAMZA_ANY        = "[" .. HAMZA .. HAMZA_ON_ALIF .. HAMZA_UNDER_ALIF .. HAMZA_ON_WAW .. HAMZA_ON_YA .. "]"
local HAMZA_PH         = u(0xFFF0) -- hamza placeholder

-- diacritics
local A  = u(0x064E) -- fatḥa
local AN = u(0x064B) -- fatḥatān (fatḥa tanwīn)
local U  = u(0x064F) -- ḍamma
local UN = u(0x064C) -- ḍammatān (ḍamma tanwīn)
local I  = u(0x0650) -- kasra
local IN = u(0x064D) -- kasratān (kasra tanwīn)
local SK = u(0x0652) -- sukūn = no vowel
local SH = u(0x0651) -- šadda = gemination of consonants
local DAGGER_ALIF = u(0x0670)
local DIACRITIC_ANY_BUT_SH = "[" .. A .. I .. U .. AN .. IN .. UN .. SK .. DAGGER_ALIF .. "]"
-- Pattern matching short vowels
local AIU = "[" .. A .. I .. U .. "]"
-- Pattern matching any diacritics that may be on a consonant
local DIACRITIC = SH .. "?" .. DIACRITIC_ANY_BUT_SH

-- various letters and signs
local ALIF   = u(0x0627) -- ʾalif = ا
local AMAQ   = u(0x0649) -- ʾalif maqṣūra = ى
local AMAD   = u(0x0622) -- ʾalif madda = آ
local WAW    = u(0x0648) -- wāw = و
local YA     = u(0x064A) -- yā = ي

local function link(term, tr, gloss, face, alt)
	if word == "" or word == "—" then
		return word
	else
		return m_links.full_link( { term = term, alt = alt, lang = lang, tr = tr, sc = sc, gloss = gloss }, face )
	end
end

local function hamzaError(rootTable, output)
	for i, letter in pairs(rootTable) do
		if not rfind(letter, consonants) then
			if rfind(letter, "[أإؤئ]") then
				table.insert(output, '<span class="previewonly" style="font-size: small;">[Seated hamzas, such as "' .. letter .. '", are not allowed in the names of roots. Use bare hamza, "&rlm;<span lang="ar">ء</span>&lrm;".]</span>')
				require("Module:debug").track("ar-root/hamza-error") -- [[Special:WhatLinksHere/Wiktionary:Tracking/ar-root/hamza-error]]
			else
				error(letter .. ", the " .. ordinal[i] .. " letter in the category name, is not a consonant.")
			end
		end
	end
end

local function validateRoot(rootTable, joined_root)
	if type(rootTable) ~= "table" then
		error("rootTable is not a table", 2)
	end
	
	for i, letter in ipairs(rootTable) do
		if mw.ustring.len(letter) > 1 then
			error('"' .. letter .. '", the ' .. ordinal[i] .. ' letter in the root "' .. joined_root .. '" should be a single letter.')
		end
	end
end

function export.ar_root(frame)
	local output = {}
	local categories = {}
	
	local title = mw.title.getCurrentTitle()
	local fulltitle = title.fullText
	local pagename = title.text
	local namespace = title.nsText
	
	local params = {
		[1] = {},
		[2] = {},
		[3] = {},
		[4] = {},
		[5] = {},
		["nocat"] = { type = "boolean", default = false },
		["plain"] = { type = "boolean", default = false },
		["t"] = {},
		["gloss"] = { alias_of = "t" },
		["face"] = { default = "term" },
		["notext"] = { type = "boolean", default = false },
		["nolink"] = { type = "boolean", default = false },
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	local argOffset = 0
	local lang = ar
	local rootLetters = {}
	if args[1] and not string.find(args[1], "%A") then
		argOffset = 1
		if args[1] == "mt" then
			error("Maltese (mt) roots are not supported, as they should"
				.. "be written in the language's Latin-script orthography.")
		end
		lang = m_languages.getByCode(args[1]) or m_languages.err(args[1], 1)
		if not lang:hasAncestor(ar) then
			error(("%s is not listed as an Arabic language."):format(
				"[[:Category:" .. lang.getCategoryName() .. "|"
				.. lang.getCanonicalName() .. "]] ("
				.. lang.getCode() .. ")"
			))
		end
	end
	if not args[1 + argOffset] and namespace == "Template" then
		rootLetters = { "ك", "ت", "ب" }
	elseif args[1 + argOffset] and args[2 + argOffset] then
		rootLetters = { args[1 + argOffset], args[2 + argOffset], args[3 + argOffset], args[4 + argOffset] }
	elseif args[1 + argOffset] then
		rootLetters = rsplit(args[1 + argOffset], " ")
	else
		rootLetters = rsplit(fulltitle, " ")
	end
	
	hamzaError(rootLetters, output)
	
	local joined_root = table.concat(rootLetters, " ")
	
	validateRoot(rootLetters, joined_root)
	
	local joined_tr = ar_translit.tr(table.concat(rootLetters, "-"), lang, sc, nil, nil, "force") or "-"
	
	local cat = lang:getCanonicalName()
	
	if fulltitle == joined_root then
		table.insert(output, m_headword.full_headword({lang = lang, sc = sc, pos_category = "roots", categories = {}, heads = { joined_root }, translits = { joined_tr }}) )
		table.insert(categories, "[[Category:" .. cat .. " roots|" .. (ine(args["sort"]) or joined_root) .. "]]")
		table.insert(categories, "[[Category:" .. cat .. " terms belonging to the root " .. joined_root .. "| ]]")
		if args[1] then
			--require("Module:debug").track("ar-root") -- [[Special:WhatLinksHere/Wiktionary:Tracking/ar-root]]
		end
	else
		if sc:countCharacters(pagename) < mw.ustring.len(pagename) - 2 then
			--require("Module:debug").track("ar-root/title-not-ar") -- [[Special:WhatLinksHere/Wiktionary:Tracking/ar-root/title-not-ar]]
			if not args["nocat"] then
				--require("Module:debug").track("ar-root/title-not-ar/cat") -- [[Special:WhatLinksHere/Wiktionary:Tracking/ar-root/title-not-ar/cat]]
			end
		end
		
		local link_text
		if args["nolink"] then
			link_text = link(nil, joined_tr, ine(args["gloss"]), args["face"], joined_root)
		else
			link_text = link(joined_root, joined_tr, ine(args["gloss"]), args["face"] )
		end
		
		table.insert(output, link_text)
		table.insert(categories, m_utilities.format_categories( { cat .. " terms belonging to the root " .. joined_root }, lang) )
	end
	
	if args["plain"] then
		return joined_root
	elseif args["nocat"] then
		return table.concat(output)
	elseif args["notext"] then
		return table.concat(categories)
	else
		return table.concat(output) .. table.concat(categories)
	end
end

function export.ar_root2(parargs, args)
	return export.ar_root(debug_frame(parargs, args))
end

function export.root_cat(frame)
	output = {}
	categories = {}
	
	local title = mw.title.getCurrentTitle()
	local pagename = title.text
	local namespace = title.nsText
	local args = frame:getParent().args
	
	if namespace ~= "Category" then
		error('The template "ar-root cat" should only be used in the category namespace.')
	end
	
	local root = {}
	local joined_root
	if rfind(pagename, "^Arabic terms belonging to the root ") then
		before, joined_root = rmatch(pagename, "(Arabic terms belonging to the root )(.+)")
		root = rsplit(joined_root, " ")
		
		frame:callParserFunction(
			"DISPLAYTITLE",
			"Category:Arabic terms belonging to the root " ..
				require("Module:script utilities").tag_text(
					joined_root,
					lang,
					sc,
					"term"
				)
		)
	elseif pagename == "Arabic terms by root" then
		table.insert(output, "This category holds lists of Arabic terms that belong to a certain root.")
		table.insert(categories, "[[Category:Arabic roots| ]]")
		table.insert(categories, "[[Category:Arabic terms by etymology|root]]")
	elseif rfind(pagename, "^Arabic %d%-letter roots$") then
		local number = rmatch(pagename, "Arabic (%d)%-letter roots")
		table.insert(output, "This category holds lists of Arabic terms that belong to roots containing " .. number .. " consonants.")
		table.insert(categories, "[[Category:Arabic roots|" .. number .. "]]")
	else
		error('The title for this category should begin with "Category:Arabic terms belonging to the root" and end in the root with spaces separating the letters.')
	end
	
	hamzaError(root, output)
	
	if joined_root then
		validateRoot(root, joined_root)
		
		if mw.site.stats.pagesInCategory(title.text, "all") == 0 then
			table.insert(categories, "[[Category:Empty categories]]")
		end
		table.insert(categories, "[[Category:Arabic terms by root|" .. joined_root .. "]]")
		local lettercount = #root or error("Module:ar-utilities was unable to count the items in the table of root consonants.")
		table.insert(categories, "[[Category:Arabic " .. lettercount .. "-letter roots|" .. joined_root .. "]]")
		
		local joined_tr = ar_translit.tr(table.concat(root, "-"), nil, nil, nil, nil, "force") or "-"
		local link_text = link(joined_root, joined_tr, ine(args["gloss"]) )
		
		table.insert(output, "This category holds [[:Category:Arabic language|Arabic]] terms that are formed from the [[w:Semitic root|root]] " .. link_text .. ".")
	end
	
	table.insert(output, "\n" .. mw.getCurrentFrame():expandTemplate{
		title = "Template:categoryTOC-Arabic",
		args = {}
		})
	
	return table.concat(output) .. table.concat(categories)
end

-- Used in {{ar-adj-in}} so that we can specify a full lemma rather than
-- requiring the user to truncate the -in ending. FIXME: Move ar-adj-in
-- into Lua.
function export.remove_in(frame)
	local lemma = frame.args[1] or error("Lemma required.")
	return rsub(reorder_shadda(lemma), IN .. "$", "")
end

-- Used in {{ar-adj-an}} so that we can specify a full lemma rather than
-- requiring the user to truncate the -an ending. FIXME: Move ar-adj-an
-- into Lua.
function export.remove_an(frame)
	local lemma = frame.args[1] or error("Lemma required.")
	return rsub(reorder_shadda(lemma), AN .. AMAQ .. "$", "")
end

-- Compare two words and find the alternation pattern (vowel changes, prefixes, suffixes etc.)
-- Still a WIP, doesn't work correctly yet.
function export.find_pattern(word1, word2)
	return nil
end

function export.etymology(frame)
	local text, categories = {}, {}
	local linkText
	local frame_params = {
		[1] = { required = true },
	}
	local frame_args = require("Module:parameters").process(frame.args, frame_params)
	
	local anchor = frame_args[1]
	
	local data = {
		["color adjective"] = {
			anchor = "Color or defect adjectives",
			text = "color adjective",
			categories = { "color/defect adjectives" },
		},
		["defect adjective"] = {
			anchor = "Color or defect adjectives",
			text = "defect adjective",
			categories = { "color/defect adjectives" },
		},
	}
	
	local params = {
		[1] = {},
		["nocat"] = { type = boolean, default = false },
		["lc"] = { type = boolean, default = false },
		["nocap"] = { alias_of = "lc" },
		["notext"] = { type = boolean, default = false},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	if anchor and data[anchor] then
		local data = data[anchor]
		anchor = data.anchor or error('The data table does not include an anchor for "' .. anchor .. '".')
		linkText = data.text or error('The data table does not include link text for "' .. anchor .. '".')
		if not args.lc then
			linkText = rsubn(linkText, "^%a", function(a) return mw.ustring.upper(a) end)
		end
	
		if not args.notext then
			table.insert(text, "[[Appendix:Arabic nominals#" .. anchor .. "|" .. linkText .. "]]")
		end
		
		if not args.nocat then
			table.insert(categories, m_utilities.format_categories(data.categories, lang) )
		end
	else
		error('The anchor "' .. tostring(anchor) .. '" is not found in the list of anchors.')
	end
	
	return table.concat(text) .. table.concat(categories)
end

return export