Module:ar-prep

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module generates tables of prepositions combined with pronoun suffixes for {{ar-prep-auto}}.


local m_links = require("Module:links")
local m_strutils = require("Module:string utilities")
local ar_utilities = require("Module:ar-utilities")

local export = {}

local lang = require("Module:languages").getByCode("ar")

local u = mw.ustring.char

-- hamza variants
local HAMZA            = u(0x0621) -- hamza on the line (stand-alone hamza) = ء
local HAMZA_ON_ALIF    = u(0x0623)
local HAMZA_ON_W       = u(0x0624)
local HAMZA_UNDER_ALIF = u(0x0625)
local HAMZA_ON_Y       = u(0x0626)
local HAMZA_ANY = "[" .. HAMZA .. HAMZA_ON_ALIF .. HAMZA_UNDER_ALIF .. HAMZA_ON_W .. HAMZA_ON_Y .. "]"
local HAMZA_PH = u(0xFFF0) -- hamza placeholder

-- diacritics
local A  = u(0x064E) -- fatḥa
local AN = u(0x064B) -- fatḥatān (fatḥa tanwīn)
local U  = u(0x064F) -- ḍamma
local UN = u(0x064C) -- ḍammatān (ḍamma tanwīn)
local I  = u(0x0650) -- kasra
local IN = u(0x064D) -- kasratān (kasra tanwīn)
local SK = u(0x0652) -- sukūn = no vowel
local SH = u(0x0651) -- šadda = gemination of consonants
local DAGGER_ALIF = u(0x0670)
local DIACRITIC_ANY_BUT_SH = "[" .. A .. I .. U .. AN .. IN .. UN .. SK .. DAGGER_ALIF .. "]"

-- various letters and signs
local ALIF   = u(0x0627) -- ʾalif = ا
local AMAQ   = u(0x0649) -- ʾalif maqṣūra = ى
local AMAD   = u(0x0622) -- ʾalif madda = آ
local TAM    = u(0x0629) -- tāʾ marbūṭa = ة
local T      = u(0x062A) -- tāʾ = ت
local HYPHEN = u(0x0640)
local N      = u(0x0646) -- nūn = ن
local W      = u(0x0648) -- wāw = و
local Y      = u(0x064A) -- yā = ي
local LRM    = u(0x200e) -- left-to-right mark

local function ine(x) -- If Not Empty
    if x == "" then
        return nil
    else
        return x
    end
end

function canon_shadda_hamza(word)
	if not word then
		return nil
	end
	-- shadda+short-vowel (including tanwīn vowels, i.e. -an -in -un) gets
	-- replaced with short-vowel+shadda during NFC normalisation, which
	-- MediaWiki does for all Unicode strings; however, it makes various
	-- processes inconvenient, so undo it.
	word = mw.ustring.gsub(word, "(" .. DIACRITIC_ANY_BUT_SH .. ")" .. SH, SH .. "%1")
	-- replace hamza with hamza placeholder; it will be fixed later by hamza_seat()
	word = mw.ustring.gsub(word, HAMZA_ANY .. "([" .. A .. I .. U .. "])$", HAMZA_PH .. "%1")
	return word
end

-- Supply the appropriate hamza seat(s) for a placeholder hamza.
function hamza_seat(word)
	-- FIXME! Allow multiple possibilities in inflection tables
	if mw.ustring.find(word, HAMZA_PH) then -- optimization to avoid many regexp substs
		return ar_utilities.process_hamza(word)[1]
	end
	return word
end

local forms = {
    ["base"] = true,
    ["1s"] = true,
    ["2ms"] = true,
    ["2fs"] = true,
    ["3ms"] = true,
    ["3fs"] = true,
    ["2d"] = true,
    ["3d"] = true,
    ["1p"] = true,
    ["2mp"] = true,
    ["2fp"] = true,
    ["3mp"] = true,
    ["3fp"] = true,
}

local function linkify(args)
    for form in pairs(forms) do
        if args[form] == "-" or not ine(args[form]) then
            args[form] = "—"
        else
        	args[form] = hamza_seat(args[form])
            args[form] = m_links.full_link({lang = lang, term = args[form], tr = ine(args[form .. "tr"]) or nil})
        end
    end
end

local attach_1s = {}

attach_1s["i"] = function(args)
    if not ine(args["1s"]) then
    	-- if ends with -y, add -ya; else, truncate final short vowel and add -ī
        if mw.ustring.find(args["stem"], "يْ?$") then
            args["1s"] = mw.ustring.gsub(args["stem"], SK .. "?$", SH .. A, 1)
            args["1str"] = args["stemtr"] and (args["stemtr"] .. "ya")
            args["1str"] = args["stemtr"] and (mw.ustring.gsub(args["1str"], "īya$", "iyya", 1))
        else
            args["1s"] = mw.ustring.gsub(args["stem"], "[" .. A .. I .. U .. SK .. "]?$", I .. Y, 1)
            args["1str"] = args["stemtr"] and (mw.ustring.gsub(args["stemtr"], "[aiu]?$", "ī", 1))
        end
    end
end

attach_1s["ni"] = function(args)
    if not ine(args["1s"]) then
    	-- if already ends in -n, double the -n then add -ī; else, add -nī
        if mw.ustring.find(args["stem"], "نْ?$") then
            args["1s"] = mw.ustring.gsub(args["stem"], SK .. "?$", SH .. I .. Y, 1)
        else
            args["1s"] = args["stem"] .. "نِي"
        end
        args["1str"] = args["stemtr"] and (args["stemtr"] .. "nī")
    end
end

local function attach_1p(args)
    if not ine(args["1p"]) then
    	-- if already ends in -n, double the -n then add -ā; else, add -nā
        if mw.ustring.find(args["stem"], "نْ?$") then
            args["1p"] = mw.ustring.gsub(args["stem"], SK .. "?$", SH .. A .. ALIF, 1)
        else
            args["1p"] = args["stem"] .. "نَا"
        end
        args["1ptr"] = args["stemtr"] and (args["stemtr"] .. "nā")
    end
end

local function attach_2(args)
    local stem2 = args["stem"] .. "ك"
    local stem2tr = args["stemtr"] and (args["stemtr"] .. "k")
    if not ine(args["2ms"]) then
        args["2ms"] = stem2 .. A
        args["2mstr"] = args["stemtr"] and (stem2tr .. "a")
    end
    if not ine(args["2fs"]) then
        args["2fs"] = stem2 .. I
        args["2fstr"] = args["stemtr"] and (stem2tr .. "i")
    end
    if not ine(args["2d"]) then
        args["2d"] = stem2 .. "ُمَا"
        args["2dtr"] = args["stemtr"] and (stem2tr .. "umā")
    end
    if not ine(args["2mp"]) then
        args["2mp"] = stem2 .. "ُمْ"
        args["2mptr"] = args["stemtr"] and (stem2tr .. "um")
    end
    if not ine(args["2fp"]) then
        args["2fp"] = stem2 .. "ُنَّ"
        args["2fptr"] = args["stemtr"] and (stem2tr .. "unna")
    end
end

local function attach_3(args)
    local stem3 = nil
    local stem3tr = nil
    if mw.ustring.find(args["stem"], "[" .. Y .. I .. "]" .. SK .. "?$") then
        stem3 = args["stem"] .. "هِ"
        stem3tr = args["stemtr"] and (args["stemtr"] .. "hi")
    else
        stem3 = args["stem"] .. "هُ"
        stem3tr = args["stemtr"] and (args["stemtr"] .. "hu")
    end
    if not ine(args["3ms"]) then
        args["3ms"] = stem3
        args["3mstr"] = args["stemtr"] and (stem3tr)
    end
    if not ine(args["3d"]) then
        args["3d"] = stem3 .. "مَا"
        args["3dtr"] = args["stemtr"] and (stem3tr .. "mā")
    end
    if not ine(args["3mp"]) then
        args["3mp"] = stem3 .. "مْ"
        args["3mptr"] = args["stemtr"] and (stem3tr .. "m")
    end
    if not ine(args["3fp"]) then
        args["3fp"] = stem3 .. "نَّ"
        args["3fptr"] = args["stemtr"] and (stem3tr .. "nna")
    end
end

local function attach_3fs(args)
    if not ine(args["3fs"]) then
        args["3fs"] = args["stem"] .. "هَا"
        args["3fstr"] = args["stemtr"] and (args["stemtr"] .. "hā")
    end
end

local function base_to_stem(args)
	-- replace final -ā with -ay (e.g. إِلَى becomes إِلَيْهِ)
    args["stem"] = mw.ustring.gsub(args["base"], "[" .. A .. DAGGER_ALIF .. "]*" .. AMAQ .. DAGGER_ALIF .. "?$", A .. Y .. SK, 1)
    -- replace final tāʾ marbūṭa with regular t (e.g. حَالَةَ becomes حَالَتَهُ)
    args["stem"] = mw.ustring.gsub(args["stem"], TAM .. "([" .. A .. I .. U .. "])$", T .. "%1", 1) 
    if not ine(args["stemtr"]) then
        if args["stem"] ~= args["base"] then
            args["stemtr"] = args["basetr"] and mw.ustring.gsub(args["basetr"], "ā$", "ay", 1)
        else
            args["stemtr"] = args["basetr"]
        end
    end
    args["stem"] = mw.ustring.gsub(args["stem"], "(.)" .. HYPHEN .. "$", "%1", 1)
    args["stemtr"] = args["stemtr"] and (mw.ustring.gsub(args["stemtr"], "(.)%-$", "%1", 1))
end

local template = [===[
{| class="wikitable vsSwitcher vsToggleCategory-inflection autocollapsed" style="text-align:center; border: 0.5px solid #CCC;"
|-
! class="vsToggleElement" style="text-align: center; width:30em" colspan="6" |     {heading}
|- class="vsHide"
! colspan="5" |Base form
| {base}
|- class="vsHide"
! rowspan="2"| Personal-pronoun-<br />including forms
! colspan="2" | Singular
! Dual
!  colspan="2" | Plural
|- class="vsHide"
! Masculine
! Feminine
! Common
! Masculine
! Feminine
|- class="vsHide"
! style="text-align: right;"| First person
| colspan="2" | {1s}
|
| colspan="2" | {1p}
|- class="vsHide"
! style="text-align: right;"| Second person
| {2ms}
| {2fs}
| {2d}
| {2mp}
| {2fp}
|- class="vsHide"
! style="text-align: right;"| Third person
| {3ms}
| {3fs}
| {3d}
| {3mp}
| {3fp}
|{\cl}]===]

local function make_table(args)
    return m_strutils.format(template, args)
end

function export.inflect(frame)
    local args = frame:getParent().args or {}
    local ni = frame.args[1]
    if args["ni"] and args["ni"] ~= "" and args["ni"] ~= "-" then
        ni = "ni"
    elseif ni ~= "ni" then
        ni = "i"
    end
    PAGENAME = mw.title.getCurrentTitle().text
    SUBPAGENAME = mw.title.getCurrentTitle().subpageText
    NAMESPACE = mw.title.getCurrentTitle().nsText

    args["base"] = canon_shadda_hamza(ine(args["base"]) or ine(args[1]) or SUBPAGENAME)
    args["basetr"] = ine(args["basetr"]) or ine(args[2])
    if NAMESPACE == "Template" and not args["base"] then
        args["base"] = "ـ"
        args["basetr"] = "-"
    end
    args["stem"] = canon_shadda_hamza(ine(args["stem"]) or ine(args[3]))
    if not args["stem"] then
        base_to_stem(args)
    end
    args["stemtr"] = ine(args["stemtr"]) or ine(args[4]) or args["stem"] == args["base"] and args["basetr"] or nil

    attach_1s[ni](args)
    attach_1p(args)
    attach_2(args)
    attach_3(args)
    attach_3fs(args)

    linkify(args)

    args["heading"] = ine(args["heading"]) or "Inflected forms"

    return make_table(args)
end

return export