Module:User:ZxxZxxZ/links

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of ZxxZxxZ, for his own experimentation. Items in this module may be added and removed at ZxxZxxZ's discretion; do not rely on this module's stability.


local m_languages = mw.loadData("Module:languages/data/all")

local export = {}
local i =1

--TODO: move to [[Module:languages]]
local override_translit = {
    ["axm"] = true,
    ["ce"] = true,
    ["el"] = true,
    ["hy"] = true,
    ["kbd"] = true,
    ["ady"] = true,
    ["ka"] = true,
    ["oge"] = true,
    ["xmf"] = true,
    ["lzz"] = true,
    ["sva"] = true,
    ["ug"] = true,
    ["xcl"] = true,
    ["os"] = true,
}

-- Used in [[Template:l]] and [[Template:term]]
function export.template_l_term(frame)
    local m_utilities = require("Module:utilities")

    local face = frame.args["face"]; if face ~= "term" then face = nil end
    -- Compatibility mode.
    -- If given a nonempty value, the function uses lang= to specify the
    -- language, and all the positional parameters shift one number lower.
    local compat = (frame.args["compat"] or "") ~= ""
    local args = frame:getParent().args

    local lang = args[(compat and "lang" or 1)]
    if lang == nil or lang == "" then
        -- Temporary. Unfortunately, many pages are missing the language parameter.
        -- These all need to be fixed, but until then this is needed to avoid
        -- thousands of script errors. See [[:Category:term cleanup]]!
        if compat then
            lang = "und"
        else
            error("The first parameter (language code) has not been given")
        end
    end

    local sc = args["sc"]; if sc == "" then sc = nil end

    local term = args[(compat and 1 or 2)]; if term == "" then term = nil end
    local alt = args[(compat and 2 or 3)]; if alt == "" then alt = nil end
    local id = args["id"]; if id == "" then id = nil end
    
    local tr = args["tr"]; if tr == "" then tr = nil end
    local gloss = args["gloss"] or args[(compat and 3 or 4)]; if gloss == "" then gloss = nil end
    local pos = args["pos"]; if pos == "" then pos = nil end
    local lit = args["lit"]; if lit == "" then lit = nil end

    -- Gather gender and number specifications
    -- Iterate over all gn parameters (g2, g3 and so on) until one is empty
    local genders = {}
    local g = args["g"] or ""
    local i = 2
    
    while g ~= "" do
        table.insert(genders, g)
        g = args["g" .. i] or ""
        i = i + 1
    end
    
    -- Forward the information to full_link
    return export.full_link(term, alt, lang, sc, face, id, {tr = tr, genders = genders, gloss = gloss, pos = pos, lit = lit})
end

-- A version of {{l}} or {{term}} that can be called from other modules too
function export.full_link(term, alt, lang, sc, face, id, annotations)
    local langinfo = m_languages[lang] or error("The language code \"" .. lang .. "\" is not valid.")

    -- Some entries use this as a filler when the native script is missing.
    if term == "..." then error("Term is \"...\", should probably be empty") end
    if alt == "..." then error("Alt is \"...\", should probably be empty") end
    
    -- Create the link
    local link = ""

    local m_utilities = require("Module:utilities")
    local m_scriptutils = require("Module:script utilities")
    
    local scFix = false
    
    -- Is there any text to show?
    if (term or alt) then
        -- Try to detect the script if it was not provided
        if not sc then
            sc, scFix = m_utilities.detect_script(alt or term, lang)
        end
        
        -- Only make a link if the term has been given, otherwise just show the alt text without a link
        link = m_scriptutils.tag_text(term and export.language_link(term, alt, lang, id) or alt, lang, sc, face)
    else
        -- No term to show.
        -- Is there at least a transliteration we can work from?
        link = m_scriptutils.request_script(lang, sc)
        
        if link == "" or not annotations["tr"] then
            -- No link to show, and no transliteration either. Just show an error because can't really do anything now.
            error("At least one of the following should be provided: the term, alternative display, transliteration")
        end
    end
    
    local trFix = false
    
    if annotations["tr"] == "" or annotations["tr"] == "-" then
        trFix = true
    end
    
    -- Try to generate a transliteration if necessary
    -- Generate it if the script is not Latn or similar, and if no transliteration was provided
    if (term or alt) and not ((sc:find("Latn", nil, true)) or sc == "Latinx" or sc == "unicode") and (not annotations["tr"] or override_translit[lang]) then
        annotations["tr"] = m_scriptutils.transliterate(export.remove_links(alt or term), lang, sc)
    end
    
    return link .. export.format_link_annotations(lang, annotations, face)
                .. (scFix and "[[Category:Terms using script detection fallback]][[Category:Terms using script detection fallback/" .. lang .. "]]" or "")
                .. (trFix and "[[Category:Terms passing empty string to tr]]" or "")
end

-- TODO: remove
function export.template_l_xform(frame)
    local args = frame.args
    local lang = (args[1] ~= '') and args[1] or nil
    local langinfo = lang and (m_languages[lang] or error("The language code \"" .. lang .. "\" is not valid.")) or nil

    local text = args[2] or ''
    local autolink = args["autolink"]

    if not mw.ustring.match(text, "%[%[.-%]%]") then
        error("No links provided in the SOP translation")
        if autolink == 'words' then
            text = mw.ustring.gsub(text, "([^{}%[%]%(%)%s,]+)", function (word)
                return '[[' .. word .. ']]'
            end)
        elseif autolink == 'all' then
            if mw.ustring.match(text, "^[^{}%[%]%(%)%s,]+$") then
                text = '[[' .. text .. ']]'
            end
        end
    end
    
    if not lang then
        return text
    end

    local id = langinfo.names[1]

    text = mw.ustring.gsub(text, "%[%[([^#]-)|(.-)%]%]", function(pagetitle, linktitle)
        return "[[" .. export.make_pagename(pagetitle, lang) .. "#" .. id .. "|" .. linktitle .. "]]"
    end)
    text = mw.ustring.gsub(text, "%[%[([^#|]-)%]%]", function(pagetitle)
        return "[[" .. export.make_pagename(pagetitle, lang) .. "#" .. id .. "|" .. pagetitle .. "]]"
    end)

    return text
end

-- Format the annotations (things following the linked term)
function export.format_link_annotations(lang, annotations, face)
    local ret = ""
    
    -- Interwiki link
    if annotations["interwiki"] then
        ret = ret .. annotations["interwiki"]
    end
    
    -- Genders
    if annotations["genders"] and #annotations["genders"] > 0 then
        local gen = require("Module:gender and number")
        ret = ret .. " " .. gen.format_list(annotations["genders"])
    end
    
    local glosses = {}
    
    -- Transliteration
    if annotations["tr"] then
        if face == "term" then
            table.insert(glosses, "<span lang=\"\" class=\"mention-tr\">" .. annotations["tr"] .. "</span>")
        else
            table.insert(glosses, "<span lang=\"\">" .. annotations["tr"] .. "</span>")
        end
    end

    -- Gloss/translation
    if annotations["gloss"] then
        table.insert(glosses, "<span class=\"mention-gloss-double-quote\">“</span><span class='mention-gloss'>" .. annotations["gloss"] .. "</span><span class=\"mention-gloss-double-quote\">”</span>")
    end
    
    -- Part of speech
    -- TODO: remove
    if annotations["pos"] then
        local pos_template = mw.title.makeTitle("Template", "pos " .. annotations["pos"])

        if pos_template and pos_template.exists then
            table.insert(glosses, mw.getCurrentFrame():expandTemplate{title = "pos " .. annotations["pos"]})
        else
            table.insert(glosses, annotations["pos"])
        end
    end

    -- Literal/sum-of-parts meaning
    if annotations["lit"] then
        table.insert(glosses, "literally <span class=\"mention-gloss-double-quote\">“</span><span class='mention-gloss'>" .. annotations["lit"] .. "</span><span class=\"mention-gloss-double-quote\">”</span>")
    end

    if #glosses > 0 then
        ret = ret .. " (" .. table.concat(glosses, ", ") .. ")"
    end

    return ret
end

-- Creates a basic wikilink to the given term. If the text already contains
-- links, these are replaced with links to the correct section.
function export.language_link(text, alt, lang, id, curtitle)
    local langinfo = m_languages[lang] or error("The language code \"" .. lang .. "\" is not valid.")
    
    -- Do not add a section link to "Undetermined".
    -- TabbedLanguages handles links without a section by linking to the "last visited"
    -- section, but adding "Undetermined" would mess that up when {{term}} lacks a language.
    if lang ~= "und" then
        id = "#" .. langinfo.names[1] .. (id and "-" .. id or "")
    else
        id = ""
    end
    
    local sectFix = false
    
    if text and text:find("#", nil, true) then
        sectFix = true
    end
    
    -- takes target page's title and linktitle and return a standard wikilink if necessary
    local core = function(target, linktitle)
        if i == 3 then error('target:' .. (target or "") .. '  linktitle:' .. (linktitle or "")) end
        if linktitle == "" then linktitle = nil end
        -- Don't link to appendix if the language is undetermined
        if lang == "und" and target:sub(0, 1) == "*" then
            return linktitle or target
        end
        
        local target2 = export.make_pagename(target, lang)
        
        -- Don't link to the current page; return bold form of the linktitle
        if target2 == curtitle then
            return "[[" .. (linktitle or target) .. "]]"
        end
        
        local tracking = ""
        
        if linktitle then
            local new = linktitle
            if m_languages[lang].type == "reconstructed" and new:sub(0, 1) ~= "*" then
                new = export.make_pagename("*" .. new, lang)
            else
                new = export.make_pagename(new, lang)
            end

            if target2 == new then
                tracking = "[[Category:Link alt form tracking/redundant]][[Category:Link alt form tracking/redundant/" .. lang .. "]]"
            elseif lang ~= "en" then
                tracking = "[[Category:Link alt form tracking/needed]]"
            end
        end
        i = i + 1
        return "[[" .. target2 .. id .. "|" .. (linktitle or target) .. "]]" .. tracking
    end
    
    -- Do we have embedded wikilinks?
    if text:find("[[", nil, true) then
        -- fix for linking to unattested terms that are consisted of more than one word
        if text:sub(0, 1) == "*" then
            text = mw.ustring.gsub(text, "%[%[([^%*][^#%]]-)|", "[[*%1|")
            text = mw.ustring.gsub(text, "%[%[([^%*][^#|]-)%]", "[[*%1|%1]")
        end
        
        -- find embedded wikilinks and improve them
        text = mw.ustring.gsub(text, "%[%[([^#|%]]-)|(.-)%]%]", core)
        text = mw.ustring.gsub(text, "%[%[([^#|%]]-)%]%]", core)

        -- remove the extra "*" at the beginning
        text = mw.ustring.gsub(text, "^%*%[%[(.-)|%*", "[[%1|*")
        
        return text .. (sectFix and "[[Category:Link with section]]" or "")
    else
        -- there is no embedded wikilink
        return core(text, alt) .. (sectFix and "[[Category:Link with section]]" or "")
    end
end

-- Creates the appropriate page name from the given term.
-- This removes diacritics and adds Appendix: when necessary.
function export.make_pagename(text, lang)
    langinfo = m_languages[lang] or error("The language code \"" .. lang .. "\" is not valid.")
    
    -- Remove diacritics from the page name
    text = export.remove_diacritics(text, lang)
    
    -- Link to appendix for reconstructed terms and terms in appendix-only languages
    if mw.ustring.sub(text, 0, 1) == "*" then
        text = "Appendix:" .. langinfo.names[1] .. "/" .. mw.ustring.sub(text, 2)
    elseif langinfo.type == "reconstructed" then
        error("The specified language " .. langinfo.names[1] .. " is unattested,"
              .. " while the given word is not marked with '*' to indicate that it is reconstructed")
    elseif langinfo.type == "appendix-constructed" then
        text = "Appendix:" .. langinfo.names[1] .. "/" .. text
    end

    return text
end

-- Removes characters from a term that do not belong in the page name.
-- This includes any diacritics displayed in the headword line or alternative
-- display, but left out of the entry names.
function export.remove_diacritics(text, lang)
    local langinfo = m_languages[lang] or error("The language code \"" .. lang .. "\" is not valid.")
    
    -- Remove general punctuation
    text = mw.ustring.gsub(text, "[؟?!՛՜ ՞ ՟]$", "")

    -- Replace diacritics and other characters according to the specifications of the language; see entry_name in [[Module:languages#Values]]
    if langinfo.entry_name then
        for i, from in ipairs(langinfo.entry_name.from) do
            local to = langinfo.entry_name.to[i] or ""
            text = mw.ustring.gsub(text, from, to)
        end
    end

    return text
end

-- Strips all square brackets out or replaces them.
function export.remove_links(text)
    if type(text) == "table" then text = text.args[1] end; if not text then text = "" end

    text = text:gsub("%[%[[^|%]]-|", "")
    text = text:gsub("%[%[", "")
    text = text:gsub("%]%]", "")

    return text
end

return export