Module:etymon
Jump to navigation
Jump to search
- The following documentation is located at Module:etymon/documentation. [edit] Categories were auto-generated by Module:documentation. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module provides the backend for {{etymon}}
.
local export = {}
local m_links = require("Module:links")
local page_data = mw.loadData("Module:headword/data").page
local concat = table.concat
local find = string.find
local findTemplates = require("Module:template parser").findTemplates
local full_link = m_links.full_link
local get_lang = require("Module:languages").getByCode
local get_link_page = m_links.get_link_page
local insert = table.insert
local ipairs = ipairs
local max = math.max
local new_title = mw.title.new
local process_params = require("Module:parameters").process
local split = require("Module:string utilities").split
local sub = string.sub
local type = type
local unpack = unpack
local check_ancestor = require("Module:etymology").check_ancestor
-- Normalize the language so that special handling of Chinese is accounted for.
-- This is everything in the Sinitic family which isn't a creole or pidgin.
local function getNormLang(lang)
if lang:inFamily("zhx") and not lang:inFamily("crp") then
return get_lang("zh")
else
return lang
end
end
-- Given an etymon param, return its parts.
local function getParts(lang, etymonParam, normalized)
local parts, etymonLang, etymonPage, etymonId = split(etymonParam, ">", true, true)
if #parts == 2 then
-- Assume language is the same as the template call if none is provided.
etymonLang, etymonPage, etymonId = lang, unpack(parts)
else
etymonLang, etymonPage, etymonId = unpack(parts)
etymonLang = get_lang(etymonLang, true, true)
end
if normalized then
etymonPage = get_link_page(etymonPage, etymonLang)
etymonLang = getNormLang(etymonLang)
end
return etymonLang, etymonPage, etymonId
end
local function scrapePage(etymonPage, etymonTitle, paramsOf, key, etymonLang, etymonId, redirect)
local pageContent = etymonTitle:getContent()
if pageContent == nil then
paramsOf[key] = "redlink"
return
end
-- Search for the template on the page (even if this is a redirect page).
for name, templateArgs in findTemplates(pageContent) do
if name == "etymon" then
-- Might as well store whatever we find along the way to potentially save time later.
paramsOf[templateArgs[1] .. ">" .. etymonPage .. ">" .. templateArgs["id"]] = templateArgs
if templateArgs[1] == etymonLang and templateArgs["id"] == etymonId then
-- This "break" saves time only if etymonPage is only ever accessed once.
-- Since this is usually true, it is probably beneficial overall.
break
end
end
end
-- If scraping produced a result, there's nothing left to do.
if paramsOf[key] then
return
-- Else if we've already followed a redirect and still found nothing, record the template as missing.
elseif redirect then
paramsOf[key] = "missing"
return
end
-- Check if the page is a redirect, and if not record the template as missing.
local redirectTarget = etymonTitle.redirectTarget
if not redirectTarget then
paramsOf[key] = "missing"
return
end
-- Otherwise, try again with the redirect target.
etymonPage = redirectTarget.prefixedText
local newKey = etymonLang .. ">" .. etymonPage .. ">" .. etymonId
scrapePage(etymonPage, redirectTarget, paramsOf, newKey, etymonLang, etymonId, true)
-- Record the value as the same as the redirect's.
paramsOf[key] = paramsOf[newKey]
end
-- Given an etymon, scrape the page and get its parameters.
-- This function returns either: a table of the params, "missing", "redlink", or "nolink"
local function getParams(lang, etymonParam, paramsOf)
-- Get normalized parts of the etymon parameter.
local etymonLang, etymonPage, etymonId = getParts(lang, etymonParam, true)
-- "?" is a special value that unlinks the page.
if etymonId == "?" then
return "nolink"
end
etymonLang = getNormLang(etymonLang):getFullCode()
-- Find the parameters by scraping etymonPage.
-- Store data in the paramsOf table to save time in case the same etymon is accessed again.
-- The key is a normalized version of etymonParam.
local key = etymonLang .. ">" .. etymonPage .. ">" .. etymonId
if paramsOf[key] == nil then
local etymonTitle = new_title(etymonPage)
if not etymonTitle then
-- This shouldn't happen: all unsupported titles should be resolved at this stage.
error("Invalid page title \"" .. etymonPage .. "\" encountered.")
end
scrapePage(etymonPage, etymonTitle, paramsOf, key, etymonLang, etymonId)
end
return paramsOf[key]
end
-- [tag]: {abbreviation, label glossary anchor, start text, start text plus, middle text}
-- Note: the keywords `afeq`, `conf`, and `unc` are also recognized, but do not use this dictionary.
local keywordDict = {
["from"] = {false, false, "From", "From", "from"},
["inh"] = {false, false, "From", "[[Appendix:Glossary#inherited|Inherited]] from", "from"},
["af"] = {false, false, "From", "From", "from"},
["blend"] = {"blend.", "blend", "Blend of", "[[Appendix:Glossary#blend|Blend]] of", "a blend of"},
["bor"] = {"bor.", "loanword", "Borrowed from", "[[Appendix:Glossary#loanword|Borrowed]] from", "borrowed from"},
["lbor"] = {"lbor.", "learned_borrowing", "Learned borrowing from", "[[Appendix:Glossary#learned_borrowing|Learned borrowing]] from", "borrowed from"},
["obor"] = {"obor.", "orthographic_borrowing", "Orthographic borrowing from", "[[Appendix:Glossary#orthographic_borrowing|Orthographic borrowing]] from", "borrowed from"},
["slbor"] = {"slbor.", "semi-learned_borrowing", "Semi-learned borrowing from", "[[Appendix:Glossary#semi-learned_borrowing|Semi-learned borrowing]] from", "borrowed from"},
["der"] = {"der.", "derived_terms", "Derived from", "[[Appendix:Glossary#derived_terms|Derived]] from", "from"},
["calque"] = {"calq.", "calque", "Calque of", "[[Appendix:Glossary#calque|Calque]] of", "a calque of"},
["sl"] = {"sl.", "semantic loan", "Semantic loan of", "[[Appendix:Glossary#semantic_loan|Semantic loan]] of", "a semantic loan of"},
["influence"] = {"influ.", "contamination", "", "", ""}
}
-- This function takes an etymon and recursively builds a tree to display in an entry.
local function etyTree(currTitle, lang, args, paramsOf, alreadySeen, isTopLevel, isUncertain, label)
local maxDepth = 0
local subtree, subtreeDepth, etymonLang, etymonPage, etymonParams
local subtrees = {}
local normTitle = get_link_page(currTitle, lang)
local currId = ""
if type(args) == "table" then
currId = args["id"]
end
local key = getNormLang(lang):getFullCode() .. ">" .. normTitle .. ">" .. currId
local derType, confidence, ignoreEtymons = "from", "conf", false
-- Only recurse when an has valid params and was not included in the tree previously.
if type(args) == "table" and alreadySeen[key] == nil then
-- Add the page to alreadySeen, which keeps track of what's already been added to the tree and the depth reached.
alreadySeen[key] = true
-- Loop over each parameter in the current template.
for i, param in ipairs(args) do
if i > 1 and find(param, ">") and not ignoreEtymons then
-- We want to display the unnormalized language and page title.
-- We can't use args[1] here because that would give the normalized language code.
etymonLang, etymonPage = getParts(lang, param, false)
-- Scrape the page and get the parameters.
etymonParams = getParams(lang, param, paramsOf)
-- Recurse into the etymon and append its tree to the list of subtrees.
subtree, subtreeDepth = etyTree(etymonPage, etymonLang, etymonParams, paramsOf, alreadySeen, false, confidence == "unc", derType)
insert(subtrees, subtree)
maxDepth = max(maxDepth, subtreeDepth)
elseif i > 1 then
-- Reached a keyword.
if param == "conf" or param == "unc" then
confidence = param
elseif keywordDict[param] ~= nil then
ignoreEtymons = false
confidence = "conf"
derType = param
elseif param == "afeq" then
ignoreEtymons = true
else
error("Received unknown keyword: " .. param)
end
end
end
end
-- Create term block.
local link
if isTopLevel then
link = lang:getCanonicalName() .. " " .. full_link({lang=lang, alt="'''" .. currTitle .. "'''"}, "term")
elseif currId == "" then
link = lang:getCanonicalName() .. " " .. full_link({lang=lang, term=currTitle}, "term")
else
link = lang:getCanonicalName() .. " " .. full_link({lang=lang, term=currTitle, id=currId}, "term")
end
-- Create tree.
local tree = "<div style=\"position:relative;display:inline;z-index:1;vertical-align:bottom;margin:0 4px 0\"><p style=\"position:relative;margin:8px 0;display:inline-block;padding:5px 10px;background-color:#fffbf2;border:1px solid #ccc;border-radius:4px\">" .. link
-- Add a short top connector if multiple subtrees exist.
if #subtrees >= 2 then
tree = tree .. "<span style=\"position:absolute;z-index:-1;inset:-10px 50% 0 auto;border-right:2px solid #9e9e9e\"></span>"
end
tree = tree .. "</p>"
-- Add derivation and uncertainty labels.
if (label ~= "" and keywordDict[label][1] ~= false) or isUncertain then
tree = tree .. "<span style=\"position:absolute;z-index:2;transform:translate(-50%);top:calc(100% + 9px);left:50%;border-radius:2px;background-color:rgba(234,255,255,0.85);font-size:12px;height:10px;line-height:10px\">"
if label ~= "" and keywordDict[label][1] ~= false then
tree = tree .. "[[Appendix:Glossary#" .. keywordDict[label][2] .. "|<span style=\"color:black;font-style:italic\">" .. keywordDict[label][1] .. "</span>]]"
if isUncertain then
-- Add uncertainty label next to the derivation label.
tree = tree .. "<span class=\"desc-arr\" title=\"uncertain\" style=\"position:absolute;top:50%;transform:translate(0,-48%);left:calc(100% + 2px);font-size:10px;border-radius:2px;background-color:rgba(255,224,240,0.85);padding:1px 2px;font-weight:bold\">?</span>"
end
elseif isUncertain then
-- Add uncertainty label in the middle.
tree = tree .. "<span class=\"desc-arr\" title=\"uncertain\" style=\"position:absolute;top:50%;left:50%;transform:translate(calc(-50% - 1px),-48%);font-size:10px;border-radius:2px;background-color:rgba(255,224,240,0.85);padding:1px 2px;font-weight:bold\">?</span>"
end
tree = tree .. "</span>"
end
tree = tree .. "</div>"
-- Add line break if parents exist.
if #subtrees >= 1 then
tree = "<br>" .. tree
end
-- Append subtrees. For cleaner HTML, only add text-align:center div when necessary.
local subtreeString = ""
if #subtrees == 1 then
-- Add a long bottom connector to the subtree.
-- Use sub() to insert it right before the </div>, which is a little hacky.
subtreeString = sub(subtrees[1], 1, -7) .. "<span style=\"position:absolute;z-index:-1;inset:0 50% 0 auto;height:50px;border-right:2px solid #9e9e9e\"></span></div>"
elseif #subtrees >= 2 then
for i,v in ipairs(subtrees) do
if i == 1 then
-- Add left connector.
v = v .. "<span style=\"position:absolute;inset:calc(100% - 9px) -20px 0 calc(50% - 2px);border-bottom:2px solid #9e9e9e;border-left:2px solid #9e9e9e;border-bottom-left-radius:4px\"></span>"
elseif i == #subtrees then
-- Add right connector.
v = v .. "<span style=\"position:absolute;inset:calc(100% - 9px) 50% 0 -20px;border-bottom:2px solid #9e9e9e;border-right:2px solid #9e9e9e;border-bottom-right-radius:4px\"></span>"
else
-- Add a short bottom connector before the </div>.
v = sub(v, 1, -7) .. "<span style=\"position:absolute;z-index:-1;inset:0 50% -14px auto;border-right:2px solid #9e9e9e\"></span></div>"
-- Add middle connector.
v = v .. "<span style=\"position:absolute;inset:calc(100% - 9px) -20px 0;border-bottom:2px solid #9e9e9e\"></span>"
end
subtreeString = subtreeString .. "<div style=\"position:relative;display:inline-block\">" .. v .. "</div>"
end
end
tree = subtreeString .. tree
-- Add outer divs.
if isTopLevel then
tree = "<div style=\"font-size:14px;white-space:nowrap;line-height:1.1;display:inline-block;text-align:center;margin-left:0.5em\">" .. tree .. "</div>"
tree = "<div class=\"NavFrame\" style=\"min-width:fit-content\"><div class=\"NavHead\" style=\"background:#eee\">Etymology tree</div><div class=\"NavContent\" style=\"text-align:left\">" .. tree .. "</div></div>"
end
return tree, maxDepth + 1
end
-- This function takes an etymon and generates some text to display in an entry.
-- Currently, it is only able to handle simple combinations of parameters.
local function etyText(title, lang, args, paramsOf, usePlusTemplates, maxDepth)
local text = ""
local depth = 1
local alreadyWritten = {}
local key, currLang, group, groupType, groupConfidence, confidence, derType, foundGroup, complexParams, ignoreEtymons, etymonLang, etymonTitle, etymonId
local params = args
-- Loop and continuously expand the sentence until we reach the end of the chain.
while not maxDepth or depth <= maxDepth do
group, groupType, groupConfidence, confidence, derType, foundGroup, complexParams, ignoreEtymons, currLang = {}, "from", "conf", "conf", "from", false, false, false, lang
key = getNormLang(lang):getFullCode() .. ">" .. get_link_page(title, lang) .. ">" .. args["id"]
-- Stop if we encounter an already-seen term.
if alreadyWritten[key] ~= nil then
break
end
alreadyWritten[key] = true
for i, param in ipairs(params) do
if i > 1 and find(param, ">") and not ignoreEtymons then
-- The text should only continue if `args` is either (not including `influence` or `afeq` etymons):
-- A single etymon, or single `af` group. Otherwise the parameters are too "complex" and are rejected.
-- TODO: add smarter handling for complex parameters.
if foundGroup or (#group == 1 and derType ~= "af") then
complexParams = true
break
end
groupType = derType
if confidence == "unc" then
groupConfidence = "unc"
end
insert(group, param)
elseif i > 1 then
-- Reached a keyword.
-- Check if af was only given one parameter, which doesn't make sense.
if #group == 2 and derType == "af" then
complexParams = true
break
end
if param == "unc" then
confidence = param
elseif param == "afeq" or param == "influence" then
ignoreEtymons = true
if #group == 1 then
foundGroup = true
end
elseif keywordDict[param] ~= nil then
ignoreEtymons = false
confidence = "conf"
derType = param
if #group == 1 then
foundGroup = true
end
else
error("Received unknown keyword: " .. param)
end
end
end
if complexParams or #group == 0 then
break
end
if #group == 1 then
params = getParams(lang, group[1], paramsOf)
end
if text == "" then
-- Start the sentence.
if groupConfidence == "conf" and not usePlusTemplates then
text = keywordDict[groupType][3]
elseif groupConfidence == "conf" and usePlusTemplates then
text = keywordDict[groupType][4]
else
text = "Possibly " .. keywordDict[groupType][5]
end
else
-- Add a phrase onto the sentence.
if groupConfidence == "conf" then
text = text .. ", " .. keywordDict[groupType][5]
else
text = text .. ", possibly " .. keywordDict[groupType][5]
end
end
-- Add the links.
for i = 1,#group do
etymonLang, etymonTitle, etymonId = getParts(lang, group[i], false)
if etymonLang ~= currLang then
group[i] = etymonLang:makeWikipediaLink() .. " " .. full_link({lang=etymonLang, term=etymonTitle, id=etymonId}, "term")
currLang = etymonLang
else
group[i] = full_link({lang=etymonLang, term=etymonTitle, id=etymonId}, "term")
end
end
text = text .. " " .. concat(group, " + ")
depth = depth + 1
if #group == 2 then
break
end
lang = etymonLang
title = etymonTitle
if type(params) ~= "table" then
break
end
end
-- Add period at the end of the sentence.
if text ~= "" then
text = text .. "."
end
return text
end
local function paramsSanityCheck(lang, params, id, title)
if mw.ustring.len(id) < 2 then
error("The `id` parameter must have at least two characters. See the [[Template:etymon/documentation#Parameters|documentation]] for more details.")
elseif id == title or id == page_data.pagename then
error("The `id` parameter must not be the same as the page title. Be more creative. See the [[Template:etymon/documentation#Parameters|documentation]] for more details.")
end
local paramLang
local currKeyword = "from"
local singleAfParam = "not in group"
for _, param in ipairs(params) do
if find(param, ">") then
paramLang, _, _ = getParts(lang, param, false)
if currKeyword == "from" then
if paramLang:getFullCode() ~= lang:getFullCode() then
error("Error: " .. param .. " is associated with `from` (same-language derivation) but is of language `" .. paramLang:getFullCode() .. "`, which does not match the current entry language (`" .. lang:getFullCode() .. "`); see the [[Template:etymon/documentation#Derivation keywords|documentation]] for more details.")
end
elseif currKeyword == "inh" then
check_ancestor(lang, paramLang)
elseif currKeyword == "af" or currKeyword == "afeq" then
if singleAfParam == "not in group" then
singleAfParam = param
else
singleAfParam = "found group"
end
elseif (currKeyword == "bor" or currKeyword == "lbor" or currKeyword == "obor" or currKeyword == "slbor" or currKeyword == "der" or currKeyword == "calque" or currKeyword == "sl") and (paramLang:getCode() == lang:getCode()) then
error("Error: " .. param .. " is associated with `" .. currKeyword .. "` but has the same language (`" .. paramLang:getCode() .. "`) as the current entry; see the [[Template:etymon/documentation#Derivation keywords|documentation]] for more details.")
end
elseif param ~= "unc" and param ~= "conf" then
currKeyword = param
if singleAfParam == "found group" then
singleAfParam = "not in group"
end
end
end
if singleAfParam ~= "not in group" and singleAfParam ~= "found group" then
error("Detected `af` or `afeq` group containing only a single etymon: `" .. singleAfParam .. "`; note that `af` and `afeq` groups must have at least two etymons. See the [[Template:etymon/documentation#Derivation keywords|documentation]] for more details.")
end
if keywordDict[currKeyword] == nil then
error("Unrecognized keyword: " .. currKeyword)
end
end
function export.main(frame)
-- Process argument input.
local boolean = {type = "boolean"}
local args = process_params(frame:getParent().args, {
[1] = {required = true, type = "language", default = "und"},
[2] = {list = true, disallow_holes = true},
["id"] = {required = true},
["title"] = {},
["tree"] = boolean,
["text"] = boolean,
})
local lang = args[1]
-- Store non-numeric parameters as locals, then treat the main numeric list as `args`.
local id = args["id"]
local title = args["title"]
local text = args["text"]
local tree = args["tree"]
args = args[2]
-- The `title` parameter is used for overriding the page title.
if title == nil then
-- Get the canonical pagename.
title = page_data.pagename
-- Determine if current term is reconstructed.
if page_data.namespace == "Reconstruction" or lang:hasType("reconstructed") then
title = "*" .. title
end
end
paramsSanityCheck(lang, args, id, title)
-- Add the langcode and `id`, to match the format of scraped parameters.
insert(args, 1, lang:getCode())
args["id"] = id
-- Add anchor to output.
local output = {"<ul id=\"" .. lang:getFullName() .. ":_" .. id .. "\"></ul>"}
local paramsOf = {[args[1] .. ">" .. title .. ">" .. id] = args}
-- Insert tree.
if tree then
insert(output, (etyTree(title, lang, args, paramsOf, {}, true, false, "")))
end
-- Insert text.
if text == "++" then
insert(output, etyText(title, lang, args, paramsOf, true, false))
elseif text == "+" then
insert(output, etyText(title, lang, args, paramsOf, true, 1))
elseif text == "-" then
insert(output, etyText(title, lang, args, paramsOf, false, 1))
elseif text ~= nil then
insert(output, etyText(title, lang, args, paramsOf, false, false))
end
return concat(output)
end
return export