Module:User:Suzukaze-c/zh-l

From Wiktionary, the free dictionary
Jump to navigation Jump to search

What I don't like about {{zh-l}} automatically picking up Mandarin is that pinyin may be interpreted as POJ or Cantonese Yale (or the other way around) or something

  • 中國中国中国中国 (M. Zhōngguó; M-S. Zong1gue2; DG. Җун1гуй2/Җун1гуә2; C. zung1 gwok3; C-T. zuung1 gok2; MD. Dṳ̆ng-guók; MN. ml,tw,pn:Tiong-kok; MN-T. dong1 gog4, China, proper noun, literally Middle Country)



  • CD (M. xīdì,sēidì,py=CD)
  • CD
  • CD (gloss)
  • CD (C. si1 di1)
  • CD (C. si1 di1, gloss)




  • 開/开
  • *原嚟如此
  • *原 (maybe there shouldn't be a romanization here? or there should be an asterisk in the pinyin/gloss)



local export = {}

local M = require("Module:zh")
local m_links = require("Module:links")
local m_languages = require("Module:languages")
local m_script_utilities = require("Module:script utilities")
local m_test1 = require("Module:User:Suzukaze-c/zh-extract")

local lang = m_languages.getByCode("zh")
local varinfo = mw.loadData("Module:User:Suzukaze-c/zh/data/info").data

local match = mw.ustring.match
local gsub = mw.ustring.gsub
local split = mw.text.split

local match_Han = '[㐀-鿕𠀀-𬺡]'

local function abbr_gen(abbr)
	local page, tooltip, upper = varinfo[abbr]['rom_w'], varinfo[abbr]['var'], mw.ustring.upper(abbr)
	return '[[' .. page .. '|<abbr title="' .. tooltip .. '">' .. upper .. '.</abbr>]]'
end

function export.link(frame)
	local args = frame:getParent().args
	local varieties, word, gloss = '', '', ''

	if match(args[1], match_Han) then
		-- variety specification has been left out; $1 is definitely a word here as it is in the Han script
		varieties = 'm'
		word = args[1] or false
		gloss = args[2] or false
	elseif not args[2] then
		-- we have been given only a word, POSSIBLY in the Latin script, and nothing else
		varieties = 'm'
		word = args[1] or false
		gloss = false
	else
		varieties = args[1] or false
		word = args[2] or false
		gloss = args[3] or false
	end
	local pos = args["pos"] or false
	local lit = args["lit"] or false
	local manual_roman = args["tr"] or false
	local force_simp = args["s"] or false

	varieties = split(varieties, ",", true)
	
	-- link repression
	if match(word, "@") then
		word = gsub(word, "@", "")
		no_link = true
	end
	if match(word, "%*") then
		-- the usual linguistic *
		no_link = true
	end

	-- cleanup
	word = gsub(word, "%/", "/")

	local lookup_targets = word
	lookup_targets = gsub(lookup_targets, '[^㐀-鿕𠀀-𬺡A-Za-z0-9|%[%]/-]', '') -- filter out things like punctuation
	if match(word, "/") then
		-- allow roman to be picked up even with explicit alternate forms
		lookup_targets = split(lookup_targets, "/", true)
		lookup_targets = { [1] = lookup_targets[1] } -- save first table item into table
	elseif match(word, "%[%[") then
		-- we have been given multiple terms
		lookup_targets = gsub(lookup_targets, "|[^%]]+", "") -- remove link titles if present
		lookup_targets = gsub(lookup_targets, "[%[%]]", " ") -- replace all square brackets with spaces
		lookup_targets = gsub(lookup_targets, " +", " ") -- reduce consecutive spaces
		lookup_targets = mw.text.trim(lookup_targets) -- remove excess spaces
		lookup_targets = split(lookup_targets, " ", true) -- now we have a table of each linked item (theoretically)
	else
		lookup_targets = { [1] = lookup_targets } -- change to table
	end

	-- check if all pages exist
	local pages_exist = ''
	for i, word in ipairs(lookup_targets) do
		if not mw.title.new(word).exists then
			pages_exist = pages_exist .. 'n'
		end
	end
	pages_exist = not match(pages_exist, 'n')

	-- extract every pronunciation for every word
	local roman_for_each_word = {}
	if not manual_roman and pages_exist and varieties[1] ~= '' then
		for i, word in ipairs(lookup_targets) do
			roman_for_each_word[i] = {}
			local roman_all = m_test1.extract_roman(word, 1)
			for j, variety in ipairs(varieties) do
				roman_for_each_word[i][variety] = (roman_all[variety] and roman_all[variety] or error('"'..variety..'" pronunciation not found for [['..word..']]!'))
			end
		end
	end
	-- if true then return '\n'..require('module:debug').dump(roman_for_each_word) end

	local tr = {}
	if roman_for_each_word[1] then
		for j, variety in ipairs(varieties) do
			tr[j] = {}
			for i, word in ipairs(lookup_targets) do
				table.insert(tr[j], roman_for_each_word[i][variety])
			end
			tr[j] = table.concat(tr[j], ' ')
		end
		-- if true then return '\n'..require('module:debug').dump(tr) end

		for i, roman in ipairs(tr) do
			tr[i] = abbr_gen(varieties[i]) .. ' ' .. tr[i]
		end
		-- if true then return '\n'..require('module:debug').dump(tr) end

		tr = table.concat(tr, '; ')
	elseif manual_roman then
		manual_roman = split(manual_roman, "/", true)
		for i, set in ipairs(manual_roman) do
			local variety, roman = match(set, "(.+):(.+)")
			table.insert(tr, abbr_gen(variety) .. ' ' .. roman)
		end
		tr = table.concat(tr, '; ')
	else
		tr = false
	end

	-- finalize link
	if match(word, "%[%[") then
		-- "[[美國]][[聖地亞戈]]"→"[[美國]][[聖地亞戈]]/[[美国]][[圣地亚戈]]"
		word = word .. "/" .. M.ts(word)
	elseif match(word, "/") then
		-- "臺灣話/台灣話/台湾话"→"[[臺灣話]]/[[台灣話]]/[[台湾话]]"
		word = '[[' .. gsub(word, "/", "]]/[[") .. ']]'
	elseif M.ts_determ(word) == "trad" or force_simp then
		-- "附著"→"[[附著]]/[[附着]]"
		word = "[[" .. word .. "]]/[[" .. M.ts(word) .. "]]"
	end

	-- build the link
	local terminfo = {lang = lang, term = word, tr = tr, gloss = gloss, pos = pos, lit = lit}
	if no_link then
		word = m_links.remove_links(word) -- "easier to destroy than create"
		text = m_script_utilities.tag_text(word, lang) .. m_links.format_link_annotations(terminfo)
	else
		text = m_links.full_link(terminfo)
	end

	return text
end

return export