Module:character info: difference between revisions

From Wiktionary, the free dictionary
Jump to navigation Jump to search
Content deleted Content added
|caption=
+decimal
Line 123: Line 123:
)
)
)
)

local codepointstr = ('%.4X'):format(codepoint)
table.insert(table_markup,
'|-\n! [[w:List of XML and HTML character entity references|Entity number]]\n| &#'.. tonumber(codepointstr, 16) .. ';\n'
)


local nfd = mw.ustring.toNFD(mw.ustring.char(codepoint))
local nfd = mw.ustring.toNFD(mw.ustring.char(codepoint))

Revision as of 20:02, 27 April 2015

This module generates content for {{character info}} and determines the condition under which {{editnotice-exotic symbols}} is displayed when in edit mode in the main namespace (through MediaWiki:Editnotice-0).

To be fixed:

  • Code points with labels beginning in < are given as unassigned (see box for U+007E in ~ and box for U+F900 in ).

local m_unicode = require('Module:Unicode data')
local m_uni_aliases = mw.loadData('Module:Unicode data/aliases')
local m_scripts = require("Module:scripts/data")

local export = {}

function export.show(frame)
	local args = frame:getParent().args
	local codepoint = args.codepoint
	
	if codepoint then
		codepoint = tonumber(codepoint) or mw.text.decode(codepoint)
		if (type(codepoint) == "string") and (mw.ustring.len(codepoint) == 1) then
			codepoint = mw.ustring.codepoint(codepoint)
		elseif type(codepoint) ~= "number" then
			error("Unrecognised string given for the codepoint parameter")
		end
	else
		local title = mw.title.getCurrentTitle()
		if title.fullText == frame:getParent():getTitle() then
			codepoint = 0xfffd
		elseif mw.ustring.len(title.fullText) == 1 then
			codepoint = mw.ustring.codepoint(title.fullText)
		else
			error("Page title is not a single Unicode character")
		end
	end
	
	local table_markup = {}
	table.insert(table_markup,
		'{| style="border:1px solid #aaa; border-spacing:5px; background-color:#f9f9f9; color:black; margin:0 0 0.5em 0.5em; padding:4px; float:right; clear:right; width:260px; text-align:left; font-size:90%; line-height:1.5em;"\n')

	local image = args.image
	if image then
		if image:match("^%[?%[?File:") or image:match("^%[?%[?Image:") then
			image = image:gsub("^%[%[", ""):gsub("^File:", ""):gsub("^Image:", ""):gsub("|.*", ""):gsub("]]", "")
		end
		image = "[[File:" .. image .. "|280x200px]]"
		
		table.insert(table_markup,
			('|-\n| colspan="2" style="text-align: center;" | %s<br/>%s\n'):format(
				image, args.caption or ""
			)
		)
	elseif args.caption then
		table.insert(table_markup,
			('|-\n| colspan="2" style="text-align: center;" | %s\n'):format(
				args.caption
			)
		)
	end

	local script_code = args.sc or m_unicode.get_script(codepoint)
	local script_data = m_scripts[script_code]
	local script_name = script_data.canonicalName
	
	local NAMESPACE = mw.title.getCurrentTitle().namespace
	
	local cat_name
	if not args.nocat and ((NAMESPACE == 0) or (NAMESPACE == 100)) then -- main and Appendix
		if script_data.character_category ~= nil then
			-- false means no category, overriding the default below
			cat_name = script_data.character_category or nil
		elseif script_name then
			cat_name = script_name .. " script characters"
		end
	end

	table.insert(table_markup, 
		('|-\n! Character\n| style="font-size: large;" | <bdi class="%s">&#%u;</bdi>\n'):format(
			script_code, codepoint
		)
	)

	table.insert(table_markup, 
		('|-\n! Unicode name\n| style="font-size: smaller;" | %s\n'):format(
			mw.text.encode(m_unicode.lookup_name(codepoint))
		)
	)
	local aliases = m_uni_aliases[codepoint]

	if aliases then
		local classif = {}
		for i, alias in ipairs(aliases) do
			if not classif[alias[1]] then
				classif[alias[1]] = {}
			end
			table.insert(classif[alias[1]], mw.text.encode(alias[2]))
		end
		
		if classif.correction then
			for i, name in ipairs(classif.correction) do
				table.insert(table_markup, 
					('|-\n! Corrected name\n| style="font-size: smaller;" | %s\n'):format(
						name
					)
				)
			end
		end
		
		if classif.abbreviation then
			table.insert(table_markup, 
				('|-\n! Abbreviation(s)\n| | %s\n'):format(
					table.concat(classif.abbreviation, ", ")
				)
			)
		end
	
		if classif.alternate then
			for i, name in ipairs(classif.alternate) do
				table.insert(table_markup, 
					('|-\n! Alternative name\n| style="font-size: smaller;" | %s\n'):format(
						name
					)
				)
			end
		end
	end
	
	table.insert(table_markup,
		('|-\n! Code point\n| [http://unicode.org/cldr/utility/character.jsp?a=%.4X U+%.4X]\n'):format(
			codepoint, codepoint	
		)
	)

    local codepointstr = ('%.4X'):format(codepoint)
	table.insert(table_markup, 
         '|-\n! [[w:List of XML and HTML character entity references|Entity number]]\n| &amp;#'.. tonumber(codepointstr, 16) .. ';\n'
    )

	local nfd = mw.ustring.toNFD(mw.ustring.char(codepoint))
	if mw.ustring.len(nfd) ~= 1 then
		local nfdcps = {}
		for nfdcp in mw.ustring.gcodepoint(nfd)	do
			table.insert(nfdcps, ("U+%.4X"):format(nfdcp))
		end

		table.insert(table_markup,
			('|-\n! Decomposed form\n| %s\n'):format(
				table.concat(nfdcps, " ")
			)
		)
	end

	local block_name = mw.text.encode(m_unicode.lookup_block(codepoint))
	table.insert(table_markup,
		('|-\n! Unicode block\n| [[Appendix:Unicode/%s|%s]]\n'):format(
			block_name, block_name
		)
	)

	if args.latex then
		local latex, n = { '<code>' .. args.latex .. '</code>' }, 2
		while args["latex" .. n] do
			table.insert(latex, '<code>' .. args["latex" .. n] .. '</code>')
			n = n + 1
		end
		table.insert(table_markup,
			('|-\n! LaTeX input\n| %s\n'):format(
				table.concat(latex, ", ")
			)
		)
	end

	local function present_codepoint(codepoint, np)
		local display = ""
		local link_target

		if m_unicode.is_printable(codepoint) then
			link_target = m_unicode.get_entry_title(codepoint)

			display = ('<bdi style="font-size: large;" class="%s">&#%u;</bdi>'):format(
				m_unicode.get_script(codepoint), codepoint
			)
		end
		
		return (
			(link_target and '[[%s|<span title="%s">' or '<!-- %s --><span title="%s">') ..
			(np and '<small>[U+%04X]</small> %s <!-- U+%04X --> →'
			or '← <!-- U+%04X --> %s <small>[U+%04X]</small>') ..
			(link_target and '</span>]]' or '</span>')
		):format(
			link_target or "", mw.text.encode(m_unicode.lookup_name(codepoint)),
			codepoint, display, codepoint
		)
	end

	table.insert(table_markup, (
		'|-\n| colspan="2" |\n' ..
		'{| style="width: 100%%;"\n' .. 
		'|-\n' ..
		'| style="text-align: left;"  | %s\n' .. 
		'| style="text-align: right;" | %s\n' ..
		'|}\n'):format(
			present_codepoint(codepoint - 1, false),
			present_codepoint(codepoint + 1, true)
		)
	)

	table.insert(table_markup, '|}')
	
	if cat_name then
		table.insert(table_markup, "[[Category:" .. cat_name .. "| " .. mw.ustring.char(codepoint) .. "]]")
	end

	return table.concat(table_markup)
end

return export