Module:Unicode data/testcases
Jump to navigation
Jump to search
- The following documentation is located at Module:Unicode data/testcases/documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • tested module • sandbox
2 of 89 tests failed. (refresh)
Text | Expected | Actual | |
---|---|---|---|
Basic Latin | U+0000–U+007F | U+0000–U+007F | |
blah | nil | nil |
Text | Expected | Actual | |
---|---|---|---|
U+0023: # | Unsupported titles/Number sign | Unsupported titles/Number sign | |
U+0020: | Unsupported titles/Space | Unsupported titles/Space |
Text | Expected | Actual | |
---|---|---|---|
U+0061: a | true | true | |
U+0378 | false | false | |
U+40000 | false | false |
Text | Expected | Actual | |
---|---|---|---|
U+0060: ` | false | false | |
U+0300: ◌̀ | true | true | |
U+0378 | false | false | |
U+DC00 | false | false |
Text | Expected | Actual | |
---|---|---|---|
U+0000 | false | false | |
U+0020: | true | true | |
U+0061: a | true | true |
Text | Expected | Actual | |
---|---|---|---|
# | false | false | |
false | false | ||
word | true | true |
Text | Expected | Actual | |
---|---|---|---|
U+0020: | true | true | |
U+0061: a | false | false |
Text | Expected | Actual | |
---|---|---|---|
U+0064: d | Basic Latin | Basic Latin | |
U+030B: ◌̋ | Combining Diacritical Marks | Combining Diacritical Marks | |
U+03A3: Σ | Greek and Coptic | Greek and Coptic | |
U+3175: ㅵ | Hangul Compatibility Jamo | Hangul Compatibility Jamo | |
U+AC01: 각 | Hangul Syllables | Hangul Syllables | |
U+10FFFF | Supplementary Private Use Area-B | Supplementary Private Use Area-B |
Text | Expected | Actual | |
---|---|---|---|
U+0009 | Cc | Cc | |
U+0020: | Zs | Zs | |
U+005B: [ | Ps | Ps | |
U+005D: ] | Pe | Pe | |
U+005E: ^ | Sk | Sk | |
U+0041: A | Lu | Lu | |
U+00AD | Cf | Cf | |
U+00BE: ¾ | No | No | |
U+00AB: « | Pi | Pi | |
U+00BB: » | Pf | Pf | |
U+0300: ◌̀ | Mn | Mn | |
U+0488: ҈ | Me | Me | |
U+0663: ٣ | Nd | Nd | |
U+5B50: 子 | Lo | Lo | |
U+1FAE: ᾮ | Lt | Lt | |
U+1B44: ◌᭄ | Mc | Mc | |
U+2208: ∈ | Sm | Sm | |
U+203F: ‿ | Pc | Pc | |
U+21B9: ↹ | So | So | |
U+2E17: ⸗ | Pd | Pd | |
U+2167: Ⅷ | Nl | Nl | |
U+2028 | Zl | Zl | |
U+2029 | Zp | Zp | |
U+309E: ゞ | Lm | Lm | |
U+D800 | Cs | Cs | |
U+FFE1: £ | Sc | Sc | |
U+FFFF | Cn | Cn | |
U+100000 | Co | Co |
Text | Expected | Actual | |
---|---|---|---|
U+203D: ‽ | |||
U+30A2: ア | |||
U+0B85: அ |
Text | Expected | Actual | |
---|---|---|---|
U+0000 | <control-0000> | <control-0000> | |
U+007F | <control-007F> | <control-007F> | |
U+00C1: Á | LATIN CAPITAL LETTER A WITH ACUTE | LATIN CAPITAL LETTER A WITH ACUTE | |
U+0300: ◌̀ | COMBINING GRAVE ACCENT | COMBINING GRAVE ACCENT | |
U+0378 | <reserved-0378> | <reserved-0378> | |
U+1B44: ◌᭄ | BALINESE ADEG ADEG | BALINESE ADEG ADEG | |
U+1F71: ά | GREEK SMALL LETTER ALPHA WITH OXIA | GREEK SMALL LETTER ALPHA WITH OXIA | |
U+3555: 㕕 | CJK UNIFIED IDEOGRAPH-3555 | CJK UNIFIED IDEOGRAPH-3555 | |
U+AC01: 각 | HANGUL SYLLABLE GAG | HANGUL SYLLABLE GAG | |
U+D5FF: 헿 | HANGUL SYLLABLE HEH | HANGUL SYLLABLE HEH | |
U+DC00 | <surrogate-DC00> | <surrogate-DC00> | |
U+EEEE | <private-use-EEEE> | <private-use-EEEE> | |
U+FDD1 | <noncharacter-FDD1> | <noncharacter-FDD1> | |
U+FFFD: � | REPLACEMENT CHARACTER | REPLACEMENT CHARACTER | |
U+FFFF | <noncharacter-FFFF> | <noncharacter-FFFF> | |
U+1F4A9: 💩 | PILE OF POO | PILE OF POO | |
U+E0000 | <reserved-E0000> | <reserved-E0000> | |
U+F0F0F | <private-use-F0F0F> | <private-use-F0F0F> | |
U+10FFFF | <noncharacter-10FFFF> | <noncharacter-10FFFF> |
Text | Expected | Actual | |
---|---|---|---|
U+005B: [ | Zyyy | Zyyy | |
U+0041: A | Latn | Latn | |
U+4E00: 一 | Hani | Hani | |
U+0300: ◌̀ | Zinh | Zinh | |
U+03CE: ώ | Grek | Grek | |
U+1F66: ὦ | Grek | Grek | |
U+0416: Ж | Cyrl | Cyrl | |
U+0478: Ѹ | Cyrl | Cyrl | |
U+A651: ꙑ | Cyrl | Cyrl | |
U+10D0: ა | Geor | Geor | |
U+10A0: Ⴀ | Geor | Geor | |
U+2D00: ⴀ | Geor | Geor | |
U+0021: ! | Zyyy | Zyyy | |
U+2F82B: 北 | Hani | Hani |
local tests = require("Module:UnitTests")
local m_str_utils = require("Module:string utilities")
local m_Unicode_data = require("Module:Unicode data")
local cp = m_str_utils.codepoint
local len = m_str_utils.len
local toNFC = mw.ustring.toNFC
local u = m_str_utils.char
local function show_whitespace(whitespace)
return '<span style="background-color: lightgray;">'
.. whitespace:gsub(" ", " ") .. '</span>'
end
local function show(codepoint)
if m_Unicode_data.is_printable(codepoint) then
local printed_codepoint = u(codepoint)
if toNFC(printed_codepoint) ~= printed_codepoint then
printed_codepoint = ("&#x%X;"):format(codepoint)
end
if m_Unicode_data.is_whitespace(codepoint) then
printed_codepoint = show_whitespace(printed_codepoint)
end
if m_Unicode_data.is_combining(codepoint) then
printed_codepoint = "◌" .. printed_codepoint
end
return ("U+%04X: %s"):format(codepoint, printed_codepoint)
else
return ("U+%04X"):format(codepoint)
end
end
local function tag(char, sc)
return string.format('<span class="%s">%s</span>', sc, char)
end
local function return_all(...) return ... end
local function test_lookup(what, examples, display)
local funcname = "lookup_" .. what
local func = m_Unicode_data[funcname]
display = display or return_all
tests["test_" .. funcname] = function (self)
self:iterate(
examples,
function (self, codepoint, result)
self:equals(
show(codepoint),
display(func(codepoint)),
display(result))
end)
end
end
test_lookup(
"category",
{
{ cp "\t", "Cc" },
{ cp " ", "Zs" },
{ cp "[", "Ps" },
{ cp "]", "Pe" },
{ cp "^", "Sk" },
{ cp "A", "Lu" },
{ 0x00AD, "Cf" },
{ cp "¾", "No" },
{ cp "«", "Pi" },
{ cp "»", "Pf" },
{ 0x0300, "Mn" },
{ 0x0488, "Me" },
{ cp "٣", "Nd" },
{ cp "子", "Lo" },
{ cp "ᾮ", "Lt" },
{ 0x1B44, "Mc" },
{ cp "∈", "Sm" },
{ cp "‿", "Pc" },
{ cp "↹", "So" },
{ cp "⸗", "Pd" },
{ cp "Ⅷ", "Nl" },
{ 0x2028, "Zl" },
{ 0x2029, "Zp" },
{ cp "ゞ", "Lm" },
{ 0xD800, "Cs" },
{ cp "£", "Sc" },
{ 0xFFFF, "Cn" },
{ 0x100000, "Co" },
})
test_lookup(
"name",
{
{ 0x0000, "<control-0000>" },
{ 0x007F, "<control-007F>" },
{ 0x00C1, "LATIN CAPITAL LETTER A WITH ACUTE" },
{ 0x0300, "COMBINING GRAVE ACCENT" },
{ 0x0378, "<reserved-0378>" },
{ 0x1B44, "BALINESE ADEG ADEG" },
{ 0x1F71, "GREEK SMALL LETTER ALPHA WITH OXIA" },
{ 0x3555, "CJK UNIFIED IDEOGRAPH-3555" },
{ 0xAC01, "HANGUL SYLLABLE GAG" },
{ 0xD5FF, "HANGUL SYLLABLE HEH" },
{ 0xDC00, "<surrogate-DC00>", },
{ 0xEEEE, "<private-use-EEEE>" },
{ 0xFDD1, "<noncharacter-FDD1>", },
{ 0xFFFD, "REPLACEMENT CHARACTER" },
{ 0xFFFF, "<noncharacter-FFFF>" },
{ 0x1F4A9, "PILE OF POO" },
{ 0xE0000, "<reserved-E0000>" },
{ 0xF0F0F, "<private-use-F0F0F>" },
{ 0x10FFFF, "<noncharacter-10FFFF>" },
})
test_lookup(
"script",
{
{ cp "[", "Zyyy" },
{ cp "A", "Latn" },
{ cp "一", "Hani" },
{ 0x0300, "Zinh" },
{ cp "ώ", "Grek" },
{ cp "ὦ", "Grek" },
{ cp "Ж", "Cyrl" },
{ cp "Ѹ", "Cyrl" },
{ cp "ꙑ", "Cyrl" },
{ cp "ა", "Geor" },
{ cp "Ⴀ", "Geor" },
{ cp "ⴀ", "Geor" },
{ cp "!", "Zyyy" },
{ 0x2F82B, "Hani" },
})
test_lookup(
"block",
{
{ 0x0064, "Basic Latin" },
{ 0x030B, "Combining Diacritical Marks" },
{ 0x03A3, "Greek and Coptic" },
{ 0x3175, "Hangul Compatibility Jamo" },
{ 0xAC01, "Hangul Syllables" },
{ 0x10FFFF, "Supplementary Private Use Area-B" },
})
test_lookup(
"image",
{
{ 0x203D, "Interrobang.svg" },
{ 0x30A2, "Japanese Katakana A.svg" },
{ 0x0B85, "Tamil-alphabet-அஅ.svg" },
},
function (image_title)
return "[[File:" .. image_title .. "|frameless|14px]]"
end)
local function test_is(what, examples)
local funcname = "is_" .. what
local func = m_Unicode_data[funcname]
tests["test_" .. funcname] = function (self)
self:iterate(
examples,
function (self, codepoint, result)
self:equals(
show(codepoint),
func(codepoint),
result)
end)
end
end
test_is(
"assigned",
{
{ 0x0061, true },
{ 0x0378, false },
{ 0x40000, false },
})
test_is(
"combining",
{
{ cp "`", false },
{ 0x0300, true },
{ 0x0378, false },
{ 0xDC00, false },
})
test_is(
"printable",
{
{ 0x0000, false },
{ 0x0020, true },
{ 0x0061, true },
})
test_is(
"whitespace",
{
{ 0x0020, true },
{ 0x0061, false },
})
function tests:test_get_block_range()
local function display_block_range(low, high)
if type(low) == "nil" then
return "nil"
end
return ("U+%04X–U+%04X"):format(low, high)
end
self:iterate(
{
{ "Basic Latin", { 0x0000, 0x007F } },
{ "blah", { nil, nil } },
},
function (self, block_name, block_range)
self:equals(
block_name,
display_block_range(m_Unicode_data.get_block_range(block_name)),
display_block_range(unpack(block_range)))
end)
end
function tests:test_is_valid_pagename()
self:iterate(
{
{ "#", false },
{ " ", false },
{ "word", true },
},
function (self, pagename, validity)
local displayed_pagename
if len(pagename) == 1
and m_Unicode_data.is_whitespace(cp(pagename)) then
displayed_pagename = show_whitespace(pagename)
else
displayed_pagename = pagename
end
self:equals(
displayed_pagename,
m_Unicode_data.is_valid_pagename(pagename),
validity)
end)
end
function tests:test_get_entry_title()
self:iterate(
{
{ cp "#", "Number sign" },
{ cp " ", "Space" },
},
function (self, codepoint, entry_title)
self:equals(
show(codepoint),
"[[" .. m_Unicode_data.get_entry_title(codepoint) .. "]]",
"[[Unsupported titles/" .. entry_title .. "]]")
end)
end
-- Log names of functions that don't have testcases yet.
for k, v in pairs(require "Module:Unicode data") do
if type(k) == "string" and type(v) == "function" and not tests["test_" .. k] then
mw.log(k)
end
end
for k, v in require "Module:table".sortedPairs(tests) do
if type(k) == "string" then
local new_k = k:gsub("^test_(.+)$", "test <code>%1</code>")
if new_k ~= k then
tests[k] = nil
tests[new_k] = v
end
end
end
return tests