Module:User:Erutuon/script recognition

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

This module generated the codepoint-to-script lookup table in Module:Unicode data/scripts.

{
	[0] = {
		{ 0x00041, 0x0005A, "Latn" },
		{ 0x00061, 0x0007A, "Latn" },
		{ 0x000C0, 0x000D6, "Latn" },
		{ 0x000D8, 0x000F6, "Latn" },
		{ 0x000F8, 0x0024F, "Latn" },
		{ 0x00370, 0x003E1, "Grek" },
		{ 0x003E2, 0x003EF, "Copt" },
		{ 0x003F0, 0x003FF, "Grek" },
		{ 0x00400, 0x0045F, "Cyrl" },
		{ 0x00400, 0x00527, "Cyrs" },
		{ 0x0048A, 0x00527, "Cyrl" },
		{ 0x00531, 0x0058F, "Armn" },
		{ 0x00590, 0x005FF, "Hebr" },
		{ 0x00600, 0x006FF, "Arab" },
		{ 0x00700, 0x0074F, "Syrc" },
		{ 0x00750, 0x0077F, "Arab" },
		{ 0x00780, 0x007B1, "Thaa" },
		{ 0x007C0, 0x007FA, "Nkoo" },
		{ 0x00800, 0x0083E, "Samr" },
		{ 0x00840, 0x0085E, "Mand" },
		{ 0x00860, 0x0086A, "Syrc" },
		{ 0x008A0, 0x008FF, "Arab" },
		{ 0x00900, 0x0097F, "Deva" },
		{ 0x00981, 0x009FA, "Beng" },
		{ 0x00A01, 0x00A75, "Guru" },
		{ 0x00A81, 0x00AF1, "Gujr" },
		{ 0x00B01, 0x00B77, "Orya" },
		{ 0x00B82, 0x00BFA, "Taml" },
		{ 0x00C01, 0x00C7F, "Telu" },
		{ 0x00C82, 0x00CF2, "Knda" },
		{ 0x00D02, 0x00D7F, "Mlym" },
		{ 0x00D82, 0x00DF4, "Sinh" },
		{ 0x00E01, 0x00E5B, "Thai" },
		{ 0x00E81, 0x00EDF, "Laoo" },
		{ 0x00F00, 0x00FDA, "Tibt" },
		["length"] = 35,
	},
	[1] = {
		{ 0x01000, 0x0109F, "Mymr" },
		{ 0x010A0, 0x010CD, "Geok" },
		{ 0x010A0, 0x010FC, "Geor" },
		{ 0x01100, 0x011FF, "Hang" },
		{ 0x01200, 0x01399, "Ethi" },
		{ 0x013A0, 0x013F4, "Cher" },
		{ 0x01400, 0x0167F, "Cans" },
		{ 0x01680, 0x0169C, "Ogam" },
		{ 0x016A0, 0x016F0, "Runr" },
		{ 0x01700, 0x01714, "Tglg" },
		{ 0x01720, 0x01734, "Hano" },
		{ 0x01740, 0x01753, "Buhd" },
		{ 0x01760, 0x01773, "Tagb" },
		{ 0x01780, 0x017F9, "Khmr" },
		{ 0x01800, 0x018AA, "Mong" },
		{ 0x01900, 0x0194F, "Limb" },
		{ 0x01950, 0x01974, "Tale" },
		{ 0x01980, 0x019DF, "Talu" },
		{ 0x019E0, 0x019FF, "Khmr" },
		{ 0x01A00, 0x01A1F, "Bugi" },
		{ 0x01A20, 0x01AAD, "Lana" },
		{ 0x01B00, 0x01B7C, "Bali" },
		{ 0x01B80, 0x01BBF, "Sund" },
		{ 0x01BC0, 0x01BFF, "Batk" },
		{ 0x01C00, 0x01C4F, "Lepc" },
		{ 0x01C50, 0x01C7F, "Olck" },
		{ 0x01E00, 0x01EFF, "Latn" },
		{ 0x01F00, 0x01FFE, "polytonic" },
		["length"] = 28,
	},
	[2] = {
		{ 0x02200, 0x022FF, "Zmth" },
		{ 0x02300, 0x023F3, "Zsym" },
		{ 0x02500, 0x027BF, "Zsym" },
		{ 0x027C0, 0x027EF, "Zmth" },
		{ 0x02800, 0x028FF, "Brai" },
		{ 0x02980, 0x029FF, "Zmth" },
		{ 0x02A00, 0x02AFF, "Zmth" },
		{ 0x02C00, 0x02C5E, "Glag" },
		{ 0x02C60, 0x02C7F, "Latinx" },
		{ 0x02C80, 0x02CFF, "Copt" },
		{ 0x02D00, 0x02D2D, "Geok" },
		{ 0x02D30, 0x02D7F, "Tfng" },
		{ 0x02D80, 0x02DDE, "Ethi" },
		{ 0x02E80, 0x02FDF, "Hani" },
		["length"] = 14,
	},
	[3] = {
		{ 0x03000, 0x0303F, "Hani" },
		{ 0x03041, 0x0309F, "Hira" },
		{ 0x030A0, 0x030FF, "Kana" },
		{ 0x03105, 0x0312D, "Bopo" },
		{ 0x03131, 0x0318E, "Hang" },
		{ 0x031A0, 0x031BA, "Bopo" },
		{ 0x031C0, 0x031E3, "Hani" },
		{ 0x031F0, 0x031FF, "Kana" },
		{ 0x03300, 0x03357, "Kana" },
		{ 0x0337B, 0x0337F, "Hani" },
		{ 0x03400, 0x03FFF, "Hani" },
		["length"] = 11,
	},
	[4] = {
		{ 0x04000, 0x04DB5, "Hani" },
		{ 0x04E00, 0x04FFF, "Hani" },
		["length"] = 2,
	},
	[5] = {
		{ 0x05000, 0x05FFF, "Hani" },
		["length"] = 1,
	},
	[6] = {
		{ 0x06000, 0x06FFF, "Hani" },
		["length"] = 1,
	},
	[7] = {
		{ 0x07000, 0x07FFF, "Hani" },
		["length"] = 1,
	},
	[8] = {
		{ 0x08000, 0x08FFF, "Hani" },
		["length"] = 1,
	},
	[9] = {
		{ 0x09000, 0x09FFF, "Hani" },
		["length"] = 1,
	},
	[10] = {
		{ 0x0A000, 0x0A4C6, "Yiii" },
		{ 0x0A4D0, 0x0A4FF, "Lisu" },
		{ 0x0A500, 0x0A62B, "Vaii" },
		{ 0x0A640, 0x0A697, "Cyrs" },
		{ 0x0A680, 0x0A697, "Cyrl" },
		{ 0x0A6A0, 0x0A6F7, "Bamu" },
		{ 0x0A720, 0x0A7FF, "Latinx" },
		{ 0x0A800, 0x0A82B, "Sylo" },
		{ 0x0A840, 0x0A877, "Phag" },
		{ 0x0A880, 0x0A8D9, "Saur" },
		{ 0x0A8E0, 0x0A8FB, "Deva" },
		{ 0x0A900, 0x0A92F, "Kali" },
		{ 0x0A930, 0x0A95F, "Rjng" },
		{ 0x0A980, 0x0A9DF, "Java" },
		{ 0x0A9E0, 0x0A9FE, "Mymr" },
		{ 0x0AA00, 0x0AA5F, "Cham" },
		{ 0x0AA60, 0x0AA7F, "Mymr" },
		{ 0x0AA80, 0x0AADF, "Tavt" },
		{ 0x0AAE0, 0x0AAFF, "Mtei" },
		{ 0x0AB01, 0x0AB2E, "Ethi" },
		{ 0x0AB30, 0x0AB65, "Latinx" },
		{ 0x0AB70, 0x0ABBF, "Cher" },
		{ 0x0ABC0, 0x0ABFF, "Mtei" },
		{ 0x0AC00, 0x0AFFF, "Hang" },
		["length"] = 24,
	},
	[11] = {
		{ 0x0B000, 0x0BFFF, "Hang" },
		["length"] = 1,
	},
	[12] = {
		{ 0x0C000, 0x0CFFF, "Hang" },
		["length"] = 1,
	},
	[13] = {
		{ 0x0D000, 0x0D7A3, "Hang" },
		["length"] = 1,
	},
	[15] = {
		{ 0x0FB13, 0x0FB17, "Armn" },
		{ 0x0FB1D, 0x0FB4F, "Hebr" },
		{ 0x0FB50, 0x0FDFD, "Arab" },
		{ 0x0FE70, 0x0FEFC, "Arab" },
		["length"] = 4,
	},
	[16] = {
		{ 0x10000, 0x100FA, "Linb" },
		{ 0x10280, 0x1029C, "Lyci" },
		{ 0x102A0, 0x102D0, "Cari" },
		{ 0x102E1, 0x102FB, "Copt" },
		{ 0x10300, 0x10323, "Ital" },
		{ 0x10330, 0x1034A, "Goth" },
		{ 0x10350, 0x1037A, "Perm" },
		{ 0x10380, 0x1039F, "Ugar" },
		{ 0x103A0, 0x103D5, "Xpeo" },
		{ 0x10400, 0x1044F, "Dsrt" },
		{ 0x10450, 0x1047F, "Shaw" },
		{ 0x10480, 0x104A9, "Osma" },
		{ 0x104B0, 0x104FB, "Osge" },
		{ 0x10500, 0x10527, "Elba" },
		{ 0x10530, 0x10563, "Aghb" },
		{ 0x10600, 0x10767, "Lina" },
		{ 0x10800, 0x1083F, "Cprt" },
		{ 0x10840, 0x1085F, "Armi" },
		{ 0x10860, 0x1087F, "Palm" },
		{ 0x10880, 0x108AF, "Nbat" },
		{ 0x108E0, 0x108FF, "Hatr" },
		{ 0x10900, 0x1091F, "Phnx" },
		{ 0x10920, 0x1093F, "Lydi" },
		{ 0x10980, 0x1099F, "Mero" },
		{ 0x109A0, 0x109BF, "Merc" },
		{ 0x10A00, 0x10A58, "Khar" },
		{ 0x10A60, 0x10A7F, "Sarb" },
		{ 0x10A80, 0x10A9F, "Narb" },
		{ 0x10AC0, 0x10AF6, "Mani" },
		{ 0x10B00, 0x10B3F, "Avst" },
		{ 0x10B40, 0x10B5F, "Prti" },
		{ 0x10B60, 0x10B7F, "Phli" },
		{ 0x10B80, 0x10BAF, "Phlp" },
		{ 0x10C00, 0x10C48, "Orkh" },
		{ 0x10C80, 0x10CB2, "Hung" },
		{ 0x10E60, 0x10E7E, "Ruminumerals" },
		["length"] = 36,
	},
	[17] = {
		{ 0x11000, 0x1106F, "Brah" },
		{ 0x11080, 0x110C1, "Kthi" },
		{ 0x110D0, 0x110F9, "Sora" },
		{ 0x11100, 0x11143, "Cakm" },
		{ 0x11150, 0x11176, "Mahj" },
		{ 0x11180, 0x111D9, "Shrd" },
		{ 0x11200, 0x1123D, "Khoj" },
		{ 0x11280, 0x112A9, "Mult" },
		{ 0x112B0, 0x112F9, "Sind" },
		{ 0x11301, 0x11374, "Gran" },
		{ 0x11400, 0x1145D, "Newa" },
		{ 0x11480, 0x114D9, "Tirh" },
		{ 0x11580, 0x115DD, "Sidd" },
		{ 0x11600, 0x11659, "Modi" },
		{ 0x11680, 0x116C9, "Takr" },
		{ 0x11700, 0x1173F, "Ahom" },
		{ 0x118A0, 0x118FF, "Wara" },
		{ 0x11A00, 0x11A47, "Zanb" },
		{ 0x11A50, 0x11AA2, "Soyo" },
		{ 0x11AC0, 0x11AF8, "Pauc" },
		{ 0x11C00, 0x11C6C, "Bhks" },
		{ 0x11C70, 0x11CB6, "Marc" },
		{ 0x11D00, 0x11D59, "Gonm" },
		["length"] = 23,
	},
	[18] = {
		{ 0x12000, 0x1236E, "Xsux" },
		{ 0x12400, 0x12473, "Xsux" },
		["length"] = 2,
	},
	[19] = {
		{ 0x13000, 0x1342E, "Egyp" },
		["length"] = 1,
	},
	[20] = {
		{ 0x14400, 0x14646, "Hluw" },
		["length"] = 1,
	},
	[22] = {
		{ 0x16800, 0x16A38, "Bamu" },
		{ 0x16A40, 0x16A6F, "Mroo" },
		{ 0x16AD0, 0x16AF5, "Bass" },
		{ 0x16B00, 0x16B8F, "Hmng" },
		{ 0x16F00, 0x16F9F, "Plrd" },
		["length"] = 5,
	},
	[23] = {
		{ 0x17000, 0x17FFF, "Tang" },
		["length"] = 1,
	},
	[24] = {
		{ 0x18000, 0x187EC, "Tang" },
		{ 0x18800, 0x18AF2, "Tang" },
		["length"] = 2,
	},
	[27] = {
		{ 0x1B001, 0x1B11E, "Hira" },
		{ 0x1B170, 0x1B2FB, "Nshu" },
		{ 0x1BC00, 0x1BC9F, "Dupl" },
		["length"] = 3,
	},
	[29] = {
		{ 0x1D100, 0x1D1DD, "musical" },
		{ 0x1D400, 0x1D7FF, "Zmth" },
		{ 0x1D800, 0x1DAAF, "Sgnw" },
		["length"] = 3,
	},
	[30] = {
		{ 0x1E000, 0x1E02A, "Glag" },
		{ 0x1E800, 0x1E8D6, "Mend" },
		{ 0x1E900, 0x1E95F, "Adlm" },
		["length"] = 3,
	},
	[31] = {
		{ 0x1F300, 0x1F6C5, "Zsym" },
		["length"] = 1,
	},
	[32] = {
		{ 0x20000, 0x20FFF, "Hani" },
		["length"] = 1,
	},
	[33] = {
		{ 0x21000, 0x21FFF, "Hani" },
		["length"] = 1,
	},
	[34] = {
		{ 0x22000, 0x22FFF, "Hani" },
		["length"] = 1,
	},
	[35] = {
		{ 0x23000, 0x23FFF, "Hani" },
		["length"] = 1,
	},
	[36] = {
		{ 0x24000, 0x24FFF, "Hani" },
		["length"] = 1,
	},
	[37] = {
		{ 0x25000, 0x25FFF, "Hani" },
		["length"] = 1,
	},
	[38] = {
		{ 0x26000, 0x26FFF, "Hani" },
		["length"] = 1,
	},
	[39] = {
		{ 0x27000, 0x27FFF, "Hani" },
		["length"] = 1,
	},
	[40] = {
		{ 0x28000, 0x28FFF, "Hani" },
		["length"] = 1,
	},
	[41] = {
		{ 0x29000, 0x29FFF, "Hani" },
		["length"] = 1,
	},
	[42] = {
		{ 0x2A000, 0x2AFFF, "Hani" },
		["length"] = 1,
	},
	[43] = {
		{ 0x2B000, 0x2BFFF, "Hani" },
		["length"] = 1,
	},
	[44] = {
		{ 0x2C000, 0x2CFFF, "Hani" },
		["length"] = 1,
	},
	[45] = {
		{ 0x2D000, 0x2DFFF, "Hani" },
		["length"] = 1,
	},
	[46] = {
		{ 0x2E000, 0x2EBE0, "Hani" },
		["length"] = 1,
	},
	["individual"] = {
		[0x00462] = "Cyrl",
		[0x00463] = "Cyrl",
		[0x0046A] = "Cyrl",
		[0x0046B] = "Cyrl",
		[0x0046C] = "Cyrl",
		[0x0046D] = "Cyrl",
		[0x00472] = "Cyrl",
		[0x00473] = "Cyrl",
		[0x00474] = "Cyrl",
		[0x00475] = "Cyrl",
		[0x02135] = "Zmth",
		[0x02190] = "Zsym",
		[0x021FF] = "Zsym",
		[0x0FA0E] = "Hani",
		[0x0FA0F] = "Hani",
		[0x0FA11] = "Hani",
		[0x0FA13] = "Hani",
		[0x0FA14] = "Hani",
		[0x0FA1F] = "Hani",
		[0x0FA21] = "Hani",
		[0x0FA23] = "Hani",
		[0x0FA24] = "Hani",
		[0x0FA27] = "Hani",
		[0x0FA28] = "Hani",
		[0x0FA29] = "Hani",
		[0x1056F] = "Aghb",
		[0x16FE0] = "Tang",
		[0x1B000] = "Kana",
	},
}

local export = {}

local getCodepoint = mw.ustring.codepoint
local U = mw.ustring.char

local title = mw.title.getCurrentTitle().fullText

local function check(funcName, expectType)
	return function(argIndex, arg)
		require("libraryUtil").checkType(funcName, argIndex, arg, expectType)
	end
end

local function dump(val, tsort)
	return require("Module:debug").highlight_dump(val, nil, tsort, { modified = true })
end

local function hasContents(t)
	if next(t) then
		return true
	else
		return false
	end
end

local function log(message)
	if title:match("testcases/documentation$") then
		mw.log(message)
	end
end
	

local function isInRange(value, lower, upper)
	-- mw.log(value, lower, upper)
	local check = check("isInRange", "number")
	check(1, value)
	check(2, lower)
	check(3, upper)
	
	return value >= lower and value <= upper
end

local function lookupCharacter(characterLookup, character)
	local codepoint
	if type(character) == "string" then
		if mw.ustring.len(character) == 1 then
			codepoint = getCodepoint(character)
		else
			error("Character " .. character .. " has length " .. mw.ustring.len(character) .. ". It is supposed to be a single character.")
		end
	elseif type(character) == "number" then
		codepoint = character
	else
		error("Character is the wrong type: " .. type(character) .. ".")
	end
	
	if characterLookup.smallest and not isInRange(codepoint, characterLookup.smallest, characterLookup.largest) then
		return false
	elseif characterLookup.values and characterLookup.values[codepoint] then
		return true
	else
		for i, range in ipairs(characterLookup) do
			if isInRange(codepoint, range[1], range[2]) then
				return true
			end
		end
	end
	
	return false
end

local function forEachChar(str, func)
	if type(func) == "function" then
		for i = 1, mw.ustring.len(str) do
			char = mw.ustring.sub(str, i, i)
			func(char)
		end
	end
end

function export.makeCharacterLookup(pattern)
	local characterLookup = {}
	local values = {}
	local allValues = {}
	
	local i = 1
	-- Create ranges in which all characters belong to the script.
	local workingString = mw.ustring.gsub(
		pattern,
		"([^-])%-([^-])",
		function(item1, item2)
			local codepoint1, codepoint2 = getCodepoint(item1), getCodepoint(item2)
			--[[
			if not (codepoint1 < codepoint2) then
				error("Wrong codepoint order with " .. U(codepoint1) .. " and " .. U(codepoint2) .. "!")
			end
			]]
			table.insert(characterLookup, { codepoint1, codepoint2 })
			allValues[codepoint1] = true
			allValues[codepoint2] = true
			return ""
		end
	)
	if workingString ~= "" then
		workingString = mw.ustring.gsub(
			workingString,
			".",
			function(char)
				local codepoint = getCodepoint(char)
				values[codepoint] = true
				allValues[codepoint] = true
			end
		)
	end
	
	--[[
		Place the tables of ranges in the Unicode order (the patterns
		should already be in that order, but just to be safe).
	]]
	table.sort(
		characterLookup,
		function(item1, item2)
			return item1[1] < item2[1]
		end
	)
	
	local allValuesKeys = require("Module:table").numKeys(allValues)
	
	local smallest, largest = allValuesKeys[1], allValuesKeys[#allValuesKeys]
	
	-- Don't create an empty values table.
	if hasContents(values) then
		characterLookup.values = values
	end
	
	--[[
		Don't record the smallest and largest values if they're found in the
		first range.
	]]
	if not (smallest == characterLookup[1][1] and largest == characterLookup[1][2]) then
		characterLookup.smallest, characterLookup.largest = smallest, largest
	end
	
	return characterLookup
end

function export.makeAllScriptsCharacterLookup()
	local allScriptsCharacterLookup = {}
	local patternToScript = {}
	for code, data in pairs(mw.loadData("Module:scripts/data")) do
		if data.characters then
			-- Don't generate identical lookup table twice.
			local scriptWithPattern = patternToScript[data.characters]
			if scriptWithPattern then
				allScriptsCharacterLookup[code] = allScriptsCharacterLookup[scriptWithPattern]
			else
				allScriptsCharacterLookup[code] = export.makeCharacterLookup(data.characters)
			end
			patternToScript[data.characters] = code
		end
	end
	return allScriptsCharacterLookup
end

-- fa-Arab → Arab-fa
local function switchLangSc(scriptCode)
	return scriptCode:gsub("^([^-]+)%-(.+)$", "%2-%1")
end

-- To ensure that Grek and Latn appear first.
-- This also makes Grek and Latn take precedence when generating
-- the codepoint-to-script lookup table.
local scriptCodeReplacements = {
	polytonic = "Grek2",
	Latinx = "Latnx",
	Latf = "Latnf",
}

local function modifyAdHocCode(code)
	if scriptCodeReplacements[code] then
		return scriptCodeReplacements[code]
	elseif not (code:match("[A-Z][a-z][a-z][a-z]") or
			code:match("[a-z][a-z][a-z]%-[A-Z][a-z][a-z][a-z]")) then
		return code:gsub("^(.+)$", "~%1")
	else
		return code
	end
end
	
local function keySort(key1, key2)
	local type1, type2 = type(key1), type(key2)
	if type1 == "number" and type2 == "string" then
		return true
	elseif type1 == "string" and type2 == "number" then
		return false
	elseif type1 == "string" then
		key1, key2 = modifyAdHocCode(key1), modifyAdHocCode(key2)
		key1, key2 = switchLangSc(key1), switchLangSc(key2)
		local lower1, lower2 = mw.ustring.lower(key1), mw.ustring.lower(key2)
		return lower1 < lower2
	else
		return key1 < key2
	end
end

local function hex(number)
	return string.format("0x%X", number)
end

local function divideRange(lower, upper, width, testing)
	local ranges = {}
	
	if not (lower and upper) then
		mw.log("divideRange failed:", lower, upper, width, testing)
		return nil
	end
	
	local position = math.floor(lower / width)
	local start = position * width
	
	local i = 0
	local increment = i * width
	repeat
		local range1 = start + increment
		local range2 = range1 + width - 1
		
		if range1 < lower then
			range1 = lower
		end
		
		if range2 > upper then
			range2 = upper
		end
		
		if testing then
			range1, range2 = hex(range1), hex(range2)
		end
		
		ranges[position + i] = { range1, range2 }
		
		i = i + 1
		increment = i * width
	until
		 start + increment > upper
	
	return ranges
end

function export.showDividedRange(frame)
	local lower = 0x2A700
	local higher = 0x2B73F
	local width = 0x1000
	local dividedRange = divideRange(lower, higher, width, true)
	return table.concat({ hex(lower), hex(higher) }, ", ") .. dump(dividedRange)
end

-- Scripts that consist entirely of characters from another script.
local scriptBlacklist = {
	["Latf"]		= true;
	["Hans"]		= true;
	["Hant"]		= true;
	["Kore"]		= true;
	["Jpan"]		= true;
	["fa-Arab"] 	= true;
	["kk-Arab"] 	= true;
	["ks-Arab"] 	= true;
	["ku-Arab"]		= true;
	["ms-Arab"]		= true;
	["mzn-Arab"]	= true;
	["ota-Arab"]	= true;
	["pa-Arab"]		= true;
	["ps-Arab"]		= true;
	["sd-Arab"]		= true;
	["tt-Arab"]		= true;
	["ug-Arab"]		= true;
	["ur-Arab"]		= true;
	["nv-Latn"]		= true;
	["pjt-Latn"]	= true;
	["Zyyy"]		= true;
}

local function sortRange(range1, range2)
	local number1, number2 = tonumber(range1[1]), tonumber(range2[1])
	if number1 == number2 then
		return keySort(range1[3], range2[3])
	else
		return number1 < number2
	end
end

local function makeCodepointToScriptLookup(testing)
	local output = {}
	output.individual = {}
	local rangeStrings = {}
	
	local allScriptsCharacterLookup = export.makeAllScriptsCharacterLookup()
	for scriptCode, lookup in require("Module:table").sortedPairs(allScriptsCharacterLookup, keySort) do
		if not scriptBlacklist[scriptCode] then
			for key, value in ipairs(lookup) do
				if type(value) == "table" then
					local newRanges = divideRange(value[1], value[2], 0x1000, testing)
					if newRanges then
						for position, newRange in pairs(newRanges) do
							local rangeString = newRange[1] .. newRange[2]
							if rangeStrings[rangeString] then
								mw.log("The range " .. newRange[1] .. "-" .. newRange[2] ..
									" is already recorded as belonging to the script code " .. 
									rangeStrings[rangeString] .. ".")
							else
								rangeStrings[rangeString] = scriptCode
								
								if not output[position] then
									output[position] = {}
								end
								
								table.insert(output[position], { newRange[1], newRange[2], scriptCode })
							end
						end
					end
				end
			end
			
			if lookup.values then
				for codepoint in pairs(lookup.values) do
					if output.individual[codepoint] then
						mw.log("The codepoint " .. hex(codepoint) ..
								" is already recorded as belonging to the script code" ..
								output.individual[codepoint] .. ".")
					else
						output.individual[codepoint] = scriptCode
					end
				end
			end
		end
	end
	
	for position, ranges in pairs(output) do
		table.sort(
			ranges,
			sortRange
		)
	end
	
	for position, ranges in pairs(output) do
		if type(position) == "number" then
			ranges.length = #ranges
		end
	end
	
	return output
end

--[[
	Binary search: more efficient for the longer lists of codepoint ranges than
	for the shorter ones.
]]
local function binarySearch(ranges, value)
	--	Initialize numbers.
	local iStart, iMid = 1, 0
	-- Can't use # because table is loaded by mw.loadData.
	local iEnd = require("Module:table").size(ranges)
	
	if iEnd == 0 then
		return nil
	end
	
	local iterations = 0
	
	-- Do search.
	while iStart <= iEnd do
		iterations = iterations + 1
		
		-- Calculate middle.
		iMid = math.floor((iStart + iEnd) / 2)
		
		-- Get compare value.
		local range = ranges[iMid]
		
		-- Return matching index. Assumes there are no duplicates.
		if isInRange(value, range[1], range[2]) then
			return range
		
		-- Keep searching.
		elseif value < range[1] then
			iEnd = iMid - 1
		else
			iStart = iMid + 1
		end
	end
	return nil
end

local function lookupInOrder(number, ranges)
	for i, range in ipairs(ranges) do
		if isInRange(number, range[1], range[2]) then
			-- mw.log(mw.ustring.char(number), hex(number), i)
			return range[3]
		end
		if number < range[1] then
			-- mw.log(mw.ustring.char(number), hex(number), i)
			return nil
		end
	end
end

-- Save previously used codepoint ranges in case another character is in the
-- same range.
local rangesCache = {}

function export.charToScript(char)
	local lookup = mw.loadData("Module:User:Erutuon/script recognition/data") -- makeCodepointToScriptLookup()
	local codepoint = mw.ustring.codepoint(char)
	
	local individualMatch = lookup.individual[codepoint]
	if individualMatch then
		return individualMatch
	else
		local script = lookupInOrder(codepoint, rangesCache)
		if script then
			return script
		end
		
		local index = math.floor(codepoint / 0x1000)
		
		script = lookupInOrder(index, lookup.blocks)
		if script then
			return script
		end
		
		local range = binarySearch(lookup[index], codepoint)
		if range then
			table.insert(rangesCache, range)
			table.sort(rangesCache, sortRange)
			return range[3]
		end
	end
	
	return nil
end

function export.show(frame)
	local allScriptsCharacterLookup = mw.loadData("Module:User:Erutuon/script recognition/data")
	
	local str = frame.args[1] or "ABCD一丨丶丿乙亅"
	
	local result = {}
	forEachChar(
		str,
		function(char)
			table.insert(result, tostring(export.charToScript(char)))
		end
	)

	return str .. ": " .. table.concat(result, ", ")
end

function export.show(frame)
	return dump(makeCodepointToScriptLookup())
end

return export