Module:Unicode data/scripts

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

This module was generated by a series of functions in Module:User:Erutuon/script recognition from the script patterns in Module:scripts/data. It is used by the char_to_script function in Module:Unicode data. It will have to be regenerated when script patterns are added or modified.

The highlight_dump and modified_dump functions in Module:debug were used to print the output from Module:User:Erutuon/script recognition.

To explain the format, the first keys are the blocks of 4096 codepoints (0x1000 in hexadecimal base). Key 0 is for the first block of 4096 codepoints, from U+0000 to U+1000. Within the tables for each block of codepoints, the syntax { 0x41, 0x5A, "Latn" } indicates that all the characters from codepoint 0x41 (U+0041 or the character A) to codepoint 0x5A (U+005A or the character Z) belong to the Latin script. The table "individual" contains all the individual codepoints that are defined as belonging to a script, but are not found inside a range.


return {
	[0] = {
		{ 0x41, 0x5A, "Latn" },
		{ 0x61, 0x7A, "Latn" },
		{ 0xC0, 0xD6, "Latn" },
		{ 0xD8, 0xF6, "Latn" },
		{ 0xF8, 0x24F, "Latn" },
		{ 0x370, 0x3E1, "Grek" },
		{ 0x3E2, 0x3EF, "Copt" },
		{ 0x3F0, 0x3FF, "Grek" },
		{ 0x400, 0x45F, "Cyrl" },
		{ 0x460, 0x489, "Cyrs" },
		{ 0x48A, 0x527, "Cyrl" },
		{ 0x531, 0x58F, "Armn" },
		{ 0x590, 0x5FF, "Hebr" },
		{ 0x600, 0x6FF, "Arab" },
		{ 0x700, 0x74F, "Syrc" },
		{ 0x750, 0x77F, "Arab" },
		{ 0x780, 0x7B1, "Thaa" },
		{ 0x7C0, 0x7FA, "Nkoo" },
		{ 0x800, 0x83E, "Samr" },
		{ 0x840, 0x85E, "Mand" },
		{ 0x860, 0x86A, "Syrc" },
		{ 0x8A0, 0x8FF, "Arab" },
		{ 0x900, 0x97F, "Deva" },
		{ 0x981, 0x9FA, "Beng" },
		{ 0xA01, 0xA75, "Guru" },
		{ 0xA81, 0xAF1, "Gujr" },
		{ 0xB01, 0xB77, "Orya" },
		{ 0xB82, 0xBFA, "Taml" },
		{ 0xC01, 0xC7F, "Telu" },
		{ 0xC82, 0xCF2, "Knda" },
		{ 0xD02, 0xD7F, "Mlym" },
		{ 0xD82, 0xDF4, "Sinh" },
		{ 0xE01, 0xE5B, "Thai" },
		{ 0xE81, 0xEDF, "Laoo" },
		{ 0xF00, 0xFDA, "Tibt" },
	},
	[1] = {
		{ 0x1000, 0x109F, "Mymr" },
		{ 0x10A0, 0x10CD, "Geok" },
		{ 0x10D0, 0x10FC, "Geor" },
		{ 0x1100, 0x11FF, "Hang" },
		{ 0x1200, 0x1399, "Ethi" },
		{ 0x13A0, 0x13F4, "Cher" },
		{ 0x1400, 0x167F, "Cans" },
		{ 0x1680, 0x169C, "Ogam" },
		{ 0x16A0, 0x16F0, "Runr" },
		{ 0x1700, 0x1714, "Tglg" },
		{ 0x1720, 0x1734, "Hano" },
		{ 0x1740, 0x1753, "Buhd" },
		{ 0x1760, 0x1773, "Tagb" },
		{ 0x1780, 0x17F9, "Khmr" },
		{ 0x1800, 0x18AA, "Mong" },
		{ 0x1900, 0x194F, "Limb" },
		{ 0x1950, 0x1974, "Tale" },
		{ 0x1980, 0x19DF, "Talu" },
		{ 0x19E0, 0x19FF, "Khmr" },
		{ 0x1A00, 0x1A1F, "Bugi" },
		{ 0x1A20, 0x1AAD, "Lana" },
		{ 0x1B00, 0x1B7C, "Bali" },
		{ 0x1B80, 0x1BBF, "Sund" },
		{ 0x1BC0, 0x1BFF, "Batk" },
		{ 0x1C00, 0x1C4F, "Lepc" },
		{ 0x1C50, 0x1C7F, "Olck" },
		{ 0x1E00, 0x1EFF, "Latn" },
		{ 0x1F00, 0x1FFE, "polytonic" },
	},
	[2] = {
		{ 0x2200, 0x22FF, "Zmth" },
		{ 0x2300, 0x23F3, "Zsym" },
		{ 0x2500, 0x27BF, "Zsym" },
		{ 0x27C0, 0x27EF, "Zmth" },
		{ 0x2800, 0x28FF, "Brai" },
		{ 0x2980, 0x29FF, "Zmth" },
		{ 0x2A00, 0x2AFF, "Zmth" },
		{ 0x2C00, 0x2C5E, "Glag" },
		{ 0x2C60, 0x2C7F, "Latinx" },
		{ 0x2C80, 0x2CFF, "Copt" },
		{ 0x2D00, 0x2D2D, "Geok" },
		{ 0x2D30, 0x2D7F, "Tfng" },
		{ 0x2D80, 0x2DDE, "Ethi" },
		{ 0x2E80, 0x2FDF, "Hani" },
	},
	[3] = {
		{ 0x3000, 0x303F, "Hani" },
		{ 0x3041, 0x309F, "Hira" },
		{ 0x30A0, 0x30FF, "Kana" },
		{ 0x3105, 0x312D, "Bopo" },
		{ 0x3131, 0x318E, "Hang" },
		{ 0x31A0, 0x31BA, "Bopo" },
		{ 0x31C0, 0x31E3, "Hani" },
		{ 0x31F0, 0x31FF, "Kana" },
		{ 0x3300, 0x3357, "Kana" },
		{ 0x337B, 0x337F, "Hani" },
		{ 0x3400, 0x3FFF, "Hani" },
	},
	[4] = {
		{ 0x4000, 0x4DB5, "Hani" },
		{ 0x4E00, 0x4FFF, "Hani" },
	},
	[5] = {
		{ 0x5000, 0x5FFF, "Hani" },
	},
	[6] = {
		{ 0x6000, 0x6FFF, "Hani" },
	},
	[7] = {
		{ 0x7000, 0x7FFF, "Hani" },
	},
	[8] = {
		{ 0x8000, 0x8FFF, "Hani" },
	},
	[9] = {
		{ 0x9000, 0x9FFF, "Hani" },
	},
	[10] = {
		{ 0xA000, 0xA4C6, "Yiii" },
		{ 0xA4D0, 0xA4FF, "Lisu" },
		{ 0xA500, 0xA62B, "Vaii" },
		{ 0xA640, 0xA697, "Cyrs" },
		{ 0xA680, 0xA697, "Cyrl" },
		{ 0xA6A0, 0xA6F7, "Bamu" },
		{ 0xA720, 0xA7FF, "Latinx" },
		{ 0xA800, 0xA82B, "Sylo" },
		{ 0xA840, 0xA877, "Phag" },
		{ 0xA880, 0xA8D9, "Saur" },
		{ 0xA8E0, 0xA8FB, "Deva" },
		{ 0xA900, 0xA92F, "Kali" },
		{ 0xA930, 0xA95F, "Rjng" },
		{ 0xA980, 0xA9DF, "Java" },
		{ 0xA9E0, 0xA9FE, "Mymr" },
		{ 0xAA00, 0xAA5F, "Cham" },
		{ 0xAA60, 0xAA7F, "Mymr" },
		{ 0xAA80, 0xAADF, "Tavt" },
		{ 0xAAE0, 0xAAFF, "Mtei" },
		{ 0xAB01, 0xAB2E, "Ethi" },
		{ 0xAB30, 0xAB65, "Latinx" },
		{ 0xAB70, 0xABBF, "Cher" },
		{ 0xABC0, 0xABFF, "Mtei" },
		{ 0xAC00, 0xAFFF, "Hang" },
	},
	[11] = {
		{ 0xB000, 0xBFFF, "Hang" },
	},
	[12] = {
		{ 0xC000, 0xCFFF, "Hang" },
	},
	[13] = {
		{ 0xD000, 0xD7A3, "Hang" },
	},
	[15] = {
		{ 0xFB13, 0xFB17, "Armn" },
		{ 0xFB1D, 0xFB4F, "Hebr" },
		{ 0xFB50, 0xFDFD, "Arab" },
		{ 0xFE70, 0xFEFC, "Arab" },
	},
	[16] = {
		{ 0x10000, 0x100FA, "Linb" },
		{ 0x10280, 0x1029C, "Lyci" },
		{ 0x102A0, 0x102D0, "Cari" },
		{ 0x102E1, 0x102FB, "Copt" },
		{ 0x10300, 0x10323, "Ital" },
		{ 0x10330, 0x1034A, "Goth" },
		{ 0x10350, 0x1037A, "Perm" },
		{ 0x10380, 0x1039F, "Ugar" },
		{ 0x103A0, 0x103D5, "Xpeo" },
		{ 0x10400, 0x1044F, "Dsrt" },
		{ 0x10450, 0x1047F, "Shaw" },
		{ 0x10480, 0x104A9, "Osma" },
		{ 0x104B0, 0x104FB, "Osge" },
		{ 0x10500, 0x10527, "Elba" },
		{ 0x10530, 0x10563, "Aghb" },
		{ 0x10600, 0x10767, "Lina" },
		{ 0x10800, 0x1083F, "Cprt" },
		{ 0x10840, 0x1085F, "Armi" },
		{ 0x10860, 0x1087F, "Palm" },
		{ 0x10880, 0x108AF, "Nbat" },
		{ 0x108E0, 0x108FF, "Hatr" },
		{ 0x10900, 0x1091F, "Phnx" },
		{ 0x10920, 0x1093F, "Lydi" },
		{ 0x10980, 0x1099F, "Mero" },
		{ 0x109A0, 0x109BF, "Merc" },
		{ 0x10A00, 0x10A58, "Khar" },
		{ 0x10A60, 0x10A7F, "Sarb" },
		{ 0x10A80, 0x10A9F, "Narb" },
		{ 0x10AC0, 0x10AF6, "Mani" },
		{ 0x10B00, 0x10B3F, "Avst" },
		{ 0x10B40, 0x10B5F, "Prti" },
		{ 0x10B60, 0x10B7F, "Phli" },
		{ 0x10B80, 0x10BAF, "Phlp" },
		{ 0x10C00, 0x10C48, "Orkh" },
		{ 0x10C80, 0x10CB2, "Hung" },
		{ 0x10E60, 0x10E7E, "Ruminumerals" },
	},
	[17] = {
		{ 0x11000, 0x1106F, "Brah" },
		{ 0x11080, 0x110C1, "Kthi" },
		{ 0x110D0, 0x110F9, "Sora" },
		{ 0x11100, 0x11143, "Cakm" },
		{ 0x11176, 0x11150, "Mahj" },
		{ 0x11180, 0x111D9, "Shrd" },
		{ 0x11200, 0x1123D, "Khoj" },
		{ 0x11280, 0x112A9, "Mult" },
		{ 0x112B0, 0x112F9, "Sind" },
		{ 0x11301, 0x11374, "Gran" },
		{ 0x11400, 0x1145D, "Newa" },
		{ 0x11480, 0x114D9, "Tirh" },
		{ 0x11580, 0x115DD, "Sidd" },
		{ 0x11600, 0x11659, "Modi" },
		{ 0x11680, 0x116C9, "Takr" },
		{ 0x11700, 0x1173F, "Ahom" },
		{ 0x118A0, 0x118FF, "Wara" },
		{ 0x11A00, 0x11A47, "Zanb" },
		{ 0x11A50, 0x11AA2, "Soyo" },
		{ 0x11AC0, 0x11AF8, "Pauc" },
		{ 0x11C00, 0x11C6C, "Bhks" },
		{ 0x11C70, 0x11CB6, "Marc" },
		{ 0x11D00, 0x11D59, "Gonm" },
	},
	[18] = {
		{ 0x12000, 0x1236E, "Xsux" },
		{ 0x12400, 0x12473, "Xsux" },
	},
	[19] = {
		{ 0x13000, 0x1342E, "Egyp" },
	},
	[20] = {
		{ 0x14400, 0x14646, "Hluw" },
	},
	[22] = {
		{ 0x16800, 0x16A38, "Bamu" },
		{ 0x16A40, 0x16A6F, "Mroo" },
		{ 0x16AD0, 0x16AF5, "Bass" },
		{ 0x16B00, 0x16B8F, "Hmng" },
		{ 0x16F00, 0x16F9F, "Plrd" },
	},
	[23] = {
		{ 0x17000, 0x17FFF, "Tang" },
	},
	[24] = {
		{ 0x18000, 0x187EC, "Tang" },
		{ 0x18800, 0x18AF2, "Tang" },
	},
	[27] = {
		{ 0x1B170, 0x1B2FB, "Nshu" },
		{ 0x1BC00, 0x1BC9F, "Dupl" },
	},
	[29] = {
		{ 0x1D100, 0x1D1DD, "musical" },
		{ 0x1D400, 0x1D7FF, "Zmth" },
		{ 0x1D800, 0x1DAAF, "Sgnw" },
	},
	[30] = {
		{ 0x1E000, 0x1E02A, "Glag" },
		{ 0x1E800, 0x1E8D6, "Mend" },
		{ 0x1E900, 0x1E95F, "Adlm" },
	},
	[31] = {
		{ 0x1F300, 0x1F6C5, "Zsym" },
	},
	[32] = {
		{ 0x20000, 0x20FFF, "Hani" },
	},
	[33] = {
		{ 0x21000, 0x21FFF, "Hani" },
	},
	[34] = {
		{ 0x22000, 0x22FFF, "Hani" },
	},
	[35] = {
		{ 0x23000, 0x23FFF, "Hani" },
	},
	[36] = {
		{ 0x24000, 0x24FFF, "Hani" },
	},
	[37] = {
		{ 0x25000, 0x25FFF, "Hani" },
	},
	[38] = {
		{ 0x26000, 0x26FFF, "Hani" },
	},
	[39] = {
		{ 0x27000, 0x27FFF, "Hani" },
	},
	[40] = {
		{ 0x28000, 0x28FFF, "Hani" },
	},
	[41] = {
		{ 0x29000, 0x29FFF, "Hani" },
	},
	[42] = {
		{ 0x2A000, 0x2AFFF, "Hani" },
	},
	[43] = {
		{ 0x2B000, 0x2BFFF, "Hani" },
	},
	[44] = {
		{ 0x2C000, 0x2CFFF, "Hani" },
	},
	[45] = {
		{ 0x2D000, 0x2DFFF, "Hani" },
	},
	[46] = {
		{ 0x2E000, 0x2EBE0, "Hani" },
	},

	-- Adjacent blocks of 4096 characters that consist entirely or almost entirely
	-- of one script.
	["blocks"] = {
		{ 4, 9, "Hani" },
		{ 11, 13, "Hang" },
		{ 32, 46, "Hani" },
	},

	["individual"] = {
		[0x462] = "Cyrl",
		[0x463] = "Cyrl",
		[0x46A] = "Cyrl",
		[0x46B] = "Cyrl",
		[0x46C] = "Cyrl",
		[0x46D] = "Cyrl",
		[0x472] = "Cyrl",
		[0x473] = "Cyrl",
		[0x474] = "Cyrl",
		[0x475] = "Cyrl",
		[0x2135] = "Zmth",
		[0x2190] = "Zsym",
		[0x21FF] = "Zsym",
		[0xFA0E] = "Hani",
		[0xFA0F] = "Hani",
		[0xFA11] = "Hani",
		[0xFA13] = "Hani",
		[0xFA14] = "Hani",
		[0xFA1F] = "Hani",
		[0xFA21] = "Hani",
		[0xFA23] = "Hani",
		[0xFA24] = "Hani",
		[0xFA27] = "Hani",
		[0xFA28] = "Hani",
		[0xFA29] = "Hani",
		[0x1056F] = "Aghb",
		[0x16FE0] = "Tang",
		[0x1B000] = "Kana",
		[0x1B001] = "Hira",
	},
}