User:Ekirahardian/Module:Unicode data/scripts.html

From Wiktionary, the free dictionary
Jump to navigation Jump to search
Module:Unicode data/scripts
<head></head>
<body>
	<pre id='test'></pre>
	<script>
		"use strict";
		fetch('PropertyValueAliases.txt').then((response1) => { //Get file from https://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
			let aliases = '	aliases = {<br>';
			let aliases_temp = [];
			let alias = [];
			let aliasOf = [];
			response1.text().then((text1) => {
				aliases_temp = text1.split("\n").filter((filter) => {
					return filter.startsWith("sc ;");
				});
				for (let i = 0; i < aliases_temp.length; i++) {
					aliases_temp[i] = aliases_temp[i].replace(/ +(?= )/g,'').replace("sc ; ", "").replace(" ; Qaai", "").replace(" ; Qaac", "").split(' ; ');
					alias.push(aliases_temp[i][0]);
					aliasOf.push(aliases_temp[i][1]);
					aliases_temp[i][1] = aliases_temp[i][1].replaceAll('_', ' ');
					aliases += '		' + aliases_temp[i][0] + ' = "' + aliases_temp[i][1] + '",<br>';
				}
				aliases += '<br>	},<br>}<br><br>-- For binary searching, because the length operator doesn\'t work on tables<br>-- loaded with mw.loadData.<br>data.ranges.length = #data.ranges<br><br>return data';
			});
			fetch('Scripts.txt').then((response2) => { //Get file from https://www.unicode.org/Public/UNIDATA/Scripts.txt
				function replaceLetters(replace1, replace2, string) {
					const index = replace1.indexOf(string);
					if (index !== -1) {
						string = string.replace(replace1[index], replace2[index]);
					}
					return string;
				}
				let ranges = '	ranges = {<br>';
				let ranges_temp = [];
				let singles = '--[=[<br>-- Official Unicode script values for individual codepoints and ranges of<br>-- codepoints.<br><br>-- https://www.unicode.org/Public/UNIDATA/Scripts.txt provided<br>-- the script names, and https://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt<br>-- provided script codes corresponding to the names (see [[w:Script (Unicode)]]).<br>--]=]<br><br>';
				let singles_temp = [];
				response2.text().then((text2) => {
					singles += 'local data = {<br>	singles = {<br>';
					singles_temp = text2.split("\n").filter((filter) => {
						return (filter.startsWith("#") === false && filter !== '');
					});
					for (let i = 0; i < singles_temp.length; i++) {
						singles_temp[i] = singles_temp[i].substring(0, singles_temp[i].indexOf('#')).replace(/ +(?= )/g,'').split(' ; ');
						singles_temp[i] = singles_temp[i][0] + ' ' + replaceLetters(aliasOf, alias, singles_temp[i][1].slice(0, -1));
						if (singles_temp[i].includes("..")) {
							ranges_temp.push(singles_temp[i]);
							singles_temp.splice(i, 1);
							i--;
						}
					}
					for (let i = 0; i < singles_temp.length; i++) {
						singles_temp[i] = singles_temp[i].split(' ');
						if (singles_temp[i][0].length === 4) {
							singles_temp[i][0] = '0x0' + singles_temp[i][0];
						}
						else if (singles_temp[i][0].length === 5) {
							singles_temp[i][0] = '0x' + singles_temp[i][0];
						}
						singles_temp[i] = singles_temp[i][0] + ' ' + singles_temp[i][1]
					}
					singles_temp.sort();
					for (let i = 0; i < singles_temp.length; i++) {
						singles_temp[i] = singles_temp[i].split(' ');
						singles += '		[' + singles_temp[i][0] + '] = "' + singles_temp[i][1] + '",<br>';
					}
					for (let i = 0; i < ranges_temp.length; i++) {
						ranges_temp[i] = ranges_temp[i].split(' ');
						ranges_temp[i][0] = ranges_temp[i][0].split('..');
						for (let j = 0; j < ranges_temp[i][0].length; j++) {
							if (ranges_temp[i][0][j].length === 4) {
								ranges_temp[i][0][j] = '0x0' + ranges_temp[i][0][j];
							}
							else if (ranges_temp[i][0][j].length === 5) {
								ranges_temp[i][0][j] = '0x' + ranges_temp[i][0][j];
							}
						}
						ranges_temp[i][0] = ranges_temp[i][0][0] + '..' + ranges_temp[i][0][1]
						ranges_temp[i] = ranges_temp[i][0] + ' ' + ranges_temp[i][1]
					}
					ranges_temp.sort();
					for (let i = 0; i < ranges_temp.length; i++) {
						ranges_temp[i] = ranges_temp[i].split(' ');
						ranges_temp[i][0] = ranges_temp[i][0].split('..');
						ranges += '		{ ' + ranges_temp[i][0][0] + ', ' + ranges_temp[i][0][1] + ', "' + ranges_temp[i][1] + '" },<br>'
					}
					singles += '	},<br><br>';
					ranges += '	},<br>	-- Scripts.txt gives full names; here we consider them aliases to save space.<br>';
					document.getElementById('test').innerHTML = singles + ranges + aliases;
				});
			});
		});
	</script>
</body>