Module:JSON data

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
The following documentation is located at Module:JSON data/documentation. [edit]

This module is intended to be used by bots or other automation tools which need to access Wiktionary data. This is not a general-purpose JSON serialisation module. Using one would be impractical, as experiments have shown that using one on the language data easily hits execution limits.

Bots may access the data by using mw:API:Expandtemplates or requesting the raw source of a page invoking this module with a &templates=expand query parameter.

Make sure you only load this module once (or twice, if you need both the languages table and another table). As of 11th of September 2013, the JSON for the languages table has been measured to weigh 666543 bytes. (Family data takes 13731 bytes, while scripts take 6279 bytes.) Generating the JSON data takes a few seconds and puts a relatively high strain on the servers.

Available functions are: export_languages, export_scripts and export_families, which generate the JSON equivalents of Module:languages, Module:scripts and Module:families respectively. The structure of the data corresponds exactly to the one used in Wiktionary modules, with a caveat below.

When export_languages is given positional arguments, the first specifies the types of languages (see Template:language data documentation) that will be listed in the data, while the following arguments list data keys which will be exported. For the first argument, the special values TWO_LETTER, TWO_THREE_LETTER and TWO_THREE_LETTER_REGULAR can be passed. For example, invoking the function with TWO_LETTER, names and scripts as arguments will export the names and script codes for languages with two-letter codes. To conserve space, if only one key is specified, its value is listed directly in the root object (indexed by language codes).


local export = {}
 
-- optimisation: local variable lookup is slightly faster than global lookup
local tab_insert, tab_concat, type, tostring, pairs, ipairs = table.insert, table.concat, type, tostring, pairs, ipairs
 
local function export_str(s)
	-- rudimentary escaping, to save time
	return '"' .. s:gsub('["\\]', '\\%0') .. '"'
end
 
local function export_array(tab)
	local items = {}
	for key, value in ipairs(tab) do
		if type(value) == 'string' then
			tab_insert(items, export_str(value))
		elseif type(value) == 'boolean' then
			tab_insert(items, tostring(value))
		else
			error("serialisation failed: unsupported array element type")
		end	
	end
	return "[" .. tab_concat(items, ",") .. "]"
end
 
-- the second argument is a rudimentary "schema" which specifies
-- whether a table value at a given key should be serialised
-- as an array or an object; Lua uses the same table type for both
local function export_object(tab, schema)
	local items = {}
	if tab == nil then
		return "null"	
	end
 
	for key, value in pairs(tab) do
		if type(value) == 'string' then
			tab_insert(items, export_str(key) .. ':' .. export_str(value))
		elseif type(value) == 'boolean' then
			tab_insert(items, export_str(key) .. ':' .. tostring(value))
		elseif type(value) == 'table' then
			if not schema then
				error("no schema given for array with table values")
			end
			local ktype = schema[key]
			if ktype == false then
				tab_insert(items, export_str(key) .. ':' .. export_array(value))
			elseif type(ktype) == 'table' then
				tab_insert(items, export_str(key) .. ':' .. export_object(value, ktype))
			else
				error("serialisation failed: table value at key '" .. key .. "' has no schema")
			end
		else
			error("serialisation failed: unsupported object value type")
		end	
	end
	return "{" .. tab_concat(items, ",") .. "}"
end
 
function export.export_languages(item_filter, key_filter, skip_nulls)
	if type(item_filter) == "table" then
		key_filter = {}
		local i = 2
		while item_filter.args[i] do
			tab_insert(key_filter, item_filter.args[i])
			i = i + 1
		end
		if #key_filter == 0 then
			key_filter = nil
		end
		skip_nulls = require('Module:yesno')(item_filter.args.nulls)
		item_filter = item_filter.args[1]
	end
 
	item_filter = (item_filter ~= "") and item_filter
	if type(item_filter) == 'string' then
		if item_filter == "TWO_LETTER" then
			function item_filter(key, value)
				return #key == 2
			end
		elseif item_filter == "TWO_THREE_LETTER" then
			function item_filter(key, value)
				return #key <= 3
			end
		elseif item_filter == "TWO_THREE_LETTER_REGULAR" then
			function item_filter(key, value)
				return (#key <= 3) and value.type == 'regular'
			end
		elseif item_filter:sub(1, 1) == '=' then
			local list = {}
			for item in mw.text.gsplit(item_filter:sub(2), ',') do
				list[item] = true
			end
			function item_filter(key, value)
				return list[key]
			end
		else
			local t = item_filter
			function item_filter(key, value)
				return value.type == t
			end
		end
	end
 
	local data = mw.loadData("Module:languages/alldata")
	local items = {}
 
	local schema = {
		names = false,
		scripts = false,
		sort_key = {
			from = false,
			to = false
		},
		entry_name = {
			from = false,
			to = false
		}
	}
 
	for key, value in pairs(data) do
		if item_filter(key, value) then
			if key_filter then
				if #key_filter == 1 then
					local item = value[key_filter[1]]
					local itsc = schema[key_filter[1]]
 
					if item == nil then
						if not skip_nulls then
							tab_insert(items, export_str(key) .. ':null')	
						end
					else 
						tab_insert(items, export_str(key) .. ':' .. 
							((type(item) == "string" and export_str(item))
							or (itsc and export_object(item, itsc))
							or export_array(item))
						)
					end
				else
					local langobj = {}
					for _, fkey in pairs(key_filter) do
						langobj[fkey] = value[fkey]
					end
					tab_insert(items, export_str(key) .. ':' .. 
						export_object(langobj, schema)
					)
				end
			else
				tab_insert(items, export_str(key) .. ':' .. 
					export_object(value, schema)
				)
			end			
		end
	end
 
	return "{" .. tab_concat(items, ",") .. "}"
end
 
function export.export_scripts()
	local data = mw.loadData("Module:scripts/data")
 
	local items = {}
 
	for key, value in pairs(data) do
		tab_insert(items, export_str(key) .. ':' .. export_object(value, {
			names = false
		}))
	end
 
	return "{" .. tab_concat(items, ",") .. "}"
end
 
function export.export_families()
	local data = mw.loadData("Module:families/data")
 
	local items = {}
 
	for key, value in pairs(data) do
		tab_insert(items, export_str(key) .. ':' .. export_object(value, {
			names = false
		}))
	end
 
	return "{" .. tab_concat(items, ",") .. "}"
end
 
function export.export_labels()
	local data = mw.loadData("Module:labels/data")
 
	local labels, aliases = {}, {}
 
	for key, value in pairs(data.labels) do
		table.insert(labels, export_str(key) .. ':' .. export_object(value, {
			plain_categories = false,
			topical_categories = false,
			pos_categories = false,
			regional_categories = false
		}))
	end
 
	for key, value in pairs(data.aliases) do
		table.insert(aliases, export_str(key) .. ':' .. export_str(value))
	end
 
	return ('{"labels":{%s},"aliases":{%s},"deprecated":%s}'):format(
		tab_concat(labels, ','), tab_concat(aliases, ','), export_object(data.deprecated or {})
	)
end
 
return export