Module:Json

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module offers some utility methods for converting Lua values into JSON values (in UTF-8-encoded Lua strings).

Unfortunately, Lua's data model differs somewhat from JSON's, so it's not possible to write a general function that takes any Lua value and returns a JSON value, always "doing the right thing". Rather, some values cannot be converted at all, and other values have multiple possible non-equivalent representations.

The differences are:

  • Lua has three types with no JSON analogues, namely function, userdata, and thread, so this module has no support for values of those types.
  • Lua's concept of "metatables" has no analogue in JSON, so this module ignores metatables completely.
  • Lua's number type, as implemented in Scribunto, consists of double-precision floating-point values, whereas JSON's number type consists of decimal representations. (And the end-recipient of the JSON data will likely convert the values back into some sort of floating-point notation.) This means that, aside from integers, you can't generally expect values to be converted exactly. (And even with integers, you can only expect perfect conversion in the range ±109 or so.) What's more, it means that Lua has a few numeric values with no JSON analogues at all, namely positive infinity, negative infinity, and "not a number" values; so, this module does not support those values.
  • Lua's string type represents strings of eight-bit bytes, whereas JSON's *string* type represents strings of Unicode characters. This module requires the Lua strings to be valid UTF-8 sequences.
  • Whereas Lua has only a single table type mapping from arbitrary non-nil values to arbitrary non-nil values, JSON has separate array and object types, where an array maps from a set of integers {0,1,…,n} to arbitrary values, and an object maps from arbitrary strings to arbitrary values. As a result, this module [TBD]

(Note: the above is an attempt at an exhaustive list of differences, but it's quite possible that I missed some.)


local p = {}

-- This function makes an effort to convert an arbitrary Lua value to a string
-- containing a JSON representation of it. It's not intended to be very robust,
-- but may be useful for prototyping.
function p.jsonValueFromValue(val, opts)
	opts = opts or {}
	function converter(val)
		local t = type(val)
		if t == 'nil' then
			return 'null'
		elseif t == 'boolean' then
			return val and 'true' or 'false'
		elseif t == 'number' then
			return p.jsonNumberFromNumber(val)
		elseif t == 'string' then
			return p.jsonStringFromString(val)
		elseif t == 'table' then
			local key = next(val)
			if type(key) == 'number' then
				return p.jsonArrayFromTable(val, converter)
			elseif type(key) == 'string' then
				return p.jsonObjectFromTable(val, converter)
			elseif type(key) == 'nil' then
				if opts.emptyTable == 'array' then
					return '[]'
				elseif opts.emptyTable == 'null' then
					return 'null'
				else
					return '{}'
				end
			else
				error('Table with unsupported key type: ' .. type(key))
			end
		else
			error('Unsupported type: ' .. t)
		end
	end
	return converter(val)
end

-- Given a string containing valid UTF-8, escapes any illegal or non-ASCII characters,
-- wraps it in double-quotes, and returns the result.
-- The main motivation for escaping non-ASCII characters is to circumvent MediaWiki's
-- application of Unicode Normalization Form C (NFC) to API outputs (and for that matter
-- HTML outputs); see
-- [[Wiktionary:Grease pit/2024/March#CJK Compatibility Ideographs in ranges for Hani script]]
function p.jsonStringFromString(s)
	if type(s) ~= 'string' or not mw.ustring.isutf8(s) then
		error('Not a valid UTF-8 string: ' .. s)
	end
    local ret = {}
    table.insert(ret, '"')
    for codepoint in mw.ustring.gcodepoint(s) do
        if codepoint >= 0x0020 and codepoint <= 0x007E then
            if codepoint == 0x0022 then
                table.insert(ret, '\\"')
            elseif codepoint == 0x005C then
                table.insert(ret, '\\\\')
            else
                table.insert(ret, string.char(codepoint))
            end
        elseif codepoint <= 0xFFFF then
            table.insert(ret, string.format('\\u%04X', codepoint))
        else
            table.insert(ret, string.format('\\u%04X', 0xD800 + math.floor((codepoint - 0x10000) / 0x400)))
            table.insert(ret, string.format('\\u%04X', 0xDC00 + codepoint % 0x400))
        end
    end
    table.insert(ret, '"')
	return table.concat(ret)
end

-- Given a finite real number x, returns a string containing its JSON
-- representation, with enough precision that it *should* round-trip correctly
-- (depending on the well-behavedness of the system on the other end).
function p.jsonNumberFromNumber(x)
	if type(x) ~= 'number' then
		error('Not of type "number": ' .. x .. ' (' .. type(x) .. ')')
	end
	if x ~= x or x == math.huge or x == -math.huge then
		error('Not a finite real number: ' .. x)
	end
	return string.format("%.17g", x)
end

-- Given nil, returns the string 'null'. (Included for completeness' sake.)
function p.jsonNullFromNil(v)
	if type(v) ~= 'nil' then
		error('Not nil: ' .. v .. ' (' .. type(v) .. ')')
	end
	return 'null'
end

-- Given true or false, returns the string 'true' or the string 'false'.
-- (Included for completeness' sake.)
function p.jsonTrueOrFalseFromBoolean(b)
	if type(b) ~= 'boolean' then
		error('Not a boolean: ' .. b .. ' (' .. type(b) .. ')')
	end
	return b and 'true' or 'false'
end

-- Given a table, treats it as an array and assembles its values in the form
-- '[ v1, v2, v3 ]'. Optionally takes a function to JSONify the values before
-- assembly; if that function is omitted, then the values should already be
-- strings containing valid JSON data.
function p.jsonArrayFromTable(t, f)
	f = f or function (x) return x end
 
	local ret = {}
	for _, elem in ipairs(t) do
		elem = f(elem)
		if elem ~= nil then
			table.insert(ret, ', ')
			table.insert(ret, elem)
		end
	end
 
	if # ret == 0 then
		return '[]'
	end
 
	ret[1] = '[ '
	table.insert(ret, ' ]')
 
	return table.concat(ret)
end

-- Given a table whose keys are all strings, assembles its keys and values in
-- the form '{ "k1": v1, "k2": v2, "k3": v3 }'. Optionally takes a function to
-- JSONify the values before assembly; if that function is omitted, then the
-- values should already be strings containing valid JSON data. (The keys, by
-- contrast, should just be regular Lua strings; they will be passed to this
-- module's jsonStringFromString.)
function p.jsonObjectFromTable(t, f)
	f = f or function (x) return x end
 
	local ret = {}
	for key, value in pairs(t) do
		if type(key) ~= 'string' then
			error('Not a string: ' .. key)
		end
		key = p.jsonStringFromString(key)
		value = f(value)
		if value ~= nil then
			table.insert(ret, ', ')
			table.insert(ret, key .. ': ' .. value)
		end
	end
 
	if # ret == 0 then
		return '{}'
	end
 
	ret[1] = '{ '
	table.insert(ret, ' }')
 
	return table.concat(ret)
end

return p