Module:string/nowiki

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Faster reimplementation of mw.text.nowiki, which also has some minor changes to match the PHP equivalent used by the native parser.


local byte = string.byte
local concat = table.concat
local gsub = string.gsub
local lower = string.lower
local match = string.match
local sub = string.sub

local data = mw.loadData("Module:string/nowiki/data")
local absolute = data.absolute
local after_newline = data.after_newline
local after_magic_link = data.after_magic_link
local uri_schemes = data.uri_schemes

local function escape_uri(uri)
	return uri_schemes[lower(uri)] and uri .. ":" or uri .. ":"
end

return function(text)
	local ret, head, n, first = {}, 1, 0, sub(text, 1, 1)
	if after_newline[first] then
		n = n + 1
		ret[n] = "&#" .. byte(first) .. ";"
		head = 2
	elseif sub(text, 1, 4) == "----" then
		n = n + 1
		ret[n] = "----"
		head = 5
	end
	local start = head
	while true do
		local loc, this = match(text, "()([\n\r\"&':;<=>IPR[%]_{|}])", head)
		if not loc then
			n = n + 1
			ret[n] = sub(text, start)
			return (gsub(concat(ret), "([%w_]+):", escape_uri))
		elseif absolute[this] then
			n = n + 1
			ret[n] = sub(text, start, loc - 1) .. "&#" .. byte(this) .. ";"
			head = loc + 1
			start = head
		elseif this == "\n" or this == "\r" then
			local nxt = loc + 1
			nxt = sub(text, nxt, nxt)
			if after_newline[nxt] then
				n = n + 1
				ret[n] = sub(text, start, loc) .. "&#" .. byte(nxt) .. ";"
				head = loc + 2
				start = head
			elseif sub(text, loc + 1, loc + 4) == "----" then
				n = n + 1
				ret[n] = sub(text, start, loc) .. "&#45;---"
				head = loc + 5
				start = head
			else
				head = head + 1
			end
		elseif this == "_" then
			local nxt = loc + 1
			if sub(text, nxt, nxt) == "_" then
				n = n + 1
				ret[n] = sub(text, start, loc) .. "&#95;"
				head = loc + 2
				start = head
			else
				head = head + 1
			end
		elseif this == ":" and sub(text, loc + 1, loc + 2) == "//" then
			n = n + 1
			ret[n] = sub(text, start, loc - 1) .. "//"
			head = loc + 3
			start = head
		elseif (
			this == "I" and sub(text, loc + 1, loc + 3) == "SBN" or
			this == "P" and sub(text, loc + 1, loc + 3) == "MID"
		) then
			local nxt = loc + 4
			nxt = sub(text, nxt, nxt)
			if after_magic_link[nxt] then
				n = n + 1
				ret[n] = sub(text, start, loc + 3)  .. "&#" .. byte(nxt) .. ";"
				head = loc + 5
				start = head
			else
				head = head + 1
			end
		elseif this == "R" and sub(text, loc + 1, loc + 2) == "FC" then
			local nxt = loc + 3
			nxt = sub(text, nxt, nxt)
			if after_magic_link[nxt] then
				n = n + 1
				ret[n] = sub(text, start, loc + 2)  .. "&#" .. byte(nxt) .. ";"
				head = loc + 4
				start = head
			else
				head = head + 1
			end
		else
			head = head + 1
		end
	end
end