
From Wiktionary, the free dictionary
Jump to navigation Jump to search

Faster reimplementation of mw.text.nowiki, which also has some minor changes to match the PHP equivalent used by the native parser.

local byte = string.byte
local concat = table.concat
local gsub = string.gsub
local lower = string.lower
local match = string.match
local sub = string.sub

local data = mw.loadData("Module:string/nowiki/data")
local absolute = data.absolute
local after_newline = data.after_newline
local after_magic_link = data.after_magic_link
local uri_schemes = data.uri_schemes

local function escape_uri(uri)
	return uri_schemes[lower(uri)] and uri .. ":" or uri .. ":"

return function(text)
	local ret, head, n, first = {}, 1, 0, sub(text, 1, 1)
	if after_newline[first] then
		n = n + 1
		ret[n] = "&#" .. byte(first) .. ";"
		head = 2
	elseif sub(text, 1, 4) == "----" then
		n = n + 1
		ret[n] = "----"
		head = 5
	local start = head
	while true do
		local loc, this = match(text, "()([\n\r\"&':;<=>IPR[%]_{|}])", head)
		if not loc then
			n = n + 1
			ret[n] = sub(text, start)
			return (gsub(concat(ret), "([%w_]+):", escape_uri))
		elseif absolute[this] then
			n = n + 1
			ret[n] = sub(text, start, loc - 1) .. "&#" .. byte(this) .. ";"
			head = loc + 1
			start = head
		elseif this == "\n" or this == "\r" then
			local nxt = loc + 1
			nxt = sub(text, nxt, nxt)
			if after_newline[nxt] then
				n = n + 1
				ret[n] = sub(text, start, loc) .. "&#" .. byte(nxt) .. ";"
				head = loc + 2
				start = head
			elseif sub(text, loc + 1, loc + 4) == "----" then
				n = n + 1
				ret[n] = sub(text, start, loc) .. "&#45;---"
				head = loc + 5
				start = head
				head = head + 1
		elseif this == "_" then
			local nxt = loc + 1
			if sub(text, nxt, nxt) == "_" then
				n = n + 1
				ret[n] = sub(text, start, loc) .. "&#95;"
				head = loc + 2
				start = head
				head = head + 1
		elseif this == ":" and sub(text, loc + 1, loc + 2) == "//" then
			n = n + 1
			ret[n] = sub(text, start, loc - 1) .. "//"
			head = loc + 3
			start = head
		elseif (
			this == "I" and sub(text, loc + 1, loc + 3) == "SBN" or
			this == "P" and sub(text, loc + 1, loc + 3) == "MID"
		) then
			local nxt = loc + 4
			nxt = sub(text, nxt, nxt)
			if after_magic_link[nxt] then
				n = n + 1
				ret[n] = sub(text, start, loc + 3)  .. "&#" .. byte(nxt) .. ";"
				head = loc + 5
				start = head
				head = head + 1
		elseif this == "R" and sub(text, loc + 1, loc + 2) == "FC" then
			local nxt = loc + 3
			nxt = sub(text, nxt, nxt)
			if after_magic_link[nxt] then
				n = n + 1
				ret[n] = sub(text, start, loc + 2)  .. "&#" .. byte(nxt) .. ";"
				head = loc + 4
				start = head
				head = head + 1
			head = head + 1