Module:User:Wikitiki89/split

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Wikitiki89, for his own experimentation. Items in this module may be added and removed at Wikitiki89's discretion; do not rely on this module's stability.


local export = {}

function export.split(text, pattern, plain)
	local len = text:len()
	local result = {}
	local start = 1
	if pattern == "" or (not plain and string.match("", pattern)) then -- special case
		while start <= len do
			result[#result + 1] = text:sub(start, start)
		end
	else
		while true do
			local i, j = text:find(pattern, start, plain)
			if i == nil then
				result[#result + 1] = text:sub(start)
				break
			end
			if j < start then
				error("Infinite split (position: " .. start .. ")")
			end
			result[#result + 1] = text:sub(start, i)
			start = j + 1
		end
	end
	return result
end

function export.usplit(text, pattern, plain) -- naive implementation
	if plain then -- proper unicode handling is unnecessary for plain patterns
		return export.split(text, pattern, plain)
	end
	local len = mw.ustring.len(text)
	local result = {}
	local start = 1
	if mw.ustring.match("", pattern) then -- special case
		while start <= len do
			result[#result + 1] = mw.ustring.sub(text, start, start)
		end
	else
		while true do
			local i, j = mw.ustring.find(text, pattern, start)
			if i == nil then
				result[#result + 1] = mw.ustring.sub(text, start)
				break
			end
			if j < start then
				error("Infinite split (position: " .. start .. ")")
			end
			result[#result + 1] = mw.ustring.sub(text, start, i)
			start = j + 1
		end
	end
	return result
end

local datasize = 3
local iterations = 3
local datum = "xxxxxYzzzzz"
local pattern = 'Y'
local plain = false
local splitfunction = export.usplit -- mw.text.split

local function gendata()
	local t = {}
	for i = 1, datasize do
		t[#t + 1] = datum
	end
	return table.concat(t)
end

function export.show()
	local data = gendata()
	local result = 0
	for i = 1, iterations do
		local tmp = splitfunction(data, pattern, plain)
		result = result + #tmp
	end
	return "Result: " .. result
end

return export