Module:Hani

From Wiktionary, the free dictionary
Jump to navigation Jump to search

local concat = table.concat
local explode = require("Module:string utilities").explode_utf8
local match = string.match
local umatch = mw.ustring.match

local export = {}

-- Converts any iteration marks (々 and 〻) into the relevant characters in the text, where n repeated iteration marks repeats n previous characters (e.g. "時々" = "時時", "馬鹿々々しい" = "馬鹿馬鹿しい" etc). Punctuation and unconnected sets of iteraton marks block iteration, with excess marks being left as-is. For example, "X,Y々々" = "X,YY々", and "X々Y々々" = "XXYY々" (not "XXYXY").
function export.convert_iteration_marks(text)
	if not match(text, "\227\128[\133\187]") then
		return text
	end
	text = explode(text)
	-- Work backwards, since sets of iteration marks must be isolated from each other (e.g. "X々Y々々" should be "XXYY々", with one excess at the end, not "XXYXY").
	local i, n = #text, 0
	while i > 0 do
		local char = text[i]
		if char == "々" or char == "〻" then
			n = n + 1
		elseif n > 0 then
			-- Count backwards once for each iteration mark, but stop early if we find something which can't be iterated, as that marks the start of the set to be repeated.
			local anchor = i
			for j = 0, n - 1 do
				local prev = text[anchor - j]
				if not prev or prev == "々" or prev == "〻" or umatch(prev, "%W") then
					n = j
					break
				end
			end
			if n > 0 then
				i = i - n + 1
				-- Replace iteration marks ahead with the relevant character.
				for j = i, i + n - 1 do
					text[j + n] = text[j]
				end
				n = 0
			end
		end
		i = i - 1
	end
	return concat(text)
end

return export