Module:User:kc kennylau/yue-word

From Wiktionary, the free dictionary
Jump to navigation Jump to search

See also:


--local Han_pattern = "[" .. require("Module:scripts").getByCode("Hani"):getCharacters() .. "]"
local Han_pattern = "[一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮹟𰀀-𲎯]"

return {
check_duplicated = function(frame)
	local res = ''
	for i=0,170 do
		local name = ("Module:zh/data/yue-word/%03d"):format(i)
		local page = mw.title.new(name):getContent()
		if page then
			local buffer = ''
			for line in page:gmatch("[^\n]+") do
				word, prons = line:match('\t%["([^"]+)"%] = "(.+)",')
				if word and word == prev_word then
					buffer = buffer .. word .. ','
				end
				prev_word = word
			end
			if buffer ~= '' then
				res = res .. '* [[' .. name .. ']]\n**' .. buffer .. '\n'
			end
		end
	end
	return res
end,
check_mismatch = function(frame)
	local res = ''
	for i=0,170 do
		local name = ("Module:zh/data/yue-word/%03d"):format(i)
		local success, data = pcall(mw.loadData,name)
		if success then
			local buffer = ''
			for word, prons in pairs(data) do
				test, word_len = mw.ustring.gsub(word,Han_pattern,"")
				if test == "" then
					local pass = true
					for pron in prons:gmatch("[^,]+") do
						local pron_len = pron:gsub("[^ ]+",""):len() + 1
						if word_len ~= pron_len then
							pass = false
							break
						end
					end
					if not pass then
						buffer = buffer .. word .. ','
					end
				end
			end
			if buffer ~= '' then
				res = res .. '* [[' .. name .. ']]\n**' .. buffer .. '\n'
			end
		end
	end
	return res
end,
check_consistency = function(frame)
	local no_tone = frame.args.no_tone
	local res = ''
	local ch_data = mw.loadData("Module:zh/data/Jyutping character")
	for i=-1,170 do
		local name = ("Module:zh/data/yue-word/%03d"):format(i)
		if i == -1 then name = "Module:zh/data/yue-pron" end
		local success, data = pcall(mw.loadData,name)
		if i == -1 then data = data.jyutping end
		if success then
			local buffer = ''
			for word, prons in pairs(data) do
				local pass = true
				local ch = nil
				if mw.ustring.find(word,"^"..Han_pattern.."+$") then
					for pron in prons:gmatch("[^,]+") do
						local j=1
						for ch_pron in pron:gmatch("[^ ]+") do
							ch_pron = ch_pron:gsub("%-%d$","")
							if no_tone then ch_pron = ch_pron:gsub("%d$","") end
							ch = mw.ustring.sub(word,j)
							if ch_data[ch] and not ch_data[ch]:find(ch_pron,1,true) then
								pass = false
								break
							end
							j = j + 1
						end
						if not pass then break end
					end
				end
				if not pass then buffer = buffer .. word .. "(" .. ch .. ")," end
			end
			if buffer ~= '' then
				res = res .. '* [[' .. name .. ']]\n**' .. buffer .. '\n'
			end
		end
	end
	return res
end,
check_jyutping = function(frame)
	local initials = {
		b=1,p=1,m=1,f=1,d=1,t=1,n=1,l=1,
		g=1,k=1,ng=1,h=1,gw=1,kw=1,
		z=1,c=1,s=1,j=1,w=1,
	}
	local finals = {
		aa=1,aai=1,aau=1,aam=1,aan=1,aang=1,aap=1,aat=1,aak=1,
		a=1,ai=1,au=1,am=1,an=1,ang=1,ap=1,at=1,ak=1,
		e=1,ei=1,eu=1,em=1,eng=1,ep=1,ek=1,
		i=1,iu=1,im=1,["in"]=1,ing=1,ip=1,it=1,ik=1,
		o=1,oi=1,ou=1,on=1,ong=1,ot=1,ok=1,
		u=1,ui=1,un=1,ung=1,ut=1,uk=1,
		eoi=1,eon=1,eot=1,
		oe=1,oeng=1,oet=1,oek=1,
		yu=1,yun=1,yut=1
	}
	local function checker(syl)
		local s,v = syl:match("^(%l+)[1-6](%-?[12]?)$")
		if not s or #v == 1 then return false end
		local i,f = s:match("^([bpmfdtnlgknhzcsjw][gw]?)([aeiouy]+[mnptk]?g?)$")
		if i and initials[i] and finals[f] then return true end
		if finals[s] or s == "m" or s == "ng" then return true end
		return false
	end
	local res = ''
	for i=0,170 do
		local name = ("Module:zh/data/yue-word/%03d"):format(i)
		local success, data = pcall(mw.loadData,name)
		if success then
			local buffer = ''
			for word, prons in pairs(data) do
				prons = prons:gsub(","," "):gsub("…"," ")
				for syl in prons:gmatch("[^ ,.]+") do
					if not checker(syl) then
						buffer = buffer .. word .. "(" .. syl .. "),"
						break
					end
				end
			end
			if buffer ~= '' then
				res = res .. '* [[' .. name .. ']]\n**' .. buffer .. '\n'
			end
		end
	end
	return res
end,
}