Module:User:Suzukaze-c/02: difference between revisions

Content deleted Content added
Inline
Revision as of 02:47, 6 September 2017

This module lacks a documentation subpage. You may create it.
Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of Suzukaze-c, for their own experimentation. Items in this module may be added and removed at Suzukaze-c's discretion; do not rely on this module's stability.
--[==[

this is a bunch of mostly unrelated shit i put in one single module because i can.

#invoke:User:Suzukaze-c/02|show
	easy word lists
subst:#invoke:User:Suzukaze-c/02|rpre
	convert to ja-r using data given
subst:#invoke:User:Suzukaze-c/02|r
	convert to ja-r using data in the linked entry
#invoke:User:Suzukaze-c/02|hzreport
	hanzi entry report
subst:#invoke:User:Suzukaze-c/02|newhz
	new hanzi entry generator
subst:#invoke:User:Suzukaze-c/02|newhzmul
	new hanzi translingual entry generator

#invoke:User:Suzukaze-c/02|test_1
	user page links
#invoke:User:Suzukaze-c/02|test_2
	search
subst:#invoke:User:Suzukaze-c/02|test_3
	
subst:#invoke:User:Suzukaze-c/02|test_5
	
subst:#invoke:User:Suzukaze-c/02|test_7
	ja fmt (+{{ja-def}}, decl=i→infl=i)
subst:#invoke:User:Suzukaze-c/02|test_9
	ltc/och attention (note to self: consider retracting {{zh-attn-split}})
#invoke:User:Suzukaze-c/02|test_10
	determine ?action=edit&section=x
#invoke:User:Suzukaze-c/02|test_11
	HSK list words missing appropriate category → https://en.wiktionary.org/w/index.php?oldid=46157868
#invoke:User:Suzukaze-c/02|test_12
	jouyou kanji that need {{ja-readings}} to be updated → [[User:Suzukaze-c/cleanup/12]]
subst:#invoke:User:Suzukaze-c/02|test_13
	process/update {{ja-readings}}
subst:#invoke:User:Suzukaze-c/02|test_14
	generate list of {{attention}}s from a copy-paste of the Category: page

]==]

local export = {}
local M = require("Module:zh")
local replace = mw.ustring.gsub
local find = mw.ustring.find
local match = mw.ustring.match
local itermatch = mw.ustring.gmatch
local split = mw.text.split
local itersplit = mw.text.gsplit
local trim = mw.text.trim
local lower = mw.ustring.lower
local sub = mw.ustring.sub
local len = mw.ustring.len

function export.show(frame)
	local text = trim(frame.args[2])
	local lang = frame.args[1]
	local out = {}

	local use_ja_ruby = (lang == 'ja') and (match(text, '[・￥]') or not match(text, '[㐀-鿕]'))

	local non = require('Module:string').pattern_escape('＊！？．，。、：；…《》「」【】（）!?.,:;()"—·☆[] ')

	text = replace(text, '\n+', '＊')
	text = replace(text, '　', ' ')
	text = replace(text, '%s+', ' ')

	if not find(text, ' ') then
		for char in itermatch(text, '(.)') do
			table.insert(out, '[['..char..']] ')
		end
	else
		for word, punc in itermatch(text, '([^'..non..']+)(['..non..']*)') do
			if not use_ja_ruby then
				table.insert(out, '[['..word..']]'..punc)
			else
				word = replace(word, '￥', ' ')
				local lemma, kana = match(word, '(.+)・(.+)')
				if not kana then
					lemma = word
					kana = replace(word, '([㐀-鿕々])', '!')
				end
				word = require('module:ja-link').link({lemma = lemma, kana = kana})
				table.insert(out, word..punc)
			end
		end
	end

	out = table.concat(out)

	if not use_ja_ruby then
		out = require('module:links').full_link({term = out, lang = require('module:languages').getByCode(lang)})
	end

	return '«' .. lang .. '» ' .. out
end

function export.rpre(frame)
-- {{m|1=ja|2=WORD|3=TITLE|4=GLOSS}}
-- {{m|0=ja|1=WORD|2=TITLE|3=GLOSS}}

	local one = frame.args[1] or ''
	local two = frame.args[2] or ''
	local three = frame.args[3] or ''
	local four = frame.args[4] or ''

	local jp = ''
	local tr = frame.args['tr'] or ''
	local gloss = frame.args['gloss'] or ''

	if one == 'ja' then
		jp = two
		linktitle = three
		gloss = (gloss ~= '' and gloss or four)
	else
		jp = one
		linktitle = two
		gloss = three
	end

	tr = replace(tr, '[^￥.^、ぁ-ー]+', '')
	tr = replace(tr, '￥', ' ')

	if gloss ~= '' then
		gloss = ': ' .. gloss
	end

	if tr ~= '' then
		tr = '|' .. tr
	end

	if tr == '' and find(jp, '[㐀-鿕𠀀-𬺡]') then
		tr = '|' .. jp
	end

	if linktitle ~= '' then
		jp = 'linkto=' .. jp .. '|' .. linktitle
	end

	return '{{ja-r|' .. jp .. tr .. '}}' .. gloss
end

function export.r(frame)
	local one = frame.args[1] or ''
	local two = frame.args[2] or ''
	local three = frame.args[3] or ''
	local four = frame.args[4] or ''

	local jp = ''
	local tr = ''
	local gloss = frame.args['gloss'] or ''
	local choice = ''

	if find(one, '[0-9]') then
		choice = one
		jp = two
		linktitle = three
		gloss = (gloss ~= '' and gloss or four)
	elseif one == 'ja' then
		choice = ''
		jp = two
		linktitle = three
		gloss = (gloss ~= '' and gloss or four)
	else
		choice = ''
		jp = one
		linktitle = two
		gloss = (gloss ~= '' and gloss or three)
	end
	
	if mw.ustring.match(jp, ".%[%[[^%]]+%]%]") then
		error("Cannot process Japanese text with embedded wikilinks.")
	end

	local content = mw.title.new(jp):getContent() or '{{ja-pos|error|}}'
	local readings = {}

	local function process(text)
		text = replace(text, 'hhira=[^|}]+', '')
		text = replace(text, 'decl=[^|}]+', '')
		text = replace(text, 'infl=[^|}]+', '')
		text = replace(text, 'kyu=[^|}]+', '')
		text = replace(text, 'hira=', '')
		if find(text, 'proper') then
			text = '^' .. replace(text, '([ |])', '%1^')
		end
		if find(content, 'infl=い') then
			text = replace(text, 'しい', 'し.い')
		end
		if find(content, 'ja%-verb') then
			text = replace(text, 'おう', 'お.う')
		end
		for parameter in itersplit(text, '|') do
			if find(parameter, '[あ-ー]') then
				table.insert(readings, parameter)
			end
		end
	end

	for parameters in itermatch(content, '{{ja%-adj|([^}]+)}}') do
		process(parameters)
	end
	for parameters in itermatch(content, '{{ja%-noun|([^}]+)}}') do
		process(parameters)
	end
	for parameters in itermatch(content, '{{ja%-verb|([^}]+)}}') do
		process(parameters)
	end
	for parameters in itermatch(content, '{{ja%-verb%-suru|([^}]+)}}') do
		process(parameters)
	end
	for parameters in itermatch(content, '{{ja%-phrase|([^}]+)}}') do
		process(parameters)
	end
	for parameters in itermatch(content, '{{ja%-pos|([^}]+)}}') do
		process(parameters)
	end
	for parameters in itermatch(content, '{{ja%-altread|([^}]+)}}') do
		process(parameters)
	end

	readings = require("Module:table").removeDuplicates(readings)

	if #readings > 1 then
		if choice ~= '' then
			tr = readings[tonumber(choice)]
		else
			return '{{ja-r|' .. jp .. '|ーーーーー}}\n' .. require("Module:debug").highlight_dump(readings)
		end
	else
		tr = (readings[1] and readings[1] or jp)
	end

	-- if term is pure kana and kana is identical
	if replace(jp, '[あ-ー]', '') == '' and tr == jp then
		tr = ''
	end

	if gloss ~= '' then
		gloss = ': ' .. gloss
	end

	if tr ~= '' then
		tr = '|' .. tr
	end

	if linktitle ~= '' then
		jp = 'linkto=' .. jp .. '|' .. linktitle
	end

	return '{{ja-r|' .. jp .. tr .. '}}' .. gloss

	--[[

	変換済みの言葉を再変換
	・選択してスペースキーを押す
	・選択してWin+Cを押す

	]]
end

function export.hzreport(frame)
	local text = {}
	local candidates = mw.loadData('Module:User:Suzukaze-c/02/hz').hz

	local rows = {}
	local y, n = '✔️', '❌️' -- taking advantage of colored emoji. with vs16

	for hz in itersplit(candidates[frame.args[1]], '') do
		local content = mw.title.new(hz):getContent() or ''

		local zh = find(content, '==Chinese==') and y or n
		local def = find(content, '{{zh%-[a-z ]+}}\n\n#') and y or n
		local der = find(content, '===Compounds===+\n{{zh%-der') and y or n
		local uns = match(content, '|sim=(.)') or ''
		local unt = match(content, '|tra=(.)') or ''
		local ufs = match(content, '{{zh%-forms|s=(.)') or ''
		local uft = match(content, '{{zh%-see|([^}]+)}}') or ''
		local goh = find(content, '===Glyph origin===') and y or n
		local histf = find(content, '{{[Hh]an[_ ]ety[ml]}}') and y or n
		local ids = find(content, '|ids=[⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻]') and y or n
		local yue = match(content, '|c=([a-z0-9,]+)') or ''
		local prc_tw = find(content, '|m=[㐀-鿕𠀀-𬺡]') and y or n

		uft = replace(uft, "[{|']", '.')
		if len(uft) > 6 then uft = sub(uft, 1, 5) .. '◆' end
		if len(yue) > 6 then yue = sub(yue, 1, 5) .. '◆' end
		hz = '[[' .. hz .. ']]'

		local cells = { hz, zh, def, der, uns, unt, ufs, uft, goh, histf, ids, yue, prc_tw }
		table.insert(rows, '| ' .. table.concat(cells, ' || '))
	end

	table.insert(text, '[[#wpTextbox1]]')
	table.insert(text, '{| class="wikitable sortable Hani"')
	table.insert(text, '! hz || zh? || def || der || unS || unT || ufS || ufT || goh || histF || ids || yue || prc/tw')
	table.insert(text, '|-')
	table.insert(text, table.concat(rows, '\n|-\n'))
	table.insert(text, '\n|}')

	return table.concat(text, '\n')
end

function export.newhz(frame)
	local a = frame.args
	local character = mw.title.getCurrentTitle().text

	local en = tonumber(a['e']) or 0 -- etym_number
	local hl = 3 -- header_level
	local function head(text) return '\n' .. mw.ustring.rep('=', hl) .. text .. mw.ustring.rep('=', hl) end
	local function hcr(hl, a) return (a > 0 and hl + 1 or hl - 1) end -- header_{in|de}crement

	local content = mw.title.new(character):getContent() or ''

	local extracted_simp_form = match(content, '|sim=(.)') or false
	local extracted_trad_form = match(content, '|tra=(.)') or false
	local manual_simp_form = a['s']

	local zhwp_data = require('Module:User:Suzukaze-c/02/zhwp')

	-- x

	local add_han_etym = false

	local candidates = mw.loadData('Module:User:Suzukaze-c/02/hz').hz

	if en <= 1 then
		if find(candidates['hanetym'], character) or require("module:zh-glyph/phonetic/list")[character] then
			add_han_etym = true
		end
	end

	local zh_see

	if not a['ignore'] then
		if extracted_trad_form or M.ts_determ(character) == 'simp' then
			zh_see = (extracted_trad_form or M.st(character))
		end
	end

	if a[1] and match(a[1], '^[㐀-鿕𠀀-𬺡]$') then
		zh_see = a[1] .. (a[2] and '|' .. a[2] or '')
	end

	local zh_forms = ''

	if manual_simp_form or extracted_simp_form or M.ts_determ(character) == 'trad' then
		zh_forms = zh_forms .. '|s=' .. (manual_simp_form or extracted_simp_form or M.ts(character))
	end
	if a['alt'] then
		zh_forms = zh_forms .. '|alt=' .. a['alt']
	end

	local zh_wp

	if a['wp'] then
		if a['wp'] == 'y' then
			zh_wp = ''
		else
			zh_wp = '|' .. replace(a['wp'], ',', '|')
		end
	end

	if zhwp_data.single_char_title[character] or zhwp_data.contains_astral[character] or zhwp_data.single_char_title[M.ts(character)] then
		zh_wp = ''
	end

	-- x

	local output = {}

	local function tin(text) table.insert(output, text) end

	-- x

	if en <= 1 then
		tin('==Chinese==')
	end

	if en == 0 then
		if not zh_see then tin('{{zh-forms' .. zh_forms .. '}}') end

		if zh_wp then
			tin('{{zh-wp' .. zh_wp .. '}}')
		end
	end

	if a['ge'] or add_han_etym then
		tin(head('Glyph origin'))
		if add_han_etym then tin('{{Han etym}}') end
		if a['ge'] then tin(a['ge']) end

		if zh_see and en == 0 then
			tin(head('Definitions'))
		end
	end

	if en > 0 then
		tin(head('Etymology ' .. tostring(en)))
		if not zh_see then tin('{{zh-forms' .. zh_forms .. '}}') end

		if zh_wp then
			tin('{{zh-wp' .. zh_wp .. '}}')
		end
	end

	if en > 0 then
		hl = hcr(hl, 1)
	end

	if zh_see then
		tin('{{zh-see|' .. zh_see .. '}}')
	else
		tin(head('Pronunciation'))
		tin('{{zh-pron')

		local m, c, h, mn, w = a['m'] or false, a['c'] or false, a['h'] or false, a['mn'] or false, a['w'] or false
		local mc, oc = a['mc'] or false, a['oc'] or false

		if m and find(m, '[ㄅ-ㄩ]') then
			m = replace(m, '[^．ˊˇˋㄅ-ㄩ]', '@')
			m = replace(m, '@+', '@')
			m = replace(m, '^@+', '')
			m = replace(m, '@+$', '')
			m = replace(m, '@', ',')
			m = require("module:cmn-pron").zhuyin_py(m)
		end
		if not m then
			if require("module:zh/data/cmn-tag").MT[character] then -- if there is cmn-tag data
				m = character
			else
				m = mw.ustring.gsub(M.pytemp(character,'','',''), '，', ', ') or false -- based on line from zh-new
				if find(m, '[㐀-鿕𠀀-𬺡]') then -- pinyin conversion failed
					m = false
				end
			end
		end
		if not c then
			c = M.check_pron(character, 'yue', 1) or false
			if c and find(c, ',') then a['rhk'] = 'yes' end
		end
		if h and find(h, '[0-9]') then
			h = export.test_5(h)
		end
		if not h then
			h = M.check_pron(character, 'hak', 1) or false
		end
		if not mn then
			mn = M.check_pron(character, 'nan', 1) or false
		end
		if w and find(w, '[PSQR]') then
			w = export.test_3(w)
		end
		if (not mc) and (mw.title.new('Module:zh/data/ltc-pron/' .. character).exists) then
			mc = 'y'
		end
		if (not oc) and (mw.title.new('Module:zh/data/och-pron-BS/' .. character).exists or mw.title.new('Module:zh/data/och-pron-ZS/' .. character).exists) then
			oc = 'y'
		end

		if m then tin('|m=' .. m) end
		if (a['ms'] or a['m-s']) then tin('|m-s=' .. (a['ms'] or a['m-s'])) end
		if c then tin('|c=' .. c) end
		if a['ct'] then tin('|c-t=' .. a['ct']) end
		if a['g'] then tin('|g=' .. a['g']) end
		if h then tin('|h=pfs=' .. h) end
		if a['j'] then tin('|j=' .. a['j']) end
		if a['md'] then tin('|md=' .. a['md']) end
		if mn then tin('|mn=' .. mn) end
		if (a['mnn'] or a['mn_note']) then tin('|mn_note=' .. (a['mnn'] or a['mn_note'])) end
		if (a['mnt'] or a['mn-t']) then tin('|mn-t=' .. (a['mnt'] or a['mn-t'])) end
		if (a['mntn'] or a['mn-t_note']) then tin('|mn-t_note=' .. (a['mntn'] or a['mn-t_note'])) end
		if w then tin('|w=' .. w) end
		if a['x'] then tin('|x=' .. a['x']) end
		if mc or oc then
			tin('|mc=' .. (mc or ''))
			tin('|oc=' .. (oc or ''))
		end

		if a['ma'] then tin('|ma=' .. a['ma']) end
		tin('|cat=' .. (a['cat'] or ''))

		tin('}}')

		tin(head('Definitions'))
		tin('{{zh-hanzi}}')
		tin('')
		if a['rcns'] then
			tin('# {{lb|zh|Taiwan}} {{n-g|Only used in personal names.}}')
		else
			tin('# ' .. (a[1] or '{{rfdef|zh}}'))
		end

		if not a['noder'] then -- Lua error: not enough memory
			local der = ''
			local der_add = ''

			if a['der'] then
				der_add = a['der']
				der_add = replace(der_add, '[^㐀-鿕𠀀-𬺡]+', '|') -- any non-hanzi text becomes separator
				der_add = replace(der_add, '|+', '|')
				der_add = replace(der_add, '^|', '')
				der_add = replace(der_add, '|$', '')
				der_add = '|' .. der_add
			end
			if match(character, '[𠀀-𬺡]') then
				for title, _ in pairs(zhwp_data.contains_astral) do
					if len(title) > 1 and match(title, character) then
						der_add = der_add .. '|' .. title
					end
				end
			end
			der = frame:preprocess('{{subst:zh-der/new' .. (der_add or '') .. (m and '|p=' .. replace(m, ',.+', '') or '') .. '}}')

			if match(der, 'memory') then
				tin(head('Compounds'))
				tin('{{su#bst:zh-der/new' .. (der_add or '') .. (m and '|p=' .. replace(m, ',.+', '') or '') .. '}}') -- let you add zh-der in a separate edit in case Lua returns "out of memory"
			elseif match(der, '[㐀-鿕𠀀-𬺡]') then
				tin(head('Compounds'))
				tin(der)
			end
		end

		if a['also'] then
			tin(head('See also'))
			tin('* {{zh-l|' .. a['also'] .. '}}')
		end
	end

	if en > 0 then
		hl = hcr(hl, -1)
	end

	if a['rtw'] or a['rhk'] or a['ryt'] or a['riso'] or a['rcns'] or a['rnan'] then
		tin(head('References'))
		if a['rtw'] then tin('* {{R:twedu|' .. a['rtw'] .. '}}') end
		if a['rhk'] then tin('* {{R:yue:mfccd}}') end
		if a['ryt'] then tin('* {{R:yue:jyut.net}}') end
		if a['riso'] then tin('* {{R:yue:Jyutping Database}}') end
		if a['rcns'] then tin('* {{R:zh:CNS|' .. replace(a['rcns'], '%-', '|') .. '}}') end
		if a['rnan'] then
			if find(a['rnan'], '%d') then
				tin('* {{R:nan:thcwd|' .. a['rnan'] .. '}}')
			else
				tin('* {{R:nan:thcwdq}}')
			end
		end
	end

	return trim(table.concat(output, '\n'))
end

function export.newhzmul(frame)
	local text = frame.args[3]

	local char = mw.title.getCurrentTitle().text

	local x = mw.title.new(char):getContent() or ''

	local model = trim([==[

{{character info/new}}
==Translingual==

===Han character===
{{Han char|rn=$rs1|rad=$rad|as=$rs2|sn=$TotalStrokes|four=$FourCornerCode$four|canj=$Cangjie$canj|ids=$ids}}

# $Definition

====References====
{{Han ref|kx=$IRGKangXi|dkj=$IRGDaiKanwaZiten|dj=$IRGDaeJaweon|hdz=$IRGHanyuDaZidian|uh=$hex}}

]==])

	local corr = {'一','丨','丶','丿','乙','亅','二','亠','人','儿','入','八','冂','冖','冫','几','凵','刀','力','勹','匕','匚','匸','十','卜','卩','厂','厶','又','口','囗','土','士','夂','夊','夕','大','女','子','宀','寸','小','尢','尸','屮','山','巛','工','己','巾','干','幺','广','廴','廾','弋','弓','彐','彡','彳','心','戈','戶','手','支','攴','文','斗','斤','方','无','日','曰','月','木','欠','止','歹','殳','毋','比','毛','氏','气','水','火','爪','父','爻','爿','片','牙','牛','犬','玄','玉','瓜','瓦','甘','生','用','田','疋','疒','癶','白','皮','皿','目','矛','矢','石','示','禸','禾','穴','立','竹','米','糸','缶','网','羊','羽','老','而','耒','耳','聿','肉','臣','自','至','臼','舌','舛','舟','艮','色','艸','虍','虫','血','行','衣','襾','見','角','言','谷','豆','豕','豸','貝','赤','走','足','身','車','辛','辰','辵','邑','酉','釆','里','金','長','門','阜','隶','隹','雨','靑','非','面','革','韋','韭','音','頁','風','飛','食','首','香','馬','骨','高','髟','鬥','鬯','鬲','鬼','魚','鳥','鹵','鹿','麥','麻','黃','黍','黑','黹','黽','鼎','鼓','鼠','鼻','齊','齒','龍','龜','龠'}
	local corr_s = {['言']='讠',['門']='门',['食']='饣',['飛']='飞',['馬']='马',['見']='见',['貝']='贝',['糸']='纟',['車']='车',['長']='长',['韋']='韦',['風']='风',['金']='钅',['鳥']='鸟',['龍']='龙',['頁']='页',['齊']='齐',['麥']='麦',['龜']='龟',['魚']='鱼',['黽']='黾',['齒']='齿',['鹵']='卤'}

	-- from text
	local targets = {'RSUnicode','TotalStrokes','FourCornerCode','Cangjie','IRGKangXi','IRGDaiKanwaZiten','IRGDaeJaweon','IRGHanyuDaZidian','Definition'}

	for _, property in ipairs(targets) do
		local value = trim(match(text, 'k'..property..'%s+([^\n]+)') or '')
		mw.log(property .. '|' .. value)
		if property == 'RSUnicode' then
			value = replace(value, ' .+', '') -- 龽
			value = split(value, '%.')
			model = replace(model, '$rs1', value[1])
			model = replace(model, '$rs2', value[2])
		elseif property == 'Definition' then
			if find(frame.args[1], 'x') or find(x, '==Chinese==') then
				model = replace(model, '# $Definition\n\n', '')
			else
				model = replace(model, '$Definition', (value == '' and '{{rfdef|Han}}' or replace(value, ';', '\n#')))
			end
		else
			model = replace(model, '$'..property, value)
		end
		if property == 'Cangjie' and value ~= '' then
			model = replace(model, '$canj', '')
		end
	end

	-- read from existing page or manually provided
	local ex = {'ids','four','canj'}

	for _, property in ipairs(ex) do
		model = replace(model, '$'..property, match(x, '|'..property..'=([^|}]+)') or frame.args[property] or '')
	end

	-- |rad=
	model = replace(model, '(|rn=)(%d+)(\'?)(|rad=)($rad)(|)', function(a,b,c,d,e,f) 
		local z = corr[tonumber(b)]
		return a .. b .. d .. (c == '\'' and corr_s[z] or z) .. f
		end
	)

	-- remove empty dict fields
	local template_ref_fields = {'kx','dkj','dj','hdz'}

	for _, property in ipairs(template_ref_fields) do
		model = replace(model, '|'..property..'=|', '|')
	end

	model = replace(model, '$hex', match(text, 'Unihan data for U.(%x+)'))

	if find(x, '==Chinese==') then
		model = model .. '\n\n----\n\n'
	end

	return model
end

function export.test_1(frame)
	local items = {
		{ '/dict', 'a'},
		{ '/cp', 'b'},
		{ 'WT:RE:zh', '中'},
		{ 'WT:RE:ja', '日'},
		{ '/m3', '日'},
		{ '/sandbox', '他'},
	}

	local box_width = tostring(100 / #items) .. '%'
	local box_style = 'border:1px solid whitesmoke; color:transparent; display:inline-block; width:' .. box_width .. '; height:100%; box-sizing:border-box; margin-right:-1px;'

	local boxes_final = {}

	for _, pair in ipairs(items) do
		table.insert(boxes_final, '[[' .. pair[1] .. '|<span style="' .. box_style .. '">' .. pair[2] .. '</span>]]')
	end

	return '<div style="width:auto; height:300px; overflow:auto;">' .. table.concat(boxes_final) .. '</div>'

end

function export.test_2(frame)
	local text = 'https://en.wiktionary.org/w/index.php?title=Special%3ASearch&profile=default&fulltext=Search&search='
	if frame.args[2] then
		return '[' .. text .. mw.uri.encode(mw.text.unstripNoWiki(frame.args[1]), 'PATH') .. ' ' .. frame.args[2] .. ']'
	else
		return '[' .. text .. frame.args[1] .. ' ' .. frame.args[1] .. ']'
	end
end

function export.test_3(text)
	if type(text) == 'table' then text = text.args[1] end

	local syllable, tone = match(text, '(.+)([PSQR])')
	local voiced = false

	if match(syllable, "^[bvdnlzg]") or match(syllable, "^m[^m]") or match(syllable, "jj") or match(syllable, "xx") or match(syllable, "hh") then
		voiced = true
	end

	if match(syllable, '^[ctsjszh]+i') then
		syllable = replace(syllable, '^[ctsjszh]+', { ['c']='j', ['ts']='j', ['ch']='q', ['tsh']='q', ['j']='jj', ['s']='x', ['sh']='x', ['z']='xx', ['zh']='xx' } )
	end

	syllable = replace(syllable, 'h$', 'q')

	-- too lazy for vowels

	if tone == 'P' and voiced then
		tone = '3'
	elseif tone == 'P' and not voiced then
		tone = '1'
	elseif tone == 'S' and voiced then
		tone = '3'
	elseif tone == 'S' and not voiced then
		tone = '2'
	elseif tone == 'Q' and voiced then
		tone = '3'
	elseif tone == 'Q' and not voiced then
		tone = '2'
	elseif tone == 'R' and voiced then
		tone = '5'
	elseif tone == 'R' and not voiced then
		tone = '4'
	end

	return tone .. syllable
end

function export.test_5(text)
	if type(text) == 'table' then text = text.args[1] end

	text = replace(text, 'w', 'ṳ')
	text = replace(text, '24', '̂')
	text = replace(text, '11', '̀')
	text = replace(text, '31', '́')
	text = replace(text, '55', '')
	text = replace(text, '2', '')
	text = replace(text, '5', '̍')

	return text
end

function export.test_7(frame)
	local text = frame.args[1]

	text = trim(text)

	text = replace(text, '(# *)(%[%[)([^\n]+)(%]%]: *)', function(newl, _, text, _) return newl .. '{{ja-def|' .. replace(text, '[^ぁ-ー㐀-鿕]+', '|') .. '}} ' end)

	text = replace(text, 'decl=[いi]', 'infl=i')

	return text
end

function export.test_9()
	return '{{attn|ltc}}{{attn|och|Middle+Old Chinese needs to be distributed}}'
end

function export.test_10(content, target_header, pagename)
	local section = 0

	for header in itermatch(content, '==+([^\n=]+)==+\n') do
		section = section + 1

		if header == target_header then
			break
		end
	end

	return tostring(mw.uri.canonicalUrl(pagename,'action=edit&section=' .. section))
end

function export.test_11()
	local ciout = {}
	local ziout = {}
	local levels = {'Beginning','Elementary','Intermediate','Advanced'}

	levels = {'Elementary','Intermediate'}

	for _, level in ipairs(levels) do
		table.insert(ciout, '*' .. level .. '\n**')
		table.insert(ziout, '*' .. level .. '\n**')
		local apcontent = mw.title.new('Appendix:HSK list of Mandarin words/' .. level .. ' Mandarin'):getContent()
		apcontent = replace(apcontent, '{{l|cmn|([^|]+)|tr={{l|cmn|([^|]+)}}', '%2')
		apcontent = replace(apcontent, '{{zh.l|([^/|]+)/([^/|]+)|', '%1')
		apcontent = replace(apcontent, '{{zh.l|([^/|]+)/([^/|]+)/([^/|]+)|', '%1+%2')
		apcontent = replace(apcontent, '%[%[([^%[%]]+)%]%] %(%[%[([^%[%]]+)%]%],', '%2')
		apcontent = replace(apcontent, 'is called a [^\n]+', '')

		-- mw.log(apcontent..'\n\n\n\n\n\n\n\n\n~~~~~~~~~~~~~~~~~~~~\n\n\n\n\n\n\n\n\n\n')

		for ci in itermatch(apcontent, '[㐀-鿕…]+') do
			local cicontent = mw.title.new(ci):getContent() or ''
			local ok = match(cicontent, 'zh%-pron')

			local url = export.test_10(cicontent, 'Chinese', ci)

			if not find(cicontent, level) then
				table.insert(len(ci)==1 and ziout or ciout,
					(ok and '' or '<mark>')
					..
					'[' .. url .. ' ' .. ci .. '], '
					..
					(ok and '' or '</mark>')
				)
			end
		end
		table.insert(ciout, '\n')
		table.insert(ziout, '\n')
	end

	return table.concat(ciout, '')..table.concat(ziout, '')
end

function export.test_12()
	local out = {}
	local x = split('亜哀挨愛曖悪握圧扱宛嵐安案暗以衣位囲医依委威為畏胃尉異移萎偉椅彙意違維慰遺緯域育一壱逸茨芋引印因咽姻員院淫陰飲隠韻右宇羽雨唄鬱畝浦運雲永泳英映栄営詠影鋭衛易疫益液駅悦越謁閲円延沿炎怨宴媛援園煙猿遠鉛塩演縁艶汚王凹央応往押旺欧殴桜翁奥横岡屋億憶臆虞乙俺卸音恩温穏下化火加可仮何花佳価果河苛科架夏家荷華菓貨渦過嫁暇禍靴寡歌箇稼課蚊牙瓦我画芽賀雅餓介回灰会快戒改怪拐悔海界皆械絵開階塊楷解潰壊懐諧貝外劾害崖涯街慨蓋該概骸垣柿各角拡革格核殻郭覚較隔閣確獲嚇穫学岳楽額顎掛潟括活喝渇割葛滑褐轄且株釜鎌刈干刊甘汗缶完肝官冠巻看陥乾勘患貫寒喚堪換敢棺款間閑勧寛幹感漢慣管関歓監緩憾還館環簡観韓艦鑑丸含岸岩玩眼頑顔願企伎危机気岐希忌汽奇祈季紀軌既記起飢鬼帰基寄規亀喜幾揮期棋貴棄毀旗器畿輝機騎技宜偽欺義疑儀戯擬犠議菊吉喫詰却客脚逆虐九久及弓丘旧休吸朽臼求究泣急級糾宮救球給嗅窮牛去巨居拒拠挙虚許距魚御漁凶共叫狂京享供協況峡挟狭恐恭胸脅強教郷境橋矯鏡競響驚仰暁業凝曲局極玉巾斤均近金菌勤琴筋僅禁緊錦謹襟吟銀区句苦駆具惧愚空偶遇隅串屈掘窟熊繰君訓勲薫軍郡群兄刑形系径茎係型契計恵啓掲渓経蛍敬景軽傾携継詣慶憬稽憩警鶏芸迎鯨隙劇撃激桁欠穴血決結傑潔月犬件見券肩建研県倹兼剣拳軒健険圏堅検嫌献絹遣権憲賢謙鍵繭顕験懸元幻玄言弦限原現舷減源厳己戸古呼固股虎孤弧故枯個庫湖雇誇鼓錮顧五互午呉後娯悟碁語誤護口工公勾孔功巧広甲交光向后好江考行坑孝抗攻更効幸拘肯侯厚恒洪皇紅荒郊香候校耕航貢降高康控梗黄喉慌港硬絞項溝鉱構綱酵稿興衡鋼講購乞号合拷剛傲豪克告谷刻国黒穀酷獄骨駒込頃今困昆恨根婚混痕紺魂墾懇左佐沙査砂唆差詐鎖座挫才再災妻采砕宰栽彩採済祭斎細菜最裁債催塞歳載際埼在材剤財罪崎作削昨柵索策酢搾錯咲冊札刷刹拶殺察撮擦雑皿三山参桟蚕惨産傘散算酸賛残斬暫士子支止氏仕史司四市矢旨死糸至伺志私使刺始姉枝祉肢姿思指施師恣紙脂視紫詞歯嗣試詩資飼誌雌摯賜諮示字寺次耳自似児事侍治持時滋慈辞磁餌璽鹿式識軸七叱失室疾執湿嫉漆質実芝写社車舎者射捨赦斜煮遮謝邪蛇勺尺借酌釈爵若弱寂手主守朱取狩首殊珠酒腫種趣寿受呪授需儒樹収囚州舟秀周宗拾秋臭修袖終羞習週就衆集愁酬醜蹴襲十汁充住柔重従渋銃獣縦叔祝宿淑粛縮塾熟出述術俊春瞬旬巡盾准殉純循順準潤遵処初所書庶暑署緒諸女如助序叙徐除小升少召匠床抄肖尚招承昇松沼昭宵将消症祥称笑唱商渉章紹訟勝掌晶焼焦硝粧詔証象傷奨照詳彰障憧衝賞償礁鐘上丈冗条状乗城浄剰常情場畳蒸縄壌嬢錠譲醸色拭食植殖飾触嘱織職辱尻心申伸臣芯身辛侵信津神唇娠振浸真針深紳進森診寝慎新審震薪親人刃仁尽迅甚陣尋腎須図水吹垂炊帥粋衰推酔遂睡穂錘随髄枢崇数据杉裾寸瀬是井世正生成西声制姓征性青斉政星牲省凄逝清盛婿晴勢聖誠精製誓静請整醒税夕斥石赤昔析席脊隻惜戚責跡積績籍切折拙窃接設雪摂節説舌絶千川仙占先宣専泉浅洗染扇栓旋船戦煎羨腺詮践箋銭銑潜線遷選薦繊鮮全前善然禅漸膳繕狙阻祖租素措粗組疎訴塑遡礎双壮早争走奏相荘草送倉捜挿桑巣掃曹曽爽窓創喪痩葬装僧想層総遭槽踪操燥霜騒藻造像増憎蔵贈臓即束足促則息捉速側測俗族属賊続卒率存村孫尊損遜他多汰打妥唾堕惰駄太対体耐待怠胎退帯泰堆袋逮替貸隊滞態戴大代台第題滝宅択沢卓拓託濯諾濁但達脱奪棚誰丹旦担単炭胆探淡短嘆端綻誕鍛団男段断弾暖談壇地池知値恥致遅痴稚置緻竹畜逐蓄築秩窒茶着嫡中仲虫沖宙忠抽注昼柱衷酎鋳駐著貯丁弔庁兆町長挑帳張彫眺釣頂鳥朝脹貼超腸跳徴嘲潮澄調聴懲直勅捗沈珍朕陳賃鎮追椎墜通痛塚漬坪爪鶴低呈廷弟定底抵邸亭貞帝訂庭逓停偵堤提程艇締諦泥的笛摘滴適敵溺迭哲鉄徹撤天典店点展添転塡田伝殿電斗吐妬徒途都渡塗賭土奴努度怒刀冬灯当投豆東到逃倒凍唐島桃討透党悼盗陶塔搭棟湯痘登答等筒統稲踏糖頭謄藤闘騰同洞胴動堂童道働銅導瞳峠匿特得督徳篤毒独読栃凸突届屯豚頓貪鈍曇丼那奈内梨謎鍋南軟難二尼弐匂肉虹日入乳尿任妊忍認寧熱年念捻粘燃悩納能脳農濃把波派破覇馬婆罵拝杯背肺俳配排敗廃輩売倍梅培陪媒買賠白伯拍泊迫剝舶博薄麦漠縛爆箱箸畑肌八鉢発髪伐抜罰閥反半氾犯帆汎伴判坂阪板版班畔般販斑飯搬煩頒範繁藩晩番蛮盤比皮妃否批彼披肥非卑飛疲秘被悲扉費碑罷避尾眉美備微鼻膝肘匹必泌筆姫百氷表俵票評漂標苗秒病描猫品浜貧賓頻敏瓶不夫父付布扶府怖阜附訃負赴浮婦符富普腐敷膚賦譜侮武部舞封風伏服副幅復福腹複覆払沸仏物粉紛雰噴墳憤奮分文聞丙平兵併並柄陛閉塀幣弊蔽餅米壁璧癖別蔑片辺返変偏遍編弁辛便勉歩保哺捕補舗母募墓慕暮簿方包芳邦奉宝抱放法泡胞俸倣峰砲崩訪報蜂豊飽褒縫亡乏忙坊妨忘防房肪某冒剖紡望傍帽棒貿貌暴膨謀頰北木朴牧睦僕墨撲没勃堀本奔翻凡盆麻摩磨魔毎妹枚昧埋幕膜枕又末抹万満慢漫未味魅岬密蜜脈妙民眠矛務無夢霧娘名命明迷冥盟銘鳴滅免面綿麺茂模毛妄盲耗猛網目黙門紋問匁冶夜野弥厄役約訳薬躍闇由油喩愉諭輸癒唯友有勇幽悠郵湧猶裕遊雄誘憂融優与予余誉預幼用羊妖洋要容庸揚揺葉陽溶腰様瘍踊窯養擁謡曜抑沃浴欲翌翼拉裸羅来雷頼絡落酪辣乱卵覧濫藍欄吏利里理痢裏履璃離陸立律慄略柳流留竜粒隆硫侶旅虜慮了両良料涼猟陵量僚領寮療瞭糧力緑林厘倫輪隣臨瑠涙累塁類令礼冷励戻例鈴零霊隷齢麗暦歴列劣烈裂恋連廉練錬呂炉賂路露老労弄郎朗浪廊楼漏籠六録麓論和話賄脇惑枠湾腕', '')

	for i, hz in ipairs(x) do
		local content = mw.title.new(hz):getContent() or ''
		local ok = not (match(content, '|kun=%[%[') or match(content, '|[^l][a-z]+on=%[%['))
		local url = export.test_10(content, 'Readings', hz)
		
		table.insert(out,
			(ok and '' or '<mark>')
			..
			'[' .. url .. ' ' .. hz .. '、]'
			..
			(ok and '' or '</mark>')
		)
	end

	return '<span lang="ja" class="plainlinks">' .. table.concat(out, '') .. '</span>'
end

function export.test_13(frame)
	local a = frame.args
	local hz = mw.title.getCurrentTitle().text
	local yomi = mw.loadData('Module:ja/data/jouyou-yomi').yomi[hz]
	local out = {}
	local order = {'goon', 'kanon', 'toon', 'kanyoon', 'soon', 'on', 'kun', 'nanori'}
	local processed = {}
	local missing = {}

	for type, text in pairs(a) do
		text = replace(text, '%s*%([a-zāīūēō%.%-]%)', '')
		text = replace(text, '{{non%-joyo%-reading}}%s*', '')
		text = replace(text, '%s*{{q[a-z]*|non%-%[%[w:Jōyō kanji|Jōyō%]%] reading}}', '')
		text = replace(text, ',%s*{{q[a-z]*|historical}}', '<')
		text = replace(text, '%[%[(' .. hz .. '[ぁ-ー]+)%]%]', '')
		text = replace(text, '{{[jal|-]+(' .. hz .. '[ぁ-ー]+)}}', '')
		text = replace(text, '%[%[([^%]|]+)|([^%]|]+)%]%]', function(a,b)
			return replace(b, '%.', '-')
		end
		)

		text = replace(text, '([^ぁ-ー])%.([^ぁ-ー])', '@')
		text = replace(text, '[^ぁ-ー%-<>.]+', '@')
		text = replace(text, '(@*)[<>](@*)', '<') -- or something. also, the > is intentional (some entries indeed have backwards arrows)

		text = replace(text, '^@', '')
		text = replace(text, '@$', '')
		text = replace(text, '@', ', ')

		if type ~= 'kun' then
			text = replace(text, '%-', '')
		end

		processed[type] = text
	end

	table.insert(out, '{{ja-readings')

	for _, type in ipairs(order) do
		if processed[type] then
			table.insert(out, '|'..type..'='..processed[type])
		end
	end

	table.insert(out, '}}')

	out = table.concat(out, '\n')

	if yomi then
		for reading, type in pairs(yomi) do
			reading = require('Module:string').pattern_escape(reading)
			if type == 1 or type == 3 then
				reading = require('Module:ja').kata_to_hira(reading)
			end

			if not (
				find(out, '[^ぁ-ー]' .. reading .. '[^ぁ-ー]')
--[[
				or
				find(out, '=' .. reading .. '[^ぁ-ー]')
				or
				find(out, '[^ぁ-ー]' .. reading .. '\n')
				or
				find(out, '=' .. reading .. '\n')
]]
			) then
				table.insert(missing, reading)
			end
		end
	end

	missing = table.concat(missing, ', ') -- NOTE: don't forget to check if I left any of these in entries

	return out .. replace(missing, '%%', '')
end

function export.test_14(frame)
	local text = frame.args[1]
	local x = {}

	table.insert(x, '{| class="wikitable sortable"')
	table.insert(x, '! - || -')

	for entry in itermatch(text, '    ([^\n]+)') do
		local content = mw.title.new(entry):getContent() or ''
		local attention = match(content, '{{attention|([^}]+)}}') or match(content, '{{attn|([^}]+)}}') or '<mark>?</mark>'
		local line = '![['..entry..']]\n|'..replace(attention, '|', '¦')
		table.insert(x, line)
	end

	table.insert(x, '|}')

	return table.concat(x, '\n|-\n')
end

return export
Module:User:Suzukaze-c/02: difference between revisions

Revision as of 02:47, 6 September 2017

Navigation menu

Search