Module:User:Suzukaze-c/02: difference between revisions
Jump to navigation
Jump to search
Content deleted Content added
maybe return T:ja-l if there's no page for the term |
|||
Line 174: | Line 174: | ||
end |
end |
||
local content = mw.title.new(jp):getContent() |
local content = mw.title.new(jp):getContent() or '{{ja-pos|error|}}' |
||
if not content then |
|||
return "{{ja-l|" .. jp .. "}}" |
|||
end |
|||
local readings = {} |
local readings = {} |
||
Revision as of 02:47, 6 September 2017
- This module lacks a documentation subpage. You may create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of Suzukaze-c, for their own experimentation. Items in this module may be added and removed at Suzukaze-c's discretion; do not rely on this module's stability.
--[==[
this is a bunch of mostly unrelated shit i put in one single module because i can.
#invoke:User:Suzukaze-c/02|show
easy word lists
subst:#invoke:User:Suzukaze-c/02|rpre
convert to ja-r using data given
subst:#invoke:User:Suzukaze-c/02|r
convert to ja-r using data in the linked entry
#invoke:User:Suzukaze-c/02|hzreport
hanzi entry report
subst:#invoke:User:Suzukaze-c/02|newhz
new hanzi entry generator
subst:#invoke:User:Suzukaze-c/02|newhzmul
new hanzi translingual entry generator
#invoke:User:Suzukaze-c/02|test_1
user page links
#invoke:User:Suzukaze-c/02|test_2
search
subst:#invoke:User:Suzukaze-c/02|test_3
subst:#invoke:User:Suzukaze-c/02|test_5
subst:#invoke:User:Suzukaze-c/02|test_7
ja fmt (+{{ja-def}}, decl=i→infl=i)
subst:#invoke:User:Suzukaze-c/02|test_9
ltc/och attention (note to self: consider retracting {{zh-attn-split}})
#invoke:User:Suzukaze-c/02|test_10
determine ?action=edit§ion=x
#invoke:User:Suzukaze-c/02|test_11
HSK list words missing appropriate category → https://en.wiktionary.org/w/index.php?oldid=46157868
#invoke:User:Suzukaze-c/02|test_12
jouyou kanji that need {{ja-readings}} to be updated → [[User:Suzukaze-c/cleanup/12]]
subst:#invoke:User:Suzukaze-c/02|test_13
process/update {{ja-readings}}
subst:#invoke:User:Suzukaze-c/02|test_14
generate list of {{attention}}s from a copy-paste of the Category: page
]==]
local export = {}
local M = require("Module:zh")
local replace = mw.ustring.gsub
local find = mw.ustring.find
local match = mw.ustring.match
local itermatch = mw.ustring.gmatch
local split = mw.text.split
local itersplit = mw.text.gsplit
local trim = mw.text.trim
local lower = mw.ustring.lower
local sub = mw.ustring.sub
local len = mw.ustring.len
function export.show(frame)
local text = trim(frame.args[2])
local lang = frame.args[1]
local out = {}
local use_ja_ruby = (lang == 'ja') and (match(text, '[・¥]') or not match(text, '[㐀-鿕]'))
local non = require('Module:string').pattern_escape('*!?.,。、:;…《》「」【】()!?.,:;()"—·☆[] ')
text = replace(text, '\n+', '*')
text = replace(text, ' ', ' ')
text = replace(text, '%s+', ' ')
if not find(text, ' ') then
for char in itermatch(text, '(.)') do
table.insert(out, '[['..char..']] ')
end
else
for word, punc in itermatch(text, '([^'..non..']+)(['..non..']*)') do
if not use_ja_ruby then
table.insert(out, '[['..word..']]'..punc)
else
word = replace(word, '¥', ' ')
local lemma, kana = match(word, '(.+)・(.+)')
if not kana then
lemma = word
kana = replace(word, '([㐀-鿕々])', '!')
end
word = require('module:ja-link').link({lemma = lemma, kana = kana})
table.insert(out, word..punc)
end
end
end
out = table.concat(out)
if not use_ja_ruby then
out = require('module:links').full_link({term = out, lang = require('module:languages').getByCode(lang)})
end
return '«' .. lang .. '» ' .. out
end
function export.rpre(frame)
-- {{m|1=ja|2=WORD|3=TITLE|4=GLOSS}}
-- {{m|0=ja|1=WORD|2=TITLE|3=GLOSS}}
local one = frame.args[1] or ''
local two = frame.args[2] or ''
local three = frame.args[3] or ''
local four = frame.args[4] or ''
local jp = ''
local tr = frame.args['tr'] or ''
local gloss = frame.args['gloss'] or ''
if one == 'ja' then
jp = two
linktitle = three
gloss = (gloss ~= '' and gloss or four)
else
jp = one
linktitle = two
gloss = three
end
tr = replace(tr, '[^¥.^、ぁ-ー]+', '')
tr = replace(tr, '¥', ' ')
if gloss ~= '' then
gloss = ': ' .. gloss
end
if tr ~= '' then
tr = '|' .. tr
end
if tr == '' and find(jp, '[㐀-鿕𠀀-𬺡]') then
tr = '|' .. jp
end
if linktitle ~= '' then
jp = 'linkto=' .. jp .. '|' .. linktitle
end
return '{{ja-r|' .. jp .. tr .. '}}' .. gloss
end
function export.r(frame)
local one = frame.args[1] or ''
local two = frame.args[2] or ''
local three = frame.args[3] or ''
local four = frame.args[4] or ''
local jp = ''
local tr = ''
local gloss = frame.args['gloss'] or ''
local choice = ''
if find(one, '[0-9]') then
choice = one
jp = two
linktitle = three
gloss = (gloss ~= '' and gloss or four)
elseif one == 'ja' then
choice = ''
jp = two
linktitle = three
gloss = (gloss ~= '' and gloss or four)
else
choice = ''
jp = one
linktitle = two
gloss = (gloss ~= '' and gloss or three)
end
if mw.ustring.match(jp, ".%[%[[^%]]+%]%]") then
error("Cannot process Japanese text with embedded wikilinks.")
end
local content = mw.title.new(jp):getContent() or '{{ja-pos|error|}}'
local readings = {}
local function process(text)
text = replace(text, 'hhira=[^|}]+', '')
text = replace(text, 'decl=[^|}]+', '')
text = replace(text, 'infl=[^|}]+', '')
text = replace(text, 'kyu=[^|}]+', '')
text = replace(text, 'hira=', '')
if find(text, 'proper') then
text = '^' .. replace(text, '([ |])', '%1^')
end
if find(content, 'infl=い') then
text = replace(text, 'しい', 'し.い')
end
if find(content, 'ja%-verb') then
text = replace(text, 'おう', 'お.う')
end
for parameter in itersplit(text, '|') do
if find(parameter, '[あ-ー]') then
table.insert(readings, parameter)
end
end
end
for parameters in itermatch(content, '{{ja%-adj|([^}]+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-noun|([^}]+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-verb|([^}]+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-verb%-suru|([^}]+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-phrase|([^}]+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-pos|([^}]+)}}') do
process(parameters)
end
for parameters in itermatch(content, '{{ja%-altread|([^}]+)}}') do
process(parameters)
end
readings = require("Module:table").removeDuplicates(readings)
if #readings > 1 then
if choice ~= '' then
tr = readings[tonumber(choice)]
else
return '{{ja-r|' .. jp .. '|ーーーーー}}\n' .. require("Module:debug").highlight_dump(readings)
end
else
tr = (readings[1] and readings[1] or jp)
end
-- if term is pure kana and kana is identical
if replace(jp, '[あ-ー]', '') == '' and tr == jp then
tr = ''
end
if gloss ~= '' then
gloss = ': ' .. gloss
end
if tr ~= '' then
tr = '|' .. tr
end
if linktitle ~= '' then
jp = 'linkto=' .. jp .. '|' .. linktitle
end
return '{{ja-r|' .. jp .. tr .. '}}' .. gloss
--[[
変換済みの言葉を再変換
・選択してスペースキーを押す
・選択してWin+Cを押す
]]
end
function export.hzreport(frame)
local text = {}
local candidates = mw.loadData('Module:User:Suzukaze-c/02/hz').hz
local rows = {}
local y, n = '✔️', '❌️' -- taking advantage of colored emoji. with vs16
for hz in itersplit(candidates[frame.args[1]], '') do
local content = mw.title.new(hz):getContent() or ''
local zh = find(content, '==Chinese==') and y or n
local def = find(content, '{{zh%-[a-z ]+}}\n\n#') and y or n
local der = find(content, '===Compounds===+\n{{zh%-der') and y or n
local uns = match(content, '|sim=(.)') or ''
local unt = match(content, '|tra=(.)') or ''
local ufs = match(content, '{{zh%-forms|s=(.)') or ''
local uft = match(content, '{{zh%-see|([^}]+)}}') or ''
local goh = find(content, '===Glyph origin===') and y or n
local histf = find(content, '{{[Hh]an[_ ]ety[ml]}}') and y or n
local ids = find(content, '|ids=[⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻]') and y or n
local yue = match(content, '|c=([a-z0-9,]+)') or ''
local prc_tw = find(content, '|m=[㐀-鿕𠀀-𬺡]') and y or n
uft = replace(uft, "[{|']", '.')
if len(uft) > 6 then uft = sub(uft, 1, 5) .. '◆' end
if len(yue) > 6 then yue = sub(yue, 1, 5) .. '◆' end
hz = '[[' .. hz .. ']]'
local cells = { hz, zh, def, der, uns, unt, ufs, uft, goh, histf, ids, yue, prc_tw }
table.insert(rows, '| ' .. table.concat(cells, ' || '))
end
table.insert(text, '[[#wpTextbox1]]')
table.insert(text, '{| class="wikitable sortable Hani"')
table.insert(text, '! hz || zh? || def || der || unS || unT || ufS || ufT || goh || histF || ids || yue || prc/tw')
table.insert(text, '|-')
table.insert(text, table.concat(rows, '\n|-\n'))
table.insert(text, '\n|}')
return table.concat(text, '\n')
end
function export.newhz(frame)
local a = frame.args
local character = mw.title.getCurrentTitle().text
local en = tonumber(a['e']) or 0 -- etym_number
local hl = 3 -- header_level
local function head(text) return '\n' .. mw.ustring.rep('=', hl) .. text .. mw.ustring.rep('=', hl) end
local function hcr(hl, a) return (a > 0 and hl + 1 or hl - 1) end -- header_{in|de}crement
local content = mw.title.new(character):getContent() or ''
local extracted_simp_form = match(content, '|sim=(.)') or false
local extracted_trad_form = match(content, '|tra=(.)') or false
local manual_simp_form = a['s']
local zhwp_data = require('Module:User:Suzukaze-c/02/zhwp')
-- x
local add_han_etym = false
local candidates = mw.loadData('Module:User:Suzukaze-c/02/hz').hz
if en <= 1 then
if find(candidates['hanetym'], character) or require("module:zh-glyph/phonetic/list")[character] then
add_han_etym = true
end
end
local zh_see
if not a['ignore'] then
if extracted_trad_form or M.ts_determ(character) == 'simp' then
zh_see = (extracted_trad_form or M.st(character))
end
end
if a[1] and match(a[1], '^[㐀-鿕𠀀-𬺡]$') then
zh_see = a[1] .. (a[2] and '|' .. a[2] or '')
end
local zh_forms = ''
if manual_simp_form or extracted_simp_form or M.ts_determ(character) == 'trad' then
zh_forms = zh_forms .. '|s=' .. (manual_simp_form or extracted_simp_form or M.ts(character))
end
if a['alt'] then
zh_forms = zh_forms .. '|alt=' .. a['alt']
end
local zh_wp
if a['wp'] then
if a['wp'] == 'y' then
zh_wp = ''
else
zh_wp = '|' .. replace(a['wp'], ',', '|')
end
end
if zhwp_data.single_char_title[character] or zhwp_data.contains_astral[character] or zhwp_data.single_char_title[M.ts(character)] then
zh_wp = ''
end
-- x
local output = {}
local function tin(text) table.insert(output, text) end
-- x
if en <= 1 then
tin('==Chinese==')
end
if en == 0 then
if not zh_see then tin('{{zh-forms' .. zh_forms .. '}}') end
if zh_wp then
tin('{{zh-wp' .. zh_wp .. '}}')
end
end
if a['ge'] or add_han_etym then
tin(head('Glyph origin'))
if add_han_etym then tin('{{Han etym}}') end
if a['ge'] then tin(a['ge']) end
if zh_see and en == 0 then
tin(head('Definitions'))
end
end
if en > 0 then
tin(head('Etymology ' .. tostring(en)))
if not zh_see then tin('{{zh-forms' .. zh_forms .. '}}') end
if zh_wp then
tin('{{zh-wp' .. zh_wp .. '}}')
end
end
if en > 0 then
hl = hcr(hl, 1)
end
if zh_see then
tin('{{zh-see|' .. zh_see .. '}}')
else
tin(head('Pronunciation'))
tin('{{zh-pron')
local m, c, h, mn, w = a['m'] or false, a['c'] or false, a['h'] or false, a['mn'] or false, a['w'] or false
local mc, oc = a['mc'] or false, a['oc'] or false
if m and find(m, '[ㄅ-ㄩ]') then
m = replace(m, '[^.ˊˇˋㄅ-ㄩ]', '@')
m = replace(m, '@+', '@')
m = replace(m, '^@+', '')
m = replace(m, '@+$', '')
m = replace(m, '@', ',')
m = require("module:cmn-pron").zhuyin_py(m)
end
if not m then
if require("module:zh/data/cmn-tag").MT[character] then -- if there is cmn-tag data
m = character
else
m = mw.ustring.gsub(M.pytemp(character,'','',''), ',', ', ') or false -- based on line from zh-new
if find(m, '[㐀-鿕𠀀-𬺡]') then -- pinyin conversion failed
m = false
end
end
end
if not c then
c = M.check_pron(character, 'yue', 1) or false
if c and find(c, ',') then a['rhk'] = 'yes' end
end
if h and find(h, '[0-9]') then
h = export.test_5(h)
end
if not h then
h = M.check_pron(character, 'hak', 1) or false
end
if not mn then
mn = M.check_pron(character, 'nan', 1) or false
end
if w and find(w, '[PSQR]') then
w = export.test_3(w)
end
if (not mc) and (mw.title.new('Module:zh/data/ltc-pron/' .. character).exists) then
mc = 'y'
end
if (not oc) and (mw.title.new('Module:zh/data/och-pron-BS/' .. character).exists or mw.title.new('Module:zh/data/och-pron-ZS/' .. character).exists) then
oc = 'y'
end
if m then tin('|m=' .. m) end
if (a['ms'] or a['m-s']) then tin('|m-s=' .. (a['ms'] or a['m-s'])) end
if c then tin('|c=' .. c) end
if a['ct'] then tin('|c-t=' .. a['ct']) end
if a['g'] then tin('|g=' .. a['g']) end
if h then tin('|h=pfs=' .. h) end
if a['j'] then tin('|j=' .. a['j']) end
if a['md'] then tin('|md=' .. a['md']) end
if mn then tin('|mn=' .. mn) end
if (a['mnn'] or a['mn_note']) then tin('|mn_note=' .. (a['mnn'] or a['mn_note'])) end
if (a['mnt'] or a['mn-t']) then tin('|mn-t=' .. (a['mnt'] or a['mn-t'])) end
if (a['mntn'] or a['mn-t_note']) then tin('|mn-t_note=' .. (a['mntn'] or a['mn-t_note'])) end
if w then tin('|w=' .. w) end
if a['x'] then tin('|x=' .. a['x']) end
if mc or oc then
tin('|mc=' .. (mc or ''))
tin('|oc=' .. (oc or ''))
end
if a['ma'] then tin('|ma=' .. a['ma']) end
tin('|cat=' .. (a['cat'] or ''))
tin('}}')
tin(head('Definitions'))
tin('{{zh-hanzi}}')
tin('')
if a['rcns'] then
tin('# {{lb|zh|Taiwan}} {{n-g|Only used in personal names.}}')
else
tin('# ' .. (a[1] or '{{rfdef|zh}}'))
end
if not a['noder'] then -- Lua error: not enough memory
local der = ''
local der_add = ''
if a['der'] then
der_add = a['der']
der_add = replace(der_add, '[^㐀-鿕𠀀-𬺡]+', '|') -- any non-hanzi text becomes separator
der_add = replace(der_add, '|+', '|')
der_add = replace(der_add, '^|', '')
der_add = replace(der_add, '|$', '')
der_add = '|' .. der_add
end
if match(character, '[𠀀-𬺡]') then
for title, _ in pairs(zhwp_data.contains_astral) do
if len(title) > 1 and match(title, character) then
der_add = der_add .. '|' .. title
end
end
end
der = frame:preprocess('{{subst:zh-der/new' .. (der_add or '') .. (m and '|p=' .. replace(m, ',.+', '') or '') .. '}}')
if match(der, 'memory') then
tin(head('Compounds'))
tin('{{su#bst:zh-der/new' .. (der_add or '') .. (m and '|p=' .. replace(m, ',.+', '') or '') .. '}}') -- let you add zh-der in a separate edit in case Lua returns "out of memory"
elseif match(der, '[㐀-鿕𠀀-𬺡]') then
tin(head('Compounds'))
tin(der)
end
end
if a['also'] then
tin(head('See also'))
tin('* {{zh-l|' .. a['also'] .. '}}')
end
end
if en > 0 then
hl = hcr(hl, -1)
end
if a['rtw'] or a['rhk'] or a['ryt'] or a['riso'] or a['rcns'] or a['rnan'] then
tin(head('References'))
if a['rtw'] then tin('* {{R:twedu|' .. a['rtw'] .. '}}') end
if a['rhk'] then tin('* {{R:yue:mfccd}}') end
if a['ryt'] then tin('* {{R:yue:jyut.net}}') end
if a['riso'] then tin('* {{R:yue:Jyutping Database}}') end
if a['rcns'] then tin('* {{R:zh:CNS|' .. replace(a['rcns'], '%-', '|') .. '}}') end
if a['rnan'] then
if find(a['rnan'], '%d') then
tin('* {{R:nan:thcwd|' .. a['rnan'] .. '}}')
else
tin('* {{R:nan:thcwdq}}')
end
end
end
return trim(table.concat(output, '\n'))
end
function export.newhzmul(frame)
local text = frame.args[3]
local char = mw.title.getCurrentTitle().text
local x = mw.title.new(char):getContent() or ''
local model = trim([==[
{{character info/new}}
==Translingual==
===Han character===
{{Han char|rn=$rs1|rad=$rad|as=$rs2|sn=$TotalStrokes|four=$FourCornerCode$four|canj=$Cangjie$canj|ids=$ids}}
# $Definition
====References====
{{Han ref|kx=$IRGKangXi|dkj=$IRGDaiKanwaZiten|dj=$IRGDaeJaweon|hdz=$IRGHanyuDaZidian|uh=$hex}}
]==])
local corr = {'一','丨','丶','丿','乙','亅','二','亠','人','儿','入','八','冂','冖','冫','几','凵','刀','力','勹','匕','匚','匸','十','卜','卩','厂','厶','又','口','囗','土','士','夂','夊','夕','大','女','子','宀','寸','小','尢','尸','屮','山','巛','工','己','巾','干','幺','广','廴','廾','弋','弓','彐','彡','彳','心','戈','戶','手','支','攴','文','斗','斤','方','无','日','曰','月','木','欠','止','歹','殳','毋','比','毛','氏','气','水','火','爪','父','爻','爿','片','牙','牛','犬','玄','玉','瓜','瓦','甘','生','用','田','疋','疒','癶','白','皮','皿','目','矛','矢','石','示','禸','禾','穴','立','竹','米','糸','缶','网','羊','羽','老','而','耒','耳','聿','肉','臣','自','至','臼','舌','舛','舟','艮','色','艸','虍','虫','血','行','衣','襾','見','角','言','谷','豆','豕','豸','貝','赤','走','足','身','車','辛','辰','辵','邑','酉','釆','里','金','長','門','阜','隶','隹','雨','靑','非','面','革','韋','韭','音','頁','風','飛','食','首','香','馬','骨','高','髟','鬥','鬯','鬲','鬼','魚','鳥','鹵','鹿','麥','麻','黃','黍','黑','黹','黽','鼎','鼓','鼠','鼻','齊','齒','龍','龜','龠'}
local corr_s = {['言']='讠',['門']='门',['食']='饣',['飛']='飞',['馬']='马',['見']='见',['貝']='贝',['糸']='纟',['車']='车',['長']='长',['韋']='韦',['風']='风',['金']='钅',['鳥']='鸟',['龍']='龙',['頁']='页',['齊']='齐',['麥']='麦',['龜']='龟',['魚']='鱼',['黽']='黾',['齒']='齿',['鹵']='卤'}
-- from text
local targets = {'RSUnicode','TotalStrokes','FourCornerCode','Cangjie','IRGKangXi','IRGDaiKanwaZiten','IRGDaeJaweon','IRGHanyuDaZidian','Definition'}
for _, property in ipairs(targets) do
local value = trim(match(text, 'k'..property..'%s+([^\n]+)') or '')
mw.log(property .. '|' .. value)
if property == 'RSUnicode' then
value = replace(value, ' .+', '') -- 龽
value = split(value, '%.')
model = replace(model, '$rs1', value[1])
model = replace(model, '$rs2', value[2])
elseif property == 'Definition' then
if find(frame.args[1], 'x') or find(x, '==Chinese==') then
model = replace(model, '# $Definition\n\n', '')
else
model = replace(model, '$Definition', (value == '' and '{{rfdef|Han}}' or replace(value, ';', '\n#')))
end
else
model = replace(model, '$'..property, value)
end
if property == 'Cangjie' and value ~= '' then
model = replace(model, '$canj', '')
end
end
-- read from existing page or manually provided
local ex = {'ids','four','canj'}
for _, property in ipairs(ex) do
model = replace(model, '$'..property, match(x, '|'..property..'=([^|}]+)') or frame.args[property] or '')
end
-- |rad=
model = replace(model, '(|rn=)(%d+)(\'?)(|rad=)($rad)(|)', function(a,b,c,d,e,f)
local z = corr[tonumber(b)]
return a .. b .. d .. (c == '\'' and corr_s[z] or z) .. f
end
)
-- remove empty dict fields
local template_ref_fields = {'kx','dkj','dj','hdz'}
for _, property in ipairs(template_ref_fields) do
model = replace(model, '|'..property..'=|', '|')
end
model = replace(model, '$hex', match(text, 'Unihan data for U.(%x+)'))
if find(x, '==Chinese==') then
model = model .. '\n\n----\n\n'
end
return model
end
function export.test_1(frame)
local items = {
{ '/dict', 'a'},
{ '/cp', 'b'},
{ 'WT:RE:zh', '中'},
{ 'WT:RE:ja', '日'},
{ '/m3', '日'},
{ '/sandbox', '他'},
}
local box_width = tostring(100 / #items) .. '%'
local box_style = 'border:1px solid whitesmoke; color:transparent; display:inline-block; width:' .. box_width .. '; height:100%; box-sizing:border-box; margin-right:-1px;'
local boxes_final = {}
for _, pair in ipairs(items) do
table.insert(boxes_final, '[[' .. pair[1] .. '|<span style="' .. box_style .. '">' .. pair[2] .. '</span>]]')
end
return '<div style="width:auto; height:300px; overflow:auto;">' .. table.concat(boxes_final) .. '</div>'
end
function export.test_2(frame)
local text = 'https://en.wiktionary.org/w/index.php?title=Special%3ASearch&profile=default&fulltext=Search&search='
if frame.args[2] then
return '[' .. text .. mw.uri.encode(mw.text.unstripNoWiki(frame.args[1]), 'PATH') .. ' ' .. frame.args[2] .. ']'
else
return '[' .. text .. frame.args[1] .. ' ' .. frame.args[1] .. ']'
end
end
function export.test_3(text)
if type(text) == 'table' then text = text.args[1] end
local syllable, tone = match(text, '(.+)([PSQR])')
local voiced = false
if match(syllable, "^[bvdnlzg]") or match(syllable, "^m[^m]") or match(syllable, "jj") or match(syllable, "xx") or match(syllable, "hh") then
voiced = true
end
if match(syllable, '^[ctsjszh]+i') then
syllable = replace(syllable, '^[ctsjszh]+', { ['c']='j', ['ts']='j', ['ch']='q', ['tsh']='q', ['j']='jj', ['s']='x', ['sh']='x', ['z']='xx', ['zh']='xx' } )
end
syllable = replace(syllable, 'h$', 'q')
-- too lazy for vowels
if tone == 'P' and voiced then
tone = '3'
elseif tone == 'P' and not voiced then
tone = '1'
elseif tone == 'S' and voiced then
tone = '3'
elseif tone == 'S' and not voiced then
tone = '2'
elseif tone == 'Q' and voiced then
tone = '3'
elseif tone == 'Q' and not voiced then
tone = '2'
elseif tone == 'R' and voiced then
tone = '5'
elseif tone == 'R' and not voiced then
tone = '4'
end
return tone .. syllable
end
function export.test_5(text)
if type(text) == 'table' then text = text.args[1] end
text = replace(text, 'w', 'ṳ')
text = replace(text, '24', '̂')
text = replace(text, '11', '̀')
text = replace(text, '31', '́')
text = replace(text, '55', '')
text = replace(text, '2', '')
text = replace(text, '5', '̍')
return text
end
function export.test_7(frame)
local text = frame.args[1]
text = trim(text)
text = replace(text, '(# *)(%[%[)([^\n]+)(%]%]: *)', function(newl, _, text, _) return newl .. '{{ja-def|' .. replace(text, '[^ぁ-ー㐀-鿕]+', '|') .. '}} ' end)
text = replace(text, 'decl=[いi]', 'infl=i')
return text
end
function export.test_9()
return '{{attn|ltc}}{{attn|och|Middle+Old Chinese needs to be distributed}}'
end
function export.test_10(content, target_header, pagename)
local section = 0
for header in itermatch(content, '==+([^\n=]+)==+\n') do
section = section + 1
if header == target_header then
break
end
end
return tostring(mw.uri.canonicalUrl(pagename,'action=edit§ion=' .. section))
end
function export.test_11()
local ciout = {}
local ziout = {}
local levels = {'Beginning','Elementary','Intermediate','Advanced'}
levels = {'Elementary','Intermediate'}
for _, level in ipairs(levels) do
table.insert(ciout, '*' .. level .. '\n**')
table.insert(ziout, '*' .. level .. '\n**')
local apcontent = mw.title.new('Appendix:HSK list of Mandarin words/' .. level .. ' Mandarin'):getContent()
apcontent = replace(apcontent, '{{l|cmn|([^|]+)|tr={{l|cmn|([^|]+)}}', '%2')
apcontent = replace(apcontent, '{{zh.l|([^/|]+)/([^/|]+)|', '%1')
apcontent = replace(apcontent, '{{zh.l|([^/|]+)/([^/|]+)/([^/|]+)|', '%1+%2')
apcontent = replace(apcontent, '%[%[([^%[%]]+)%]%] %(%[%[([^%[%]]+)%]%],', '%2')
apcontent = replace(apcontent, 'is called a [^\n]+', '')
-- mw.log(apcontent..'\n\n\n\n\n\n\n\n\n~~~~~~~~~~~~~~~~~~~~\n\n\n\n\n\n\n\n\n\n')
for ci in itermatch(apcontent, '[㐀-鿕…]+') do
local cicontent = mw.title.new(ci):getContent() or ''
local ok = match(cicontent, 'zh%-pron')
local url = export.test_10(cicontent, 'Chinese', ci)
if not find(cicontent, level) then
table.insert(len(ci)==1 and ziout or ciout,
(ok and '' or '<mark>')
..
'[' .. url .. ' ' .. ci .. '], '
..
(ok and '' or '</mark>')
)
end
end
table.insert(ciout, '\n')
table.insert(ziout, '\n')
end
return table.concat(ciout, '')..table.concat(ziout, '')
end
function export.test_12()
local out = {}
local x = split('亜哀挨愛曖悪握圧扱宛嵐安案暗以衣位囲医依委威為畏胃尉異移萎偉椅彙意違維慰遺緯域育一壱逸茨芋引印因咽姻員院淫陰飲隠韻右宇羽雨唄鬱畝浦運雲永泳英映栄営詠影鋭衛易疫益液駅悦越謁閲円延沿炎怨宴媛援園煙猿遠鉛塩演縁艶汚王凹央応往押旺欧殴桜翁奥横岡屋億憶臆虞乙俺卸音恩温穏下化火加可仮何花佳価果河苛科架夏家荷華菓貨渦過嫁暇禍靴寡歌箇稼課蚊牙瓦我画芽賀雅餓介回灰会快戒改怪拐悔海界皆械絵開階塊楷解潰壊懐諧貝外劾害崖涯街慨蓋該概骸垣柿各角拡革格核殻郭覚較隔閣確獲嚇穫学岳楽額顎掛潟括活喝渇割葛滑褐轄且株釜鎌刈干刊甘汗缶完肝官冠巻看陥乾勘患貫寒喚堪換敢棺款間閑勧寛幹感漢慣管関歓監緩憾還館環簡観韓艦鑑丸含岸岩玩眼頑顔願企伎危机気岐希忌汽奇祈季紀軌既記起飢鬼帰基寄規亀喜幾揮期棋貴棄毀旗器畿輝機騎技宜偽欺義疑儀戯擬犠議菊吉喫詰却客脚逆虐九久及弓丘旧休吸朽臼求究泣急級糾宮救球給嗅窮牛去巨居拒拠挙虚許距魚御漁凶共叫狂京享供協況峡挟狭恐恭胸脅強教郷境橋矯鏡競響驚仰暁業凝曲局極玉巾斤均近金菌勤琴筋僅禁緊錦謹襟吟銀区句苦駆具惧愚空偶遇隅串屈掘窟熊繰君訓勲薫軍郡群兄刑形系径茎係型契計恵啓掲渓経蛍敬景軽傾携継詣慶憬稽憩警鶏芸迎鯨隙劇撃激桁欠穴血決結傑潔月犬件見券肩建研県倹兼剣拳軒健険圏堅検嫌献絹遣権憲賢謙鍵繭顕験懸元幻玄言弦限原現舷減源厳己戸古呼固股虎孤弧故枯個庫湖雇誇鼓錮顧五互午呉後娯悟碁語誤護口工公勾孔功巧広甲交光向后好江考行坑孝抗攻更効幸拘肯侯厚恒洪皇紅荒郊香候校耕航貢降高康控梗黄喉慌港硬絞項溝鉱構綱酵稿興衡鋼講購乞号合拷剛傲豪克告谷刻国黒穀酷獄骨駒込頃今困昆恨根婚混痕紺魂墾懇左佐沙査砂唆差詐鎖座挫才再災妻采砕宰栽彩採済祭斎細菜最裁債催塞歳載際埼在材剤財罪崎作削昨柵索策酢搾錯咲冊札刷刹拶殺察撮擦雑皿三山参桟蚕惨産傘散算酸賛残斬暫士子支止氏仕史司四市矢旨死糸至伺志私使刺始姉枝祉肢姿思指施師恣紙脂視紫詞歯嗣試詩資飼誌雌摯賜諮示字寺次耳自似児事侍治持時滋慈辞磁餌璽鹿式識軸七叱失室疾執湿嫉漆質実芝写社車舎者射捨赦斜煮遮謝邪蛇勺尺借酌釈爵若弱寂手主守朱取狩首殊珠酒腫種趣寿受呪授需儒樹収囚州舟秀周宗拾秋臭修袖終羞習週就衆集愁酬醜蹴襲十汁充住柔重従渋銃獣縦叔祝宿淑粛縮塾熟出述術俊春瞬旬巡盾准殉純循順準潤遵処初所書庶暑署緒諸女如助序叙徐除小升少召匠床抄肖尚招承昇松沼昭宵将消症祥称笑唱商渉章紹訟勝掌晶焼焦硝粧詔証象傷奨照詳彰障憧衝賞償礁鐘上丈冗条状乗城浄剰常情場畳蒸縄壌嬢錠譲醸色拭食植殖飾触嘱織職辱尻心申伸臣芯身辛侵信津神唇娠振浸真針深紳進森診寝慎新審震薪親人刃仁尽迅甚陣尋腎須図水吹垂炊帥粋衰推酔遂睡穂錘随髄枢崇数据杉裾寸瀬是井世正生成西声制姓征性青斉政星牲省凄逝清盛婿晴勢聖誠精製誓静請整醒税夕斥石赤昔析席脊隻惜戚責跡積績籍切折拙窃接設雪摂節説舌絶千川仙占先宣専泉浅洗染扇栓旋船戦煎羨腺詮践箋銭銑潜線遷選薦繊鮮全前善然禅漸膳繕狙阻祖租素措粗組疎訴塑遡礎双壮早争走奏相荘草送倉捜挿桑巣掃曹曽爽窓創喪痩葬装僧想層総遭槽踪操燥霜騒藻造像増憎蔵贈臓即束足促則息捉速側測俗族属賊続卒率存村孫尊損遜他多汰打妥唾堕惰駄太対体耐待怠胎退帯泰堆袋逮替貸隊滞態戴大代台第題滝宅択沢卓拓託濯諾濁但達脱奪棚誰丹旦担単炭胆探淡短嘆端綻誕鍛団男段断弾暖談壇地池知値恥致遅痴稚置緻竹畜逐蓄築秩窒茶着嫡中仲虫沖宙忠抽注昼柱衷酎鋳駐著貯丁弔庁兆町長挑帳張彫眺釣頂鳥朝脹貼超腸跳徴嘲潮澄調聴懲直勅捗沈珍朕陳賃鎮追椎墜通痛塚漬坪爪鶴低呈廷弟定底抵邸亭貞帝訂庭逓停偵堤提程艇締諦泥的笛摘滴適敵溺迭哲鉄徹撤天典店点展添転塡田伝殿電斗吐妬徒途都渡塗賭土奴努度怒刀冬灯当投豆東到逃倒凍唐島桃討透党悼盗陶塔搭棟湯痘登答等筒統稲踏糖頭謄藤闘騰同洞胴動堂童道働銅導瞳峠匿特得督徳篤毒独読栃凸突届屯豚頓貪鈍曇丼那奈内梨謎鍋南軟難二尼弐匂肉虹日入乳尿任妊忍認寧熱年念捻粘燃悩納能脳農濃把波派破覇馬婆罵拝杯背肺俳配排敗廃輩売倍梅培陪媒買賠白伯拍泊迫剝舶博薄麦漠縛爆箱箸畑肌八鉢発髪伐抜罰閥反半氾犯帆汎伴判坂阪板版班畔般販斑飯搬煩頒範繁藩晩番蛮盤比皮妃否批彼披肥非卑飛疲秘被悲扉費碑罷避尾眉美備微鼻膝肘匹必泌筆姫百氷表俵票評漂標苗秒病描猫品浜貧賓頻敏瓶不夫父付布扶府怖阜附訃負赴浮婦符富普腐敷膚賦譜侮武部舞封風伏服副幅復福腹複覆払沸仏物粉紛雰噴墳憤奮分文聞丙平兵併並柄陛閉塀幣弊蔽餅米壁璧癖別蔑片辺返変偏遍編弁辛便勉歩保哺捕補舗母募墓慕暮簿方包芳邦奉宝抱放法泡胞俸倣峰砲崩訪報蜂豊飽褒縫亡乏忙坊妨忘防房肪某冒剖紡望傍帽棒貿貌暴膨謀頰北木朴牧睦僕墨撲没勃堀本奔翻凡盆麻摩磨魔毎妹枚昧埋幕膜枕又末抹万満慢漫未味魅岬密蜜脈妙民眠矛務無夢霧娘名命明迷冥盟銘鳴滅免面綿麺茂模毛妄盲耗猛網目黙門紋問匁冶夜野弥厄役約訳薬躍闇由油喩愉諭輸癒唯友有勇幽悠郵湧猶裕遊雄誘憂融優与予余誉預幼用羊妖洋要容庸揚揺葉陽溶腰様瘍踊窯養擁謡曜抑沃浴欲翌翼拉裸羅来雷頼絡落酪辣乱卵覧濫藍欄吏利里理痢裏履璃離陸立律慄略柳流留竜粒隆硫侶旅虜慮了両良料涼猟陵量僚領寮療瞭糧力緑林厘倫輪隣臨瑠涙累塁類令礼冷励戻例鈴零霊隷齢麗暦歴列劣烈裂恋連廉練錬呂炉賂路露老労弄郎朗浪廊楼漏籠六録麓論和話賄脇惑枠湾腕', '')
for i, hz in ipairs(x) do
local content = mw.title.new(hz):getContent() or ''
local ok = not (match(content, '|kun=%[%[') or match(content, '|[^l][a-z]+on=%[%['))
local url = export.test_10(content, 'Readings', hz)
table.insert(out,
(ok and '' or '<mark>')
..
'[' .. url .. ' ' .. hz .. '、]'
..
(ok and '' or '</mark>')
)
end
return '<span lang="ja" class="plainlinks">' .. table.concat(out, '') .. '</span>'
end
function export.test_13(frame)
local a = frame.args
local hz = mw.title.getCurrentTitle().text
local yomi = mw.loadData('Module:ja/data/jouyou-yomi').yomi[hz]
local out = {}
local order = {'goon', 'kanon', 'toon', 'kanyoon', 'soon', 'on', 'kun', 'nanori'}
local processed = {}
local missing = {}
for type, text in pairs(a) do
text = replace(text, '%s*%([a-zāīūēō%.%-]%)', '')
text = replace(text, '{{non%-joyo%-reading}}%s*', '')
text = replace(text, '%s*{{q[a-z]*|non%-%[%[w:Jōyō kanji|Jōyō%]%] reading}}', '')
text = replace(text, ',%s*{{q[a-z]*|historical}}', '<')
text = replace(text, '%[%[(' .. hz .. '[ぁ-ー]+)%]%]', '')
text = replace(text, '{{[jal|-]+(' .. hz .. '[ぁ-ー]+)}}', '')
text = replace(text, '%[%[([^%]|]+)|([^%]|]+)%]%]', function(a,b)
return replace(b, '%.', '-')
end
)
text = replace(text, '([^ぁ-ー])%.([^ぁ-ー])', '@')
text = replace(text, '[^ぁ-ー%-<>.]+', '@')
text = replace(text, '(@*)[<>](@*)', '<') -- or something. also, the > is intentional (some entries indeed have backwards arrows)
text = replace(text, '^@', '')
text = replace(text, '@$', '')
text = replace(text, '@', ', ')
if type ~= 'kun' then
text = replace(text, '%-', '')
end
processed[type] = text
end
table.insert(out, '{{ja-readings')
for _, type in ipairs(order) do
if processed[type] then
table.insert(out, '|'..type..'='..processed[type])
end
end
table.insert(out, '}}')
out = table.concat(out, '\n')
if yomi then
for reading, type in pairs(yomi) do
reading = require('Module:string').pattern_escape(reading)
if type == 1 or type == 3 then
reading = require('Module:ja').kata_to_hira(reading)
end
if not (
find(out, '[^ぁ-ー]' .. reading .. '[^ぁ-ー]')
--[[
or
find(out, '=' .. reading .. '[^ぁ-ー]')
or
find(out, '[^ぁ-ー]' .. reading .. '\n')
or
find(out, '=' .. reading .. '\n')
]]
) then
table.insert(missing, reading)
end
end
end
missing = table.concat(missing, ', ') -- NOTE: don't forget to check if I left any of these in entries
return out .. replace(missing, '%%', '')
end
function export.test_14(frame)
local text = frame.args[1]
local x = {}
table.insert(x, '{| class="wikitable sortable"')
table.insert(x, '! - || -')
for entry in itermatch(text, ' ([^\n]+)') do
local content = mw.title.new(entry):getContent() or ''
local attention = match(content, '{{attention|([^}]+)}}') or match(content, '{{attn|([^}]+)}}') or '<mark>?</mark>'
local line = '![['..entry..']]\n|'..replace(attention, '|', '¦')
table.insert(x, line)
end
table.insert(x, '|}')
return table.concat(x, '\n|-\n')
end
return export