Module:User:Dine2016/ja-kanjitab

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Dine2016, for his own experimentation. Items in this module may be added and removed at Dine2016's discretion; do not rely on this module's stability.


local export = {}
local m_utilities = require("Module:utilities")
local m_ja = require("Module:ja")

local title = mw.title.getCurrentTitle()
local PAGENAME = title.text
local NAMESPACE = title.nsText

local lang = require("Module:languages").getByCode("ja")

-- TODO: centralize internal tables here

local kanji_pattern = "㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟"

local kanji_grade_links = {
	"[[:w:Kyōiku_kanji|Grade: 1]]",
	"[[:w:Kyōiku_kanji|Grade: 2]]",
	"[[:w:Kyōiku_kanji|Grade: 3]]",
	"[[:w:Kyōiku_kanji|Grade: 4]]",
	"[[:w:Kyōiku_kanji|Grade: 5]]",
	"[[:w:Kyōiku_kanji|Grade: 6]]",
	"[[:w:Jōyō kanji|Grade: S]]",		-- 7
	"[[:w:Jinmeiyō kanji|Jinmeiyō]]",	-- 8
	"[[:w:Hyōgai kanji|Hyōgaiji]]"		-- 9
}

local function link(text)
	if text == title then
		return '<span lang="ja" class="Jpan" style="font-weight: bold;">' .. text .. '</span>'
	else
		return '<span lang="ja" class="Jpan">' .. '[[' .. text .. '#Japanese|' .. text .. ']]' .. '</span>'
	end
end

-- export this?
local function shin_to_kyu(shin)
	local ambiguous_char = '[弁芸缶]' -- TODO: check if there is more
	if mw.ustring.find(shin, ambiguous_char) then return nil end
	
	local s_to_k = {
		["亜"] = "亞", ["悪"] = "惡", ["圧"] = "壓", ["囲"] = "圍", ["医"] = "醫", ["為"] = "爲",
		["壱"] = "壹", ["逸"] = "逸", ["隠"] = "隱", ["栄"] = "榮", ["営"] = "營", ["衛"] = "衞",
		["駅"] = "驛", ["謁"] = "謁", ["円"] = "圓", ["塩"] = "鹽", ["縁"] = "緣", ["艶"] = "艷",
		["応"] = "應", ["欧"] = "歐", ["殴"] = "毆", ["桜"] = "櫻", ["奥"] = "奧", ["横"] = "橫",
		["温"] = "溫", ["穏"] = "穩", ["仮"] = "假", ["価"] = "價", ["禍"] = "禍", ["画"] = "畫",
		["会"] = "會", ["悔"] = "悔", ["海"] = "海", ["絵"] = "繪", ["壊"] = "壞", ["懐"] = "懷",
		["慨"] = "慨", ["概"] = "槪", ["拡"] = "擴", ["殻"] = "殼", ["覚"] = "覺", ["学"] = "學",
		["岳"] = "嶽", ["楽"] = "樂", ["喝"] = "喝", ["渇"] = "渴", ["褐"] = "褐", ["缶"] = "罐",
		["巻"] = "卷", ["陥"] = "陷", ["勧"] = "勸", ["寛"] = "寬", ["漢"] = "漢", ["関"] = "關",
		["歓"] = "歡", ["観"] = "觀", ["気"] = "氣", ["祈"] = "祈", ["既"] = "卽", ["帰"] = "歸",
		["器"] = "器", ["偽"] = "僞", ["戯"] = "戲", ["犠"] = "犧", ["旧"] = "舊", ["拠"] = "據",
		["挙"] = "擧", ["虚"] = "虛", ["峡"] = "峽", ["挟"] = "挾", ["狭"] = "狹", ["郷"] = "鄕",
		["響"] = "響", ["暁"] = "曉", ["勤"] = "勤", ["謹"] = "謹", ["区"] = "區", ["駆"] = "驅",
		["勲"] = "勳", ["薫"] = "薰", ["径"] = "徑", ["茎"] = "莖", ["恵"] = "惠", ["掲"] = "揭",
		["渓"] = "溪", ["経"] = "經", ["蛍"] = "螢", ["軽"] = "輕", ["継"] = "繼", ["鶏"] = "鷄",
		["芸"] = "藝", ["撃"] = "擊", ["欠"] = "缺", ["研"] = "硏", ["県"] = "縣", ["倹"] = "儉",
		["剣"] = "劍", ["険"] = "險", ["圏"] = "圈", ["検"] = "檢", ["献"] = "獻", ["権"] = "權",
		["顕"] = "顯", ["験"] = "驗", ["厳"] = "嚴", ["広"] = "廣", ["効"] = "效", ["恒"] = "恆",
		["黄"] = "黃", ["鉱"] = "鑛", ["号"] = "號", ["国"] = "國", ["黒"] = "黑", ["穀"] = "穀",
		["砕"] = "碎", ["済"] = "濟", ["斎"] = "齋", ["剤"] = "劑", ["殺"] = "殺", ["雑"] = "雜",
		["参"] = "參", ["桟"] = "棧", ["蚕"] = "蠶", ["惨"] = "慘", ["賛"] = "贊", ["残"] = "殘",
		["糸"] = "絲", ["祉"] = "祉", ["視"] = "視", ["歯"] = "齒", ["児"] = "兒", ["辞"] = "辭",
		["湿"] = "濕", ["実"] = "實", ["写"] = "寫", ["社"] = "社", ["者"] = "者", ["煮"] = "煮",
		["釈"] = "釋", ["寿"] = "壽", ["収"] = "收", ["臭"] = "臭", ["従"] = "從", ["渋"] = "澁",
		["獣"] = "獸", ["縦"] = "縱", ["祝"] = "祝", ["粛"] = "肅", ["処"] = "處", ["暑"] = "暑",
		["署"] = "署", ["緒"] = "緖", ["諸"] = "諸", ["叙"] = "敍", ["将"] = "將", ["祥"] = "祥",
		["称"] = "稱", ["渉"] = "涉", ["焼"] = "燒", ["証"] = "證", ["奨"] = "獎", ["条"] = "條",
		["状"] = "狀", ["乗"] = "乘", ["浄"] = "淨", ["剰"] = "剩", ["畳"] = "疊", ["縄"] = "繩",
		["壌"] = "壤", ["嬢"] = "孃", ["譲"] = "讓", ["醸"] = "釀", ["触"] = "觸", ["嘱"] = "囑",
		["神"] = "神", ["真"] = "眞", ["寝"] = "寢", ["慎"] = "愼", ["尽"] = "盡", ["図"] = "圖",
		["粋"] = "粹", ["酔"] = "醉", ["穂"] = "穗", ["随"] = "隨", ["髄"] = "髓", ["枢"] = "樞",
		["数"] = "數", ["瀬"] = "瀨", ["声"] = "聲", ["斉"] = "齊", ["静"] = "靜", ["窃"] = "竊",
		["摂"] = "攝", ["節"] = "節", ["専"] = "專", ["浅"] = "淺", ["戦"] = "戰", ["践"] = "踐",
		["銭"] = "錢", ["潜"] = "潛", ["繊"] = "纖", ["禅"] = "禪", ["祖"] = "祖", ["双"] = "雙",
		["壮"] = "壯", ["争"] = "爭", ["荘"] = "莊", ["捜"] = "搜", ["挿"] = "插", ["巣"] = "巢",
		["曽"] = "曾", ["痩"] = "瘦", ["装"] = "裝", ["僧"] = "僧", ["層"] = "層", ["総"] = "總",
		["騒"] = "騷", ["増"] = "增", ["憎"] = "憎", ["蔵"] = "藏", ["贈"] = "贈", ["臓"] = "臟",
		["即"] = "卽", ["属"] = "屬", ["続"] = "續", ["堕"] = "墮", ["対"] = "對", ["体"] = "體",
		["帯"] = "帶", ["滞"] = "滯", ["台"] = "臺", ["滝"] = "瀧", ["択"] = "擇", ["沢"] = "澤",
		["担"] = "擔", ["単"] = "單", ["胆"] = "膽", ["嘆"] = "嘆", ["団"] = "團", ["断"] = "斷",
		["弾"] = "彈", ["遅"] = "遲", ["痴"] = "癡", ["虫"] = "蟲", ["昼"] = "晝", ["鋳"] = "鑄",
		["著"] = "著", ["庁"] = "廳", ["徴"] = "徵", ["聴"] = "聽", ["懲"] = "懲", ["勅"] = "敕",
		["鎮"] = "鎭", ["塚"] = "塚", ["逓"] = "遞", ["鉄"] = "鐵", ["点"] = "點", ["転"] = "轉",
		["伝"] = "傳", ["都"] = "都", ["灯"] = "燈", ["当"] = "當", ["党"] = "黨", ["盗"] = "盜",
		["稲"] = "稻", ["闘"] = "鬭", ["徳"] = "德", ["独"] = "獨", ["読"] = "讀", ["突"] = "突",
		["届"] = "屆", ["難"] = "難", ["弐"] = "貳", ["悩"] = "惱", ["脳"] = "腦", ["覇"] = "霸",
		["拝"] = "拜", ["廃"] = "廢", ["売"] = "賣", ["梅"] = "梅", ["麦"] = "麥", ["発"] = "發",
		["髪"] = "髮", ["抜"] = "拔", ["繁"] = "繁", ["晩"] = "晚", ["蛮"] = "蠻", ["卑"] = "卑",
		["秘"] = "祕", ["碑"] = "碑", ["浜"] = "濱", ["賓"] = "賓", ["頻"] = "頻", ["敏"] = "敏",
		["瓶"] = "甁", ["侮"] = "侮", ["福"] = "福", ["払"] = "拂", ["仏"] = "佛", ["併"] = "倂",
		["並"] = "竝", ["塀"] = "塀", ["餅"] = "餠", ["辺"] = "邊", ["変"] = "變", ["勉"] = "勉",
		["歩"] = "步", ["宝"] = "寶", ["豊"] = "豐", ["褒"] = "襃", ["墨"] = "墨", ["翻"] = "飜",
		["毎"] = "每", ["万"] = "萬", ["満"] = "滿", ["免"] = "免", ["麺"] = "麵", ["弥"] = "彌",
		["黙"] = "默", ["訳"] = "譯", ["薬"] = "藥", ["与"] = "與", ["予"] = "豫", ["余"] = "餘",
		["誉"] = "譽", ["揺"] = "搖", ["様"] = "樣", ["謡"] = "謠", ["来"] = "來", ["頼"] = "賴",
		["乱"] = "亂", ["覧"] = "覽", ["欄"] = "欄", ["竜"] = "龍", ["隆"] = "隆", ["虜"] = "虜",
		["両"] = "兩", ["猟"] = "獵", ["緑"] = "綠", ["涙"] = "淚", ["塁"] = "壘", ["類"] = "類",
		["礼"] = "禮", ["励"] = "勵", ["戻"] = "戾", ["霊"] = "靈", ["齢"] = "齡", ["暦"] = "曆",
		["歴"] = "歷", ["恋"] = "戀", ["練"] = "練", ["錬"] = "鍊", ["炉"] = "爐", ["労"] = "勞",
		["郎"] = "郞", ["朗"] = "朗", ["廊"] = "廊", ["楼"] = "樓", ["録"] = "錄", ["湾"] = "灣",
		
		-- TODO: add Z-variants like 青/靑, 説/說
	}
	return mw.ustring.gsub(shin, '.', s_to_k)
end

local function genspec(args)
--[=[
	kanji_length -- the number of kanji in the term
	kanji -- e.g. { '追払', '追拂' }
	kanji_types -- e.g. { 'shin', 'kyu' } (or 位: { 'shinkyu' }, 儘/侭: { 'kyu', 'extshin' })
	kanji_spellings -- e.g. { '追い払う', '追い拂う' }
	yomi -- a list of { original reading, actual reading, okurigana } of the kanji spans, e.g. { { 'お', 'お', 'い' }, { 'はら', 'はら', 'う' } }.
	yomi_types -- a list of reading types, each formatted like { 'on', span=1 }, { 'jukujikun', span=2 }. The sum of the spans should equal kanji_length.
	altforms -- e.g. { { '追いはらう', type='' }, { '追払う', '追拂う', type='irregular okurigana' }, { '追ひ払ふ', '追ひ拂ふ', type='historical kana' } }
]=]
	local kanji_types = {}
	local kanji_spellings = {}
	
	local shin = args.s;  if shin == '-' then shin = nil end
	local kyu = args.k;  if kyu == '-' then kyu = nil end
	local extshin = args.es;  if extshin == '-' then extshin = nil end
	if kyu and not shin and not extshin then shin = PAGENAME end
	if extshin and not shin and not kyu then kyu = PAGENAME end
	
	if shin and kyu and not extshin then
		kanji_types = { 'shin', 'kyu' }
		kanji_spellings = { shin, kyu }
	elseif kyu and extshin and not shin then
		kanji_types = { 'kyu', 'extshin' }
		kanji_spellings = { kyu, extshin }
	elseif not kyu and not extshin then
		if not shin then shin = PAGENAME end
		kanji_types = { 'shin', 'kyu' }
		kanji_spellings = { shin, shin_to_kyu(shin) or error('automatic jitai conversion of ' .. shin .. ' failed, please supply the kyujitai manually') }
	else
		error('combination of kanji types not yet supported') -- shin + kyu + extshin can be supported if needed
	end
	if #kanji_spellings == 2 and kanji_spellings[1] == kanji_spellings[2] then
		kanji_types = { 'shinkyu' }
		kanji_spellings = { kanji_spellings[1] }
	end
	
	local kanji = {}
	for _, i in ipairs(kanji_spellings) do
		local kj = mw.ustring.gsub(i, '([' .. kanji_pattern .. '])々', '%1%1')
		kj = mw.ustring.gsub(kj, '[^' .. kanji_pattern .. ']', '')
		table.insert(kanji, kj)
	end
	
	local kanji_length = mw.ustring.len(kanji[1])
	if kanji_length == 0 then error('there is no kanji in the term ' .. kanji[1]) end
	
	local yomi = {}
	for i = 1, args[1].maxindex do
		local ym = args[1][i] or ''
		if not mw.ustring.find(ym, '%.') then ym = ym .. '.' end
		if not mw.ustring.find(ym, '>') then ym = mw.ustring.gsub(ym, '^(.*)%.(.*)$', '%1>%1.%2') end 
		local _, _, a, b, c = mw.ustring.find(ym, '^(.*)>(.*)%.(.*)$')
		table.insert(yomi, { a, b, c })
	end
	
	local yomi_types = {}
	local yt = args['yomi'] or ''
	local yomi_code = {
		o = 'on',			on = 'on',
		kanon = 'kanon',
		goon = 'goon',
		toon = 'toon',
		kan = 'kanyoon',	kanyo = 'kanyoon',		kanyoon = 'kanyoon',
		k = 'kun',			kun = 'kun',
		juku = 'jukujikun',	jukuji = 'jukujikun',	jukujikun = 'jukujikun',  -- j alone is jūbakoyomi (on+kun) for backward compatibility
		irr = 'irregular',	irreg = 'irregular',	irregular = 'irregular',
		p = 'phonetic',		phonetic = 'phonetic',
		nanori = 'nanori',
		[''] = '',			none = '',
	}
	if yt == 'j' then yt = 'on,kun' elseif yt == 'y' then yt='kun,on' end
	if not mw.ustring.find(yt, ',') and not yomi_code[mw.ustring.gsub(yt, '^(.-)[0-9]*$', '%1')] then
		yt = mw.ustring.gsub(mw.ustring.gsub(yt, '([a-z][0-9]*)', '%1,'), ',$', '') -- 'j2kk' to 'j2,k,k'
	end
	yt = mw.text.split(yt, ',')
	for _, i in ipairs(yt) do
		local _, _, a, b = mw.ustring.find(i, '^([a-z]*)([0-9]*)$')
		a = yomi_code[a] or error('cannot recognize yomi type "' .. args['yomi'] .. '"')
		b = tonumber(b) or 1
		table.insert(yomi_types, { a, span=b })
	end
	-- allow using a single yomi type for the whole kanji
	-- Case 1: {{ja-kanjitab|かん|れん|yomi=kan|s=関連}} -> kanon, kanon
	-- Case 2: {{ja-kanjitab|やまと|yomi=j|s=大和}} -> jukujikun2
	if #yomi_types == 1 and yomi_types[1].span == 1 and kanji_length > 1 then
		local yomi_jukujikun_type = { ['jukujikun'] = true, ['irregular'] = true, ['phonetic'] = true }
		if yomi_jukujikun_type[yomi_types[1][1]] then -- Case 2
			yomi_types[1].span = kanji_length
		else -- Case 1
			local a = yomi_types[1][1]
			local b = yomi_types[1].span
			for i = 2, kanji_length do table.insert(yomi_types, { a, span=b }) end
		end
	end
	-- check the sum of the spans
	local span_sum = 0
	for _, i in ipairs(yomi_types) do span_sum = span_sum + i.span end
	if span_sum ~= kanji_length then error('the |yomi= parameter does not match the number of kanji: ' .. span_sum .. ' vs ' .. kanji_length) end
	
	local altforms = {}
	local af = args.alt or ''
	af = mw.text.split(af, ',')
	for _, i in ipairs(af) do
		if i ~= '' then
			if not mw.ustring.find(i, '-') then i = i .. '-' end
			local _, _, a, b = mw.ustring.find(i, '^(.*)%-(.*)$')
			local alt_code = {
				['io'] = 'irregular okurigana',
				['hk'] = 'historical kana',
				['vk'] = 'variant kana',
				['ok'] = 'obsolete kana', -- e.g. 用ひる
			}
			if alt_code[b] then b = alt_code[b] end
			-- syntax borrowed from {{zh-l}}: *字体 supresses automatic conversion, and 字体/字體 provides manual conversions
			if mw.ustring.find(a, '%*') then
				table.insert(altforms, { mw.ustring.gsub(a, '%*', ''), type=b })
			elseif mw.ustring.find(a, '/') then
				a = mw.text.split(a, '/')
				a['type'] = b
				table.insert(altforms, a)
			else
				local shin = a
				local kyu = shin_to_kyu(shin) or shin
				if shin == kyu then
					table.insert(altforms, { shin, type=b })
				else
					table.insert(altforms, { shin, kyu, type=b })
				end
			end
		end
	end
	
	return { kanji_length = kanji_length, kanji = kanji, kanji_types = kanji_types, kanji_spellings = kanji_spellings, yomi = yomi, yomi_types = yomi_types, altforms = altforms }
end

local function gentable(spec, collapsed)
	local table_code = '{| align=right style="clear: right;margin: 1em;border-collapse: collapse;text-align: center"\n!\n'
	
	-- generate the yomi tabs
	for i, j in ipairs(spec.yomi_types) do
		local yomi = spec.yomi[i] or { '', '', '' }
		local yomi_text = (yomi[1] == yomi[2]) and (yomi[1] .. '`') or (yomi[1] .. '` > ' .. yomi[2] .. '`')
		yomi_text = mw.ustring.gsub(yomi_text, '`', (yomi[3] == '') and '' or ('(' .. yomi[3] .. ')'))
		local yomi_type = j[1]
		local yomi_link = {
			on = "[[音読み#Japanese|on’yomi]]",
			kanon = "[[漢音#Japanese|kan’on]]",
			goon = "[[呉音#Japanese|goon]]",
			kanyoon = "[[慣用音#Japanese|kan’yōon]]",
			toon = "[[唐音#Japanese|tōon]]",
			kun = "[[訓読み#Japanese|kun’yomi]]",
			jukujikun = "[[熟字訓#Japanese|jukujikun]]",
			irregular = "irregular",
			phonetic = "phonetic",
			nanori = "[[名乗り#Japanese|nanori]]",
			[""] = "",
		}
		local span = j.span
		table_code = table_code .. '! style="padding: 0.5em;border: 1px solid #aaa;background:#F5F5DC;font-weight: normal;font-size: 85%;" colspan=' .. span .. '|<span class="Jpan" lang="ja">' .. yomi_text .. '</span><br>' .. yomi_link[yomi_type] .. '\n'
	end
	
	-- generate the main jitai lines
	for i = 1, #spec.kanji_types do
		table_code = table_code .. '|- style="line-height:1.3"\n'
		local jitai_link = {
			shin = '[[shinjitai]]',
			kyu = '[[kyūjitai]]',
			shinkyu = '[[shinjitai|shin.]] and [[kyūjitai|kyū.]]',
			extshin = '[[shinjitai]] ([[wikipedia:Extended shinjitai|extended]])',
		}
		local kanji_type = spec.kanji_types[i]
		-- cell showing the jitai type and the spelling
		table_code = table_code .. '! style="padding: 0.5em;border: 1px solid #aaa;background: #E0FFFF;font-weight: normal;font-size: smaller;" |' .. jitai_link[kanji_type]
		local kanji_spelling = spec.kanji_spellings[i]
		if mw.ustring.len(kanji_spelling) > 6 then 
			table_code = table_code .. ' <span style="font-size:100%">(' .. link(kanji_spelling) .. ')</span>'
		elseif mw.ustring.len(kanji_spelling) > 1 then 
			table_code = table_code .. ' <span style="font-size:140%">(' .. link(kanji_spelling) .. ')</span>'
		end
		table_code = table_code .. '\n'
		-- cells showing the kanji
		local kanji_string = spec.kanji[i]
		for j = 1, mw.ustring.len(kanji_string) do
			local kanji = mw.ustring.sub(kanji_string, j, j)
			table_code = table_code .. '| style="padding: 0.5em; background-color:white;border-right: 1px solid #aaa;border-bottom: 1px solid #aaa;" | <span style="font-size:x-large">' .. link(kanji) .. '</span>'
			local kanji_grade = m_ja.kanji_grade(kanji)
			table_code = table_code .. '<br><small>' .. kanji_grade_links[kanji_grade] .. '</small>'
			table_code = table_code .. '\n'
		end
	end
	
	-- alt forms cell
	if #spec.altforms > 0 then
		table_code = table_code .. '|-\n| style="background-color:white;border: 1px solid #aaa; font-size:90%;" colspan=' .. spec.kanji_length + 1 ..  '|\n<b>Variant forms</b><br>'
		local alt1 = {}
		for _, altform in ipairs(spec.altforms) do
			local alt2 = {}
			for _, i in ipairs(altform) do table.insert(alt2, link(i)) end
			alt2 = table.concat(alt2, '/')
			if altform['type'] and altform['type'] ~= '' then
				alt2 = alt2 .. ' <span style="font-size:70%"><i>' .. altform['type'] .. '</i></span>'
			end
			table.insert(alt1, alt2)
		end
		table_code = table_code .. table.concat(alt1, '<br>') .. '\n'
	end
	table_code = table_code .. '|}\n'
	return table_code
end

function export.show(frame)
	local params = {
		[1] = { list = true, allow_holes = true, allow_empty = true },
		["s"] = {},
		["k"] = {},
		["es"] = {},
		["alt"] = {},
		["sort"] = {},
		["yomi"] = {},
		["pagename"] = {},
	}
	local args, unrecognized_args = require("Module:parameters").process(frame:getParent().args, params, true)
	
	for key, value in pairs(unrecognized_args) do
		local additional
		if mw.ustring.sub(key, 1, 1) == "y" then
			additional =  " Perhaps you meant “yomi”?"
		end
		error("“" .. key .. "” is not a recognized parameter." .. (additional or ""))
	end
	
	if args.pagename then
		if NAMESPACE == "" then error("The pagename parameter should not be used in entries, as it is only for testing.") end
		PAGENAME = args.pagename
	end
	
	return gentable(genspec(args))
end

function export:test()
	--[[
	mw.logObject(genspec({
		[1] = { 'き', 'つ>づ.く', maxindex = 2 },
		s = '気付く',
		yomi = 'ok',
		alt = '気づく,気ずく-ik',
	}))
	mw.logObject(genspec({
		[1] = { 'とう', 'きょう', 'けしき', maxindex = 3 },
		s = '東京景色',
		yomi = 'kan,go,j2',
	}))
	mw.logObject(genspec({
		[1] = { 'まま', maxindex = 1 },
		k = '儘',
		es = '侭',
		yomi = 'k',
	}))
	mw.logObject(genspec({
		[1] = { maxindex = 0 },
		s = 'かんじなし',
	}))
	mw.logObject(genspec({
		[1] = { 'やまと', maxindex = 1 },
		s = '大和',
		yomi = 'j',
	}))
	]]--
	mw.log(gentable(genspec({
		[1] = { 'こく', 'さい', 'おん', 'せい', 'き', 'ごう', maxindex = 6 },
		s = '国際音声記号',
		yomi = 'o',
	})))
end

return export