Module:ja-kanji-readings

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module contains functions used by {{ja-readings}} to format the lists of readings in entries on kanji and to create the content for some of the Japanese kanji reading categories.


local export = {}

local m_ja = require("Module:ja")
local m_str_utils = require("Module:string utilities")

local concat = table.concat
local find = m_str_utils.find
local get_script = require("Module:scripts").getByCode
local hira_to_kata = m_ja.hira_to_kata
local insert = table.insert
local kana_to_romaji = require("Module:Hrkt-translit").tr
local kata_to_hira = m_ja.kata_to_hira
local gmatch = m_str_utils.gmatch
local match = m_str_utils.match
local split = m_str_utils.split

local Jpan = get_script("Jpan")
-- local katakana_script = get_script("Kana")
local Hira = get_script("Hira")

local PAGENAME = mw.loadData("Module:headword/data").pagename
local NAMESPACE = mw.title.getCurrentTitle().nsText

-- Only used by commented-out code.
-- local data = mw.loadData("Module:ja/data")

local CONCAT_SEP = ', '
	
local labels = {
	{
		text = "Go-on",
		text2 = "goon",
		classification = "on",
	},
	{
		text = "Kan-on",
		text2 = "kan'on",
		classification = "on",
	},
	{
		text = "Sō-on",
		text2 = "sōon",
		classification = "on",
	},
	{
		text = "Tō-on",
		text2 = "tōon",
		classification = "on",
	},
	{
		text = "Kan’yō-on",
		text2 = "kan'yōon",
		classification = "on",
	},
	{
		entry = "on'yomi",
		text = "On",
		text2 = "on",
		classification = "on",
		unclassified = " (unclassified)",
	},
	{
		entry = "kun'yomi",
		text = "Kun",
		text2 = "kun",
		classification = "kun",
	},
	{
		text = "Nanori",
		text2 = "nanori",
		classification = "nanori",
	},
}

local function track(code)
	require("Module:debug").track("ja-kanji-readings/" .. code)
end

local function plain_link(data)
	data.term = data.term:gsub('[%.%- ]', '') -- 「かな-し.い」→「かなしい」, 「も-しく は」→「もしくは」
	data.tr = data.tr and data.tr:gsub('[%.%-]', '') or '-'
	data.sc = match(data.term:gsub('[%z\1-\127]', ''), '[^' .. Hira:getCharacters() .. ']') and Jpan or Hira
	data.pos = data.pos ~= '' and data.pos or nil
	return require("Module:links").full_link(data, "term") --"term" makes italic
end

--[=[
		Copied from [[Module:ja]] on 2017/6/14.
		Replaces the code in Template:ja-readings which accepted kanji readings,
		and displayed them in a consistent format.
		Substantial change in function was introduced in https://en.wiktionary.org/w/index.php?diff=46057625
]=]
function export.show(frame)
	local args = require("Module:parameters").process(frame:getParent().args, {
		["goon"] = {},
		["kanon"] = {},
		["soon"] = {},
		["toon"] = {},
		["on"] = {},
		["kanyoon"] = {},
		["kun"] = {},
		["nanori"] = {},
		["pagename"] = {},
	})
	
	local lang_code = frame.args[1] or 'ja'
	local lang = require'Module:languages'.getByCode(lang_code)
	local lang_name = lang:getCanonicalName()
	
	if args.pagename and NAMESPACE == "" then
		error("The pagename parameter should not be used in entries, as it is only for testing.")
	end
	local pagename = args.pagename or PAGENAME

	local yomi_data = mw.loadData("Module:ja/data/jouyou-yomi").yomi

	-- this holds the finished product composed of wikilinks to be displayed
	-- in the Readings section under the Kanji section
	local links, categories = {}, {}

	local is_old_format = false
	
	-- We need a separate kanji sortkey module.
	local sortkey = (require("Module:Hani-sortkey").makeSortKey(pagename, lang_code, "Jpan"))
	local function add_reading_category(reading, subtype, period)
		reading = kata_to_hira(reading:gsub("[%. ]+", ""):gsub("%-$", ""):gsub("%-", "・"))
		if subtype then
			return insert(categories, '[[Category:' .. lang_name .. ' kanji with ' ..
				(period or '') .. ' ' .. subtype .. ' reading ' .. reading ..
				'|' .. sortkey .. ']]')
		else
			return insert(categories, '[[Category:' .. lang_name .. ' kanji read as ' ..
				reading .. '|' .. sortkey .. ']]')
		end
	end
	
	local unclassified_on = {}
	local classified_on = {}
	local kun = {}
			
	local kana = "[ぁ-ー]"
	
	for _, label in ipairs(labels) do
		local readings = args[label.text2:gsub('ō', 'o'):gsub('\'', '')]
		if readings then
			local unclassified = ""

			if label.unclassified then
				if not (args.goon or args.kanon or args.soon or args.toon or args.kanyoon) then
					unclassified = label.unclassified
				end
			end
			
			if find(readings, '%[%[' .. kana) then
				is_old_format = true

				if label.classification == 'on' then
					for reading in gmatch(readings, kana .. '+') do
						add_reading_category(reading)
					end
				end

				readings = readings:gsub("%[%[([^%]|]+)%]%]", function(entry)
					if find(entry, "^[" .. Jpan:getCharacters() .. "]+$") then
						return plain_link{
							lang = lang,
							term = entry,
						}
					else
						return "[[" .. entry .. "]]"
					end
				end)
			else
				readings = split(readings, "%s*[,、]%s*")

				for i, reading in ipairs(readings) do
					local is_jouyou = false

					local pos, pos_hist, pos_oldest = { }, { '[[w:Historical kana orthography|historical]]' }, { 'ancient' }

					-- check for formatting indicating presence of historical kana spelling
					local reading_mod, reading_hist, reading_oldest, reading_surplus = reading:match'^(.-)%f[<%z]<?(.-)%f[<%z]<?(.-)%f[<%z]<?(.*)$'
					
					if reading_surplus ~= '' then
						error("The reading " .. reading .. " contains too many historical readings. The maximum is 3: modern, historical, ancient.")
					end
					
					if label.text2 == "on" then
						unclassified_on[reading_mod] = true
						insert(unclassified_on, reading_mod)
					elseif label.text2 == "kun" then
						kun[reading_mod] = true
						insert(kun, reading_mod)
					elseif label.classification == "on" then
						classified_on[reading_mod] = true
						insert(classified_on, reading_mod)
					end
					
					-- test if reading contains katakana
					if find(reading_mod .. reading_hist .. reading_oldest, '[ァ-ヺ]') then
						insert(categories, '[[Category:Requests for attention concerning ' .. lang_name .. '|1]]') -- sometimes legit, like 「頁(ページ)」
					end

					if reading_hist ~= '' or reading_oldest ~= '' then
						-- test if historical readings contain small kana (anachronistic)
						if find(reading_hist .. reading_oldest, '[ぁぃぅぇぉゃゅょ]') then
							insert(categories, '[[Category:Requests for attention concerning ' .. lang_name .. '|2]]') -- 
						end
						
						-- test if reading contains kun'yomi delimiter thing but historical readings don't
						if reading_mod:find("-", 1, true) then
							if reading_hist ~= '' and not reading_hist:find("-", 1, true) or reading_oldest ~= '' and not reading_oldest:find("-", 1, true) then
								insert(categories, '[[Category:Requests for attention concerning ' .. lang_name .. '|3]]')
							end
						end
					end

					-- check if there is data indicating that our kanji is a jouyou kanji
					if yomi_data[pagename] then
						local reading = (label.classification == 'on' and hira_to_kata(reading_mod) or reading_mod)
						reading = reading:gsub('%.', '') -- 「あたら-し.い」→「あたら-しい」
						local yomi_type = yomi_data[pagename][reading]

						if yomi_type then
							is_jouyou = true

							if yomi_type == 1 or yomi_type == 2 then
								insert(pos, '[[w:Jōyō kanji|<abbr title="This reading is listed in the Jōyō kanji table. Click for the Wikipedia article about the Jōyō kanji.">Jōyō</abbr>]]')
							elseif yomi_type == 3 or yomi_type == 4 then
								insert(pos, '[[w:Jōyō kanji|<abbr title="This reading is listed in the Jōyō kanji table, but is marked as restricted or rare. Click for the Wikipedia article about the Jōyō kanji.">Jōyō <sup>†</sup></abbr>]]')
							end
						end
					end
					
					local subtype = label.text2
					if reading_mod then
						add_reading_category(reading_mod, subtype)
					end
					if reading_hist ~= '' then
						add_reading_category(reading_hist, subtype, 'historical')
					end
					if reading_oldest ~= '' then
						add_reading_category(reading_oldest, subtype, 'ancient')
					end
					
					-- process kun readings with okurigana, create kanji-okurigana links
					if reading:find("-", 1, true) then
						insert(pos, 1, plain_link{
							lang = lang,
							term = reading_mod:gsub('^.+%-', pagename),
						})
						
						if reading_hist ~= '' then
							insert(pos_hist, 1, plain_link{
								lang = lang,
								term = reading_hist:gsub('^.+%-', pagename),
							})
						end
						
						if reading_oldest ~= '' then
							insert(pos_oldest, 1, plain_link{
								lang = lang,
								term = reading_oldest:gsub('^.+%-', pagename),
							})
						end
					elseif label.classification == 'kun' then
						insert(categories, '[[Category:' .. lang_name .. ' kanji with kun readings missing okurigana designation|' .. sortkey .. ']]')
					end

					local rom = kana_to_romaji((reading_mod), lang_code):gsub('^(.+)(%-)', '<u>%1</u>')
					local rom_hist = kana_to_romaji((reading_hist:gsub('^(.+)(%-)', '<u>%1</u>')), lang_code, nil, {hist = true})
					local rom_oldest = kana_to_romaji((reading_oldest:gsub('^(.+)(%-)', '<u>%1</u>')), lang_code, nil, {hist = true})
					
					local mod_link = plain_link{
						lang = lang,
						term = reading_mod,
						tr = rom,
						pos = concat(pos, CONCAT_SEP),
					}
					if is_jouyou then
						mod_link = '<mark class="jouyou-reading">' .. mod_link .. '</mark>'
					end
					
					readings[i] = mod_link .. (reading_hist ~= '' and '<sup>←' .. plain_link{
						lang = lang,
						term = reading_hist,
						tr = rom_hist,
						pos = concat(pos_hist, CONCAT_SEP),
					} .. '</sup>' or '') .. (reading_oldest ~= '' and '<sup>←' .. plain_link{
						lang = lang,
						term = reading_oldest,
						tr = rom_oldest,
						pos = concat(pos_oldest, CONCAT_SEP),
					} .. '</sup>' or '')
				end

				readings = concat(readings, "、")
			end
			
			-- Add "on-yomi", "kun-yomi", or "nanori-yomi" class around list of
			-- readings to allow JavaScript to locate them.
			insert(links, "* '''[[Appendix:Japanese glossary#" .. (label.entry or label.text2) .. '|'.. label.text .. "]]'''" .. unclassified .. ': <span class="' .. label.classification .. '-yomi">' .. readings .. '</span>')
		end
	end
	
	for _, reading in ipairs(unclassified_on) do
		-- [[Special:WhatLinksHere/Wiktionary:Tracking/ja-kanji-readings/duplicate reading]]
		if classified_on[reading] then
			track("duplicate reading")
		end
		track("unclassified reading") -- Track unclassified readings for later classification
	end
	
	if not next(classified_on) and not next(unclassified_on) then
		if next(kun) then
			-- [[Special:WhatLinksHere/Wiktionary:Tracking/ja-kanji-readings/kun only]]
			track("kun only")
		end
	elseif not next(kun) then
		-- [[Special:WhatLinksHere/Wiktionary:Tracking/ja-kanji-readings/on only]]
		track("on only")
	end

	if is_old_format then
		insert(categories, '[[Category:Japanese kanji using old ja-readings format|' .. sortkey .. ']]')
	end
	
	return concat(links, '\n') .. (NAMESPACE == '' and concat(categories) or '') .. require("Module:TemplateStyles")("Template:ja-readings/style.css")
end

return export