Module:ja-kanji-readings

Definition from Wiktionary, the free dictionary
Jump to: navigation, search

This module contains a function that provides the content for Japanese kanji reading categories. It is used by {{ja-readings}} and {{ja-readings-cat}}.

Category tree modeled on [1].

Currently needed: text for some categories.

Testcases[edit]

  • Japanese kanji read as あん
    [[Category:Japanese kanji by reading|あん]]
    
  • Japanese kanji with on reading あん
    [[Category:Japanese kanji by on reading|あん]]
    [[Category:Japanese kanji read as あん|on]]
    
  • Japanese kanji with goon reading あん
    [[Category:Japanese kanji by goon reading|あん]]
    [[Category:Japanese kanji with on reading あん|goon]]
    [[Category:Japanese kanji read as あん|goon]]
    
  • Japanese kanji with kan'on reading あん
    [[Category:Japanese kanji by kan'on reading|あん]]
    [[Category:Japanese kanji with on reading あん|kan'on]]
    [[Category:Japanese kanji read as あん|kan'on]]
    
  • Japanese kanji with tōon reading あん
    [[Category:Japanese kanji by tōon reading|あん]]
    [[Category:Japanese kanji with on reading あん|tōon]]
    [[Category:Japanese kanji read as あん|tōon]]
    
  • Japanese kanji with kan'yōon reading あん
    [[Category:Japanese kanji by kan'yōon reading|あん]]
    [[Category:Japanese kanji with on reading あん|kan'yōon]]
    [[Category:Japanese kanji read as あん|kan'yōon]]
    
  • Japanese kanji with historical on reading か
    [[Category:Japanese kanji by historical on reading|か]]
    [[Category:Japanese kanji with historical reading か|on]]
    
  • Japanese kanji with historical goon reading か
    [[Category:Japanese kanji by historical goon reading|か]]
    [[Category:Japanese kanji with historical on reading か|goon]]
    
  • Japanese kanji with historical kan'on reading か
    [[Category:Japanese kanji by historical kan'on reading|か]]
    [[Category:Japanese kanji with historical on reading か|kan'on]]
    
  • Japanese kanji with historical tōon reading か
    [[Category:Japanese kanji by historical tōon reading|か]]
    [[Category:Japanese kanji with historical on reading か|tōon]]
    
  • Japanese kanji with historical kan'yōon reading か
    [[Category:Japanese kanji by historical kan'yōon reading|か]]
    [[Category:Japanese kanji with historical on reading か|kan'yōon]]
    
  • Japanese kanji with historical kun reading か
    [[Category:Japanese kanji with kun readings missing okurigana designation|か]]
    [[Category:Japanese kanji by historical kun reading|か]]
    [[Category:Japanese kanji with historical reading か|kun]]
    
  • Japanese kanji with kun reading い-く
    [[Category:Japanese kanji by kun reading|いく]]
    [[Category:Japanese kanji read as い-く|kun]]
    
  • Japanese kanji with nanori reading ゆき
    [[Category:Japanese kanji by nanori reading|ゆき]]
    [[Category:Japanese kanji read as ゆき|nanori]]
    
  • Japanese kanji with on reading きょう
    [[Category:Japanese kanji by on reading|きょう]]
    [[Category:Japanese kanji read as きょう|on]]
    
  • Japanese kanji with historical on reading きやう
    [[Category:Japanese kanji by historical on reading|きやう]]
    [[Category:Japanese kanji with historical reading きやう|on]]
    
  • Japanese kanji with ancient on reading くゐやう
    [[Category:Japanese kanji by ancient on reading|くゐやう]]
    [[Category:Japanese kanji with ancient reading くゐやう|on]]
    
  • Japanese kanji with kun reading いわ-んや
    [[Category:Japanese kanji by kun reading|いわんや]]
    [[Category:Japanese kanji read as いわ-んや|kun]]
    
  • Japanese kanji with ancient on reading くゐやう
    [[Category:Japanese kanji by ancient on reading|くゐやう]]
    [[Category:Japanese kanji with ancient reading くゐやう|on]]
    
  • Japanese kanji by on reading
    [[Category:Japanese kanji by reading|on]]
    
  • Japanese kanji by kan'on reading
    [[Category:Japanese kanji by on reading|kan'on]]
    
  • Japanese kanji by ancient on reading
    [[Category:Japanese kanji by on reading|ancient]]
    [[Category:Japanese kanji by ancient reading|on]]
    
  • Japanese kanji by ancient kan'on reading
    [[Category:Japanese kanji by ancient on reading|kan'on]]
    [[Category:Japanese kanji by ancient reading|kan'on]]
    

local export = {}

local titleObj = mw.title.getCurrentTitle()
local fullpagename = titleObj.fullText
local pagename = titleObj.text
local namespace = titleObj.nsText

local Jpan = require("Module:scripts").getByCode("Jpan")
local ja = require("Module:languages").getByCode("ja")

local find = mw.ustring.find
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local split = mw.text.split

local data = mw.loadData("Module:ja/data")
	
local labels = {
	goon = {
		index = 1,
		entry = "呉音",
		text = "Goon",
		text2 = "goon",
		classification = "on",
	},
	kanon = {
		index = 2,
		entry = "漢音",
		text = "Kan’on",
		text2 = "kan'on",
		classification = "on",
	},
	toon = {
		index = 3,
		entry = "唐音",
		text = "Tōon",
		text2 = "tōon",
		classification = "on",
	},
	kanyoon = {
		index = 4,
		entry = "慣用音",
		text = "Kan’yōon",
		text2 = "kan'yōon",
		classification = "on",
	},
	soon = {
		index = 5,
		entry = "宋音",
		text = "Sōon",
		text2 = "sōon",
		classification = "on",
	},
	on = {
		index = 6,
		entry = "音読み",
		text = "On",
		text2 = "on",
		classification = "on",
		unclassified = " (unclassified)",
	},
	kun = {
		index = 7,
		entry = "訓読み",
		text = "Kun",
		text2 = "kun",
		classification = "kun",
	},
	nanori = {
		index = 8,
		entry = "nanori",
		text = "Nanori",
		text2 = "nanori",
		classification = "nanori",
	},
}

local function if_not_empty(var)
	if var == "" then
		return nil
	else
		return var
	end
end

-- If table contains at least one key, returns false.
local function isEmpty(t)
	if next(t) then
		return false
	else
		return true
	end
end

local function track(code)
	require("Module:debug").track("ja-kanji-readings/" .. code)
end

local function plain_link(data)
	data.term = gsub(data.term, '[%.%- ]', '') -- 「かな-し.い」→「かなしい」, 「も-しく は」→「もしくは」
	if data.tr then
		data.tr = gsub(data.tr, '[%.%-]', '')
	end
	data.lang = ja
	data.sc = Jpan
	data.pos = if_not_empty(data.pos)
	data.gloss = if_not_empty(data.gloss)
	return require("Module:links").full_link(data)
end

local function process_okurigana(reading, kanji)
	if not (reading and kanji) then
		return nil
	end
	
	 -- 「むす-ぶ」→「結ぶ」
	return gsub(reading, '^(.+)(%-)', kanji)
end

local function make_romaji(rom, options)
	if not rom then
		return nil
	end
	
	 -- 「むす-ぶ」→「<u>むす</u>ぶ」
	rom = gsub(rom, '^(.+)(%-)', '<u>%1</u>')
	
	return require("Module:ja").kana_to_romaji(rom, options)
end

local function format_historical_reading(reading, romanization, pos)
	if not reading then
		return ""
	end
	return '<sup>←' .. plain_link{ term = reading, tr = romanization, pos = table.concat(pos, CONCAT_SEP) } .. '</sup>'
end

local function check(categories, reading_mod, reading_hist, reading_oldest)
	-- test if reading contains katakana
	if match(reading_mod .. (reading_hist or "") .. (reading_oldest or ""), '[ァ-ヺ]') then
		table.insert(categories, '[[Category:Requests for attention concerning Japanese|1]]') -- sometimes legit, like 「頁(ページ)」
	end

	if reading_hist or reading_oldest then
		-- test if historical readings contain small kana (anachronistic)
		if match(reading_hist .. (reading_oldest or ""), '[ぁぃぅぇぉゃゅょ]') then
			table.insert(categories, '[[Category:Requests for attention concerning Japanese|2]]') -- 
		end
		
		-- test if reading contains kun'yomi delimiter thing but historical readings don't
		if match(reading_mod, '%-') and
				(reading_hist and not match(reading_hist, '%-')		or
				reading_oldest and not match(reading_oldest, '%-'))	then
			table.insert(categories, '[[Category:Requests for attention concerning Japanese|3]]')
		end
	end
end

--[=[
		Copied from [[Module:ja]] on 2017/6/14.
		Replaces the code in Template:ja-readings which accepted kanji readings,
		and displayed them in a consistent format.
		Substantial change in function was introduced in https://en.wiktionary.org/w/index.php?diff=46057625
]=]
function export.show(frame)
	local params = {
		["goon"] = {},
		["kanon"] = {},
		["toon"] = {},
		["soon"] = {},
		["on"] = {},
		["kanyoon"] = {},
		["kun"] = {},
		["nanori"] = {},
		["pagename"] = {},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	if args.pagename then
		if namespace == "" then
			error("The pagename parameter should not be used in entries, as it is only for testing.")
		end
		pagename = args.pagename
	end

	local yomi_data = mw.loadData("Module:ja/data/jouyou-yomi").yomi

	local items = {
		goon = args["goon"],
		kanon = args["kanon"],
		toon = args["toon"],
		soon = args["soon"],
		on = args["on"],
		kanyoon = args["kanyoon"],
		kun = args["kun"],
		nanori = args["nanori"],
	}

	-- this holds the finished product composed of wikilinks to be displayed
	-- in the Readings section under the Kanji section
	local links = {}
	local categories = {}

	local is_old_format = false
	
	-- We need a separate kanji sortkey module.
	local sortkey = require("Module:zh-sortkey").makeSortKey(pagename, "ja")
	
	local unclassified_on = {}
	local classified_on = {}
	local kun = {}
	local gloss_count = 0
	
	for class, readings in pairs(items) do
		if readings then
			local label = labels[class]
			
			local unclassified = ""

			if label.unclassified then
				if not (items.goon or items.kanon or items.toon or items.soon or items.kanyoon) then
					unclassified = label.unclassified
				end
			end
			
			local kana = "[ぁ-ー]"
			
			if match(readings, '%[%[' .. kana) then
				is_old_format = true

				if label.classification == 'on' then
					for reading in gmatch(readings, kana .. '+') do
						table.insert(categories, '[[Category:Japanese kanji read as ' .. reading .. '|' .. sortkey .. ']]')
					end
				end

				readings = gsub(
					readings,
					"%[%[([^%]|]+)%]%]",
					function(entry)
						if find(entry, "^[" .. Jpan:getCharacters() .. "]+$") then
							return plain_link{ term = entry  }
						else
							return "[[" .. entry .. "]]"
						end
					end
				)
			else
				local glosses = {} -- hold glosses for entire line, in case a gloss contains a comma
				for i, wholematch, gloss in require("Module:string").imatch(readings, '( "([^"]+)")') do
					glosses[i] = gloss
					wholematch = require('Module:string').pattern_escape(wholematch)
					readings = gsub(readings, wholematch, '##' .. i)
				end
				
				gloss_count = gloss_count + #glosses

				readings = split(readings, ',%s*')

				for i, reading in ipairs(readings) do
					local CONCAT_SEP = ', '

					local is_jouyou = false

					local reading_hist, reading_oldest

					local gloss = ''

					local pos, pos_hist, pos_oldest = { }, { '[[w:Historical kana orthography|historical]]' }, { 'ancient' }

					-- Get gloss; remove marker from text.
					local unchanged_reading = reading
					reading, number_of_glosses = gsub(
						reading,
						'##(%d+)',
						function(gloss_id)
							gloss = glosses[tonumber(gloss_id)]
							return ""
						end
					)
					
					if number_of_glosses > 1 then
						error("Too many glosses in the reading " .. unchanged_reading .. ".")
					end
					
					if match(reading, '##') then
						local gloss_id = match(reading, '##(%d+)') -- extract gloss id
						reading = gsub(reading, '##' .. gloss_id, '') -- remove marker from text
						gloss = glosses[tonumber(gloss_id)] -- save gloss
					end

					-- check for formatting indicating presence of historical kana spelling
					local hist_readings = mw.text.split(reading, "<")
					
					if #hist_readings <= 3 then
						reading_mod, reading_hist, reading_oldest = unpack(hist_readings)
					else
						error("The reading " .. reading .. " contains too many historical readings: " .. #hist_readings .. ". The maximum is 3: modern, historical, ancient.")
					end
					
					if class == "on" then
						unclassified_on[reading_mod] = true
						table.insert(unclassified_on, reading_mod)
					elseif class == "kun" then
						kun[reading_mod] = true
						table.insert(kun, reading_mod)
					elseif label.classification == "on" then
						classified_on[reading_mod] = true
						table.insert(classified_on, reading_mod)
					end
					
					check(categories, reading_mod, reading_hist, reading_oldest)

					-- check if there is data indicating that our kanji is a jouyou kanji
					if yomi_data[pagename] then
						local reading = (label.classification == 'on' and require("Module:ja").hira_to_kata(reading_mod) or reading_mod)
						reading = gsub(reading, '%.', '') -- 「あたら-し.い」→「あたら-しい」
						local type = yomi_data[pagename][reading]

						if type then
							is_jouyou = true

							if type == 1 or type == 2 then
								table.insert(pos, '[[w:Jōyō kanji|Jōyō]]')
							elseif type == 3 or type == 4 then
								table.insert(pos, '[[w:Jōyō kanji|Jōyō]], uncommon')
							end
						end
					end

					if reading_mod then
						table.insert(categories, '[[Category:Japanese kanji with ' .. label.text2 .. ' reading ' .. gsub(reading_mod, "[%. ]", "") .. '|' .. sortkey .. ']]')
					end
					if reading_hist then
						table.insert(categories, '[[Category:Japanese kanji with historical ' .. label.text2 .. ' reading ' .. gsub(reading_hist, "[%. ]", "") .. '|' .. sortkey .. ']]')
					end
					if reading_oldest then
						table.insert(categories, '[[Category:Japanese kanji with ancient ' .. label.text2 .. ' reading ' .. gsub(reading_oldest, "[%. ]", "") .. '|' .. sortkey .. ']]')
					end
					
					local kanji, kanji_hist, kanji_oldest
					-- process kun readings with okurigana, create kanji-okurigana links
					if match(reading, '%-') then
						kanji = process_okurigana(reading_mod, pagename)
						table.insert(pos, 1, plain_link{ term = kanji })
						
						if kanji_hist then
							kanji_hist = process_okurigana(reading_hist, pagename)
							table.insert(pos_hist, 1, plain_link{ term = kanji_hist })
						end
						
						if kanji_oldest then
							kanji_oldest = process_okurigana(reading_oldest, pagename)
							table.insert(pos_oldest, 1, plain_link{ term = kanji_oldest })
						end
					elseif label.classification == 'kun' then
						table.insert(categories, '[[Category:Japanese kanji with kun readings missing okurigana designation|' .. sortkey .. ']]')
					end

					local rom = make_romaji(reading_mod)
					local rom_hist = make_romaji(reading_hist, {hist=true})
					local rom_oldest = make_romaji(reading_oldest, {hist=true})
					
					local mod_link = plain_link{ term = reading_mod, tr = rom, gloss = gloss, pos = table.concat(pos, CONCAT_SEP) }
					if is_jouyou then
						mod_link = '<mark class="jouyou-reading" style="background:rgba(224, 255, 255, 0.5);">' .. mod_link .. '</mark>'
					end
					
					readings[i] =
						mod_link
						..
						format_historical_reading(reading_hist, rom_hist, pos_hist)
						..
						format_historical_reading(reading_oldest, rom_oldest, pos_oldest)
				end

				readings = table.concat(readings, '; ')
			end
			
			links[label.index] = "* '''[[" .. label.entry .. "|".. label.text .. "]]'''" .. unclassified .. ": " .. readings
		end
	end
	
	for i, reading in ipairs(unclassified_on) do
		-- [[Special:WhatLinksHere/Template:tracking/ja-kanji-readings/duplicate reading]]
		if classified_on[reading] then
			track("duplicate reading")
		end
	end
	
	if not isEmpty(kun) then
		if isEmpty(classified_on) and isEmpty(unclassified_on) then
			-- [[Special:WhatLinksHere/Template:tracking/ja-kanji-readings/on only]]
			track("kun only")
		end
	elseif not isEmpty(unclassified_on) or not isEmpty(classified_on) then
		-- [[Special:WhatLinksHere/Template:tracking/ja-kanji-readings/on only]]
		track("on only")
	end
	
	if gloss_count > 0 then
		-- [[Special:WhatLinksHere/Template:tracking/ja-kanji-readings/gloss]]
		track("gloss")
	end

	links = require("Module:table").compressSparseArray(links)
	
	--[==[
	-- determine if this is joyo kanji (常用) or jinmeiyo kanji (人名用) or neither (表外)
	local joyo_kanji_pattern = ('[' .. data.joyo_kanji .. ']')
	local jinmeiyo_kanji_pattern = ('[' .. data.jinmeiyo_kanji .. ']')
	local sortkey = ""
	if match(pagename, joyo_kanji_pattern) then
		sortkey = "Common"
	elseif match(pagename, jinmeiyo_kanji_pattern) then
		sortkey = "Names"
	else
		sortkey = "Uncommon"
	end
	]==]
	-- NOTE: with the introduction of the new {{ja-readings}} formatting the above block of code currently does nothing...

	if is_old_format then
		table.insert(links, '[[Category:Japanese kanji using old ja-readings format]]')
	end
	
	local links = table.concat(links, "\n")
	-- Categorize only in mainspace.
	if namespace == "" then
		categories = table.concat(categories, "\n")
	else
		categories = ""
	end

	local output = links .. categories
	
	-- mw.log(output)
	
	return output
end

function export.catboiler(frame)
	local output = {}
	local categories = {}
	local catfix = ""
	
	local title = mw.title.getCurrentTitle()
	local pagename = title.text
	local namespace = title.nsText
	
	local mode
	
	if frame.args[1] then
		pagename = frame.args[1]
		pagename = gsub(pagename, "^Category:", "")
		namespace = "Category"
		mode = "testing"
	end
	
	if namespace ~= "Category" then
		error("This template should only be used in the Category namespace.")
	end
	
	local kanaRegex = "[-ぁ-ー]+"
	
	if not (	find(pagename, "^Japanese kanji with .+ reading " .. kanaRegex .. "$") or
				find(pagename, "^Japanese kanji read as " .. kanaRegex .. "$") or
				find(pagename, "^Japanese kanji by .+ reading$")				)
		then
		
		error('The category name "' .. pagename .. '" is not recognized.')
	end
		
	local periods = {
		historical = true,
		ancient = true,
	}
	
	local onTypes = {
		["goon"] = true,
		["kan'on"] = true,
		["tōon"] = true,
		["kan'yōon"] = true,
		["sōon"] = true,
		["on"] = true,
	}
	
	local cat1, cat2, cat3
	local sortkey1, sortkey2, sortkey3
	local hira_sortkey
		
	local function isOnVariant(readingType)
		if find(readingType, ".on$") then
			if not onTypes[readingType] then
				error('"' .. readingType .. '" is not a valid type of on\'yomi.')
			end
			return true
		end
	end
	
	local period, readingType, reading
	
	period, readingType, reading = match(pagename, "^Japanese kanji with ([a-z]-) ?([%a']+) reading (" .. kanaRegex .. ")$")
	
	if not readingType then
		reading = match(pagename, "^Japanese kanji read as (" .. kanaRegex .. ")$")
	end
	
	if not reading then
		period, readingType = match(pagename, "^Japanese kanji by ([a-z]-) ?([%a']+) reading$")
	end
	
	period, readingType, reading = if_not_empty(period), if_not_empty(readingType), if_not_empty(reading)
	
	if period and not periods[period] then
		error('The period name "' .. period .. '" is not valid.')
	end
	
	-- If the pagename contains kana, it is a "with x reading" category; otherwise, it's a "by x reading" category.
	if reading then
		-- Japanese kanji with on reading あつ, Japanese kanji with historical on reading あつ
		catfix = require("Module:utilities").catfix(ja, Jpan)
		hira_sortkey = require("Module:ja").jsort(reading)
		frame:callParserFunction("DISPLAYTITLE", require("Module:string").plain_gsub(fullpagename, reading, '<span class="Jpan" lang="ja">%0</span>'))
		
		local label = labels[readingType]
		if label and mode ~= "testing" then
			table.insert(
				output,
				"This category contains [[kanji]] with the " .. (period and period .. " " or "") .. "[[" .. label.entry .. "|".. mw.ustring.lower(label.text) .. "]] reading " ..
					plain_link{ term = reading, tr = require("Module:ja").kana_to_romaji(reading, period and { hist = true }) } ..
					"."
			)
		end
		
		if readingType then
			if period then
				cat1 = "by " .. period .. " " .. readingType .. " reading"
				sortkey1 = hira_sortkey
				
				if isOnVariant(readingType) then
					cat2 = "with " .. period .. " on reading " .. reading
					sortkey2 = readingType
				else
					cat2 = "with " .. period .. " reading " .. reading
					sortkey2 = readingType
				end
			else
				cat1 = "by " .. readingType .. " reading"
				sortkey1 = hira_sortkey
				
				if isOnVariant(readingType) then
					cat2 = "with on reading " .. reading
					sortkey2 = readingType
				end
				
				cat3 = "read as " .. reading
				sortkey3 = readingType
			end

			if readingType == "kun" and not find(reading, "%-") then
				table.insert(categories, "[[Category:Japanese kanji with kun readings missing okurigana designation|" .. sortkey1 .. "]]")
			end
		else
			cat1 = "by reading"
			sortkey1 = hira_sortkey
		end
	else
		if period then
			if isOnVariant(readingType) then
				cat1 = "by " .. period .. " on reading"
				sortkey1 = readingType
			else
				cat1 = "by " .. readingType .. " reading"
				sortkey1 = period
			end
			
			cat2 = "by " .. period .. " reading"
			sortkey2 = readingType
		else
			if isOnVariant(readingType) then
				sortkey1 = readingType
				cat1 = "by on reading"
			else
				cat1 = "by reading"
				sortkey1 = readingType
			end
		end
	end
	
	if mode ~= "testing" then
		local inCategory
		if reading then
			inCategory = mw.site.stats.pagesInCategory(pagename, "pages")
		else
			inCategory = mw.site.stats.pagesInCategory(pagename, "subcats")
		end
		
		if inCategory > 200 then
			table.insert(output, mw.getCurrentFrame():expandTemplate{title = "Template:ja-categoryTOC", args = {}})
		end
	end
		
	if cat1 then
		table.insert(categories, "[[Category:Japanese kanji " .. cat1 .. "|" .. sortkey1 .. "]]")
	end
	if cat2 then
		table.insert(categories, "[[Category:Japanese kanji " .. cat2 .. "|" .. sortkey2 .. "]]")
	end
	if cat3 then
		table.insert(categories, "[[Category:Japanese kanji " .. cat3 .. "|" .. sortkey3 .. "]]")
	end
	
	local forceOutput
	if mode == "testing" then
		forceOutput = true
	end
	
	categories = table.concat(categories)
	
	if mode == "testing" then
		table.insert(output, pagename)
		
		categories = gsub(categories, "%]%]%[%[", "]]\n[[")
		categories = frame:extensionTag{ name = "syntaxhighlight", content = categories }
		if categories == "" then
			categories = '<span class="error">failed to generate categories for ' .. pagename .. '</span>'
		end
	end
	
	local output = table.concat(output) .. categories .. ( mode ~= "texting" and catfix or "" )
	
	return output
end

return export