Module:Jpan-headword: difference between revisions
Jump to navigation
Jump to search
Content deleted Content added
MiguelX413 (talk | contribs) Added ka-adjectives. |
No edit summary |
||
(5 intermediate revisions by the same user not shown) | |||
Line 21: | Line 21: | ||
local kana_pattern_full = '^[、' .. kana_range .. '%s%.%-%^]*$' |
local kana_pattern_full = '^[、' .. kana_range .. '%s%.%-%^]*$' |
||
local kana_pattern_char = '[、' .. kana_range .. '%s%.%-%^]' |
local kana_pattern_char = '[、' .. kana_range .. '%s%.%-%^]' |
||
local function remove_links(text) |
|||
return text:gsub("%[%[[^|%]]-|", ""):gsub("%[%[", ""):gsub("%]%]", "") |
|||
end |
|||
local detect_kana_script = require("Module:fun").memoize(function(kana) |
local detect_kana_script = require("Module:fun").memoize(function(kana) |
||
Line 37: | Line 41: | ||
-- make adjustments for -u verbs and -i adjectives by placing a period before the last character |
-- make adjustments for -u verbs and -i adjectives by placing a period before the last character |
||
-- to prevent romanizing long vowels with macrons |
-- to prevent romanizing long vowels with macrons |
||
if (data.pos_category == "verbs") or (data.pos_category == "adjectives" and (args["infl"] == "i" or args["infl"] == "い")) then |
if (data.pos_category == "verbs") or (data.pos_category == "adjectives" and (args["infl"] == "i" or args["infl"] == "い" or args["infl"] == "is")) then |
||
kana = mw.ustring.gsub(kana,'([うい])$','.%1') |
kana = mw.ustring.gsub(kana,'([うい])$','.%1') |
||
end |
end |
||
Line 132: | Line 136: | ||
local kata = args["kata"] or ""; if kata ~= "" then return kata end |
local kata = args["kata"] or ""; if kata ~= "" then return kata end |
||
error("No kana detected in the unnamed parameters or |hira= and |kata= parameters. See template documentation for details.") |
error("No kana detected in the unnamed parameters or |hira= and |kata= parameters. See template documentation for details.") |
||
end |
|||
local function format_ruby(kanji, kana) |
|||
local kanji_segments = mw.ustring.gsub(kanji, "([A-Za-z0-9々㐀-䶵一-鿌" .. mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAD9) .. "𠀀-0-9A-Za-z]+)", "`%1`") |
|||
-- returns possible matches between kanji and kana |
|||
-- for example, match('`物`の`哀`れ', 'もののあわれ') returns { '[物](も)の[哀](のあわ)れ', '[物](もの)の[哀](あわ)れ' } |
|||
local function match(kanji_segments, kana) |
|||
if kanji_segments:find('`') then |
|||
local kana_portion, kanji_portion, rest = mw.ustring.match(kanji_segments, '(.-)`(.-)`(.*)') |
|||
_, _, kana = mw.ustring.find(kana, '^' .. kana_portion .. '(.*)') |
|||
if not kana then return {} end |
|||
local candidates = {} |
|||
for i = 1, mw.ustring.len(kana) do |
|||
for _, candidate in ipairs(match(rest, mw.ustring.sub(kana, i + 1))) do |
|||
table.insert(candidates, kana_portion .. '[' .. kanji_portion .. '](' .. mw.ustring.sub(kana, 1, i) .. ')' .. candidate) |
|||
end |
|||
end |
|||
return candidates |
|||
else |
|||
return (kanji_segments == kana) and { kana } or {} |
|||
end |
|||
end |
|||
local ruby = match(kanji_segments, kana) |
|||
if #ruby == 1 then |
|||
ruby = ruby[1] |
|||
else |
|||
require("Module:debug").track("ja-headword/failed to match kanji and kana") |
|||
ruby = '[' .. kanji .. '](' .. kana .. ')' |
|||
end |
|||
return ruby |
|||
end |
end |
||
-- go through args and build inflections by finding whatever kanas were given to us |
-- go through args and build inflections by finding whatever kanas were given to us |
||
local function |
local function format_headword(args, data, PAGENAME) |
||
local detect_result = detect_kana_script(PAGENAME) |
local detect_result = detect_kana_script(PAGENAME) |
||
local function romanization(auto_rom) |
local function romanization(auto_rom) |
||
Line 154: | Line 191: | ||
-- add romaji |
-- add romaji |
||
-- add link manually for WT:ACCEL unless headword is for suru verb |
|||
if data.pos_category == "suru verbs" then |
if data.pos_category == "suru verbs" then |
||
table.insert(data. |
table.insert(data.translits, rom .. " suru") |
||
elseif detect_result then |
|||
-- only accelerate romaji creation for kana entries |
|||
table.insert(data.inflections, {label = "rōmaji", accel = {form = "romanized"}, rom, sc = Latn}) |
|||
else |
else |
||
table.insert(data. |
table.insert(data.translits, rom) |
||
end |
end |
||
end |
end |
||
Line 210: | Line 243: | ||
-- add everything to inflections, except historical hiragana which is next |
-- add everything to inflections, except historical hiragana which is next |
||
local format_result = detect_result and allkana[i] or format_ruby(PAGENAME, allkana[i]) |
|||
if data.pos_category == "nouns" or data.pos_category == "proper nouns" or data.pos_category == "verbs" or data.pos_category == "adjectives" or data.pos_category == "adverbs" then |
|||
if data.pos_category == "suru verbs" then |
|||
-- enable accelerated entry creation using hiragana links for certain parts of speech |
|||
table.insert(data.heads, format_result .. "[[する]]") |
|||
if mw.ustring.match(original[i],"[%. ]") then |
|||
table.insert(data.inflections, {label = labelval, accel = {form = ("kana-%s"):format(data.pos_category:sub(1, data.pos_category:len()-1):gsub(' ','-')), translit = original[i]}, allkana[i]}) |
|||
else |
|||
table.insert(data.inflections, {label = labelval, accel = {form = ("kana-%s"):format(data.pos_category:sub(1, data.pos_category:len()-1):gsub(' ','-'))}, allkana[i]}) |
|||
end |
|||
elseif data.pos_category == "suru verbs" then |
|||
table.insert(data.inflections, {label = labelval, "[[" .. allkana[i] .. "]][[する]]"}) |
|||
else |
else |
||
table.insert(data. |
table.insert(data.heads, format_result) |
||
end |
end |
||
end |
end |
||
Line 240: | Line 267: | ||
end |
end |
||
local hhira = args["hhira"] or "" |
|||
if hhira ~= "" then |
|||
if data.pos_category == "suru verbs" then |
|||
table.insert(data.inflections, {label = "historical hiragana", "[[" .. hhira .. "]][[する]]"}) |
|||
else |
|||
table.insert(data.inflections, {label = "historical hiragana", hhira}) |
|||
end |
|||
if string.match(hhira, "ゐ") ~= nil then |
|||
table.insert(data.categories, "Japanese terms historically spelled with ゐ") |
|||
end |
|||
if string.match(hhira, "ゑ") ~= nil then |
|||
table.insert(data.categories, "Japanese terms historically spelled with ゑ") |
|||
end |
|||
if string.match(hhira, "を") ~= nil then |
|||
table.insert(data.categories, "Japanese terms historically spelled with を") |
|||
end |
|||
end |
|||
local hkata = args["hkata"] or "" |
|||
if hkata ~= "" then |
|||
if data.pos_category == "suru verbs" then |
|||
table.insert(data.inflections, {label = "historical katakana", "[[" .. hkata .. "]][[する]]"}) |
|||
else |
|||
table.insert(data.inflections, {label = "historical katakana", hkata}) |
|||
end |
|||
end |
|||
local num_readings = 0 |
local num_readings = 0 |
||
for _ in pairs(readings) do |
for _ in pairs(readings) do |
||
Line 298: | Line 297: | ||
['godan']='1', ['ichidan']='2', ['irregular']='3' |
['godan']='1', ['ichidan']='2', ['irregular']='3' |
||
} |
} |
||
local function add_inflections(data, inflection_type) |
|||
local lemma = data.heads[1] |
|||
local romaji = data.translits[1] |
|||
local function form(term, translit) |
|||
local kanji = remove_links(mw.ustring.gsub(term, "%[([^%[%]]+)%]%(([^%(%)]+)%)", "%1")) |
|||
term = mw.ustring.gsub(term, "%[([^%[%]]+)%]%(([^%(%)]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>") |
|||
if not term:find('%[%[') then term = '[[' .. kanji .. '#Japanese|' .. term .. ']]' end |
|||
return {term = term, translit = translit} |
|||
end |
|||
local function insert(k1, k2, k3, r1, r2, r3) |
|||
local success = lemma:find(k1) and romaji:find(r1) |
|||
if success then |
|||
table.insert(data.inflections, {label = 'stem', form(lemma:gsub(k1, k2), romaji:gsub(r1, r2))}) |
|||
table.insert(data.inflections, {label = 'past', form(lemma:gsub(k1, k3), romaji:gsub(r1, r3))}) |
|||
end |
|||
return success |
|||
end |
|||
if inflection_type == '1' then |
|||
local success = insert('く$', 'き', 'いた', 'ku$', 'ki', 'ita') or |
|||
insert('ぐ$', 'ぎ', 'いだ', 'gu$', 'gi', 'ida') or |
|||
insert('す$', 'し', 'した', 'su$', 'shi', 'shita') or |
|||
insert('つ$', 'ち', 'った', 'tsu$', 'chi', 'tta') or |
|||
insert('ぬ$', 'に', 'んだ', 'nu$', 'ni', 'nda') or |
|||
insert('ぶ$', 'び', 'んだ', 'bu$', 'bi', 'nda') or |
|||
insert('む$', 'み', 'んだ', 'mu$', 'mi', 'nda') or |
|||
insert('る$', 'り', 'った', 'ru$', 'ri', 'tta') or |
|||
insert('う$', 'い', 'った', 'u$', 'i', 'tta') or |
|||
require("Module:debug").track("ja-headword/godan conjugation failed") |
|||
elseif inflection_type == '1s' then |
|||
if lemma:find('る$') and romaji:find('ru$') then |
|||
table.insert(data.inflections, {label = 'stem', form(lemma:gsub('る$', 'り'), romaji:gsub('ru$', 'ri')), form(lemma:gsub('る$', 'い'), romaji:gsub('ru$', 'i'))}) |
|||
table.insert(data.inflections, {label = 'past', form(lemma:gsub('る$', 'った'), romaji:gsub('ru$', 'tta'))}) |
|||
else |
|||
local success = insert('く$', 'き', 'った', 'iku$', 'iki', 'itta') or |
|||
insert('う$', 'い', 'うた', 'ou$', 'oi', 'ōta') or |
|||
require("Module:debug").track("ja-headword/godan conjugation special failed") |
|||
end |
|||
elseif inflection_type == '2' then |
|||
local success = insert('る$', '', 'た', 'ru$', '', 'ta') or |
|||
require("Module:debug").track("ja-headword/ichidan conjugation failed") |
|||
elseif inflection_type == '3' or inflection_type == 'kuru' or inflection_type == 'suru' then |
|||
if lemma:find('%[来%]%(く%)る$') and romaji:find('kuru$') then |
|||
table.insert(data.inflections, {label = 'stem', form(lemma:gsub('%[来%]%(く%)る$', '[来](き)'), romaji:gsub('kuru$', 'ki'))}) |
|||
table.insert(data.inflections, {label = 'past', form(lemma:gsub('%[来%]%(く%)る$', '[来](き)た'), romaji:gsub('kuru$', 'kita'))}) |
|||
else |
|||
local success = insert('くる$', 'き', 'きた', 'kuru$', 'ki', 'kita') or |
|||
insert('する$', 'し', 'した', 'suru$', 'shi', 'shita') or |
|||
insert('する%]%]$', 'し]]', 'した]]', 'suru$', 'shi', 'shita') or |
|||
insert('ずる$', 'じ', 'じた', 'zuru$', 'ji', 'jita') or |
|||
require("Module:debug").track("ja-headword/irregular conjugation failed") |
|||
end |
|||
elseif inflection_type == 'i' or inflection_type == 'い' then |
|||
if lemma:find('い$') and romaji:find('i$') then |
|||
table.insert(data.inflections, {label = 'adverbial', form(lemma:gsub('い$', 'く'), romaji:gsub('i$', 'ku'))}) |
|||
else |
|||
require("Module:debug").track("ja-headword/-i inflection failed") |
|||
end |
|||
elseif inflection_type == 'is' then |
|||
if lemma:find('%[良%]%(い%)い$') and romaji:find('ii$') then |
|||
table.insert(data.inflections, {label = 'adverbial', form(lemma:gsub('%[良%]%(い%)い$', '[良](よ)く'), romaji:gsub('ii$', 'yoku'))}) |
|||
elseif lemma:find('いい$') and romaji:find('ii$') then |
|||
table.insert(data.inflections, {label = 'adverbial', form(lemma:gsub('いい$', 'よく'), romaji:gsub('ii$', 'yoku'))}) |
|||
else |
|||
require("Module:debug").track("ja-headword/slightly irregular -i inflection failed") |
|||
end |
|||
elseif inflection_type == 'na' or inflection_type == 'な' then |
|||
table.insert(data.inflections, {label = 'adnominal', form(lemma .. '[[な]]', romaji .. ' na')}) |
|||
table.insert(data.inflections, {label = 'adverbial', form(lemma .. '[[に]]', romaji .. ' ni')}) |
|||
elseif inflection_type == 'tari' or inflection_type == 'たり' then |
|||
table.insert(data.inflections, {label = 'adnominal', form(lemma .. '[[たる]]', romaji .. ' taru')}) |
|||
table.insert(data.inflections, {label = 'adverbial', form(lemma .. '[[と]]', romaji .. ' to'), form(lemma .. '[[として]]', romaji .. ' toshite')}) |
|||
end |
|||
end |
|||
pos_functions["verbs"] = function(args, data) |
pos_functions["verbs"] = function(args, data) |
||
Line 319: | Line 403: | ||
conjugation = aliases[conjugation] or conjugation |
conjugation = aliases[conjugation] or conjugation |
||
if conjugation == "1" then |
if conjugation == "1" or conjugation == "1s" then |
||
table.insert(data.inflections, {label = "godan conjugation"}) |
table.insert(data.inflections, {label = "godan conjugation"}) |
||
table.insert(data.categories, "Japanese type 1 verbs") |
table.insert(data.categories, "Japanese type 1 verbs") |
||
Line 360: | Line 444: | ||
end |
end |
||
add_inflections(data, conjugation) |
|||
-- >> maintenance category << |
-- >> maintenance category << |
||
-- check if this ends in something other than acceptable kana in a modern verb (and isn't already categorised as yodan or nidan) |
-- check if this ends in something other than acceptable kana in a modern verb (and isn't already categorised as yodan or nidan) |
||
Line 372: | Line 458: | ||
conjugation = aliases[conjugation] or conjugation |
conjugation = aliases[conjugation] or conjugation |
||
if conjugation == "1" then |
if conjugation == "1" or conjugation == "1s" then |
||
table.insert(data.inflections, {label = "godan conjugation"}) |
table.insert(data.inflections, {label = "godan conjugation"}) |
||
elseif conjugation == "2" then |
elseif conjugation == "2" then |
||
Line 394: | Line 480: | ||
elseif conjugation == "nahen" then |
elseif conjugation == "nahen" then |
||
table.insert(data.inflections, {label = "n-irregular conjugation"}) |
table.insert(data.inflections, {label = "n-irregular conjugation"}) |
||
elseif conjugation == "i" or conjugation == "い" then |
elseif conjugation == "i" or conjugation == "い" or conjugation == "is" then |
||
table.insert(data.inflections, {label = "-i inflection"}) |
table.insert(data.inflections, {label = "-i inflection"}) |
||
elseif conjugation == "na" or conjugation == "な" then |
elseif conjugation == "na" or conjugation == "な" then |
||
Line 405: | Line 491: | ||
table.insert(data.inflections, {label = "-ka inflection"}) |
table.insert(data.inflections, {label = "-ka inflection"}) |
||
end |
end |
||
add_inflections(data, conjugation) |
|||
end |
end |
||
Line 430: | Line 517: | ||
table.insert(data.inflections, {label = "suru conjugation"}) |
table.insert(data.inflections, {label = "suru conjugation"}) |
||
add_inflections(data, '3') |
|||
end |
end |
||
Line 436: | Line 524: | ||
local infl = args["infl"] or "" |
local infl = args["infl"] or "" |
||
if infl == "i" or infl == "い" then |
if infl == "i" or infl == "い" or infl == "is" then |
||
table.insert(data.inflections, {label = "-i inflection"}) |
table.insert(data.inflections, {label = "-i inflection"}) |
||
table.insert(data.categories, "Japanese い-i adjectives") |
table.insert(data.categories, "Japanese い-i adjectives") |
||
Line 452: | Line 540: | ||
table.insert(data.categories, "Japanese か-ka adjectives") |
table.insert(data.categories, "Japanese か-ka adjectives") |
||
end |
end |
||
add_inflections(data, infl) |
|||
end |
end |
||
Line 486: | Line 575: | ||
end |
end |
||
local data = {lang = lang, sc = sc, pos_category = poscat, categories = {}, heads = {head}, inflections = {}} |
local data = {lang = lang, sc = sc, pos_category = poscat, categories = {}, translits = {}, heads = (detect_kana_script(PAGENAME) and {head} or {}), inflections = {}} |
||
local katakana_category = {} |
local katakana_category = {} |
||
local kana = find_kana(args, PAGENAME) |
local kana = find_kana(args, PAGENAME) |
||
-- sort out all the kanas and do the romanization business |
|||
format_headword(args, data, PAGENAME, kana) |
|||
-- add certain "inflections" and categories for adjectives, verbs, or nouns |
|||
if pos_functions[poscat] then |
|||
pos_functions[poscat](args, data) |
|||
end |
|||
for i = 1, #data.heads do |
|||
local kana = mw.ustring.gsub(data.heads[i], "%[([^%[%]]+)%]%(([^%(%)]+)%)", "%2") |
|||
kana = remove_links(mw.ustring.gsub(kana, '%[%[する%]%]$', '')) |
|||
data.heads[i] = mw.ustring.gsub(data.heads[i], "%[([^%[%]]+)%]%(([^%(%)]+)%)", "<ruby>%1<rp>(</rp><rt>[[" .. kana .. "#Japanese|%2]]</rt><rp>)</rp></ruby>") |
|||
end |
|||
-- the presence of kyūjitai param indicates that this is shinjitai kanji entry and vice versa |
-- the presence of kyūjitai param indicates that this is shinjitai kanji entry and vice versa |
||
Line 515: | Line 619: | ||
end |
end |
||
local hhira = args["hhira"] or "" |
|||
-- add certain "inflections" and categories for adjectives, verbs, or nouns |
|||
if |
if hhira ~= "" then |
||
if data.pos_category == "suru verbs" then |
|||
pos_functions[poscat](args, data) |
|||
table.insert(data.inflections, {label = "historical hiragana", "[[" .. hhira .. "]][[する]]"}) |
|||
else |
|||
table.insert(data.inflections, {label = "historical hiragana", hhira}) |
|||
end |
|||
if string.match(hhira, "ゐ") ~= nil then |
|||
table.insert(data.categories, "Japanese terms historically spelled with ゐ") |
|||
end |
|||
if string.match(hhira, "ゑ") ~= nil then |
|||
table.insert(data.categories, "Japanese terms historically spelled with ゑ") |
|||
end |
|||
if string.match(hhira, "を") ~= nil then |
|||
table.insert(data.categories, "Japanese terms historically spelled with を") |
|||
end |
|||
end |
end |
||
-- sort out all the kanas and do the romanization business |
|||
find_inflections(args, data, PAGENAME, kana) |
|||
local hkata = args["hkata"] or "" |
|||
if hkata ~= "" then |
|||
if data.pos_category == "suru verbs" then |
|||
table.insert(data.inflections, {label = "historical katakana", "[[" .. hkata .. "]][[する]]"}) |
|||
else |
|||
table.insert(data.inflections, {label = "historical katakana", hkata}) |
|||
end |
|||
end |
|||
if data.pos_category == "suru verbs" then |
if data.pos_category == "suru verbs" then |
||
data.pos_category = "verbs" |
data.pos_category = "verbs" |
Revision as of 15:52, 16 January 2020
- The following documentation is located at Module:Jpan-headword/documentation. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This implements Japanese headword-line templates and all of the associated templates that they called to do categorization and error checking.
local m_ja = require("Module:ja")
local find = mw.ustring.find
local export = {}
local pos_functions = {}
local lang = require("Module:languages").getByCode("ja")
local sc = require("Module:scripts").getByCode("Jpan")
local Latn = require("Module:scripts").getByCode("Latn")
local Japanese_symbols = '%ー・=?!。、'
local katakana_range = 'ァ-ヺーヽヾ'
local hiragana_range = 'ぁ-ゖーゞゝ'
local kana_range = katakana_range .. hiragana_range .. Japanese_symbols
local Japanese_scripts_range = kana_range .. '一-鿌・々'
local katakana_pattern = '^[' .. katakana_range .. Japanese_symbols .. ']*$'
local hiragana_pattern = '^[' .. hiragana_range .. Japanese_symbols .. ']*$'
local kana_pattern = '^[' .. kana_range .. ']*$'
local kana_pattern_full = '^[、' .. kana_range .. '%s%.%-%^]*$'
local kana_pattern_char = '[、' .. kana_range .. '%s%.%-%^]'
local function remove_links(text)
return text:gsub("%[%[[^|%]]-|", ""):gsub("%[%[", ""):gsub("%]%]", "")
end
local detect_kana_script = require("Module:fun").memoize(function(kana)
if find(kana, katakana_pattern) then
return 'kata'
elseif find(kana, hiragana_pattern) then
return 'hira'
elseif find(kana, kana_pattern) then
return 'both'
else
return nil
end
end)
local function kana_to_romaji(kana, data, args)
-- make adjustments for -u verbs and -i adjectives by placing a period before the last character
-- to prevent romanizing long vowels with macrons
if (data.pos_category == "verbs") or (data.pos_category == "adjectives" and (args["infl"] == "i" or args["infl"] == "い" or args["infl"] == "is")) then
kana = mw.ustring.gsub(kana,'([うい])$','.%1')
end
local romaji = m_ja.kana_to_romaji(kana)
-- init caps for proper nouns
if data.pos_category == "proper nouns" then
romaji = mw.ustring.gsub(romaji, "^%l", mw.ustring.upper)
romaji = mw.ustring.gsub(romaji, " %l", mw.ustring.upper)
romaji = mw.ustring.gsub(romaji, "-%l", mw.ustring.upper)
end
-- hyphens for prefixes, suffixes, and counters (classifiers)
if data.pos_category == "prefixes" then
return romaji .. "-"
elseif data.pos_category == "suffixes" or data.pos_category == "counters" or data.pos_category == "classifiers" then
return "-" .. romaji
else
return romaji
end
end
local function ends_in_iru_eru(kana)
if mw.ustring.sub(kana, -1) ~= "る" then
return false
end
local wanted = {
["い"]=1, ["き"]=1, ["し"]=1, ["ち"]=1, ["に"]=1, ["ひ"]=1, ["み"]=1, ["り"]=1, ["ゐ"]=1,
["ぃ"]=1, ["ぎ"]=1, ["じ"]=1, ["ぢ"]=1, ["び"]=1, ["ぴ"]=1,
["え"]=1, ["け"]=1, ["せ"]=1, ["て"]=1, ["ね"]=1, ["へ"]=1, ["め"]=1, ["れ"]=1, ["ゑ"]=1,
["ぇ"]=1, ["げ"]=1, ["ぜ"]=1, ["で"]=1, ["べ"]=1, ["ぺ"]=1,
["イ"]=1, ["キ"]=1, ["シ"]=1, ["チ"]=1, ["ニ"]=1, ["ヒ"]=1, ["ミ"]=1, ["リ"]=1, ["ヰ"]=1,
["ィ"]=1, ["ギ"]=1, ["ジ"]=1, ["ヂ"]=1, ["ビ"]=1, ["ピ"]=1,
["エ"]=1, ["ケ"]=1, ["セ"]=1, ["テ"]=1, ["ネ"]=1, ["ヘ"]=1, ["メ"]=1, ["レ"]=1, ["ヱ"]=1,
["ェ"]=1, ["ゲ"]=1, ["ゼ"]=1, ["デ"]=1, ["ベ"]=1, ["ペ"]=1,
}
return wanted[mw.ustring.sub(kana, -2, -2)]
end
local en_numerals = {
"one", "two", "three", "four", "five",
"six", "seven", "eight", "nine", "ten",
"eleven", "twelve", "thirteen", "fourteen", "fifteen"
}
local en_grades = {
"first grade", "second grade", "third grade",
"fourth grade", "fifth grade", "sixth grade",
"secondary school", "jinmeiyō", "hyōgaiji"
}
-- adds category Japanese terms spelled with jōyō kanji or Japanese terms spelled with non-jōyō kanji
-- (if it contains any kanji)
local function categorize_by_kanji(data, PAGENAME)
-- remove non-kanji characters
local onlykanji = mw.ustring.gsub(PAGENAME, '[^一-鿌]', '')
local number_of_kanji = mw.ustring.len(onlykanji)
if number_of_kanji > 0 then
for i=1,mw.ustring.len(onlykanji) do
table.insert(data.categories, ("Japanese terms spelled with %s kanji"):format(en_grades[m_ja.kanji_grade(mw.ustring.sub(onlykanji,i,i))]))
end
-- categorize by number of kanji
if number_of_kanji == 1 then
table.insert(data.categories, "Japanese terms written with one Han script character")
elseif en_numerals[number_of_kanji] then
table.insert(data.categories, ("Japanese terms written with %s Han script characters"):format(en_numerals[number_of_kanji]))
end
end
end
-- if this term is composed of only a single kanji, it does not have kanjitab/kanji reading tab
-- which generate "Japanese terms spelled with .. " categories, and since it is only one kanji
-- we know the kanji reading
-- (this category is for maintenance because many of these need attention)
local function singlekanji_term(data, PAGENAME)
if mw.ustring.len(PAGENAME) == 1 and mw.ustring.match(PAGENAME, '[一-鿌]') then
table.insert(data.categories, "Japanese terms spelled with " .. PAGENAME)
table.insert(data.categories, "Japanese single-kanji terms")
end
end
-- get a kana form to use, in order of preference: unnamed, hira, kana, pagename
local function find_kana(args, PAGENAME)
for i,arg in ipairs(args) do
if args[i] and find(args[i], kana_pattern_full) then return args[i] end
end
if find(PAGENAME, kana_pattern_full) then return PAGENAME end
local hira = args["hira"] or ""; if hira ~= "" then return hira end
local kata = args["kata"] or ""; if kata ~= "" then return kata end
error("No kana detected in the unnamed parameters or |hira= and |kata= parameters. See template documentation for details.")
end
local function format_ruby(kanji, kana)
local kanji_segments = mw.ustring.gsub(kanji, "([A-Za-z0-9々㐀-䶵一-鿌" .. mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAD9) .. "𠀀-0-9A-Za-z]+)", "`%1`")
-- returns possible matches between kanji and kana
-- for example, match('`物`の`哀`れ', 'もののあわれ') returns { '[物](も)の[哀](のあわ)れ', '[物](もの)の[哀](あわ)れ' }
local function match(kanji_segments, kana)
if kanji_segments:find('`') then
local kana_portion, kanji_portion, rest = mw.ustring.match(kanji_segments, '(.-)`(.-)`(.*)')
_, _, kana = mw.ustring.find(kana, '^' .. kana_portion .. '(.*)')
if not kana then return {} end
local candidates = {}
for i = 1, mw.ustring.len(kana) do
for _, candidate in ipairs(match(rest, mw.ustring.sub(kana, i + 1))) do
table.insert(candidates, kana_portion .. '[' .. kanji_portion .. '](' .. mw.ustring.sub(kana, 1, i) .. ')' .. candidate)
end
end
return candidates
else
return (kanji_segments == kana) and { kana } or {}
end
end
local ruby = match(kanji_segments, kana)
if #ruby == 1 then
ruby = ruby[1]
else
require("Module:debug").track("ja-headword/failed to match kanji and kana")
ruby = '[' .. kanji .. '](' .. kana .. ')'
end
return ruby
end
-- go through args and build inflections by finding whatever kanas were given to us
local function format_headword(args, data, PAGENAME)
local detect_result = detect_kana_script(PAGENAME)
local function romanization(auto_rom)
-- accept the automatic romanization generated in function kana_to_romaji() above
-- compare that to the manual romanization if it exists and add it to inflections
local rom = args["rom"] or ""
if rom == "" then rom = auto_rom end
-- check auto rom against manual and put in hidden category if they differ
if rom ~= auto_rom then
table.insert(data.categories, "Japanese terms with romaji needing attention")
end
-- throw an error if there is no romanization
if rom == "" then
error("Japanese terms must have a kana form.")
end
-- add romaji
if data.pos_category == "suru verbs" then
table.insert(data.translits, rom .. " suru")
else
table.insert(data.translits, rom)
end
end
local allkana,original,readings,romajis,romaji_lookup = {},{},{},{},{}
for i,arg in ipairs(args) do
if arg and arg ~= "" and find(arg, kana_pattern_full) then table.insert(allkana, arg) end
end
-- accept "hira" and "kata" but let Lua decide if they are really hiragana or katakana
if args["hira"] and args["hira"] ~= "" and find(args["hira"], kana_pattern_full) then table.insert(allkana, args["hira"]) end
if args["kata"] and args["kata"] ~= "" and find(args["kata"], kana_pattern_full) then table.insert(allkana, args["kata"]) end
if find(PAGENAME, kana_pattern_full) then
if #allkana == 0 then table.insert(allkana, PAGENAME) end
end
for i = 1, #allkana do
-- auto_romanization
romajis[i] = kana_to_romaji(allkana[i], data, args)
-- remove markup
table.insert(original,allkana[i])
allkana[i] = mw.ustring.gsub(allkana[i], '[%s%.%-%^]', '')
end
for i = 1, #allkana do
-- if this is not kana, blank it out
if allkana and not mw.ustring.match(allkana[i], kana_pattern_char) then
allkana[i] = ""
else
-- if this is kana, count it as another effective reading (ignoring hiragana-katakana distinction)
readings[m_ja.kata_to_hira(allkana[i])] = 1
end
-- only if this kana is different from the page name
if allkana[i] ~= PAGENAME and allkana[i] ~= "" then
-- find script type and put it in "label"
local labelval = ""
local alternative = true
for j = 1, i-1 do
if allkana[j] and romajis[i] == romajis[j] then
alternative = false
end
end
if i>1 and alternative then labelval = "alternative reading"
elseif detect_kana_script(allkana[i]) == 'both' then labelval = "hiragana and katakana"
elseif detect_kana_script(allkana[i]) == 'hira' then labelval = "hiragana"
else labelval = "katakana" end
-- add everything to inflections, except historical hiragana which is next
local format_result = detect_result and allkana[i] or format_ruby(PAGENAME, allkana[i])
if data.pos_category == "suru verbs" then
table.insert(data.heads, format_result .. "[[する]]")
else
table.insert(data.heads, format_result)
end
end
-- do the romanization business if it passes through every check
local undergo_romanization = true
if allkana[i] ~= "" then
if allkana[i] == PAGENAME and not find(PAGENAME, kana_pattern_full) then
undergo_romanization = false
else
for j=i+1, #allkana do
if allkana[j] and romajis[i] == romajis[j] then
undergo_romanization = false
end
end
end
end
if undergo_romanization then romanization(romajis[i]) end
end
local num_readings = 0
for _ in pairs(readings) do
num_readings = num_readings + 1
end
if num_readings > 1 then
table.insert(data.categories, "Japanese words with multiple readings")
end
end
-- categorize by the script of the pagename or specific characters contained in it
local function extra_categorization(data, PAGENAME, katakana_category)
-- if PAGENAME is hiragana, put in that category, same for katakana (but do it at the end)
if detect_kana_script(PAGENAME) == 'hira' then table.insert(data.categories, "Japanese hiragana") end
if detect_kana_script(PAGENAME) == 'kata' then table.insert(katakana_category, "Japanese katakana") end
if find(PAGENAME, "[^" .. Japanese_scripts_range .. "]") and find(PAGENAME, '[' .. Japanese_scripts_range .. ']') then
table.insert(data.categories, "Japanese terms written in multiple scripts") end
for _,character in ipairs({'々','〆','ヶ','ゝ','ゞ','ヽ','ヾ','ゐ','ヰ','ゑ','ヱ','ゔ','ヷ','ヸ','ヹ','ヺ','・','=','゠'}) do
if mw.ustring.match(PAGENAME,character) then
table.insert(data.categories, ("Japanese terms spelled with %s"):format(character))
end
end
end
local aliases = {
['transitive']='tr', ['trans']='tr',
['intransitive']='in', ['intrans']='in', ['intr']='in',
['godan']='1', ['ichidan']='2', ['irregular']='3'
}
local function add_inflections(data, inflection_type)
local lemma = data.heads[1]
local romaji = data.translits[1]
local function form(term, translit)
local kanji = remove_links(mw.ustring.gsub(term, "%[([^%[%]]+)%]%(([^%(%)]+)%)", "%1"))
term = mw.ustring.gsub(term, "%[([^%[%]]+)%]%(([^%(%)]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>")
if not term:find('%[%[') then term = '[[' .. kanji .. '#Japanese|' .. term .. ']]' end
return {term = term, translit = translit}
end
local function insert(k1, k2, k3, r1, r2, r3)
local success = lemma:find(k1) and romaji:find(r1)
if success then
table.insert(data.inflections, {label = 'stem', form(lemma:gsub(k1, k2), romaji:gsub(r1, r2))})
table.insert(data.inflections, {label = 'past', form(lemma:gsub(k1, k3), romaji:gsub(r1, r3))})
end
return success
end
if inflection_type == '1' then
local success = insert('く$', 'き', 'いた', 'ku$', 'ki', 'ita') or
insert('ぐ$', 'ぎ', 'いだ', 'gu$', 'gi', 'ida') or
insert('す$', 'し', 'した', 'su$', 'shi', 'shita') or
insert('つ$', 'ち', 'った', 'tsu$', 'chi', 'tta') or
insert('ぬ$', 'に', 'んだ', 'nu$', 'ni', 'nda') or
insert('ぶ$', 'び', 'んだ', 'bu$', 'bi', 'nda') or
insert('む$', 'み', 'んだ', 'mu$', 'mi', 'nda') or
insert('る$', 'り', 'った', 'ru$', 'ri', 'tta') or
insert('う$', 'い', 'った', 'u$', 'i', 'tta') or
require("Module:debug").track("ja-headword/godan conjugation failed")
elseif inflection_type == '1s' then
if lemma:find('る$') and romaji:find('ru$') then
table.insert(data.inflections, {label = 'stem', form(lemma:gsub('る$', 'り'), romaji:gsub('ru$', 'ri')), form(lemma:gsub('る$', 'い'), romaji:gsub('ru$', 'i'))})
table.insert(data.inflections, {label = 'past', form(lemma:gsub('る$', 'った'), romaji:gsub('ru$', 'tta'))})
else
local success = insert('く$', 'き', 'った', 'iku$', 'iki', 'itta') or
insert('う$', 'い', 'うた', 'ou$', 'oi', 'ōta') or
require("Module:debug").track("ja-headword/godan conjugation special failed")
end
elseif inflection_type == '2' then
local success = insert('る$', '', 'た', 'ru$', '', 'ta') or
require("Module:debug").track("ja-headword/ichidan conjugation failed")
elseif inflection_type == '3' or inflection_type == 'kuru' or inflection_type == 'suru' then
if lemma:find('%[来%]%(く%)る$') and romaji:find('kuru$') then
table.insert(data.inflections, {label = 'stem', form(lemma:gsub('%[来%]%(く%)る$', '[来](き)'), romaji:gsub('kuru$', 'ki'))})
table.insert(data.inflections, {label = 'past', form(lemma:gsub('%[来%]%(く%)る$', '[来](き)た'), romaji:gsub('kuru$', 'kita'))})
else
local success = insert('くる$', 'き', 'きた', 'kuru$', 'ki', 'kita') or
insert('する$', 'し', 'した', 'suru$', 'shi', 'shita') or
insert('する%]%]$', 'し]]', 'した]]', 'suru$', 'shi', 'shita') or
insert('ずる$', 'じ', 'じた', 'zuru$', 'ji', 'jita') or
require("Module:debug").track("ja-headword/irregular conjugation failed")
end
elseif inflection_type == 'i' or inflection_type == 'い' then
if lemma:find('い$') and romaji:find('i$') then
table.insert(data.inflections, {label = 'adverbial', form(lemma:gsub('い$', 'く'), romaji:gsub('i$', 'ku'))})
else
require("Module:debug").track("ja-headword/-i inflection failed")
end
elseif inflection_type == 'is' then
if lemma:find('%[良%]%(い%)い$') and romaji:find('ii$') then
table.insert(data.inflections, {label = 'adverbial', form(lemma:gsub('%[良%]%(い%)い$', '[良](よ)く'), romaji:gsub('ii$', 'yoku'))})
elseif lemma:find('いい$') and romaji:find('ii$') then
table.insert(data.inflections, {label = 'adverbial', form(lemma:gsub('いい$', 'よく'), romaji:gsub('ii$', 'yoku'))})
else
require("Module:debug").track("ja-headword/slightly irregular -i inflection failed")
end
elseif inflection_type == 'na' or inflection_type == 'な' then
table.insert(data.inflections, {label = 'adnominal', form(lemma .. '[[な]]', romaji .. ' na')})
table.insert(data.inflections, {label = 'adverbial', form(lemma .. '[[に]]', romaji .. ' ni')})
elseif inflection_type == 'tari' or inflection_type == 'たり' then
table.insert(data.inflections, {label = 'adnominal', form(lemma .. '[[たる]]', romaji .. ' taru')})
table.insert(data.inflections, {label = 'adverbial', form(lemma .. '[[と]]', romaji .. ' to'), form(lemma .. '[[として]]', romaji .. ' toshite')})
end
end
pos_functions["verbs"] = function(args, data)
-- transitivity
local tr = args["tr"] or ""
tr = aliases[tr] or tr
if tr ~= "" then
if tr == "tr" then table.insert(data.inflections, {label = "transitive"})
table.insert(data.categories, "Japanese transitive verbs") end
if tr == "in" then table.insert(data.inflections, {label = "intransitive"})
table.insert(data.categories, "Japanese intransitive verbs") end
if tr == "both" then table.insert(data.inflections, {label = "transitive and intransitive"})
table.insert(data.categories, "Japanese transitive verbs")
table.insert(data.categories, "Japanese intransitive verbs") end
else
table.insert(data.categories, "Japanese verbs without transitivity")
end
-- conjugation type
local conjugation = args["type"] or ""
conjugation = aliases[conjugation] or conjugation
if conjugation == "1" or conjugation == "1s" then
table.insert(data.inflections, {label = "godan conjugation"})
table.insert(data.categories, "Japanese type 1 verbs")
if ends_in_iru_eru(args[1] or PAGENAME) then
table.insert(data.categories, "Japanese type 1 verbs that end in -iru or -eru")
end
elseif conjugation == "2" then
table.insert(data.inflections, {label = "ichidan conjugation"})
table.insert(data.categories, "Japanese type 2 verbs")
elseif conjugation == "suru" then
table.insert(data.inflections, {label = "suru conjugation"})
table.insert(data.categories, "Japanese suru verbs")
elseif conjugation == "kuru" then
table.insert(data.inflections, {label = "kuru conjugation"})
table.insert(data.categories, "Japanese kuru verbs")
elseif conjugation == "3" then
-- hidden temporary maintenance category
-- (suru verbs should use ja-verb-suru but sometime erroneously use ja-verb with type=3 instead)
table.insert(data.inflections, {label = "type 3 conjugation"})
table.insert(data.categories, "Japanese type 3 verbs")
elseif conjugation == "irr" then
table.insert(data.inflections, {label = "irregular conjugation"})
table.insert(data.categories, "Japanese irregular verbs")
if mw.ustring.match(PAGENAME,'する$') then
table.insert(data.categories, "Japanese terms using ja-verb with type 3")
end
elseif conjugation == "yo" then
table.insert(data.inflections, {label = "yodan conjugation"})
table.insert(data.categories, "Japanese yodan verbs")
elseif conjugation == "ni" then
table.insert(data.inflections, {label = "nidan conjugation"})
table.insert(data.categories, "Japanese nidan verbs")
elseif conjugation == "kami ni" then
table.insert(data.inflections, {label = "kami nidan conjugation"})
table.insert(data.categories, "Japanese kami nidan verbs")
elseif conjugation == "shimo ni" then
table.insert(data.inflections, {label = "shimo nidan conjugation"})
table.insert(data.categories, "Japanese shimo nidan verbs")
end
add_inflections(data, conjugation)
-- >> maintenance category <<
-- check if this ends in something other than acceptable kana in a modern verb (and isn't already categorised as yodan or nidan)
if not mw.ustring.match(PAGENAME, '[うくぐすつぬぶむゆる]$') and conjugation ~= "yo" and conjugation ~= "ni" and conjugation ~= "kami ni" and conjugation ~= "shimo ni" then
table.insert(data.categories, "Japanese verbs without modern conjugations")
end
end
pos_functions["suffixes"] = function(args, data)
-- conjugation type
local conjugation = args["type"] or ""
conjugation = aliases[conjugation] or conjugation
if conjugation == "1" or conjugation == "1s" then
table.insert(data.inflections, {label = "godan conjugation"})
elseif conjugation == "2" then
table.insert(data.inflections, {label = "ichidan conjugation"})
elseif conjugation == "3" then
table.insert(data.inflections, {label = "irregular conjugation"})
elseif conjugation == "yo" then
table.insert(data.inflections, {label = "yodan conjugation"})
elseif conjugation == "ni" then
table.insert(data.inflections, {label = "nidan conjugation"})
elseif conjugation == "kami ni" then
table.insert(data.inflections, {label = "kami nidan conjugation"})
elseif conjugation == "shimo ni" then
table.insert(data.inflections, {label = "shimo nidan conjugation"})
elseif conjugation == "rahen" then
table.insert(data.inflections, {label = "r-irregular conjugation"})
elseif conjugation == "sahen" then
table.insert(data.inflections, {label = "s-irregular conjugation"})
elseif conjugation == "kahen" then
table.insert(data.inflections, {label = "k-irregular conjugation"})
elseif conjugation == "nahen" then
table.insert(data.inflections, {label = "n-irregular conjugation"})
elseif conjugation == "i" or conjugation == "い" or conjugation == "is" then
table.insert(data.inflections, {label = "-i inflection"})
elseif conjugation == "na" or conjugation == "な" then
table.insert(data.inflections, {label = "-na inflection"})
elseif conjugation == "nari" or conjugation == "なり" then
table.insert(data.inflections, {label = "-nari inflection"})
elseif conjugation == "tari" or conjugation == "たり" then
table.insert(data.inflections, {label = "-tari inflection"})
elseif conjugation == "ka" or conjugation == "か" then
table.insert(data.inflections, {label = "-ka inflection"})
end
add_inflections(data, conjugation)
end
pos_functions["auxiliary verbs"] = function(args, data)
data.pos_category = "verbs"
table.insert(data.categories, "Japanese auxiliary verbs")
end
pos_functions["suru verbs"] = function(args, data)
table.insert(data.categories, "Japanese suru verbs")
-- transitivity
local tr = args["tr"] or ""
tr = aliases[tr] or tr
if tr == "tr" then
table.insert(data.inflections, {label = "transitive"})
elseif tr == "in" then
table.insert(data.inflections, {label = "intransitive"})
elseif tr == "both" then
table.insert(data.inflections, {label = "transitive and intransitive"})
elseif tr == "" then
table.insert(data.categories, "Japanese verbs without transitivity")
end
table.insert(data.inflections, {label = "suru conjugation"})
add_inflections(data, '3')
end
pos_functions["adjectives"] = function(args, data)
-- categorize by inflection type
local infl = args["infl"] or ""
if infl == "i" or infl == "い" or infl == "is" then
table.insert(data.inflections, {label = "-i inflection"})
table.insert(data.categories, "Japanese い-i adjectives")
elseif infl == "na" or infl == "な" then
table.insert(data.inflections, {label = "-na inflection"})
table.insert(data.categories, "Japanese な-na adjectives")
elseif infl == "nari" or infl == "なり" then
table.insert(data.inflections, {label = "-nari inflection"})
table.insert(data.categories, "Japanese なり-nari adjectives")
elseif infl == "tari" or infl == "たり" then
table.insert(data.inflections, {label = "-tari inflection"})
table.insert(data.categories, "Japanese たり-tari adjectives")
elseif infl == "ka" or infl == "か" then
table.insert(data.inflections, {label = "-ka inflection"})
table.insert(data.categories, "Japanese か-ka adjectives")
end
add_inflections(data, infl)
end
pos_functions["nouns"] = function(args, data)
-- the counter (classifier) parameter, only relevant for nouns
local counter = args["count"] or ""
if counter == "-" then
table.insert(data.inflections, {label = "uncountable"})
elseif counter ~= "" then
table.insert(data.inflections, {label = "counter", counter})
end
end
-- The main entry point.
-- This is the only function that can be invoked from a template.
function export.show(frame)
local args = frame:getParent().args
PAGENAME = args["pagename"] or mw.title.getCurrentTitle().text
local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
local head = args["head"] or PAGENAME
if poscat == "suru verbs" then
local base, count = head:gsub("する$", "")
if count == 1 then
require("Module:debug").track("ja-headword/suru in pagename")
end
head = base .. "[[する]]"
end
if args["decl"] and (not args["infl"] or args["infl"] == "") then
args["infl"] = args["decl"]
end
local data = {lang = lang, sc = sc, pos_category = poscat, categories = {}, translits = {}, heads = (detect_kana_script(PAGENAME) and {head} or {}), inflections = {}}
local katakana_category = {}
local kana = find_kana(args, PAGENAME)
-- sort out all the kanas and do the romanization business
format_headword(args, data, PAGENAME, kana)
-- add certain "inflections" and categories for adjectives, verbs, or nouns
if pos_functions[poscat] then
pos_functions[poscat](args, data)
end
for i = 1, #data.heads do
local kana = mw.ustring.gsub(data.heads[i], "%[([^%[%]]+)%]%(([^%(%)]+)%)", "%2")
kana = remove_links(mw.ustring.gsub(kana, '%[%[する%]%]$', ''))
data.heads[i] = mw.ustring.gsub(data.heads[i], "%[([^%[%]]+)%]%(([^%(%)]+)%)", "<ruby>%1<rp>(</rp><rt>[[" .. kana .. "#Japanese|%2]]</rt><rp>)</rp></ruby>")
end
-- the presence of kyūjitai param indicates that this is shinjitai kanji entry and vice versa
local kyu = args["kyu"] or ""
local shin = args["shin"] or ""
if kyu == "" then
kyu = nil
else
table.insert(data.inflections, {label = "[[shinjitai]] kanji"})
if data.pos_category == "suru verbs" then
table.insert(data.inflections, {label = "[[kyūjitai]] kanji", "[[" .. kyu .. "]][[する]]"})
else
table.insert(data.inflections, {label = "[[kyūjitai]] kanji", kyu})
end
end
if shin ~= "" then
table.insert(data.inflections, {label = "[[kyūjitai]] kanji"})
if data.pos_category == "suru verbs" then
table.insert(data.inflections, {label = "[[shinjitai]] kanji", "[[" .. shin .. "]][[する]]"})
else
table.insert(data.inflections, {label = "[[shinjitai]] kanji", shin})
end
end
local hhira = args["hhira"] or ""
if hhira ~= "" then
if data.pos_category == "suru verbs" then
table.insert(data.inflections, {label = "historical hiragana", "[[" .. hhira .. "]][[する]]"})
else
table.insert(data.inflections, {label = "historical hiragana", hhira})
end
if string.match(hhira, "ゐ") ~= nil then
table.insert(data.categories, "Japanese terms historically spelled with ゐ")
end
if string.match(hhira, "ゑ") ~= nil then
table.insert(data.categories, "Japanese terms historically spelled with ゑ")
end
if string.match(hhira, "を") ~= nil then
table.insert(data.categories, "Japanese terms historically spelled with を")
end
end
local hkata = args["hkata"] or ""
if hkata ~= "" then
if data.pos_category == "suru verbs" then
table.insert(data.inflections, {label = "historical katakana", "[[" .. hkata .. "]][[する]]"})
else
table.insert(data.inflections, {label = "historical katakana", hkata})
end
end
if data.pos_category == "suru verbs" then
data.pos_category = "verbs"
end
-- categorize by joyo kanji and number of kanji
categorize_by_kanji(data, PAGENAME)
-- generate "Japanese terms spelled with ... read as ..." for single-kanji terms
singlekanji_term(data, PAGENAME)
-- add categories for terms with iteration marks (which are not kanji and hence are not categorized by ja-kanjitab)
extra_categorization(data, PAGENAME, katakana_category)
if find(PAGENAME, "[ァ-ヺヽヾ]") and find(PAGENAME, "[ぁ-ゖゞゝ]") and data.pos_category ~= "proverbs" and data.pos_category ~= "phrases" then
table.insert(data.categories, "Japanese terms spelled with mixed kana")
end
-- will only use sortkey if sortkey is different from PAGENAME
-- when katakana in PAGENAME is converted to hiragana
local sort_key = m_ja.jsort(kana)
if sort_key == m_ja.kata_to_hira(PAGENAME) then
return
require("Module:headword").full_headword(data) ..
require("Module:utilities").format_categories(katakana_category, lang)
else
-- convert sortkey to katakana version for katakana terms category (should sort by katakana)
data.sort_key = sort_key
return
require("Module:headword").full_headword(data) ..
require("Module:utilities").format_categories(katakana_category, lang, m_ja.hira_to_kata(sort_key))
end
end
return export