Module:ja: difference between revisions

Browse history interactively
← Go to previous edit Go to next edit →
Content deleted Content added
Inline
Revision as of 06:08, 5 June 2017

The following documentation is located at Module:ja/documentation. ^[edit]
Useful links: subpage list • links • transclusions • testcases • sandbox
The module does various things related to Japanese.
Testcases

Module:ja/testcases
Functions

hira_to_kata:
{{#invoke:ja|hira_to_kata|おはようございます}} → Lua error at line 907: attempt to concatenate field 'grade1' (a nil value)
kata_to_hira:
{{#invoke:ja|kata_to_hira|アメリカンアパレル}} → Lua error at line 907: attempt to concatenate field 'grade1' (a nil value)
romaji_to_kata:
{{#invoke:ja|romaji_to_kata|bakkurasshu}} → Lua error at line 907: attempt to concatenate field 'grade1' (a nil value)
script:
{{#invoke:ja|script|どうも有難う御座います}} → Lua error at line 907: attempt to concatenate field 'grade1' (a nil value)

{{#invoke:ja|script|どうぞよろしく}} → Lua error at line 907: attempt to concatenate field 'grade1' (a nil value)

{{#invoke:ja|script|アメリカ合衆国}} → Lua error at line 907: attempt to concatenate field 'grade1' (a nil value)
Uses

It is used by
{{ja-verbconj}} and its subtemplates
{{ja-noun}}, {{ja-verb}}, {{ja-adj}}, {{ja-pos}}, and {{ja-verb-suru}} which detect the script, generate romanizations, generate sort keys
Module:category tree/poscatboiler/data/lang-specific/jpx to generate romanizations, sort keys, count morae, perform checks for various Japanese categories
{{ja-readings}} (Module:ja-kanji-readings) to generate romanizations and to convert from hiragana to katakana for on readings in Module:ja/data/jouyou-yomi
It was formerly used by {{ja-new}} (which uses it substitutively); the relevant code is now at Module:ja-new.
local export = {}
local find = mw.ustring.find
local length = mw.ustring.len
local trim = mw.text.trim
local split = mw.text.split
local sub, gsub = mw.ustring.sub, mw.ustring.gsub
local match, gmatch = mw.ustring.match, mw.ustring.gmatch
local to_cp, to_char = mw.ustring.codepoint, mw.ustring.char

local Jpan = require("Module:scripts").getByCode("Jpan")
local lang = require("Module:languages").getByCode("ja")

-- note that arrays loaded by mw.loadData cannot be directly used by gsub
local data = mw.loadData("Module:ja/data")

export.data = {
	joyo_kanji = data.joyo_kanji,
	jinmeiyo_kanji = data.jinmeiyo_kanji,
	grade1 = data.grade1,
	grade2 = data.grade2,
	grade3 = data.grade3,
	grade4 = data.grade4,
	grade5 = data.grade5,
	grade6 = data.grade6
}

function export.hira_to_kata(text)
	if type(text) == "table" then text = text.args[1] end

	return (gsub(text, '[ぁ-ゖ]', function(char) return to_char(to_cp(char) + 96) end))
end

function export.kata_to_hira(text)
	if type(text) == "table" then text = text.args[1] end

	return (gsub(text, '[ァ-ヶ]', function(char) return to_char(to_cp(char) - 96) end))
end

function export.fullwidth_to_halfwidth(text)
	if type(text) == "table" then text = text.args[1] end

	text = gsub(text, '　', ' ')
	return (gsub(text, '[！-～]', function(char) return to_char(to_cp(char) - 65248) end))
end

function export.kana_to_romaji(text, no_diacritics, keep_period, hist)
	if type(text) == "table" then text = text.args[1] end

	local tracking_has_percent = find(text, '%%')
	local text_old = trim(require('Module:ja/k2r-old').kana_to_romaji(text, no_diacritics, keep_period))

	-- avoid tampering with existing latin text: store it away
	local escape = {}
	local id = 0
	for latin in gmatch(text, "[a-z]+") do
		escape[id] = latin
		text = gsub(text, latin, "㊟㊕㊕㊟" .. id .. "㊟㊕㊕㊟")
		id = id + 1
	end

	-- special preformatting
	text = gsub(text, 'ヶげつ', 'かげつ')
	text = gsub(text, 'ヶ', 'が')
	text = gsub(text, '(.)ゝ', '%1%1')
	text = gsub(text, '(.)ゞ', function(char) return mw.ustring.toNFC(char .. char .. '゙') end) -- unicode hax

	-- conversions
	text = gsub(text, '％', '㊟㌫') -- at [[見込む]], for example
	text = gsub(text, '%-[はハ]$', '㊟波⒜') -- は as suffix and appearing at the end of string
	text = gsub(text, '%-[はハ] ', '㊟波⒜ ') -- は as suffix and appearing mid-sentence
	text = gsub(text, "%-'''[はハ]'''$", "㊟波⒝")
	text = gsub(text, "%-'''[はハ]''' ", "㊟波⒝ ")

	-- [[Wiktionary:Grease_pit/2017/May#Formatting_for_individual_Japanese_readings]]
	if hist then
		text = gsub(text, 'づ', 'du')
		text = gsub(text, 'ぢ', 'di')
		text = gsub(text, 'を', 'wo')
		text = gsub(text, '([やゆよわゐゑを])', '㊟⒳%1')
	end

	text = export.hira_to_kata(text)
	text = gsub(text, '.', function(char) return data.kr[char] or char end)
	text = export.fullwidth_to_halfwidth(text)

	if hist then
		text = gsub(text, 'ou', 'o.u')
		text = gsub(text, '([iu])㊟⒳', '') -- くゐやう kwyau
		text = gsub(text, '㊟⒳', '') -- ゑつ wetsu
	end

	-- markup
	text = gsub(text, '%%', '.') -- ruby "percent sign" syntax
	text = gsub(text, '㊟㌫', '%')
	text = gsub(text, '([ッ¤])%.', '%1') -- 「し を ぼっ.す」「るい%じん%えん」

	-- 「テェェェ」→「テェーー」 (avoid funky romaji effected by the "(テュ→)teユ→tyu" line below)
	text = gsub(text, '(ァ)(ァ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	text = gsub(text, '(ィ)(ィ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	text = gsub(text, '(ゥ)(ゥ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	text = gsub(text, '(ェ)(ェ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)
	text = gsub(text, '(ォ)(ォ+)', function(a,b) return a .. mw.ustring.rep('ー', length(b)) end)

	-- (ゲェ→)geェ→gee (note that this causes things like ウゥ→ū and ギィ→gī)
	text = gsub(text, '[aiueo][ァィゥェォ]', {['aァ']='aa',['iィ']='ii',['uゥ']='uu',['eェ']='ee',['oォ']='oo',})

	-- (クヮ→)kuヮ→kwa
	text = gsub(text, '[u]([ヮ])', {['ヮ']='wa',})

	-- (クァ→)kuァ→kwa, (トァ→)toァ→twa, (ウィ→)uィ→wi
	text = gsub(text, '[uo]([ァィェォ])', {['ァ']='wa',['ィ']='wi',['ェ']='we',['ォ']='wo',})
	-- (ツァ→)cwa→ca
	text = gsub(text, '([fvcsz])w', '%1')

	-- (テュ→)teユ→tyu, (ギェ→)giェ→gye
	text = gsub(text, '[aiueo]([ャュェョ])', {['ャ']='ya',['ュ']='yu',['ェ']='ye',['ョ']='yo',})
	-- (ジュ→)jyu→ju
	text = gsub(text, '([xjq])y', '%1')

	-- (ティ→)teィ→ti (essentially forget about the vowel in between)
	text = gsub(text, '[aiueo]([ァィゥェォ])', {['ァ']='a',['ィ']='i',['ゥ']='u',['ェ']='e',['ォ']='o',})

	-- chouonpu and sokuon
	while find(text, '[aiueo]ー') or find(text, 'ッ[ \']*[bcdfghjklmnpqrstvwxyz]') do
		text = gsub(text, '([aiueo])ー', '%1%1')
		text = gsub(text, 'ッ([ \']*)([bcdfghjklmnpqrstvwxyz])', '%2%1%2')
	end
	-- deal with leftover sokuon not used as geminate
	text = gsub(text, 'ッ', 'h')

	-- (ん→)n¤
	text = gsub(text, '¤([aiueoy])', "'%1")
	text = gsub(text, '¤', '')

	-- は
	text = gsub(text, "([^a-z.])ha([^a-z.])", "%1wa%2")
	text = gsub(text, "([^a-z.])ha$", "%1wa")
	text = gsub(text, "^ha([^a-z.])", "wa%1")
	text = gsub(text, "^ha([^a-z.])", "wa%1")
	-- へ
	text = gsub(text, "([^a-z.])he([^a-z.])", "%1e%2")
	text = gsub(text, "([^a-z.])he$", "%1e")
	text = gsub(text, "^he([^a-z.])", "e%1")
	-- change only when
	--   ① not flanked by a-z or a period
	--   ② at the end of the string and not preceded by by a-z or a period
	--   ③ at the beginning of the string and not followed by by a-z or a period
	-- period can be used next to the kana (either side) to force the "dumb" romanization (i.e. "ha", "he")

	text = gsub(text, '㊟波⒜', '-ha')
	text = gsub(text, "㊟波⒝", "-'''ha'''")

	-- fix sh, ch, ts
	text = gsub(text, '([xqc]*)([xqc])', function(geminate,main)
		--「めちゃ」→「mecha」
		--「めっちゃ」→「metcha」
		--「めっっちゃ」→「mettcha」
		local corresp_geminate_form = {['x']='s',['q']='t',['c']='t'}
		local corresp_main = {['x']='sh',['q']='ch',['c']='ts'}
		return (geminate and mw.ustring.rep(corresp_geminate_form[main], length(geminate))) .. corresp_main[main]
		end
	)

	-- macrons
	if not no_diacritics then
		text = gsub(text, 'oo', 'ō')
		text = gsub(text, 'aa', 'ā')
		text = gsub(text, 'ee', 'ē')
		text = gsub(text, 'ou', 'ō')
		text = gsub(text, 'uu', 'ū')
		text = gsub(text, 'ii', 'ī')
	end

	-- remove markup and convert real periods
	if not keep_period then
		text = gsub(text, '%.', '')
		text = gsub(text, '。', '◆.◇')
	end

	-- 
	text = gsub(text, '◇◆', '')
	text = gsub(text, '◆◇', '')
	text = gsub(text, ' *◆ *', '')
	text = gsub(text, ' *◇ *', ' ')

	-- restore latin text
	text = gsub(text, "㊟㊕㊕㊟([0-9]+)㊟㊕㊕㊟", function(id) return escape[tonumber(id)] end)

	-- clean up spaces
	text = trim(text)
	text = gsub(text, ' +', ' ')

	-- uppercase markup
	text = gsub(text, "%^'''", "'''^") -- move ^ to an effective position if placed before bold markup
	text = gsub(text, "%^ ", " ^") -- same but with spaces
	text = gsub(text, '%^(.)', mw.ustring.upper) -- uppercase conversion

	-- clean up spaces again
	text = gsub(text, ' +', ' ')

	-- comparison with old kana_to_romaji() code
	text_old = gsub(text_old, '%(ba%)', ' (ba)') -- avoid flooding the tracking template with na-adjectives. ← this really should be looked at though
	text_old = gsub(text_old, ' ”', '”') -- and spacing around quotation marks
	if text ~= text_old then
		if mw.ustring.lower(text) == mw.ustring.lower(text_old) then
			require('Module:debug').track('ja/k2r diff caps')
		elseif find(text_old, 'ッ') then
			require('Module:debug').track('ja/k2r diff w xtu')
		elseif tracking_has_percent then
			require('Module:debug').track('ja/k2r diff pc')
		else
			require('Module:debug').track('ja/k2r diff')
		end
		mw.log('new]' .. text .. '[')
		mw.log('old]' .. text_old .. '[')
	end

	if find(text, '[ぁ-ー]') then
		require('Module:debug').track('ja/k2r failure')
	end

	return text
end

-- removes spaces and hyphens from input
-- intended to be used when checking manual romaji to allow the
-- insertion of spaces or hyphens in manual romaji without appearing "wrong"
function export.rm_spaces_hyphens(f)
	local text = type(f) == 'table' and f.args[1] or f
	text = gsub(text, ' ', '')
	text = gsub(text, '-', '')
	text = gsub(text, '%.', '')
	text = gsub(text, '&nbsp;', '')
	text = gsub(text, '\'', '')
	return text
end

function export.romaji_to_kata(f)
	local text = type(f) == 'table' and f.args[1] or f
	text = gsub(text, '.', function (char) return data.rd[char] or char end)
	text = gsub(text, 'kk', 'ッk')
	text = gsub(text, 'ss', 'ッs')
	text = gsub(text, 'tt', 'ッt')
	text = gsub(text, 'pp', 'ッp')
	text = gsub(text, 'bb', 'ッb')
	text = gsub(text, 'dd', 'ッd')
	text = gsub(text, 'gg', 'ッg')
	text = gsub(text, 'jj', 'ッj')
	text = gsub(text, 'tc', 'ッc')
	text = gsub(text, 'tsyu', 'ツュ')
	text = gsub(text, 'ts[uoiea]', {['tsu']='ツ',['tso']='ツォ',['tsi']='ツィ',['tse']='ツェ',['tsa']='ツァ'})
	text = gsub(text, 'sh[uoiea]', {['shu']='シュ',['sho']='ショ',['shi']='シ',['she']='シェ',['sha']='シャ'})
	text = gsub(text, 'ch[uoiea]', {['chu']='チュ',['cho']='チョ',['chi']='チ',['che']='チェ',['cha']='チャ'})
	text = gsub(text, "n[uoiea']?", {['nu']='ヌ',['no']='ノ',['ni']='ニ',['ne']='ネ',['na']='ナ',['n']='ン',["n'"]='ン'})
	text = gsub(text, '[wvtrpsmlkjhgfdbzy][yw]?[uoiea]', function (char) return data.rk[char] or char end)
	text = gsub(text, 'u', 'ウ')
	text = gsub(text, 'o', 'オ')
	text = gsub(text, 'i', 'イ')
	text = gsub(text, 'e', 'エ')
	text = gsub(text, 'a', 'ア')
	return text
end

-- expects: any mix of kanji and kana
-- determines the script types used
-- e.g. given イギリス人, it returns Kana+Hani
function export.script(f)
	text, script = type(f) == 'table' and f.args[1] or f, {}

	if match(text, '[ぁ-ゖ]') then
		table.insert(script, 'Hira')
	end
	-- TODO: there are two kanas.  This should insert Kata.
	if match(text, '[ァ-ヺー]') then
		table.insert(script, 'Kana')
	end
	-- 一 is unicode 4e00, previously used 丁 is 4e01
	if match(text, '[㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟]') then
		table.insert(script, 'Hani')
	end
	-- matching %a should have worked but matched the end of every string
	if match(text, '[a-zA-ZāēīōūĀĒĪŌŪａ-ｚＡ-Ｚ]') then
		table.insert(script, 'Romaji')
	end
	if match(text, '[0-9０-９]') then
		table.insert(script, 'Number')
	end
	if match(text, '[〆々]') then
		table.insert(script, 'Abbreviation')
	end

	return table.concat(script, '+')
end

-- returns the number of kanji in this term
function export.count_kanji(f)
	local text = type(f) == 'table' and f.args[1] or f
	local len = 1
	-- replace 時々 with 時時
	text = gsub(text, '([㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟])々', '%1%1')
	-- See w:Template:CJK_ideographs_in_Unicode (U+3400 - U+4DB5, U+4E00 - U+9FCC, U+F900 (escaped to avoid normalisation) - U+FAD9 (escaped to avoid normalisation), U+20000 - U+2FA1F)
	text = gsub(text, '[^㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟]', '')
	len = length(text)
	return len
end

-- when counting morae, most small hiragana belong to the previous mora,
-- so for purposes of counting them, they can be removed and the characters
-- can be counted to get the number of morae.  The exception is small tsu,
-- so data.nonmora_to_empty maps all small hiragana except small tsu.
function export.count_morae(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	-- convert kata to hira (hira is untouched)
	text = export.kata_to_hira(text)
	-- remove all of the small hiragana such as ょ except small tsu
	text = gsub(text,'.',function (char) return data.nonmora_to_empty[char] or char end)
	-- remove zero-width spaces
	text = gsub(text, '‎', '')
	-- return number of characters, which should be the number of morae
	return length(text)
end

-- accepts: any mix of kana
-- returns: a hiragana sort key designed for WMF software
-- this is like sort() but doesn't return |sort=sortkey,
-- just the sort key itself, but unlike sort(), this
-- replaces the long vowel mark with its vowel
function export.jsort(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	local textsub = ''
	local convertedten = ''
	local result = ''
	local len = 1

	-- remove western spaces, hyphens, and periods
	-- diff=41967612: also remove caret
	text = gsub(text, '[ %-%.%^]', '')

	text = export.kata_to_hira(text)

	-- if the first character has dakuten, replace it with the corresponding
	-- character without dakuten and add an apostrophe to the end, e.g.
	-- がす > かす'
	if gsub(sub(text,1,1),'.',function (char) return data.dakuten[char] or char end) == '' then
		len = length(text)
		textsub = sub(text,2,len)
		convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv[char] or char end)
		text = (convertedten .. textsub .. "'")
	else
		-- similar thing but with handuken and two apostrophes, e.g. ぱす -> はす''
		if gsub(sub(text,1,1),'.',function (char) return data.handakuten[char] or char end) == '' then
			len = length(text)
			textsub = sub(text,2,len)
			convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv[char] or char end)
			text = (convertedten .. textsub .. "''")
		end
	end
	-- replace the long vowel mark with the vowel that it stands for
	for key,value in pairs(data.longvowels) do
		text = gsub(text,key,value)
	end
	return text
end

-- returns 'yes' if the string contains kana (not exactly is kana)
-- returns 'no' otherwise, including if string is empty
function export.is_kana(f)
	local text = type(f) == 'table' and f.args[1] or f
	if match(text, '[ぁ-ゖ]') then
		return 'yes'
	end
	if match(text, '[ァ-ヺ]') then
		return 'yes'
	end
	return 'no'
end

-- returns a sort key with |sort= in front, e.g.
-- |sort=はつぐん' if given ばつぐん
function export.sort(f)
	local text = type(f) == 'table' and f.args[1] or f
	local textsub = ''
	local convertedten = ''
	local result = ''
	local len = 1
	local kyreplace = ''
	kyreplace = gsub(text,'[ァ-ヺ]', '')
	if kyreplace == '' then
		result = ('|' .. 'sort' .. '=')
	end
	text = export.kata_to_hira(text)
	if gsub(sub(text,1,1),'.',function (char) return data.dakuten[char] or char end) == '' then
		if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
		len = length(text)
		textsub = sub(text,2,len)
		convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv[char] or char end)
		result = (result .. convertedten .. textsub .. "'")
	else
		if gsub(sub(text,1,1),'.',function (char) return data.handakuten[char] or char end) == '' then
			if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
			len = length(text)
			textsub = sub(text,2,len)
			convertedten = gsub(sub(text,1,1),'.',function (char) return data.tenconv[char] or char end)
			result = (result .. convertedten .. textsub .. "''")
		else
			if kyreplace == '' then
				result = (result .. text)
			end
		end
	end
	return result
end

-- returns the "stem" of a verb or -i adjective, that is the term minus the final character
function export.definal(f)
	return sub(f.args[1],1,(length(f.args[1])-1))
end

-- this generates links to categories of the form
-- Category:Japanese terms spelled with (kanji)
-- which was previously done in Template:ja-kanjitab
-- but depended on the editor entering the right kanji
function export.spelled_with_kanji()
	local PAGENAME = mw.title.getCurrentTitle().text
	--PAGENAME = f.args["pagename"]
	local cats = {}
	local c = ''

	-- remove non-kanji characters
	-- technically 々 is not a kanji, but we want a category for it, so leave it in
	PAGENAME = gsub(PAGENAME, '[^㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟]', '')

	local uniquekanji = ""
	for k in gmatch(PAGENAME,".") do
		if not find(uniquekanji,k) then uniquekanji = (uniquekanji .. k) end
	end

	for i = 1, length(uniquekanji) do
		local c = sub(uniquekanji,i,i)
		table.insert(cats, '[[Category:Japanese terms spelled with ')
		table.insert(cats, c)
		--table.insert(cats, '|')
		--table.insert(cats, sortkey)
		table.insert(cats, ']]')
		--table.insert(cats, "\n")
		--table.insert(cats, '</nowiki>')
	end

	return table.concat(cats, '')
end

-- see also Template:JAruby
-- meant to be called from another module
function export.add_ruby_backend(term, kana, from_ja_link)
	local pattern = ""
	-- holds the whole segments of markup enclosed in <ruby>...</ruby>
	local ruby_markup = {}
	-- range of kana: '[ぁ-ゖァ-ヺ]'
	-- nonkana: [^ぁ-ゖァ-ヺ]
	local kanji_pattern = "[々㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟０-９]"

	-- links without pipes will fail
	term = gsub(term, '%[%[([^|%]]+)%]%]', '[[%1|%1]]')

	-- remove links from kana
	kana = gsub(kana, '%[%[([^|%]]+)%]%]', '%1')
	kana = gsub(kana, '%[%[[^%]]+|([^%]]+)%]%]', '%1')

	-- build up pattern
	-- escape the magic characters in the term
	pattern = gsub(term, '%[%[[^%]]+|([^%]]+)%]%]', '%1')
	pattern = require("Module:string").pattern_escape(pattern)

	pattern = gsub(pattern, "[%[%]]+", " *")
	kana = gsub(kana, "[%[%]]+", '')
	pattern = gsub(pattern, " *('+) *", "%1")
	kana = gsub(kana, " *('+) *", "%1")
	pattern = gsub(pattern, " +", " ")
	kana = gsub(kana, " +", " ")

	-- remove periods and caret signs and hyphens
	pattern = gsub(pattern, '%%[%.%^%-]', '')
	kana = gsub(kana, '[%.%^%-]', '')

	-- in order to make a pattern that will find the ruby,
	-- replace every unbroken string of kanji with a sub-pattern
	pattern = gsub(pattern, kanji_pattern .. '+', '(.+)')
	-- get a pattern like
	-- (.+)ばか(.+)ばか(.+)ばかばかばああか(.+) when given 超ばか猿超ばか猿超ばかばかばああか猿
	-- it turns out we need to keep the spaces sometimes
	-- so that kana don't "leak" in ambiguous cases like 捨すてて撤退 where it's not clear if it's
	-- す, てったい or すて, ったい.  only solution now is to put spaces in the "term" param
	-- if they fall between kana

	-- build up term (e.g. [[歌う|歌った]])
	local replaced = {}
	local count = 0
	term = gsub(term, '%]', '%]') -- escape the "]" character so that it cannot appear, example becomes [[歌う|歌った%]%]
	term = gsub(term, kanji_pattern .. '+', function(text)
		count = count + 1
		table.insert(replaced, text)
		return '[' .. count .. ']'
	end) -- example becomes [[[1]う|[2]った%]%]

	-- remove spaces
	for i,val in ipairs(replaced) do replaced[i] = gsub(val, ' ', '') end

	while match(term, '%[%[[^|]*%[[0-9]+%][^|]*|') do
		term = gsub(term, '(%[%[[^|]*)%[([0-9]+)%]([^|]*|)', function(a,b,c)
			return a .. replaced[tonumber(b)] .. c
		end)
	end -- example becomes [[歌う|[2]った%]%]

	-- apply that pattern to the kana to collect the rubies
	-- if this fails, try it without spaces
	if match(kana, pattern) == nil then kana = gsub(kana, ' ', '') end

	local ruby = { match(kana, pattern) }
	-- local ruby = {}
	-- for c in gmatch(kana, pattern) do table.insert(ruby, c) end

	-- find the kanji strings again and combine them with their ruby to make the <ruby> markup
	local kanji_segments = {}
	for c in gmatch(term, '%[([0-9]+)%]') do table.insert(kanji_segments, replaced[tonumber(c)]) end

	for i = 1, #kanji_segments do
		table.insert(ruby_markup, "<ruby>" .. kanji_segments[i] .. "<rp>&nbsp;(</rp><rt>" .. ruby[i] .. "</rt><rp>) </rp></ruby>")
	end

	count = 0
	term = gsub(term, '%[[0-9]+%]', function()
		count = count + 1
		return ruby_markup[count]
	end)

	term = gsub(term, '%%%]', ']')
	term = gsub(term, '%%', '')
	term = gsub(term, ' ', '')

	--done
	return '<span style="font-size: 1.2em">' .. term .. '</span>'
end

-- replaces the code in Template:ja-readings which accepted kanji readings
-- and displayed them in a consistent format
-- substantial change in function was introduced in https://en.wiktionary.org/w/index.php?diff=46057625
function export.readings(frame)
	local PAGENAME = mw.title.getCurrentTitle().text
	local args = {}
	
	-- Convert empty args to nil (by not adding them to the args table).
	for key, arg in pairs(frame:getParent().args) do
		if arg ~= "" then
			args[key] = arg
		end
	end

	local yomi_data = mw.loadData("Module:ja/data/jouyou-yomi").yomi

	local items = {
		goon = args["goon"],
		kanon = args["kanon"],
		toon = args["toon"],
		soon = args["soon"],
		on = args["on"],
		kanyoon = args["kanyoon"],
		kun = args["kun"],
		nanori = args["nanori"],
	}
	
	local labels = {
		goon = {
			index = 1,
			entry = "呉音",
			text = "Goon",
			classification = "on",
		},
		kanon = {
			index = 2,
			entry = "漢音",
			text = "Kan’on",
			classification = "on",
		},
		toon = {
			index = 3,
			entry = "唐音",
			text = "Tōon",
			classification = "on",
		},
		soon = {
			index = 4,
			entry = "宋音",
			text = "Sōon",
			classification = "on",
		},
		on = {
			index = 5,
			entry = "on'yomi",
			text = "On",
			classification = "on",
			unclassified = " (unclassified)",
		},
		kanyoon = {
			index = 6,
			entry = "慣用音",
			text = "Kan’yōon",
			classification = "kan'yōon",
		},
		kun = {
			index = 7,
			entry = "kun'yomi",
			text = "Kun",
			classification = "kun",
		},
		nanori = {
			index = 8,
			entry = "nanori",
			text = "Nanori",
			classification = "nanori",
		},
		
	}

	local is_old_format = false

	if length(PAGENAME) ~= 1 then
		PAGENAME = trim(args[1] or '')
	end

	local function link(lemma, kana, gloss, pos, tr)
		return require("Module:ja-link").link{ lemma = lemma, kana = kana, gloss = gloss, pos = pos, transliteration = tr }
	end

	local function plain_link(entry)
		return require("Module:links").full_link {
			term = entry,
			lang = lang,
			sc = Jpan,
			tr = "-",
		}
	end

	-- this holds the finished product composed of wikilinks to be displayed
	-- in the Readings section under the Kanji section
	local links = {}
	
	for class, readings in pairs(items) do
		if readings then
			local label = labels[class]
			
			local unclassified = ""

			if label.unclassified then
				if items.goon == ""
					and items.kanon == ""
					and items.toon == ""
					and items.soon == ""
					and items.kanyoon == "" then
						
					unclassified = label.unclassified
				end
			end

			if match(readings, '%[%[') then
				is_old_format = true

				if label.classification == 'on' then
					for reading in gmatch(readings, '[ぁ-ー]+') do
						table.insert(links, '[[Category:Japanese kanji read as ' .. reading .. ']]')
					end
				end

				readings = mw.ustring.gsub(
					readings,
					"%[%[([^%]|]+)%]%]",
					function(entry)
						if mw.ustring.find(entry, "^[" .. Jpan:getCharacters() .. "]+$") then
							return plain_link(entry)
						else
							return "[[" .. entry .. "]]"
						end
					end
				)
			else
				local glosses = {} -- hold glosses for entire line, in case a gloss contains a comma
				local glosses_i = 1
				for wholematch, gloss in gmatch(readings, '( "([^"]+)")') do
					glosses[glosses_i] = gloss
					readings = gsub(readings, wholematch, '##' .. glosses_i)
					glosses_i = glosses_i + 1
				end

				readings = split(readings, ',%s*')

				for i, reading in ipairs(readings) do
					local reading_hist, reading_oldest = '', ''

					local is_jouyou = false

					local gloss = ''

					if match(reading, '[ァ-ヺ]') then
						table.insert(links, '[[Category:Requests for attention concerning Japanese]]')
					end


					if match(reading, '##') then
						local gloss_id = match(reading, '##(%d+)')
						reading = gsub(reading, '##' .. gloss_id, '')
						gloss = glosses[tonumber(gloss_id)] -- retrieve gloss
					end

					local pos, pos_hist, pos_oldest = { }, { '[[w:Historical kana orthography|historical]]' }, { 'ancient' }

					--[=[
					if match(reading, '%+') then
						reading = gsub(reading, '%+', '')
						table.insert(pos, 'non-[[w:Jōyō kanji|Jōyō]]')
					end
					]=]

					if match(reading, '.+<.+<.+') then
						reading, reading_hist, reading_oldest = match(reading, '(.+)<(.+)<(.+)')
					elseif match(reading, '.+<.+') then
						reading, reading_hist = match(reading, '(.+)<(.+)')
					end

					local disp, disp_hist, disp_oldest = reading, reading_hist, reading_oldest
					local kanji, kanji_hist, kanji_oldest = reading, reading_hist, reading_oldest

					local concat_sep = ', '

					if yomi_data[PAGENAME] and yomi_data[PAGENAME][reading] then
						is_jouyou = true

						if yomi_data[PAGENAME][reading] == 2 then
							table.insert(pos, '[[w:Jōyō kanji|Jōyō]], uncommon')
						else
							table.insert(pos, '[[w:Jōyō kanji|Jōyō]]')
						end
					end

					if reading ~= '' then
						table.insert(links, '[[Category:Japanese kanji with ' .. label.classification .. ' reading ' .. reading .. ']]')
					end
					if reading_hist ~= '' then
						table.insert(links, '[[Category:Japanese kanji with historical ' .. label.classification .. ' reading ' .. reading_hist .. ']]')
					end
					if reading_oldest ~= '' then
						table.insert(links, '[[Category:Japanese kanji with ancient ' .. label.classification .. ' reading ' .. reading_oldest .. ']]')
					end

					if match(reading, '%.') then
						kanji = gsub(kanji, '^(.+)(%.)', PAGENAME) -- transform 「むす.ぶ」 into 「結ぶ」
						kanji_hist = gsub(kanji_hist, '^(.+)(%.)', PAGENAME)
						kanji_oldest = gsub(kanji_oldest, '^(.+)(%.)', PAGENAME)

						reading = gsub(reading, '^(.+)(%.)', '<u>%1</u>') -- transform 「むす.ぶ」 into 「<u>むす</u>ぶ」
						reading_hist = gsub(reading_hist, '^(.+)(%.)', '<u>%1</u>')
						reading_oldest = gsub(reading_oldest, '^(.+)(%.)', '<u>%1</u>')

						table.insert(pos, 1, plain_link(kanji))
						table.insert(pos_hist, 1, plain_link(kanji_hist))
						table.insert(pos_oldest, 1, plain_link(kanji_oldest))
					end

					local rom_hist, rom_oldest = export.kana_to_romaji(reading_hist, nil, nil, true),  export.kana_to_romaji(reading_oldest, nil, nil, true)

					readings[i] =
						(is_jouyou and '<mark style="background:rgba(224, 255, 255, 0.5);">' or '')
						..
						link(disp, reading, gloss, table.concat(pos, concat_sep))
						..
						(is_jouyou and '</mark>' or '')
						..
						(reading_hist ~= '' and ('<sup>←' .. link(disp_hist, reading_hist, nil, table.concat(pos_hist, concat_sep), rom_hist) .. '</sup>') or '')
						..
						(reading_oldest ~= '' and ('<sup>←' .. link(disp_oldest, reading_oldest, nil, table.concat(pos_oldest, concat_sep), rom_oldest) .. '</sup>') or '')
				end

				readings = table.concat(readings, '; ')
			end

			if label.index > #links then
				table.insert(links, "* '''[[" .. label.entry .. "|".. label.text .. "]]'''" .. unclassified .. ": " .. readings)
			else
				table.insert(links, label.index, "* '''[[" .. label.entry .. "|".. label.text .. "]]'''" .. unclassified .. ": " .. readings)
			end
		end
	end

	--[==[
	-- determine if this is joyo kanji (常用) or jinmeiyo kanji (人名用) or neither (表外)
	local joyo_kanji_pattern = ('[' .. data.joyo_kanji .. ']')
	local jinmeiyo_kanji_pattern = ('[' .. data.jinmeiyo_kanji .. ']')
	local sortkey = ""
	if match(PAGENAME, joyo_kanji_pattern) then
		sortkey = "Common"
	elseif match(PAGENAME, jinmeiyo_kanji_pattern) then
		sortkey = "Names"
	else
		sortkey = "Uncommon"
	end
	]==]
	-- NOTE: with the introduction of the new {{ja-readings}} formatting the above block of code currently does nothing...

	if is_old_format then
		table.insert(links, '[[Category:Japanese kanji using old ja-readings format]]')
	end

	return table.concat(links, "\n")
end

-- do the work of Template:ja-kanji
function export.kanji(frame)
	local PAGENAME = mw.title.getCurrentTitle().text
	-- only do this if this entry is a kanji page and not some user's page
	if match(PAGENAME, "[㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟]") then
		local args = frame:getParent().args
		local grade = args["grade"] or ""
		local rs = args["rs"] or ""
		local style = args["style"] or ""
		local shin = args["shin"] or ""
		local kyu = args["kyu"] or ""

		local wikitext = {}
		local categories = {}

		local catsort = (rs ~= "") and rs or PAGENAME

		-- display the kanji itself at the top at 275% size
		table.insert(wikitext, '<div><span lang="ja" class="Jpan" style="font-size:275%; line-height: 100%;">' .. PAGENAME .. '</span></div>')

		-- display information for the grade

		-- if grade was not specified, determine it now
		if grade == "" then
			local joyo_kanji_pattern = ('[' .. data.joyo_kanji .. ']')
			local jinmeiyo_kanji_pattern = ('[' .. data.jinmeiyo_kanji .. ']')
			if match(PAGENAME, joyo_kanji_pattern) then grade = "c"
			elseif match(PAGENAME, jinmeiyo_kanji_pattern) then grade = "n"
			else
				grade = "uc"
			end
		end

		table.insert(wikitext, "(''")
		if grade == "1" then table.insert(wikitext, "[[w:Kyōiku kanji|grade 1 “Kyōiku” kanji]]")
		elseif grade == "2" then table.insert(wikitext, "[[w:Kyōiku kanji|grade 2 “Kyōiku” kanji]]")
		elseif grade == "3" then table.insert(wikitext, "[[w:Kyōiku kanji|grade 3 “Kyōiku” kanji]]")
		elseif grade == "4" then table.insert(wikitext, "[[w:Kyōiku kanji|grade 4 “Kyōiku” kanji]]")
		elseif grade == "5" then table.insert(wikitext, "[[w:Kyōiku kanji|grade 5 “Kyōiku” kanji]]")
		elseif grade == "6" then table.insert(wikitext, "[[w:Kyōiku kanji|grade 6 “Kyōiku” kanji]]")
		elseif grade == "7" or grade == "c" then table.insert(wikitext, "[[w:Jōyō kanji|common “Jōyō” kanji]]")
		elseif grade == "8" or grade == "n" then table.insert(wikitext, "[[w:Jinmeiyō kanji|“Jinmeiyō” kanji used for names]]")
		elseif grade == "9" or grade == "uc" then table.insert(wikitext, "[[w:Hyōgai kanji|uncommon “Hyōgai” kanji]]")
		elseif grade == "0" or grade == "r" then table.insert(wikitext, "[[w:Radical_(Chinese_character)|Radical]]")
		else
			table.insert(categories, "[[Category:Japanese terms needing attention/kanji grade]]")
		end

		-- if style was indicated, mention that and provide link to corresponding kanji
		-- (link to shinjitai if this is kyujitai, link to kyujitai if this is shinjitai)

		if style == "s" then
			table.insert(wikitext, ",&nbsp;")
			if kyu == "" then
				table.insert(wikitext, "[[shinjitai]] kanji")
			else
				table.insert(wikitext, '[[shinjitai]] kanji, [[kyūjitai]] form <span lang="ja" class="Jpan">[[' .. kyu .. '#Japanese|' .. kyu .. ']]</span>')
			end
		elseif style == "ky" then
			table.insert(wikitext, ",&nbsp;")
			if shin == "" then
				table.insert(wikitext, "[[kyūjitai]] kanji")
			else
				table.insert(wikitext, '[[kyūjitai]] kanji, [[shinjitai]] form <span lang="ja" class="Jpan">[[' .. shin .. '#Japanese|' .. shin .. "]]</span>")
			end
		end
		table.insert(wikitext, "'')")

		-- add categories
		table.insert(categories, "[[Category:Japanese Han characters|" .. catsort .. "]]")
		if grade == "1" then table.insert(categories, "[[Category:Grade 1 kanji|" .. catsort .. "]]")
		elseif grade == "2" then table.insert(categories, "[[Category:Grade 2 kanji|" .. catsort .. "]]")
		elseif grade == "3" then table.insert(categories, "[[Category:Grade 3 kanji|" .. catsort .. "]]")
		elseif grade == "4" then table.insert(categories, "[[Category:Grade 4 kanji|" .. catsort .. "]]")
		elseif grade == "5" then table.insert(categories, "[[Category:Grade 5 kanji|" .. catsort .. "]]")
		elseif grade == "6" then table.insert(categories, "[[Category:Grade 6 kanji|" .. catsort .. "]]")
		elseif grade == "7" or grade == "c" then table.insert(categories, "[[Category:Common kanji|" .. catsort .. "]]")
		elseif grade == "8" or grade == "n" then table.insert(categories, "[[Category:Kanji used for names|" .. catsort .. "]]")
		elseif grade == "9" or grade == "uc" then table.insert(categories, "[[Category:Uncommon kanji|" .. catsort .. "]]")
		elseif grade == "0" or grade == "r" then table.insert(categories, "[[Category:CJKV radicals| ]]")
		end

		-- error category
		if rs == "" then table.insert(categories, "[[Category:Japanese terms needing attention/radical and strokes]]") end

		return table.concat(wikitext, "") .. table.concat(categories, "\n")
	end
end

local grade1_pattern = ('[' .. data.grade1 .. ']')
local grade2_pattern = ('[' .. data.grade2 .. ']')
local grade3_pattern = ('[' .. data.grade3 .. ']')
local grade4_pattern = ('[' .. data.grade4 .. ']')
local grade5_pattern = ('[' .. data.grade5 .. ']')
local grade6_pattern = ('[' .. data.grade6 .. ']')
local secondary_pattern = ('[' .. data.secondary .. ']')
local jinmeiyo_kanji_pattern = ('[' .. data.jinmeiyo_kanji .. ']')
local hyogaiji_pattern = ('[^' .. data.joyo_kanji .. data.jinmeiyo_kanji .. ']')

function export.kanji_grade(kanji)
	if type(kanji) == "table" then
		kanji = kanji.args[1]
	end

	if match(kanji, hyogaiji_pattern) then return 9
	elseif match(kanji, jinmeiyo_kanji_pattern) then return 8
	elseif match(kanji, secondary_pattern) then return 7
	elseif match(kanji, grade6_pattern) then return 6
	elseif match(kanji, grade5_pattern) then return 5
	elseif match(kanji, grade4_pattern) then return 4
	elseif match(kanji, grade3_pattern) then return 3
	elseif match(kanji, grade2_pattern) then return 2
	elseif match(kanji, grade1_pattern) then return 1
	end

	return false
end

function export.new(frame)
	local args = frame:getParent().args
	local result = "==Japanese=="
	if args["defs"] then
		result = result .. "\n{{DEFAULTSORT:" .. args["defs"] .. "}}"
	end
	if args["wp"] then
		if args["wp"] == "y" then
			result = result .. "\n{{wp|lang=ja}}"
		else
			result = result .. "\n{{wp|lang=ja|" .. args["wp"] .. "}}"
		end
	end
	if args["wp2"] then
		result = result .. "\n{{wp|lang=ja|" .. args["wp2"] .. "}}"
	end
	if args["wp3"] then
		result = result .. "\n{{wp|lang=ja|" .. args["wp3"] .. "}}"
	end
	if args["wp4"] then
		result = result .. "\n{{wp|lang=ja|" .. args["wp4"] .. "}}"
	end
	if args["wp5"] then
		result = result .. "\n{{wp|lang=ja|" .. args["wp5"] .. "}}"
	end
	if args["wp6"] then
		result = result .. "\n{{wp|lang=ja|" .. args["wp6"] .. "}}"
	end

	if args["swp"] then
		if args["swp"] == "y" then
			result = result .. "\n{{swp|lang=ja}}"
		else
			result = result .. "\n{{swp|lang=ja|" .. args["swp"] .. "}}"
		end
	end
	if args["swp2"] then
		result = result .. "\n{{swp|lang=ja|" .. args["swp2"] .. "}}"
	end
	if args["swp3"] then
		result = result .. "\n{{swp|lang=ja|" .. args["swp3"] .. "}}"
	end
	if args["swp4"] then
		result = result .. "\n{{swp|lang=ja|" .. args["swp4"] .. "}}"
	end
	if args["swp5"] then
		result = result .. "\n{{swp|lang=ja|" .. args["swp5"] .. "}}"
	end
	if args["swp6"] then
		result = result .. "\n{{swp|lang=ja|" .. args["swp6"] .. "}}"
	end
	pagename = mw.title.getCurrentTitle().text
	text = args[1] ~= "" and args[1] or pagename
	text = gsub(text, "%-", "|")

	local function make_tab(original, yomi)
		output_text = ""
		original = gsub(original, " ", "|")
		original = gsub(original, "%.", "|")
		original = gsub(original, "%^", "")
		if match(original, "<") then
			for word in gmatch(original, "<([^>]+)>") do
				output_text = output_text .. "|" .. word
			end
			yomi = "k"
		else
			output_text = gsub(original, ">([1-9])", "|k%1=")
			output_text = match(output_text, "|") and "|" .. output_text or false
		end
		yomi = yomi or "o"
		return "\n{{ja-kanjitab" .. (output_text or "") .. "|yomi=" .. yomi .. (args["yomi"] == "irr" and "" or sortkey or "") .. "}}", yomi
	end
	if match(pagename, "[㐀-䶵一-鿌\239\164\128-\239\171\153𠀀-𯨟]") then
		to_add, yomi = make_tab(text, args["yomi"])
		result = result .. to_add
	end

	if match(text, "<") then
		text = gsub(text, "[<>]", "")
	else
		text = gsub(text, "^[^>|]+>[0-9]+([^>|]+)", "%1")
		text = gsub(text, "|[^>|]+>[0-9]+([^>|]+)", "%1")
		text = gsub(text, "([あかがさざただなはばぱまやらわ])|(あ)", "%1.%2")
		text = gsub(text, "([いきぎしじちぢにひびぴみり])|(い)", "%1.%2")
		text = gsub(text, "([うくぐすずつづぬふむゆる])|(う)", "%1.%2")
		text = gsub(text, "([えけげせぜてでねへめれ])|([えい])", "%1.%2")
		text = gsub(text, "([おこごそぞとどのほぼぽもよろ])|([おう])", "%1.%2")
		text = gsub(text, "|", "")
	end

	local function other(class, title, args)
		local code = ""

		if args[class] then
			code = code .. "\n\n===" .. title .. "===\n* {{ja-l|" .. args[class] .. "}}"

			if args[class .. "2"] then
				code = code .. "\n* {{ja-l|" .. args[class .. "2"] .. "}}"

				if args[class .. "3"] then
					code = code .. "\n* {{ja-l|" .. args[class .. "3"] .. "}}"

					if args[class .. "4"] then
						code = code .. "\n* {{ja-l|" .. args[class .. "4"] .. "}}"
					end
				end
			end
		end

		code = gsub(code, "{{ja%-l\|([^%|%}]+)[:：]", "{{ja-r|%1|") -- change something like "{{ja-l|辞典:じてん}}" to "{{ja-r|辞典|じてん}}"
		code = gsub(code, "{{ja%-l\|([ぁ-ー ^%%.]+)}}", "{{ja-r|%1}}") -- change something like "{{ja-l|じてん}}" to "{{ja-r|じてん}}"

		return code
	end

	result = result .. other("alt", "Alternative forms", args)

	sortkey = export.script(text) == "Kana" and export.sort(text) or false
	if sortkey and sortkey == "|sort=" .. text then
		sortkey = false
	end

	if args["d"] or args["e"] or args["we1"] or args["b"] or args["lb"] or args["co1"] or args["et"] or args["pr1"] or args["su1"] then
		result = result .. "\n\n===Etymology===\n"
		if args["we1"] then
			result = result .. "{{waei|" .. args["we1"] .. (args["we2"] and "|" .. args["we2"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
		else
			if args["pr1"] then
				result = result .. "{{pre|ja|" .. args["pr1"] .. "|" .. args["pr2"] .. (args["defs"] and "" or sortkey or "") .. (args["tr1"] and "|tr1=" .. args["tr1"] or "") .. (args["tr2"] and "|tr2=" .. args["tr2"] or "") .. (args["pos1"] and "|pos1=" .. args["pos1"] or "") .. (args["pos2"] and "|pos2=" .. args["pos2"] or "") .. (args["t1"] and "|t1=" .. args["t1"] or "") .. (args["t2"] and "|t2=" .. args["t2"] or "") .. "}}"
			else
				if args["su1"] then
					result = result .. "{{suf|ja|" .. args["su1"] .. "|" .. args["su2"] .. (args["defs"] and "" or sortkey or "") .. (args["tr1"] and "|tr1=" .. args["tr1"] or "") .. (args["tr2"] and "|tr2=" .. args["tr2"] or "") .. (args["pos1"] and "|pos1=" .. args["pos1"] or "") .. (args["pos2"] and "|pos2=" .. args["pos2"] or "") .. (args["t1"] and "|t1=" .. args["t1"] or "") .. (args["t2"] and "|t2=" .. args["t2"] or "") .. "}}"
				else
					if args["co1"] then
						result = result .. "{{com|ja|" .. args["co1"] .. "|" .. args["co2"] .. (args["co3"] and "|" .. args["co3"] or "") .. (args["co4"] and "|" .. args["co4"] or "") .. (args["co5"] and "|" .. args["co5"] or "") .. (args["co6"] and "|" .. args["co6"] or "") .. (args["tr1"] and "|tr1=" .. args["tr1"] or "") .. (args["tr2"] and "|tr2=" .. args["tr2"] or "") .. (args["tr3"] and "|tr3=" .. args["tr3"] or "") .. (args["tr4"] and "|tr4=" .. args["tr4"] or "") .. (args["tr5"] and "|tr5=" .. args["tr5"] or "") .. (args["tr6"] and "|tr6=" .. args["tr6"] or "") .. (args["pos1"] and "|pos1=" .. args["pos1"] or "") .. (args["pos2"] and "|pos2=" .. args["pos2"] or "") .. (args["pos3"] and "|pos3=" .. args["pos3"] or "") .. (args["pos4"] and "|pos4=" .. args["pos4"] or "") .. (args["pos5"] and "|pos5=" .. args["pos5"] or "") .. (args["pos6"] and "|pos6=" .. args["pos6"] or "") .. (args["t1"] and "|t1=" .. args["t1"] or "") .. (args["t2"] and "|t2=" .. args["t2"] or "") .. (args["t3"] and "|t3=" .. args["t3"] or "") .. (args["t4"] and "|t4=" .. args["t4"] or "") .. (args["t5"] and "|t5=" .. args["t5"] or "") .. (args["t6"] and "|t6=" .. args["t6"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
					else
					if args["et"] then
						result = result .. "{{cal|ja|" .. ((args["el"] and "etyl lang=" .. args["el"]) or "etyl lang=en") .. "|etyl term=" .. args["et"] .. (args["nocap"] and "|nocap=" .. args["nocap"] or "") .. (args["ca1"] and "|" .. args["ca1"] or "") .. (args["ca2"] and "|" .. args["ca2"] or "") .. (args["ca3"] and "|" .. args["ca3"] or "") .. (args["ca4"] and "|" .. args["ca4"] or "") .. (args["ca5"] and "|" .. args["ca5"] or "") .. (args["ca6"] and "|" .. args["ca6"] or "") .. (args["tr1"] and "|tr1=" .. args["tr1"] or "") .. (args["tr2"] and "|tr2=" .. args["tr2"] or "") .. (args["tr3"] and "|tr3=" .. args["tr3"] or "") .. (args["tr4"] and "|tr4=" .. args["tr4"] or "") .. (args["tr5"] and "|tr5=" .. args["tr5"] or "") .. (args["tr6"] and "|tr6=" .. args["tr6"] or "") .. (args["pos1"] and "|pos1=" .. args["pos1"] or "") .. (args["pos2"] and "|pos2=" .. args["pos2"] or "") .. (args["pos3"] and "|pos3=" .. args["pos3"] or "") .. (args["pos4"] and "|pos4=" .. args["pos4"] or "") .. (args["pos5"] and "|pos5=" .. args["pos5"] or "") .. (args["pos6"] and "|pos6=" .. args["pos6"] or "") .. (args["t1"] and "|t1=" .. args["t1"] or "") .. (args["t2"] and "|t2=" .. args["t2"] or "") .. (args["t3"] and "|t3=" .. args["t3"] or "") .. (args["t4"] and "|t4=" .. args["t4"] or "") .. (args["t5"] and "|t5=" .. args["t5"] or "") .. (args["t6"] and "|t6=" .. args["t6"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
					else
						if args["b"] then
							result = result .. "{{bor|ja|" .. (args["bl"] or "en") .. (args["b"] and "|" .. args["b"] or "") .. (args["tr"] and "|tr=" .. args["tr"] or "") .. (args["t"] and "||" .. args["t"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
						else
						if args["lb"] then
							result = result .. "{{lbor|ja|" .. (args["lbl"] or "grc") .. (args["lb"] and "|" .. args["lb"] or "") .. (args["tr"] and "|tr=" .. args["tr"] or "") .. (args["t"] and "||" .. args["t"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
						else
							result = result .. (args["e"] or
								("From {{der|ja|" .. (args["dl"] or "en") .. (args["d"] and "|" .. args["d"] or "") .. (args["tr"] and "|tr=" .. args["tr"] or "") .. (args["t"] and "||" .. args["t"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"))
						end
						end
						end
					end
				end
			end
		end
	end

	if not args["nop"] then
		result = result .. "\n\n===Pronunciation===\n{{ja-pron" .. (args[1] ~= "" and "|" .. gsub(text, '%^', '') or "")
		if args["y"] == "n" then
			result = result .. ""
		else if args["y"] and args["y"] ~= "n" then
			result = result .. "|y=" .. args["y"]
		else if yomi then
			if yomi == "irr" then
			result = result .. "|y=i"
			else
				result = result .. "|y=" .. yomi
			end
			end
		end
		end
		result = result .. (args["acc"] and "|acc=" .. args["acc"] or "") .. (args["acc2"] and "|acc2=" .. args["acc2"] or "") .. (args["acc3"] and "|acc3=" .. args["acc3"] or "") .. (args["acc4"] and "|acc4=" .. args["acc4"] or "") .. (args["acc_ref"] and "|acc_ref=" .. args["acc_ref"] or "") .. (args["acc2_ref"] and "|acc2_ref=" .. args["acc2_ref"] or "") .. (args["acc3_ref"] and "|acc3_ref=" .. args["acc3_ref"] or "") .. (args["acc4_ref"] and "|acc4_ref=" .. args["acc4_ref"] or "") .. (args["dev"] and "|dev=" .. args["dev"] or "") .. "}}" .. (args["hmp"] and "\n* {{hmp|lang=ja|" .. args["hmp"] .. (args["hmp2"] and "|" .. args["hmp2"] or "") .. (args["hmp3"] and "|" .. args["hmp3"] or "") .. (args["hmp4"] and "|" .. args["hmp4"] or "") .. (args["hmp5"] and "|" .. args["hmp5"] or "") .. (args["hmp6"] and "|" .. args["hmp6"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}" or "")
	end

	local pos = args[2] ~= "" and args[2] or "n"
	local pos_table = {
		[""] = { "Noun", "noun", true },
		["n"] = { "Noun", "noun", true },
		["s"] = { "Noun", "noun", true, "Verb", "verb-suru" },
		["noun"] = { "Noun", "noun", true },
		["suru"] = { "Noun", "noun", true, "Verb", "verb-suru" },
		["an"] = { "Adjective", "adj", true, "Noun", "noun" },
		["anoun"] = { "Adjective", "adj", true, "Noun", "noun" },
		["v"] = { "Verb", "verb", true },
		["verb"] = { "Verb", "verb", true },
		["vform"] = { "Verb", "verb form", true },
		["verb form"] = { "Verb", "verb form", true },
		["a"] = { "Adjective", "adj", true },
		["adj"] = { "Adjective", "adj", true },
		["adjective"] = { "Adjective", "adj", true },
		["adv"] = { "Adverb", "adverb", false },
		["adverb"] = { "Adverb", "adverb", false },
		["pron"] = { "Pronoun", "pronoun", false },
		["pronoun"] = { "Pronoun", "pronoun", false },
		["pn"] = { "Proper noun", "proper", false },
		["propn"] = { "Proper noun", "proper", false },
		["proper"] = { "Proper noun", "proper", false },
		["proper noun"] = { "Proper noun", "proper", false },
		["ph"] = { "Phrase", "phrase", true },
		["phrase"] = { "Phrase", "phrase", true },
		["interjection"] = { "Interjection", "interjection", false },
		["intj"] = { "Interjection", "interjection", false },
		["conj"] = { "Conjunction", "conjunction", false },
		["part"] = { "Particle", "particle", false },
		["prep"] = { "Preposition", "preposition", false },
		["suf"] = { "Suffix", "suffix", false },
		["suffix"] = { "Suffix", "suffix", false },
		["pref"] = { "Prefix", "prefix", false },
		["prefix"] = { "Prefix", "prefix", false },
		["prov"] = { "Proverb", "proverb", false },
	}

	result = result .. "\n\n===" .. pos_table[pos][1] .. "===\n{{ja-" .. (not pos_table[pos][3] and "pos|" or "") .. pos_table[pos][2] ..
		(args[1] ~= "" and "|" .. text or "") .. (args["kyu"] and "|kyu=" .. args["kyu"] or "") .. (args["sin"] and "|shin=" .. args["sin"] or "") .. (args["ak"] and "|" .. args["ak"] or "") .. (args["ak2"] and "|" .. args["ak2"] or "") .. (args["ro"] and "|rom=" .. args["ro"] or "") .. (args["hh"] and "|hhira=" .. args["hh"] or "") .. (args["hk"] and "|hkata=" .. args["hk"] or "")

	if pos_table[pos][1] == "Adjective" then
		result = result .. "|infl=" .. (args["infl"] and args["infl"] or "na")
	end

	result = result .. (args["type"] and "|type=" .. args["type"] or "") .. (args["tr"] and "|tr=" .. args["tr"] or "") .. "}}"
	result = result .. "\n\n# " .. (args[3] or "{{rfdef|ja}}")

	if pos_table[pos][1] == "Adjective" then
		result = result .. "\n\n====Inflection====\n"
		if args["infl"] == "i" or args["infl"] == "い" then
			result = result .. "{{ja-i" .. (args[1] ~= "" and "|" .. sub(text, 1, -2) or "") .. "}}"
		else
			result = result .. "{{ja-na" .. (args[1] ~= "" and "|" .. text or "") .. "}}"
		end
	end

	if pos_table[pos][2] == "verb" then
		result = result .. "\n\n====Conjugation====\n{{ja-"
		penul, cons = text, text
		penul, cons = sub(penul, -2, -2), sub(cons, -1, -1)
		penul, cons = export.hira_to_kata(penul), export.hira_to_kata(cons)
		penul, cons = gsub(penul, ".", function (char) return data.kr[char] or char end), gsub(cons, ".", function (char) return data.kr[char] or char end)
		penul, cons = sub(penul, -1, -1), sub(cons, 1, 1)
		if cons == "u" then
			cons = ""
		elseif cons == "c" then
			cons = "ts"
		end
		if final == "る" and (penul == "i" or penul == "e") and args["type"] == 2 then
			result = result .. "ichi"
		else
			result = result .. "go-" .. cons .. "u"
		end

		result = result .. (args[1] ~= "" and "|" .. sub(text, 1, -2) or "") .. "}}"
	end

	if pos_table[pos][4] and args[4] ~= "" then
		result = result .. "\n\n===" .. pos_table[pos][4] .. "===\n{{ja-" .. pos_table[pos][5] .. (args[1] ~= "" and "|" .. text or "") ..
			(args["type"] and "|type=" .. args["type"] or "") .. (args["tr"] and "|tr=" .. args["tr"] or "") .. "}}\n\n# " .. (args[4] or "{{rfdef|ja}}")

		if pos_table[pos][4] == "Verb" then
			result = result .. "\n\n====Conjugation====\n{{ja-suru" .. (args[1] ~= "" and "|" .. text or "") .. "}}"
		end
	end

	result = result .. other("syn", "=Synonyms=", args)
	result = result .. other("ant", "=Antonyms=", args)
	result = result .. other("der", "=Derived terms=", args)
	result = result .. other("rel", "=Related terms=", args)

	if args["dzh"] or args["dko"] or args["dvi"] then
		result = result .. "\n\n===Descendants===\n" .. (args["dzh"] and "* Chinese: {{zh-l|" .. args["dzh"] .. "}}" or "")
		if args["dzh"] then
			if args["dko"] or args["dvi"] then
				result = result .. "\n"
			end
		end
		result = result .. (args["dko"] and "* Korean: {{ko-l|" .. args["dko"] .. "}}" or "")
		if args["dko"] then
			if args["dvi"] then
				result = result .. "\n"
			end
		end
		result = result .. (args["dvi"] and "* Vietnamese: {{vi-l|" .. args["dvi"] .. "}}" or "")
	end
	result = result .. other("ana", "Anagrams", args)
	result = result .. other("also", "See also", args)

	if args["acc_ref"] or args["acc2_ref"] or args["acc3_ref"] then
		result = result .. "\n\n===References===\n<references/>"
	end

	if args["cn"] then
		result = result .. "\n\n{{cln|ja|" .. args["cn"] .. (args["cn2"] and "|" .. args["cn2"] or "") .. (args["cn3"] and "|" .. args["cn3"] or "") .. (args["cn4"] and "|" .. args["cn4"] or "") .. (args["cn5"] and "|" .. args["cn5"] or "") .. (args["cn6"] and "|" .. args["cn6"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
	end
	if args["ct"] then
		result = result .. (args["cn"] and "\n" or "\n\n") .. "{{C|ja|" .. args["ct"] .. (args["ct2"] and "|" .. args["ct2"] or "") .. (args["ct3"] and "|" .. args["ct3"] or "") .. (args["ct4"] and "|" .. args["ct4"] or "") .. (args["ct5"] and "|" .. args["ct5"] or "") .. (args["ct6"] and "|" .. args["ct6"] or "") .. (args["defs"] and "" or sortkey or "") .. "}}"
	end

	if args["k"] then
		result = result .."\n\n----\n\n==Korean==\n{{ko-hanjatab}}\n\n===" .. pos_table[args["kp"] or "n"][1] ..
			"===\n{{ko-" .. pos_table[args["kp"] or "n"][2] .. "|hangeul=" .. args["k"] .. (args["mr"] and "|mr=" .. args["mr"] or "") .. (args["yl"] and "|y=" .. args["yl"] or "") .. "}}" ..
			"\n\n# {{hanja form of|" .. args["k"] .. "|" .. (args["kd"] or args[3]) .. "}}"
	end

	return result
end

return export
Module:ja: difference between revisions

Revision as of 06:08, 5 June 2017

Testcases

Functions

Uses

Navigation menu

Search