Module:Jpan-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will transliterate text in the Japanese script. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:Jpan-translit/testcases.

Functions[edit]

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local find = mw.ustring.find
local gsub = mw.ustring.gsub
local kana_to_romaji = require("Module:Hrkt-translit")
local match = mw.ustring.match
local upper = string.uupper

local export = {}

local function is_good_romaji(str)
	str = gsub(str, "%A", "")
	return match(str, "[^A-za-zĀĪŪĒŌāīūēō]") == nil
end

local function format_pos_romaji(rom, pos)
	if pos == "proper" then
		if find(rom, "%u") then return rom end
		return (gsub(rom, "%f[%a]%a", upper))
	elseif pos == "prefix" then
		return (rom:gsub("%-?$", "-"))
	elseif pos == "suffix" or pos == "counter" or pos == "classifier" then
		return (rom:gsub("^%-?", "-"))
	else
		return rom
	end
end

function export.tr(text, lang, sc, options)
	local rom_result
	
	local rom_title = kana_to_romaji(text, lang, sc, options)
	if not is_good_romaji(rom_title) then rom_title = nil end
	
	local pagetext = mw.title.new(text):getContent()
	if pagetext then
		for _, tn in ipairs{"noun", "verb", "verb%-suru", "adj", "phrase", "combining form", "verb form", "see"} do
			if rom_title and pagetext:match("{{ja%-" .. tn .. "}}") then
				if rom_result and rom_result ~= rom_title then return rom_title end
				rom_result = rom_title
			end
			for t in pagetext:gmatch("{{ja%-" .. tn .. "(|..-})}") do
				local no_kana = true
				for tt in t:gmatch"%f[^|]..-%f[|}]" do
					if not tt:match"%D.*=" and not tt:match"%[%[" and not tt:match"]]" then
						local rom = kana_to_romaji(tt, lang, sc, options)
						if is_good_romaji(rom) then
							no_kana = false
							if rom_result and rom_result ~= rom then return rom_title end
							rom_result = rom
						end
					end
				end
				if rom_title and no_kana then
					if rom_result and rom_result ~= rom_title then return rom_title end
					rom_result = rom_title
				end
			end
		end
		
		for t in pagetext:gmatch"{{ja%-pos|(..-})}" do
			local pos, ta = t:match"^(..-)(|..-})$"
			if ta then
				local no_kana = true
				for tt in ta:gmatch"%f[^|]..-%f[|}]" do
					if not tt:match"%D.*=" and not tt:match"%[%[" and not tt:match"]]" then
						local rom = kana_to_romaji(tt, lang, sc, options)
						if is_good_romaji(rom) then
							no_kana = false
							rom = format_pos_romaji(rom, pos)
							if rom_result and rom_result ~= rom then return rom_title end
							rom_result = rom
						end
					end
				end
				if rom_title and no_kana then
					local rom = format_pos_romaji(rom_title, pos)
					if rom_result and rom_result ~= rom then return rom_title end
					rom_result = rom
				end
			elseif rom_title then
				local rom = format_pos_romaji(rom_title, t:sub(1, -2))
				if rom_result and rom_result ~= rom then return rom_title end
				rom_result = rom
			end
		end
		return rom_result or rom_title
	else
		return rom_title
	end
end

-- A hack to bypass [[mod:languages]] bug [[special:diff/72585061]]
local f_tr = export.tr
function export.tr(...)
	local rom = f_tr(...)
	if rom then
		return (rom:gsub("'", mw.getCurrentFrame():extensionTag("nowiki", "'")))
	end
end

return export