Module:tengwar

From Wiktionary, the free dictionary
Jump to navigation Jump to search
This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local export = {}

local PAGENAME = mw.title.getCurrentTitle().text

function export.convert2(frame)
	-- get arguments
	local args = frame:getParent().args
	local lang = frame.args[1] or error("Language code has not been specified.")
	local font = frame.args[2] or 'annatar'
	local text = frame.args[3] or PAGENAME
	return export.convert(lang, font, text)
end

function export.convert(lang, font, text)
	text = mw.ustring.lower(text)
	text = mw.ustring.gsub(text, 'appendix:[^/]+/', '')	-- remove Appendix:<Language>/
	text = '␂' .. text .. '␃'	-- mark start and end of text
	text = mw.ustring.gsub(text, '[\.]', '‡')	-- unused character for syllable separator
	
	if lang == 'qya' then	-- Quenya
		text = export.conv_qya(font, text)
	elseif lang == 'sjn' then	-- Sindarin, Tehtar mode
		text = export.conv_sjn(font, text)
	elseif lang == 'sjn2' then	-- Sindarin, mode of Beleriand
		text = export.conv_sjn2(font, text)
	else
		error("Language code is not supported.")
	end
	text = mw.ustring.gsub(text, '[‡␂␃]', '')	-- remove syllable separator and start/end marks
	return text
end

function export.tel2ana(text)
	local repltel = {'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '-', ',', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''}
	local replana = {'1Í', '1[', '1\'', 'q[', 'q\'', 'a[', 'a\'', 'z[', '2Ì', '2{', 'w{', 's{', 'x{', 'xè', '3[', 'c[', 'cé', '4Ì', '5Ì', '5{', '5"', 't{', 't"', 'hÍ', '7Í', '7\'', 'j´', 'j¸', '9Ï', '\\', '¹', '1', 'q', 'a', 'z', '2', 'w', 's', 'x', '3', 'e', 'd', 'c', '4', 'r', 'f', 'v', '5', 't', 'g', 'b', '6', 'y', 'h', 'n', '7', 'u', 'j', 'm', '8', 'i', 'k', ',', '9', 'o', 'l', 'l', '.', '~', '½', '`', ']', 'E', 'Õ', 'T', 'R', 'Y', 'å', 'U', '\+', 'â', 'ë'}
	
	for i=1, #repltel do
		text = mw.ustring.gsub(text, repltel[i], replana[i])
	end
		
	-- adjust diacritics to the letter width in Annatar
	local vowdef = {'E', 'R', 'T', 'Y', 'U', 'Õ'}
	local vowwide = {'#' ,'\$', '&#x25;', '\^', '&', 'Ô', ''}
	local vowround = {'D' ,'F', 'G', 'H', 'J', 'Ö'}
	local vownarr = {'C' ,'V', 'B', 'N', 'M', '×'}
	
	for i=1, #vowdef do
		text = mw.ustring.gsub(text, '([245rwtfsgvxbj][\'"´ÍÌÏ{[]?)'..vowdef[i], '%1'..vowwide[i])
		text = mw.ustring.gsub(text, '([9~`][\'"´ÍÌÏ{[]?)'..vowdef[i], '%1'..vownarr[i])
		text = mw.ustring.gsub(text, '([3ei,.][\'"´ÍÌÏ{[]?)'..vowdef[i], '%1'..vowround[i])
	end
	return text
end

function export.conv_qya(font, text)
	-- merge duplicate transcriptions (replaced by the easier processable one)
	local dublfind = {'x', 'z', 'qu', 'mb', 'nd', 'ng', 'nw', 'th', 'j', 'k', 'â', 'ê', 'î', 'ô', 'û'}
	local duplrepl = {'cs', 'ss', 'cw', 'b', 'd', 'g', 'ñw', 'þ', 'y', 'c', 'á', 'é', 'í', 'ó', 'ú'}
	
	for i=1, #dublfind do
		text = mw.ustring.gsub(text, dublfind[i], duplrepl[i])
	end
	
	-- define arrays for conversion
	local arfind = {'gw', 'ncw', 'ndy', 'nty', 'cc',  'dy', 'hl', 'hr', 'hw', 'hy', 'ld', 'll', 'ly', 'mm', 'mp', 'nc', 'nn', 'nt', 'ñw', 'ny', 'pp', 'ps', 'cs', 'cw', 'rd', 'rr', 'ry', 'ss', 'ts', 'tt', 'ty', 'b', 'c', 'd', 'f', 'g', 'h', 'l', 'm', 'n', 'ñ', 'p', 'r', 's', 't', 'v', 'w', 'y', 'þ', 'ai', 'au', 'eu', 'iu', 'oi', 'ui', 'á', 'é', 'í', 'ó', 'ú', 'a', 'e', 'i', 'o', 'u'}
	local repltel = {'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''}
	
	-- detect hiatus onsets
	text = mw.ustring.gsub(text, '([äöë])([aeiou])', '%1%2')
	text = mw.ustring.gsub(text, '([aeiou])([äöëaeo])', '%1%2')
	text = mw.ustring.gsub(text, '([ei])(i)', '%1%2')
	text = mw.ustring.gsub(text, '([ou])(u)', '%1%2')
	text = mw.ustring.gsub(text, 'ä', 'a')
	text = mw.ustring.gsub(text, 'ë', 'e')
	text = mw.ustring.gsub(text, 'ö', 'o')
	
	-- convert to Telcontar (base font, as it uses own codepoints)
	for i=1, #arfind do
		text = mw.ustring.gsub(text, arfind[i], repltel[i])
	end
	
	-- detect initial vowel
	text = mw.ustring.gsub(text, '([^-])([])', '%1%2')
	
	-- switch consonants, that have another form before vowels
	local cons2find = {'', '', '', ''}
	local cons2repl = {'',  '',  '', ''}
	-- before short, long vowels and diphthongs
	for i=1, 2 do
		text = mw.ustring.gsub(text, cons2find[i]..'([])', cons2repl[i]..'%1')
	end
	-- before short vowels only
	for i=3, 4 do
		text = mw.ustring.gsub(text, cons2find[i]..'([])', cons2repl[i]..'%1')
	end
	
	if font == 'annatar' then	-- covert to Annatar
		text = export.tel2ana(text)
	end
	return text
end

function export.conv_sjn(font, text)
	text = mw.ustring.gsub(text, 'f␃', 'v␃')	-- final f to v
	
	-- merge duplicate transcriptions (replaced by the easier processable one)
	local dublfind = {'dh', 'ph', 'th', 'k', 'â', 'ê', 'î', 'ô', 'û', 'ŷ', 'œ', 'ai', 'ae', 'au', 'ei', 'oe', 'ui'}
	local duplrepl = {'ð', 'f', 'þ', 'c', 'á', 'é', 'í', 'ó', 'ú', 'ý', 'e', 'aÿ', 'aj', 'aw', 'eÿ', 'oj', 'uÿ'}
	
	for i=1, #dublfind do
		text = mw.ustring.gsub(text, dublfind[i], duplrepl[i])
	end
	
	-- define arrays for conversion
	local arfind = {'chw', 'nch',  'nþ', 'ch', 'ð', 'gh', 'gw', 'hw', 'lh', 'll', 'mb', 'mm', 'mp', 'nc', 'nd', 'ng', 'nn', 'nt', 'rh', 'rr', 'ss', 'þ', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'l', 'm', 'n', 'ŋ', 'p', 'r', 's', 't', 'v', 'w', 'á', 'é', 'í', 'ó', 'ú', 'ý', 'ÿ', '\''}
	local repltel = {'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''}
	
	-- detect initial i glide
	text = mw.ustring.gsub(text, '([^a-zÿðþ])i([aeou])', '%1j%2')
	-- detect initial ng
	text = mw.ustring.gsub(text, '([^a-zÿðþ])ng([aeou])', '%1ŋ%2')
	-- detect hiatus onsets
	text = mw.ustring.gsub(text, '([äöë][aeiouy])', '%1')
	text = mw.ustring.gsub(text, '([aeiouy][äöëao])', '%1')
	text = mw.ustring.gsub(text, '([iy][iuy])', '%1')
	text = mw.ustring.gsub(text, '([eu]e|oi|[eou]u)', '%1')
	text = mw.ustring.gsub(text, 'ä', 'a')
	text = mw.ustring.gsub(text, 'ë', 'e')
	text = mw.ustring.gsub(text, 'ö', 'o')
	-- detect final vowel
	text = mw.ustring.gsub(text, '([^])([aeiouy][^a-zÿðþ])', '%1%2')
	
	-- convert to Telcontar (base font, as it uses own codepoints)
	for i=1, #arfind do
		text = mw.ustring.gsub(text, arfind[i], repltel[i])
	end
	
	local vowfind  = {'a', 'e', 'i', 'o', 'u', 'y'}
	local vowrepl  = {'', '', '', '', '', ''}
	
	-- move vowel marks to the next consonant
	for i=1, #vowfind do
		text = mw.ustring.gsub(text, '([^])'..vowfind[i]..'([-][]?)', '%1%2'..vowrepl[i])
		text = mw.ustring.gsub(text, '([^])'..vowfind[i]..'([-][]?)', '%1%2'..vowrepl[i])	-- excecute twice because of overlap
		text = mw.ustring.gsub(text, vowfind[i], vowrepl[i])
	end
	
	-- switch consonants, that have another form before vowels
	local cons2find = {'', '', ''}
	local cons2repl = {'', '', ''}
	-- before short, long vowels and diphthongs
	text = mw.ustring.gsub(text, cons2find[1]..'([]?[-][])', cons2repl[1]..'%1')
	-- carrying vowel mark
	for i=2, 3 do
		text = mw.ustring.gsub(text, cons2find[i]..'([])', cons2repl[i]..'%1')
	end
	
	if font == 'annatar' then	-- covert to Annatar
		text = export.tel2ana(text)
	end
	
	return text
end

function export.conv_sjn2(font, text)
	text = mw.ustring.gsub(text, 'f␃', 'v␃')	-- final f to v
	
	-- merge duplicate transcriptions (replaced by the easier processable one)
	local dublfind = {'dh', 'ph', 'th', 'k', 'â', 'ê', 'î', 'ô', 'û', 'ŷ'}
	local duplrepl = {'ð', 'f', 'þ', 'c', 'á', 'é', 'í', 'ó', 'ú', 'ý'}
	
	for i=1, #dublfind do
		text = mw.ustring.gsub(text, dublfind[i], duplrepl[i])
	end
	
	-- define arrays for conversion
	local arfind = {'chw', 'nch', 'nþ', 'ch', 'ð', 'gh', 'gw', 'hw', 'lh', 'mb', 'mm', 'mp', 'nc', 'nd', 'ng', 'nn', 'nt', 'rh', 'ss', 'þ', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'l', 'm', 'n', 'ŋ', 'p', 'r', 's', 't', 'v', 'w', 'ai', 'ae', 'au', 'ei', 'oe', 'ui', 'a', 'ä', 'e', 'ë', 'i', 'o', 'ö', 'u', 'y', 'œ', 'á', 'é', 'í', 'ó', 'ú', 'ý', '\''}
	local repltel = {'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''}
	
	-- detect initial ng
	text = mw.ustring.gsub(text, '([^a-zðþ])ng([aeou])', '%1ŋ%2')
	-- detect final au
	text = mw.ustring.gsub(text, 'aw([^a-zðþ])', 'au%1')
	
	-- convert to Telcontar (base font, as it uses own codepoints)
	for i=1, #arfind do
		text = mw.ustring.gsub(text, arfind[i], repltel[i])
	end
	
	if font == 'annatar' then	-- covert to Annatar
		text = export.tel2ana(text)
	end
	
	return text
end

return export