Module:User:Mzajac/transform

This module lacks a documentation subpage. You may create it.
Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of Mzajac, for their own experimentation. Items in this module may be added and removed at Mzajac's discretion; do not rely on this module's stability.
-- Transliterations
--
-- invoke with {{#invoke:User:Mzajac/transform|romanize|[text]|method=[method]}}
--
-- where method=
--     scholarly (DEFAULT): According to [[Wiktionary:Ukrainian transliteration]], following Daniels and 
--         Bright (1996) ''World’s Writing Systems''.
--     alaloc: ALA–LC, 1997 http://www.loc.gov/catdir/cpso/roman.html
--     bgn: BGN/PCGN 1965 http://libraries.ucsd.edu/bib/fed/USBGN_romanization.pdf
--     iso-1968: ISO/R 9:1968, (Ukrainian language-specific) variant 1.
--     iso: ISO 9:1995
--     ungegn: UNGEGN, after the Ukrainian National system, 2012 http://www.eki.ee/wgrs/rom1_uk.pdf.

-- Bugs
--     [none]

-- To do
--
--    Distinguish all caps from initial caps in context, 
--       e.g., ХАТА = KHATA (not KhATA); Хата = Khata (not KHata); хата = khata
--    Convert only single apostrophes within words, or before soft vowels, to distinguish them from single 
--       quotation marks and wikitext emphasis.
--    Set the order for method="all"
-- 
--   Tables for 
--      uk-Latn-t-uk-Cyrl-m0-iso-1968-v2 ?= uk-Latn-x-british (British Standard)


-- Configuration

-- default romanization method
local methodDefault = "scholarly"

-- characters to be replaced
local searchDefault = "[АБВГҐДЕЄЖЗИIЇЙКЛМНОПРСТУФХЦЧШЩЬЮЯЪЁЫѢЭѪѲѴабвгґдеєжзиіїйклмнопрстуфхцчшщьюяъёыѣэѫѳѵ'’ʼ]"

local transform = {}

-- load transliteration tables from a data module
-- * Cyrillic characters as table indexes seem to fail when imported through mw.loadData *
-- * USING LOCAL DATA INSTEAD *
--[[ 
local ttable = mw.loadData('Module:User:Mzajac/transform/uk-Latn-t-uk-Cyrl') 
]]--


-- Ukrainian Romanization tables
local ttable = { 
    
    ["scholarly"] = {
        ["shortname"] = "Scholarly", 
        ["name"] = "Scholarly", 
        ["lang"] = "uk-Latn-t-uk-Cyrl-x-scholarly", 
        ["patterns"] = {
            [1] = {
                ["search"] = searchDefault, 
                ["replace"] = {
                    ["А"] = "A",
                    ["а"] = "a",
                    ["Б"] = "B",
                    ["б"] = "b",
                    ["В"] = "V",
                    ["в"] = "v",
                    ["Г"] = "H",
                    ["г"] = "h",
                    ["Ґ"] = "G",
                    ["ґ"] = "g",
                    ["Д"] = "D",
                    ["д"] = "d",
                    ["Е"] = "E",
                    ["е"] = "e",
                    ["Є"] = "Je",
                    ["є"] = "je",
                    ["Ж"] = "Ž",
                    ["ж"] = "ž",
                    ["З"] = "Z",
                    ["з"] = "z",
                    ["И"] = "Y",
                    ["и"] = "y",
                    ["I"] = "I",
                    ["і"] = "i",
                    ["Ї"] = "Ji",
                    ["ї"] = "ji",
                    ["Й"] = "J",
                    ["й"] = "j",
                    ["К"] = "K",
                    ["к"] = "k",
                    ["Л"] = "L",
                    ["л"] = "l",
                    ["М"] = "M",
                    ["м"] = "m",
                    ["Н"] = "N",
                    ["н"] = "n",
                    ["О"] = "O",
                    ["о"] = "o",
                    ["П"] = "P",
                    ["п"] = "p",
                    ["Р"] = "R",
                    ["р"] = "r",
                    ["С"] = "S",
                    ["с"] = "s",
                    ["Т"] = "T",
                    ["т"] = "t",
                    ["У"] = "U",
                    ["у"] = "u",
                    ["Ф"] = "F",
                    ["ф"] = "f",
                    ["Х"] = "X",
                    ["х"] = "x",
                    ["Ц"] = "C",
                    ["ц"] = "c",
                    ["Ч"] = "Č",
                    ["ч"] = "č",
                    ["Ш"] = "Š",
                    ["ш"] = "š",
                    ["Щ"] = "Šč",
                    ["щ"] = "šč",
                    ["Ь"] = "ʹ",
                    ["ь"] = "ʹ",
                    ["Ю"] = "Ju",
                    ["ю"] = "ju",
                    ["Я"] = "Ja",
                    ["я"] = "ja",
                    ["'"] = "ʺ", -- apostrophe
                    ["’"] = "ʺ", -- right single quotation mark
                    ["ʼ"] = "ʺ", -- modifier letter apostrophe
        
                    -- Archaic letters
                    ["Ъ"] = "ʺ",
                    ["ъ"] = "ʺ",
                    ["Ё"] = "Ë",
                    ["ё"] = "ë",
                    ["Ы"] = "Y",
                    ["ы"] = "y",
                    ["Ѣ"] = "Ě",
                    ["ѣ"] = "ě",
                    ["Э"] = "È",
                    ["э"] = "è",
                    ["Ѫ"] = "�",
                    ["ѫ"] = "�",
                    ["Ѳ"] = "�",
                    ["ѳ"] = "�",
                    ["Ѵ"] = "�",
                    ["ѵ"] = "�",
                }
            }
        }
    }, 
    
    ["ungegn"] = {
        ["shortname"] = "UNGEGN", 
        ["name"] = "Ukrainian National/UNGEGN", 
        ["lang"] = "uk-Latn-t-uk-Cyrl-m0-ungegn-2012", 
        ["patterns"] = {
            -- note 1 [Г]: “gh is used in the romanization of зг (zgh)”
            [1] = {
                ["search"] = "[зЗ][гГ]", 
                ["replace"] = {
                    ["ЗГ"] = "ZGH",
                    ["Зг"] = "Zgh",
                    ["зг"] = "zgh",
                }
            }, 
            -- note 2 [Є, Ї, Й, Ю, Я]: “The second variant is used at the beginning of a word”
            [2] = {
                ["search"] = "([^%a'’ʼ])Є",
                ["replace"] = "%1Ye", 
            },
            [3] = {
                ["search"] = "([^%a'’ʼ])є",
                ["replace"] = "%1ye", 
            },
            [4] = {
                ["search"] = "([^%a'’ʼ])Ї",
                ["replace"] = "%1Yi", 
            },
            [5] = {
                ["search"] = "([^%a'’ʼ])ї",
                ["replace"] = "%1yi", 
            },
            [6] = {
                ["search"] = "([^%a'’ʼ])Й",
                ["replace"] = "%1Y", 
            },
            [7] = {
                ["search"] = "([^%a'’ʼ])й",
                ["replace"] = "%1y", 
            },
            [8] = {
                ["search"] = "([^%a'’ʼ])Ю",
                ["replace"] = "%1Yu", 
            },
            [9] = {
                ["search"] = "([^%a'’ʼ])ю",
                ["replace"] = "%1yu", 
            },
            [10] = {
                ["search"] = "([^%a'’ʼ])Я",
                ["replace"] = "%1Ya", 
            },
            [11] = {
                ["search"] = "([^%a'’ʼ])я",
                ["replace"] = "%1ya", 
            },
            -- Default 1-letter replacements
            [12] = {
                ["search"] = searchDefault, 
                ["replace"] = {
                    ["А"] = "A",
                    ["а"] = "a",
                    ["Б"] = "B",
                    ["б"] = "b",
                    ["В"] = "V",
                    ["в"] = "v",
                    ["Г"] = "H",
                    ["г"] = "h",
                    ["Ґ"] = "G",
                    ["ґ"] = "g",
                    ["Д"] = "D",
                    ["д"] = "d",
                    ["Е"] = "E",
                    ["е"] = "e",
                    ["Є"] = "Ie",
                    ["є"] = "ie",
                    ["Ж"] = "Zh",
                    ["ж"] = "zh",
                    ["З"] = "Z",
                    ["з"] = "z",
                    ["И"] = "Y",
                    ["и"] = "y",
                    ["I"] = "I",
                    ["і"] = "i",
                    ["Ї"] = "I",
                    ["ї"] = "i",
                    ["Й"] = "I",
                    ["й"] = "i",
                    ["К"] = "K",
                    ["к"] = "k",
                    ["Л"] = "L",
                    ["л"] = "l",
                    ["М"] = "M",
                    ["м"] = "m",
                    ["Н"] = "N",
                    ["н"] = "n",
                    ["О"] = "O",
                    ["о"] = "o",
                    ["П"] = "P",
                    ["п"] = "p",
                    ["Р"] = "R",
                    ["р"] = "r",
                    ["С"] = "S",
                    ["с"] = "s",
                    ["Т"] = "T",
                    ["т"] = "t",
                    ["У"] = "U",
                    ["у"] = "u",
                    ["Ф"] = "F",
                    ["ф"] = "f",
                    ["Х"] = "Kh",
                    ["х"] = "kh",
                    ["Ц"] = "Ts",
                    ["ц"] = "ts",
                    ["Ч"] = "Ch",
                    ["ч"] = "ch",
                    ["Ш"] = "Sh",
                    ["ш"] = "sh",
                    ["Щ"] = "Shch",
                    ["щ"] = "shch",
                    ["Ь"] = "",
                    ["ь"] = "",
                    ["Ю"] = "Iu",
                    ["ю"] = "iu",
                    ["Я"] = "Ia",
                    ["я"] = "ia",
                    ["'"] = "", -- apostrophe
                    ["’"] = "", -- right single quotation mark
                    ["ʼ"] = "", -- modifier letter apostrophe
        
                    -- Archaic letters (non-standard)
                    ["Ъ"] = "",
                    ["ъ"] = "",
                    ["Ё"] = "Ë",
                    ["ё"] = "ë",
                    ["Ы"] = "Y",
                    ["ы"] = "y",
                    ["Ѣ"] = "Ě",
                    ["ѣ"] = "ě",
                    ["Э"] = "E",
                    ["э"] = "e",
                    ["Ѫ"] = "�",
                    ["ѫ"] = "�",
                    ["Ѳ"] = "�",
                    ["ѳ"] = "�",
                    ["Ѵ"] = "�",
                    ["ѵ"] = "�",
                }
            } 
        }
    }, 
    
    ["iso-1968"] = {
        ["shortname"] = "ISO 1968", 
        ["name"] = "ISO/R 9:1968, Ukrainian variant", 
        ["lang"] = "uk-Latn-t-uk-Cyrl-m0-iso-1968", 
        ["patterns"] = {
            [1] = {
                ["search"] = searchDefault, 
                ["replace"] = {
                    ["А"] = "A",
                    ["а"] = "a",
                    ["Б"] = "B",
                    ["б"] = "b",
                    ["В"] = "V",
                    ["в"] = "v",
                    ["Г"] = "H",
                    ["г"] = "h",
                    ["Ґ"] = "G",
                    ["ґ"] = "g",
                    ["Д"] = "D",
                    ["д"] = "d",
                    ["Е"] = "E",
                    ["е"] = "e",
                    ["Є"] = "Je",
                    ["є"] = "je",
                    ["Ж"] = "Ž",
                    ["ж"] = "ž",
                    ["З"] = "Z",
                    ["з"] = "z",
                    ["И"] = "Y",
                    ["и"] = "y",
                    ["I"] = "I",
                    ["і"] = "i",
                    ["Ї"] = "Ï",
                    ["ї"] = "ï",
                    ["Й"] = "J",
                    ["й"] = "j",
                    ["К"] = "K",
                    ["к"] = "k",
                    ["Л"] = "L",
                    ["л"] = "l",
                    ["М"] = "M",
                    ["м"] = "m",
                    ["Н"] = "N",
                    ["н"] = "n",
                    ["О"] = "O",
                    ["о"] = "o",
                    ["П"] = "P",
                    ["п"] = "p",
                    ["Р"] = "R",
                    ["р"] = "r",
                    ["С"] = "S",
                    ["с"] = "s",
                    ["Т"] = "T",
                    ["т"] = "t",
                    ["У"] = "U",
                    ["у"] = "u",
                    ["Ф"] = "F",
                    ["ф"] = "f",
                    ["Х"] = "Ch",
                    ["х"] = "ch",
                    ["Ц"] = "C",
                    ["ц"] = "c",
                    ["Ч"] = "Č",
                    ["ч"] = "č",
                    ["Ш"] = "Š",
                    ["ш"] = "š",
                    ["Щ"] = "Šč",
                    ["щ"] = "šč",
                    ["Ь"] = "’",
                    ["ь"] = "’",
                    ["Ю"] = "Ju",
                    ["ю"] = "ju",
                    ["Я"] = "Ja",
                    ["я"] = "ja",
                    ["'"] = "", -- apostrophe
                    ["’"] = "", -- right single quotation mark
                    ["ʼ"] = "", -- modifier letter apostrophe
        
                    -- Archaic letters
                    ["Ъ"] = "Ǎ",
                    ["ъ"] = "ǎ",
        
                    -- Archaic letters (borrowed from other language columns in ISO/R 9:1968)
                    ["Ё"] = "Ë",
                    ["ё"] = "ë",
                    ["Ы"] = "Y",
                    ["ы"] = "y",
                    ["Ѣ"] = "Ě",
                    ["ѣ"] = "ě",
                    ["Э"] = "Ė",
                    ["э"] = "ė",
                    ["Ѫ"] = "ʺ̣",
                    ["ѫ"] = "ʺ̣",
                    ["Ѳ"] = "Ḟ",
                    ["ѳ"] = "ḟ",
                    ["Ѵ"] = "Ẏ",
                    ["ѵ"] = "ẏ",
                }
            }
        }
    },
    
    ["iso"] = {
        ["shortname"] = "ISO", 
        ["name"] = "ISO 9:1995", 
        ["lang"] = "uk-Latn-t-uk-Cyrl-m0-iso-1995", 
        ["patterns"] = {
            [1] = {
                ["search"] = searchDefault, 
                ["replace"] = {
                    ["А"] = "A",
                    ["а"] = "a",
                    ["Б"] = "B",
                    ["б"] = "b",
                    ["В"] = "V",
                    ["в"] = "v",
                    ["Г"] = "G",
                    ["г"] = "g",
                    ["Ґ"] = "G̀",
                    ["ґ"] = "g̀",
                    ["Д"] = "D",
                    ["д"] = "d",
                    ["Е"] = "E",
                    ["е"] = "e",
                    ["Є"] = "Ê",
                    ["є"] = "ê",
                    ["Ж"] = "Ž",
                    ["ж"] = "ž",
                    ["З"] = "Z",
                    ["з"] = "z",
                    ["И"] = "I",
                    ["и"] = "i",
                    ["I"] = "Ì",
                    ["і"] = "ì",
                    ["Ї"] = "Ï",
                    ["ї"] = "ï",
                    ["Й"] = "J",
                    ["й"] = "j",
                    ["К"] = "K",
                    ["к"] = "k",
                    ["Л"] = "L",
                    ["л"] = "l",
                    ["М"] = "M",
                    ["м"] = "m",
                    ["Н"] = "N",
                    ["н"] = "n",
                    ["О"] = "O",
                    ["о"] = "o",
                    ["П"] = "P",
                    ["п"] = "p",
                    ["Р"] = "R",
                    ["р"] = "r",
                    ["С"] = "S",
                    ["с"] = "s",
                    ["Т"] = "T",
                    ["т"] = "t",
                    ["У"] = "U",
                    ["у"] = "u",
                    ["Ф"] = "F",
                    ["ф"] = "f",
                    ["Х"] = "H",
                    ["х"] = "h",
                    ["Ц"] = "C",
                    ["ц"] = "c",
                    ["Ч"] = "Č",
                    ["ч"] = "č",
                    ["Ш"] = "Š",
                    ["ш"] = "š",
                    ["Щ"] = "Ŝ",
                    ["щ"] = "ŝ",
                    ["Ь"] = "ʹ",
                    ["ь"] = "ʹ",
                    ["Ю"] = "Û",
                    ["ю"] = "û",
                    ["Я"] = "Â",
                    ["я"] = "â",
                    ["'"] = "ˋ", -- apostrophe
                    ["’"] = "ˋ", -- right single quotation mark
                    ["ʼ"] = "ˋ", -- modifier letter apostrophe
        
                    -- Archaic letters
                    ["Ъ"] = "ʺ",
                    ["ъ"] = "ʺ",
                    ["Ё"] = "Ë",
                    ["ё"] = "ë",
                    ["Ы"] = "Y",
                    ["ы"] = "y",
                    ["Ѣ"] = "Ě",
                    ["ѣ"] = "ě",
                    ["Э"] = "È",
                    ["э"] = "è",
                    ["Ѫ"] = "Ǎ",
                    ["ѫ"] = "ǎ",
                    ["Ѳ"] = "F̀",
                    ["ѳ"] = "f̀",
                    ["Ѵ"] = "Ỳ",
                    ["ѵ"] = "ỳ",
                }
            }
        }
    },
    
    ["alaloc"] = {
        ["shortname"] = "ALA–LC", 
        ["name"] = "US Library of Congress", 
        ["lang"] = "uk-Latn-t-uk-Cyrl-m0-alaloc-1997", 
        ["patterns"] = {
            [1] = {
                ["search"] = searchDefault, 
                ["replace"] = {
                    ["А"] = "A",
                    ["а"] = "a",
                    ["Б"] = "B",
                    ["б"] = "b",
                    ["В"] = "V",
                    ["в"] = "v",
                    ["Г"] = "H",
                    ["г"] = "h",
                    ["Ґ"] = "G",
                    ["ґ"] = "g",
                    ["Д"] = "D",
                    ["д"] = "d",
                    ["Е"] = "E",
                    ["е"] = "e",
                    ["Є"] = "I͡e",
                    ["є"] = "i͡e",
                    ["Ж"] = "Z͡h",
                    ["ж"] = "z͡h",
                    ["З"] = "Z",
                    ["з"] = "z",
                    ["И"] = "Y",
                    ["и"] = "y",
                    ["I"] = "I",
                    ["і"] = "i",
                    ["Ї"] = "Ï",
                    ["ї"] = "ï",
                    ["Й"] = "Ĭ",
                    ["й"] = "ĭ",
                    ["К"] = "K",
                    ["к"] = "k",
                    ["Л"] = "L",
                    ["л"] = "l",
                    ["М"] = "M",
                    ["м"] = "m",
                    ["Н"] = "N",
                    ["н"] = "n",
                    ["О"] = "O",
                    ["о"] = "o",
                    ["П"] = "P",
                    ["п"] = "p",
                    ["Р"] = "R",
                    ["р"] = "r",
                    ["С"] = "S",
                    ["с"] = "s",
                    ["Т"] = "T",
                    ["т"] = "t",
                    ["У"] = "U",
                    ["у"] = "u",
                    ["Ф"] = "F",
                    ["ф"] = "f",
                    ["Х"] = "Kh",
                    ["х"] = "kh",
                    ["Ц"] = "T͡s",
                    ["ц"] = "t͡s",
                    ["Ч"] = "Ch",
                    ["ч"] = "ch",
                    ["Ш"] = "Sh",
                    ["ш"] = "sh",
                    ["Щ"] = "Shch",
                    ["щ"] = "shch",
                    ["Ь"] = "ʹ",
                    ["ь"] = "ʹ",
                    ["Ю"] = "I͡u",
                    ["ю"] = "i͡u",
                    ["Я"] = "I͡a",
                    ["я"] = "i͡a",
                    ["'"] = "", -- apostrophe
                    ["’"] = "", -- right single quotation mark
                    ["ʼ"] = "", -- modifier letter apostrophe
        
                    -- Archaic letters (non-standard)
                    ["Ъ"] = "",
                    ["ъ"] = "",
                    ["Ё"] = "Ë",
                    ["ё"] = "ë",
                    ["Ы"] = "Y",
                    ["ы"] = "y",
                    ["Ѣ"] = "Ě",
                    ["ѣ"] = "ě",
                    ["Э"] = "E",
                    ["э"] = "e",
                    ["Ѫ"] = "�",
                    ["ѫ"] = "�",
                    ["Ѳ"] = "�",
                    ["ѳ"] = "�",
                    ["Ѵ"] = "�",
                    ["ѵ"] = "�",
                }
            }
       }
    }, 
    
    ["bgn"] = {
        ["shortname"] = "BGN/PCGN", 
        ["name"] = "US Board on Geographic Names and British Permanent Committee on Geographical Names", 
        ["lang"] = "uk-Latn-t-uk-Cyrl-m0-bgn-1965", 
        ["patterns"] = {
            -- note 1: “The character sequences зг, кг, сг, тс, and цг may be romanized z·h, k·h, s·h, t·s, and ts·h 
            -- in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter 
            -- sequence tsh, which are used to render the characters ж, х, ш, ц, and the character sequence тш.”
            [1] = {
                ["search"] = "[зЗкКсСцЦ][гГ]", 
                ["replace"] = {
                    ["ЗГ"] = "Z·H",
                    ["Зг"] = "Z·h",
                    ["зг"] = "z·h",
                    ["КГ"] = "K·H",
                    ["Кг"] = "K·h",
                    ["кг"] = "k·h",
                    ["СГ"] = "S·H",
                    ["Сг"] = "S·h",
                    ["сг"] = "s·h",
                    ["ЦГ"] = "TS·H",
                    ["Цг"] = "Ts·h",
                    ["цг"] = "ts·h",
                }
            }, 
            [2] = {
                ["search"] = "[тТ][сС]", 
                ["replace"] = {
                    ["ТС"] = "T·S",
                    ["Тс"] = "T·s",
                    ["тс"] = "t·s",
                }
            }, 
            -- Default 1-letter replacements
            [3] = {
                ["search"] = searchDefault, 
                ["replace"] = {
                    ["А"] = "A",
                    ["а"] = "a",
                    ["Б"] = "B",
                    ["б"] = "b",
                    ["В"] = "V",
                    ["в"] = "v",
                    ["Г"] = "H",
                    ["г"] = "h",
                    ["Ґ"] = "G",
                    ["ґ"] = "g",
                    ["Д"] = "D",
                    ["д"] = "d",
                    ["Е"] = "E",
                    ["е"] = "e",
                    ["Є"] = "Ye",
                    ["є"] = "ye",
                    ["Ж"] = "Zh",
                    ["ж"] = "zh",
                    ["З"] = "Z",
                    ["з"] = "z",
                    ["И"] = "Y",
                    ["и"] = "y",
                    ["I"] = "I",
                    ["і"] = "i",
                    ["Ї"] = "Yi",
                    ["ї"] = "yi",
                    ["Й"] = "Y",
                    ["й"] = "y",
                    ["К"] = "K",
                    ["к"] = "k",
                    ["Л"] = "L",
                    ["л"] = "l",
                    ["М"] = "M",
                    ["м"] = "m",
                    ["Н"] = "N",
                    ["н"] = "n",
                    ["О"] = "O",
                    ["о"] = "o",
                    ["П"] = "P",
                    ["п"] = "p",
                    ["Р"] = "R",
                    ["р"] = "r",
                    ["С"] = "S",
                    ["с"] = "s",
                    ["Т"] = "T",
                    ["т"] = "t",
                    ["У"] = "U",
                    ["у"] = "u",
                    ["Ф"] = "F",
                    ["ф"] = "f",
                    ["Х"] = "Kh",
                    ["х"] = "kh",
                    ["Ц"] = "Ts",
                    ["ц"] = "ts",
                    ["Ч"] = "Ch",
                    ["ч"] = "ch",
                    ["Ш"] = "Sh",
                    ["ш"] = "sh",
                    ["Щ"] = "Shch",
                    ["щ"] = "shch",
                    ["Ь"] = "’",
                    ["ь"] = "’",
                    ["Ю"] = "Yu",
                    ["ю"] = "yu",
                    ["Я"] = "Ya",
                    ["я"] = "ya",
                    ["'"] = "ˮ", -- apostrophe
                    ["’"] = "ˮ", -- right single quotation mark
                    ["ʼ"] = "ˮ", -- modifier letter apostrophe
        
                    -- Archaic letters (non-standard)
                    ["Ъ"] = "",
                    ["ъ"] = "",
                    ["Ё"] = "Ë",
                    ["ё"] = "ë",
                    ["Ы"] = "Y",
                    ["ы"] = "y",
                    ["Ѣ"] = "Ě",
                    ["ѣ"] = "ě",
                    ["Э"] = "E",
                    ["э"] = "e",
                    ["Ѫ"] = "�",
                    ["ѫ"] = "�",
                    ["Ѳ"] = "�",
                    ["ѳ"] = "�",
                    ["Ѵ"] = "�",
                    ["ѵ"] = "�",
                }
            }
       }
    }, 

}

-- handle input
function transform.romanize(frame)
    local inputText = frame.args[1]
    local method = frame.args.method or methodDefault
    
    if ttable[method] then -- just do a conversion
        return transform.convert(inputText, method)
    elseif method == "all" then -- loop through all methods
        local result = "" -- overall start tag
        local i = 1
        for theMethod, theValue in pairs(ttable) do
            
            -- comma following previous iteration
            if i > 1 then result = result .. ", " end
            i = i + 1
            
            result = result .. "<span>" -- instance start tag
            result = result .. transform.convert(inputText, theMethod, true)
            result = result .. "</span>" -- instance end tag
        end
        result = result .. "" -- overall end tag
        return result
    else
        error("Transliteration method “" .. method .. "” is not supported")
    end
end

-- do the conversion
function transform.convert(inputText, method, showLabel)
    local result = ""
    
    result = result .. "<span"
        result = result .. " lang='" .. ttable[method]["lang"] .. "'"
        result = result .. " title='Romanized Ukrainian (" .. ttable[method]["name"] .. ")'"
        result = result .. ">"
    
    inputText = " " .. inputText .. " " -- pad with spaces to allow boundary patterns (working around lack of %f pattern)
    
    for thePattern, theReplacements in ipairs(ttable[method]["patterns"]) do
        inputText = (mw.ustring.gsub(inputText, ttable[method]["patterns"][thePattern]["search"], ttable[method]["patterns"][thePattern]["replace"]))
    end
    
    inputText = mw.ustring.sub(inputText, 2, mw.ustring.len(inputText) - 1) -- un-pad with spaces
    
    result = result .. inputText
    
    result = result .. "</span>"
    
    if showLabel then
        result = result .. " ("
        
        if ttable[method]["shortname"] ~= ttable[method]["name"] then -- Use an abbr element if the short name doesn’t match name
            result = result .. "<abbr title='" .. ttable[method]["name"] .. "'>"
        end
        
        result = result .. ttable[method]["shortname"]
        
        if ttable[method]["shortname"] ~= ttable[method]["name"] then
            result = result .. "</abbr>"
        end
        
        result = result .. ")"
    end

    return result
end

return transform
Module:User:Mzajac/transform

Navigation menu

Search