Module:ko-hangul

Definition from Wiktionary, the free dictionary
Jump to: navigation, search
The following documentation is located at Module:ko-hangul/documentation. [edit]
See also: subpages of this module.

This module provides Lua functions for decomposing precomposed Hangul syllables. (There's a whole section of the Unicode spec — §3.12 as of v6.2.0 — dedicated to this algorithm.) This is useful for transliteration algorithms — see Module:ko-translit — because these will generally prefer to operate on smaller units than whole syllables, seeing as how there are 11,172 of the latter.


local p = {}
 
-- Jamo index constants 
p.initials = {
	KIYEOK      =  0, SSANGKIYEOK =  1, NIEUN       =  2,
	TIKEUT      =  3, SSANGTIKEUT =  4, RIEUL       =  5,
	MIEUM       =  6, PIEUP       =  7, SSANGPIEUP  =  8,
	SIOS        =  9, SSANGSIOS   = 10, IEUNG       = 11,
	CIEUC       = 12, SSANGCIEUC  = 13, CHIEUCH     = 14,
	KHIEUKH     = 15, THIEUTH     = 16, PHIEUPH     = 17,
	HIEUH       = 18
}
 
p.vowels = {
	A   =  0, AE  =  1, YA  =  2, YAE =  3, EO  =  4,
	E   =  5, YEO =  6, YE  =  7, O   =  8, WA  =  9,
	WAE = 10, OE  = 11, YO  = 12, U   = 13, WEO = 14,
	WE  = 15, WI  = 16, YU  = 17, EU  = 18, YI  = 19,
	I   = 20
}
 
p.finals = {
	KYEOK         =  1, SSANGKIYEOK   =  2, KYEOK_SIOS    =  3,
	NIEUN         =  4, NIEUN_CIEUC   =  5, NIEUN_HIEUH   =  6,
	TIKEUT        =  7, RIEUL         =  8, RIEUL_KIYEOK  =  9,
	RIEUL_MIEUM   = 10, RIEUL_PIEUP   = 11, RIEUL_SIOS    = 12,
	RIEUL_THIEUTH = 13, RIEUL_PHIEUPH = 14, RIEUL_HIEUH   = 15,
	MIEUM         = 16, PIEUP         = 17, PIEUP_SIOS    = 18,
	SIOS          = 19, SSANGSIOS     = 20, IEUNG         = 21,
	CIEUC         = 22, CHIEUCH       = 23, KHIEUKH       = 24,
	THIEUTH       = 25, PHIEUPH       = 26, HIEUH         = 27,
	KIYEOK_RIEUL  = 28
}
 
-- Provided that s is a string, returns true if s consists of a single
-- precomposed Hangul syllable, and false otherwise.
function p.isHangulSyllable(s)
    if type(s) == 'number' then
        return (s >= 0xac00) and (s <= 0xd7a3)
    elseif type(s) == 'string' then
        return s:len() == 3 and s >= '\234\176\128' and s <= '\237\158\163'
    else
    	return false
    end
end
 
-- Provided that s is a single character or code point corresponding to
-- a precomposed Hangul syllable, returns its "syllable index" per
-- section 3.12 of the Unicode spec, i.e., the character-code of
-- the syllable minus 0xAC00, which is an integer in the range [0, 11171].
function p.getSyllableIndex(hangulSyllable)
    if type(hangulSyllable) == 'string' then
        hangulSyllable = mw.ustring.codepoint(hangulSyllable)
    end
    return hangulSyllable - 0xAC00
end
 
-- Given the "syllable index" of a precomposed Hangul syllable (see
-- above), returns "indices" representing the three constituent jamo
-- ("lead", i.e. initial consonant; "vowel"; and "tail", i.e. final
-- consonant, except that zero denotes the absence of a final consonant).
function p.syllableIndex2JamoIndices(syllableIndex)
    local lIndex = math.floor(syllableIndex / 588)
    local vIndex = math.floor((syllableIndex % 588) / 28)
    local tIndex = syllableIndex % 28
 
    return lIndex, vIndex, tIndex
end
 
-- Same as previous, but takes a string consisting of a precomposed Hangul
-- syllable, instead of the "syllable index". (A convenience function.)
function p.syllable2JamoIndices(hangulSyllable)
    return p.syllableIndex2JamoIndices(p.getSyllableIndex(hangulSyllable))
end
 
-- Given a string consisting of a single precomposed hangul syllable,
-- returns a string consisting of the same syllable but decomposed into
-- its constituent jamo. (This function is not really expected to be
-- useful, but it's here to show how the parts fit together.)
function p.syllable2Jamo(hangulSyllable)
    local lIndex, vIndex, tIndex =
        p.syllable2JamoIndices(hangulSyllable)
    local l = mw.ustring.char(0x1100 + lIndex)
    local v = mw.ustring.char(0x1161 + vIndex)
    local t
    if tIndex == 0 then
        t = ''
    else
        t = mw.ustring.char(0x11A7 + tIndex)
    end
    return l .. v .. t
end
 
return p