Module:User:AmazingJus/sco
Jump to navigation
Jump to search
- This module lacks a documentation subpage. You may create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of AmazingJus, for his own experimentation. Items in this module may be added and removed at AmazingJus's discretion; do not rely on this module's stability.
local export = {}
local lang = require("Module:languages").getByCode("sco")
local m_IPA = require("Module:IPA")
local gmatch = mw.ustring.gmatch
local gsplit = mw.text.gsplit
local match = mw.ustring.match
local gsubn = mw.ustring.gsub
local len = mw.ustring.len
local lc = mw.ustring.lower
local sub = mw.ustring.sub
-- version of gsubn() that discards all but the first return value
local function gsub(term, foo, bar, n)
local retval = gsubn(term, foo, bar, n)
return retval
end
--[[ Dialect abbreviations:
* Insular:
** Orkney: or
** Shetland: sh
* Northern:
** North Northern: nn
** Mid Northern: mn
** South Northern: sn
* Central:
** North East Central: nec
** South East Central: sec
** West Central: wc
** South West Central: swc
* Southern: s
* Ulster:
** Western Ulster: wu
** Central Ulster: cu
** Eastern Ulster: eu
--]]
--[[
TODO:
-- * Consider unstressed vowels (schwa)
-- * Place the morpheme splitting in the main evaluation function
-- * Work on consonant rules
-- * Consider adding unique dialects based on word inputted
-- * Consider unique pronunciation for suffixes
--]]
--[[ DATA STRUCTURES
--]]
-- all possible multi-letter graphemes needed for tokenisation
local multigraphs = {
"a_e", "e_e", "i_e", "o_e", "owe", "u_e", "y_e",
"aa", "ae", "ai", "au", "aw", "ay", "ea", "ee", "ei", "eu", "ew", "ey", "ie", "oa", "oi", "oo", "ou", "ow", "oy", "ui",
"ch", "ck", "kn", "ld", "mb", "nd", "ng", "nk", "qu", "sh", "th", "wh", "wr"
}
-- common morphemes
local morphemes = {
unstressed = {"ae", "ane", "dae", "hae", "na", "nae", "sae", "tae", "the"}, -- unstressed particles
prefixes = {"a"}, -- prefixes
suffixes = {"ae", "ie", "fu", "le", "na"} -- suffixes
}
--[[ HELPER FUNCTIONS
--]]
-- handle vowel length according to scottish vowel length rule
local function handle_vowel_length(word)
-- long if before /r/ and voiced fricatives
word = gsub(word, "ˑ([rvzðʒ])", "ː%1")
-- also long morpheme-finally
word = gsub(word, "ˑ$", "ː")
-- otherwise short
word = gsub(word, "ˑ", "")
return word
end
-- handle stress
local function handle_stress(word)
-- apply morpheme rules if no explicit stress marker and not a common unstressed particle
if not match(word, "ˈ") and not morphemes.unstressed[word] then
-- stress after prefix "a-"
if match(word, "^a[^aeiou][aeiou]") then
word = "aˈ" .. sub(word, 2)
-- otherwise add stress on the first syllable of a morpheme
else
word = "ˈ" .. word
end
end
return word
end
--[[ MAIN FUNCTIONS
--]]
-- tokenise word into individual graphemes and affixes
local function tokenise(word)
-- initialise index and tokenised array
local i = 1
local tokenised = {}
-- respell vowel + consonant + e as vowel + _e + consonant for easier parsing
word = gsub(word, "([^aeiou][aeiouy])([^aeiouwy])e([^aeiou])", "%1_e%2%3")
word = gsub(word, "([^aeiou][aeiouy])([^aeiouwy])e$", "%1_e%2")
-- loop over entire word
while i <= len(word) do
-- loop over all possible multigraphs
for _, multigraph in ipairs(multigraphs) do
-- check for a matching multigraph
if sub(word, i, i + len(multigraph) - 1) == multigraph then
-- add multigraph to tokenised
table.insert(tokenised, multigraph)
i = i + len(multigraph)
break
end
end
-- add single grapheme if no multigraph found
if i <= len(word) then
table.insert(tokenised, sub(word, i, i))
i = i + 1
end
end
return tokenised
end
-- generate pronunciation of word
local function pron(entry)
-- make text lowercase
entry = lc(entry)
local words = {}
-- loop over each word
for word in gsplit(entry, "%s") do
-- tokenise word into graphemes
word = tokenise(word)
-- add tokenised word to word array
table.insert(words, word)
end
return words
end
-- export function to IPA
function export.toIPA(word)
if type(word) == "table" then
word = word.args[1]
end
-- process pronunciation
word = pron(word)
return word
end
return export