Module:User:Chernorizets/bg-orthography

From Wiktionary, the free dictionary
Jump to navigation Jump to search

export.is_valid[edit]

function export.is_valid(word, validation_opts)

Checks whether the provided word is valid according to Bulgarian orthographic rules.

Parameters[edit]

  • word: a string representing a Bulgarian word.
    The empty string is considered valid. nil is invalid by default, but that can be overriden via an option.
  • validation_opts: a table of Boolean validation options
    |nil_is_valid=: treats nil as valid input

Errors[edit]

If word is not a string, the function raises the error "Input must be a string!".

Return values[edit]

The function returns two values: result and message:

  • result: true if the word is orthographically valid, false otherwise
  • message: nil if result == true, otherwise identifies the first failing orthographic rule

local export = {}

local umatch = mw.ustring.match
local ufind = mw.ustring.find
local ulower = mw.ustring.lower

local vowels_lower = "аъоуеиюяѝ"
local vowels_upper = "АЪОУЕИЮЯ"

export.vowels_lower_c = "[" .. vowels_lower .. "]"
export.vowels_upper_c = "[" .. vowels_upper .. "]"
export.vowels_c = "[" .. vowels_lower .. vowels_upper .. "]"

local consonants_lower = "бвгджзйклмнпрстфхцчшщь"
local consonants_upper = "БВГДЖЗЙКЛМНПРСТФХЦЧШЩЬ"

export.cons_lower_c = "[" .. consonants_lower .. "]"
export.cons_upper_c = "[" .. consonants_upper .. "]"
export.cons_c = "[" .. consonants_lower .. consonants_upper .. "]"

local alpha_lower = vowels_lower .. consonants_lower
local alpha_upper = vowels_upper .. consonants_upper

export.alpha_lower_c = "[" .. alpha_lower .. "]"
export.alpha_upper_c = "[" .. alpha_upper .. "]"
export.alphabet_c = "[" .. alpha_lower .. alpha_upper .. "]"
export.non_bulgarian_c = "[^" .. alpha_lower .. alpha_upper .. "]"

local function get_opt(validation_opts, key)
	if validation_opts and type(validation_opts) == "table" then
		return validation_opts[key]
	else
		return nil
	end
end

local orthographic_rules = {}

orthographic_rules["valid letter case"] = function(word, opts)
	-- All uppercase, all lowercase, or capitalized
	return umatch(word, "^%u+$") or umatch(word, "^%l+$") or umatch(word, "^%u%l*$")
end

orthographic_rules["correct use of ьЬ"] = function(word, opts)
	if ufind(word, "[ьЬ]") then
		local lowered = ulower(word)
		return umatch(lowered, export.cons_lower_c .. "ьо") ~= nil
	end
	
	return true
end

orthographic_rules["no alphabet mixing"] = function(word, opts)
	return umatch(word, "^" .. export.alphabet_c .. "+$") or
			umatch(word, "^" .. export.non_bulgarian_c .. "+$")
end

--[==[
Checks whether the provided word is valid according to Bulgarian orthographic rules.

===Parameters===

* word: a {string} representing a Bulgarian word.
*: The empty string is considered valid. {nil} is invalid by default, but that can be overriden via an option.
* validation_opts: a {table} of Boolean validation options
*: |nil_is_valid=: treats {nil} as valid input

===Errors===

If `word` is not a {string}, the function raises the error {"Input must be a string!"}.

===Return values===

The function returns two values: `result` and `message`:
* `result`: {true} if the word is orthographically valid, {false} otherwise
* `message`: {nil} if {result == true}, otherwise identifies the first failing orthographic rule
]==]
function export.is_valid(word, validation_opts)
	if not word then
		if get_opt(validation_opts, "nil_is_valid") then
			return true, nil
		else
			return false, "no input"
		end
	end
	
	if type(word) ~= "string" then error("Input must be a string!") end
	
	if word == "" then return true, nil end
	
	for rule_name, rule in pairs(orthographic_rules) do
		if not rule(word, validation_opts) then
			return false, rule_name
		end
	end
	
	return true, nil
end

return export