53 lines
1.5 KiB
Lua
53 lines
1.5 KiB
Lua
|
|
-- TODO: I don't like to have such tiny modules. Either merge into another
|
|
-- module or provide the functionality with another approach.
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- Enum
|
|
|
|
local enum_metatable = {
|
|
__tostring = function (e) return 'Enum:' .. e.name or 'Enum: no name' end,
|
|
__concat = function (a, b) return tostring(a) .. tostring(b) end,
|
|
}
|
|
|
|
local function enum (t)
|
|
local e = {}
|
|
for _, v in ipairs(t) do
|
|
e[v] = setmetatable({ name = v }, enum_metatable)
|
|
end
|
|
return e
|
|
end
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- Unicode
|
|
|
|
local UNICODE_CHAR_PATTERN = '[\01-\127\192-\255][\128-\191]*'
|
|
|
|
local UNICODE_ZERO_WIDTH_CHARACTERS = {}
|
|
for i = 128, 191 do UNICODE_ZERO_WIDTH_CHARACTERS['\204'..string.char(i)] = true end
|
|
for i = 128, 175 do UNICODE_ZERO_WIDTH_CHARACTERS['\205'..string.char(i)] = true end
|
|
|
|
|
|
local function iterate_utf8_chars (str)
|
|
-- TODO: Detect invalid codepoints.
|
|
return str:gmatch(UNICODE_CHAR_PATTERN)
|
|
end
|
|
|
|
local function utf8_string_length (str)
|
|
local len = 0
|
|
for char in iterate_utf8_chars(str) do
|
|
if not UNICODE_ZERO_WIDTH_CHARACTERS[char] then
|
|
len = len + 1
|
|
end
|
|
end
|
|
return len
|
|
end
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
return {
|
|
TABLE_TYPE = enum { 'EMPTY', 'SEQUENCE', 'STRING_MAP', 'PURE_MAP', 'MIXED', 'SET' },
|
|
DISPLAY = { HIDE = 1, SMALL = 2, INLINE = 3, EXPAND = 4 },
|
|
utf8_string_length = utf8_string_length,
|
|
}
|