205 lines
5.8 KiB
Lua
205 lines
5.8 KiB
Lua
|
|
-- pretty.string
|
|
-- The string formatting module for pretty.
|
|
|
|
--[=[ Thoughts on displaying strings in the useful ways.
|
|
|
|
Thoughts are TODO
|
|
|
|
--]=]
|
|
|
|
local DISPLAY = assert(require((... and select('1', ...):match('.+%.') or '')..'common'), '[pretty]: Could not load vital library: common') . DISPLAY
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- Constants
|
|
|
|
local NR_CHARS_IN_LONG_STRING = 40
|
|
local MAX_HORIZONAL_CHARACTER = 80
|
|
local SHORT_STR_DELIMITER = '\''
|
|
local STRING_CONT_INDICATOR = '...'
|
|
|
|
--------
|
|
|
|
local CHAR_TO_STR_REPR = {}
|
|
do
|
|
for i = 00, 031 do CHAR_TO_STR_REPR[i] = ('\\%03i'):format(i) end
|
|
for i = 32, 255 do CHAR_TO_STR_REPR[i] = string.char(i) end
|
|
CHAR_TO_STR_REPR[7] = '\\a'
|
|
CHAR_TO_STR_REPR[8] = '\\b'
|
|
CHAR_TO_STR_REPR[9] = '\\t'
|
|
CHAR_TO_STR_REPR[10] = '\\n'
|
|
CHAR_TO_STR_REPR[11] = '\\v'
|
|
CHAR_TO_STR_REPR[12] = '\\f'
|
|
CHAR_TO_STR_REPR[13] = '\\r'
|
|
CHAR_TO_STR_REPR[92] = '\\\\'
|
|
CHAR_TO_STR_REPR[127] = '\\127'
|
|
|
|
CHAR_TO_STR_REPR[SHORT_STR_DELIMITER:byte()] = '\\'..SHORT_STR_DELIMITER
|
|
end
|
|
|
|
local CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ = '[%z\001-\008\011-\031\127]'
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- Util
|
|
|
|
local function does_string_require_escaping (str)
|
|
return not not str:find(CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ)
|
|
end
|
|
|
|
local escape_string do
|
|
|
|
local ESCAPE_SINGLE_BYTE = function (char) return CHAR_TO_STR_REPR[char:byte()] end
|
|
local ESCAPE_MALFORMED_CONT_BYTE = function (a, b) return a..'\\' .. b:byte() end
|
|
local ESCAPE_MALFORMED_START_BYTE = function (a, b) return '\\'..a:byte() .. b end
|
|
|
|
function escape_string (str)
|
|
-- Attempts to escape the string, to a format that is both a valid Lua
|
|
-- constant, and ledible unicode.
|
|
|
|
-- Error checking
|
|
assert(type(str) == 'string')
|
|
|
|
-- Escape single bytes
|
|
local str, count = str:gsub('.', ESCAPE_SINGLE_BYTE), 0
|
|
|
|
-- Escape malformed continuation bytes
|
|
repeat str, count = str:gsub('([^\128-\255])([\128-\191])', ESCAPE_MALFORMED_CONT_BYTE)
|
|
until count == 0
|
|
|
|
-- Escape malformed start bytes
|
|
repeat str, count = str:gsub('([\191-\255])([^\128-\191])', ESCAPE_MALFORMED_START_BYTE)
|
|
until count == 0
|
|
|
|
-- Done, lets return
|
|
return str
|
|
end
|
|
end
|
|
|
|
local function smallest_secure_longform_string_level (str)
|
|
-- Determines the level a longform string needs to use, to avoid code
|
|
-- injection. For example, if we want to use longform on the string
|
|
-- 'Hello ]] World', we cannot use level-0 as this would result in
|
|
-- '[[Hello ]] World]]', which could be an issue in certain applications.
|
|
|
|
-- Error checking
|
|
assert(type(str) == 'string')
|
|
|
|
-- Do stuff
|
|
local levels = { [1] = 1 }
|
|
str:gsub('%]=*%]', function (m) levels[m:len()] = true end)
|
|
return #levels - 1
|
|
end
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
local function format_shortform_string (str, _, l)
|
|
l[#l+1] = SHORT_STR_DELIMITER
|
|
l[#l+1] = escape_string(str)
|
|
l[#l+1] = SHORT_STR_DELIMITER
|
|
end
|
|
|
|
local function safe_cut (str, si, ei)
|
|
|
|
-- Error checking
|
|
assert(type(str) == 'string')
|
|
assert(type(si) == 'number' or si == nil)
|
|
assert(type(ei) == 'number' or ei == nil)
|
|
|
|
-- Calculate
|
|
local cut_str = str:sub(si, ei)
|
|
|
|
-- Search for the number of backslashes just before the send of the string.
|
|
-- If that number is even, it's a sequence of backslashes, if not it's a
|
|
-- broken escape string.
|
|
local start_of_backslashes, start_of_digits = cut_str:match '()\\*()%d?%d?$'
|
|
local nr_backslashes_before_end = start_of_digits - start_of_backslashes
|
|
if nr_backslashes_before_end % 2 == 1 then cut_str = cut_str:sub(1, start_of_backslashes - 1) end
|
|
|
|
return cut_str
|
|
end
|
|
|
|
|
|
local function format_cut_string (str, _, l)
|
|
-- Calculate string
|
|
local str = escape_string(str)
|
|
str = safe_cut(str, 1, NR_CHARS_IN_LONG_STRING - #STRING_CONT_INDICATOR)
|
|
|
|
-- Format
|
|
l[#l+1] = SHORT_STR_DELIMITER
|
|
l[#l+1] = str
|
|
l[#l+1] = SHORT_STR_DELIMITER
|
|
l[#l+1] = STRING_CONT_INDICATOR
|
|
end
|
|
|
|
local function format_concatted_string (str, _, l)
|
|
-- Cuts the string up into smaller individual substrings, each Concatted
|
|
-- together. Is uglier compared to longform, but is at least idempotent.
|
|
|
|
-- TODO: Attempt to cut near whitespace?
|
|
|
|
-- Error checking
|
|
assert( type(str) == 'string' )
|
|
assert( type(l) == 'table' )
|
|
|
|
-- Calculate
|
|
local width_without_overhead = MAX_HORIZONAL_CHARACTER - 2*#SHORT_STR_DELIMITER - #' ..'
|
|
local str = escape_string(str)
|
|
|
|
-- Cut strings
|
|
local sub_strings, str_i = {}, 1
|
|
repeat
|
|
local sub_str = safe_cut(str, str_i, str_i + width_without_overhead - 1)
|
|
str_i = str_i + #sub_str
|
|
sub_strings[#sub_strings+1] = sub_str
|
|
until str_i >= #str
|
|
|
|
-- Format them
|
|
for _, sub_str in ipairs(sub_strings) do
|
|
l[#l+1] = SHORT_STR_DELIMITER
|
|
l[#l+1] = sub_str
|
|
l[#l+1] = SHORT_STR_DELIMITER
|
|
l[#l+1] = ' ..\n'
|
|
end
|
|
l[#l] = ''
|
|
end
|
|
|
|
local function format_longform_string (str, _, l)
|
|
|
|
-- Error checking
|
|
assert(type(str) == 'string')
|
|
assert(type(l) == 'table')
|
|
|
|
-- Calculate
|
|
local level_required = smallest_secure_longform_string_level(str)
|
|
|
|
-- Format
|
|
l[#l+1] = '['..string.rep('=', level_required)..'['
|
|
l[#l+1] = '\n'
|
|
l[#l+1] = str
|
|
l[#l+1] = ']'..string.rep('=', level_required)..']'
|
|
end
|
|
|
|
return function (str, display, l)
|
|
-- pretty.format_string
|
|
|
|
-- TODO: Prefer \ddd style escaping to shorter (\n, \t), when many of the
|
|
-- \ddd already exist in the text.
|
|
|
|
-- Error checking
|
|
assert(type(str) == 'string')
|
|
assert(type(display) == 'number' and type(l) == 'table')
|
|
|
|
-- Do work
|
|
|
|
if #str < NR_CHARS_IN_LONG_STRING then
|
|
return format_shortform_string(str, nil, l)
|
|
elseif display < DISPLAY.EXPAND then
|
|
return format_cut_string (str, nil, l)
|
|
elseif does_string_require_escaping (str) then
|
|
return format_concatted_string(str, nil, l)
|
|
else
|
|
return format_longform_string(str, nil, l)
|
|
end
|
|
end
|