1
0
pretty/pstring.lua

198 lines
5.6 KiB
Lua
Raw Normal View History

-- pretty.string
-- The string formatting module for pretty.
--[=[ Thoughts on displaying strings in the useful ways.
TODO
--]=]
--------------------------------------------------------------------------------
-- Constants
local NR_CHARS_IN_LONG_STRING = 40
2017-07-21 11:46:46 +00:00
local MAX_HORIZONAL_CHARACTER = 80
local SHORT_STR_DELIMITER = '\''
local STRING_CONT_INDICATOR = '...'
2017-07-21 11:46:46 +00:00
--------
local CHAR_TO_STR_REPR = {}
do
for i = 00, 031 do CHAR_TO_STR_REPR[i] = ('\\%03i'):format(i) end
for i = 32, 255 do CHAR_TO_STR_REPR[i] = string.char(i) end
CHAR_TO_STR_REPR[7] = '\\a'
CHAR_TO_STR_REPR[8] = '\\b'
CHAR_TO_STR_REPR[9] = '\\t'
CHAR_TO_STR_REPR[10] = '\\n'
CHAR_TO_STR_REPR[11] = '\\v'
CHAR_TO_STR_REPR[12] = '\\f'
CHAR_TO_STR_REPR[13] = '\\r'
CHAR_TO_STR_REPR[92] = '\\\\'
CHAR_TO_STR_REPR[127] = '\\127'
2017-07-21 11:46:46 +00:00
CHAR_TO_STR_REPR[SHORT_STR_DELIMITER:byte()] = '\\'..SHORT_STR_DELIMITER
end
local CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ = '[%z\001-\008\011-\031\127]'
--------------------------------------------------------------------------------
-- Util
local function requires_weird_escape_seq (str)
return not not str:find(CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ)
end
local function escape_string (str)
-- Attempts to escape the string, to a format that is both a valid Lua
-- constant, and ledible unicode.
-- TODO: Escape invalid unicode sequences.
-- Error checking
assert(type(str) == 'string')
-- First escape the easy ones.
local str = str:gsub('.', function (char) return CHAR_TO_STR_REPR[char:byte()] end)
-- Escape malformed continuation characters
repeat
local count
str, count = str:gsub('([^\128-\255])([\128-\191])', function(a, b) print(a,b) return a..'\\' .. b:byte() end)
until count == 0
-- Escape malformed start characters
repeat
local count
str, count = str:gsub('([\191-\255])([^\128-\191])', function(a, b) print(a,b) return '\\'..a:byte() .. b end)
until count == 0
-- return
return str
end
local function smallest_secure_longform_string_level (str)
-- Determines the level a longform string needs to use, to avoid code
-- injection. For example, if we want to use longform on the string
-- 'Hello ]] World', we cannot use level-0 as this would result in
-- '[[Hello ]] World]]', which could be an issue in certain applications.
-- Error checking
assert(type(str) == 'string')
-- Do stuff
local levels = { [1] = 1 }
str:gsub('%]=*%]', function (m) levels[m:len()] = true end)
return #levels - 1
end
--------------------------------------------------------------------------------
local function format_shortform_string (str, depth, l)
l[#l+1] = SHORT_STR_DELIMITER
2017-07-21 11:46:46 +00:00
l[#l+1] = escape_string(str)
l[#l+1] = SHORT_STR_DELIMITER
end
2017-07-21 11:46:46 +00:00
local function safe_cut (str, si, ei)
-- Error checking
assert(type(str) == 'string')
assert(type(si) == 'number' or si == nil)
assert(type(ei) == 'number' or ei == nil)
-- Calculate
local cut_str = str:sub(si, ei)
-- Search for the number of backslashes just before the send of the string.
-- If that number is even, it's a sequence of backslashes, if not it's a
-- broken escape string.
2017-07-21 11:46:46 +00:00
local start_of_backslashes, start_of_digits = cut_str:match '()\\*()%d?%d?$'
local nr_backslashes_before_end = start_of_digits - start_of_backslashes
2017-07-21 11:46:46 +00:00
if nr_backslashes_before_end % 2 == 1 then cut_str = cut_str:sub(1, start_of_backslashes - 1) end
return cut_str
end
local function format_cut_string (str, depth, l)
-- Calculate string
local str = escape_string(str)
str = safe_cut(str, 1, NR_CHARS_IN_LONG_STRING - #STRING_CONT_INDICATOR)
-- Format
l[#l+1] = SHORT_STR_DELIMITER
l[#l+1] = str
l[#l+1] = SHORT_STR_DELIMITER
l[#l+1] = STRING_CONT_INDICATOR
end
local function format_concatted_string (str, depth, l)
2017-07-21 11:46:46 +00:00
-- Cuts the string up into smaller individual substrings, each Concatted
-- together. Is uglier compared to longform, but is at least idempotent.
-- Error checking
assert( type(str) == 'string' )
assert(type(depth) == 'number' and type(l) == 'table')
-- Calculate
local width_without_overhead = MAX_HORIZONAL_CHARACTER - 2*#SHORT_STR_DELIMITER - #' ..'
local str = escape_string(str)
-- Cut strings
local sub_strings, str_i = {}, 1
repeat
local sub_str = safe_cut(str, str_i, str_i + width_without_overhead - 1)
str_i = str_i + #sub_str
sub_strings[#sub_strings+1] = sub_str
until str_i >= #str
-- Format them
for _, sub_str in ipairs(sub_strings) do
l[#l+1] = SHORT_STR_DELIMITER
l[#l+1] = sub_str
l[#l+1] = SHORT_STR_DELIMITER
l[#l+1] = ' ..\n'
end
l[#l] = ''
end
local function format_longform_string (str, depth, l)
-- Error checking
assert( type(str) == 'string' )
assert(type(depth) == 'number' and type(l) == 'table')
-- Calculate
local level_required = smallest_secure_longform_string_level(str)
-- Format
l[#l+1] = '['..string.rep('=', level_required)..'['
l[#l+1] = '\n'
l[#l+1] = str
l[#l+1] = ']'..string.rep('=', level_required)..']'
end
return function (str, depth, l)
-- pretty.format_string
-- TODO: Prefer \ddd style escaping to shorter (\n, \t), when many of the
-- \ddd already exist in the text.
-- Error checking
assert( type(str) == 'string' )
assert(type(depth) == 'number' and type(l) == 'table')
-- Do work
if #str < NR_CHARS_IN_LONG_STRING then
return format_shortform_string(str, depth, l)
elseif depth > 0 then
return format_cut_string (str, depth, l)
elseif requires_weird_escape_seq (str) then
return format_concatted_string(str, depth, l)
else
return format_longform_string(str, depth, l)
end
end