-- pretty.string -- The string formatting module for pretty. --[=[ Thoughts on displaying strings in the useful ways. TODO --]=] -------------------------------------------------------------------------------- -- Constants local NR_CHARS_IN_LONG_STRING = 40 local MAX_HORIZONAL_CHARACTER = 80 local SHORT_STR_DELIMITER = '\'' local STRING_CONT_INDICATOR = '...' -------- local CHAR_TO_STR_REPR = {} do for i = 00, 031 do CHAR_TO_STR_REPR[i] = ('\\%03i'):format(i) end for i = 32, 255 do CHAR_TO_STR_REPR[i] = string.char(i) end CHAR_TO_STR_REPR[7] = '\\a' CHAR_TO_STR_REPR[8] = '\\b' CHAR_TO_STR_REPR[9] = '\\t' CHAR_TO_STR_REPR[10] = '\\n' CHAR_TO_STR_REPR[11] = '\\v' CHAR_TO_STR_REPR[12] = '\\f' CHAR_TO_STR_REPR[13] = '\\r' CHAR_TO_STR_REPR[92] = '\\\\' CHAR_TO_STR_REPR[127] = '\\127' CHAR_TO_STR_REPR[SHORT_STR_DELIMITER:byte()] = '\\'..SHORT_STR_DELIMITER end local CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ = '[%z\001-\008\011-\031\127]' -------------------------------------------------------------------------------- -- Util local function does_string_require_escaping (str) return not not str:find(CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ) end local escape_string do local ESCAPE_SINGLE_BYTE = function (char) return CHAR_TO_STR_REPR[char:byte()] end local ESCAPE_MALFORMED_CONT_BYTE = function (a, b) return a..'\\' .. b:byte() end local ESCAPE_MALFORMED_START_BYTE = function (a, b) return '\\'..a:byte() .. b end function escape_string (str) -- Attempts to escape the string, to a format that is both a valid Lua -- constant, and ledible unicode. -- Error checking assert(type(str) == 'string') -- Escape single bytes local str, count = str:gsub('.', ESCAPE_SINGLE_BYTE), 0 -- Escape malformed continuation bytes repeat str, count = str:gsub('([^\128-\255])([\128-\191])', ESCAPE_MALFORMED_CONT_BYTE) until count == 0 -- Escape malformed start bytes repeat str, count = str:gsub('([\191-\255])([^\128-\191])', ESCAPE_MALFORMED_START_BYTE) until count == 0 -- Done, lets return return str end end local function smallest_secure_longform_string_level (str) -- Determines the level a longform string needs to use, to avoid code -- injection. For example, if we want to use longform on the string -- 'Hello ]] World', we cannot use level-0 as this would result in -- '[[Hello ]] World]]', which could be an issue in certain applications. -- Error checking assert(type(str) == 'string') -- Do stuff local levels = { [1] = 1 } str:gsub('%]=*%]', function (m) levels[m:len()] = true end) return #levels - 1 end -------------------------------------------------------------------------------- local function format_shortform_string (str, depth, l) l[#l+1] = SHORT_STR_DELIMITER l[#l+1] = escape_string(str) l[#l+1] = SHORT_STR_DELIMITER end local function safe_cut (str, si, ei) -- Error checking assert(type(str) == 'string') assert(type(si) == 'number' or si == nil) assert(type(ei) == 'number' or ei == nil) -- Calculate local cut_str = str:sub(si, ei) -- Search for the number of backslashes just before the send of the string. -- If that number is even, it's a sequence of backslashes, if not it's a -- broken escape string. local start_of_backslashes, start_of_digits = cut_str:match '()\\*()%d?%d?$' local nr_backslashes_before_end = start_of_digits - start_of_backslashes if nr_backslashes_before_end % 2 == 1 then cut_str = cut_str:sub(1, start_of_backslashes - 1) end return cut_str end local function format_cut_string (str, depth, l) -- Calculate string local str = escape_string(str) str = safe_cut(str, 1, NR_CHARS_IN_LONG_STRING - #STRING_CONT_INDICATOR) -- Format l[#l+1] = SHORT_STR_DELIMITER l[#l+1] = str l[#l+1] = SHORT_STR_DELIMITER l[#l+1] = STRING_CONT_INDICATOR end local function format_concatted_string (str, depth, l) -- Cuts the string up into smaller individual substrings, each Concatted -- together. Is uglier compared to longform, but is at least idempotent. -- Error checking assert( type(str) == 'string' ) assert(type(depth) == 'number' and type(l) == 'table') -- Calculate local width_without_overhead = MAX_HORIZONAL_CHARACTER - 2*#SHORT_STR_DELIMITER - #' ..' local str = escape_string(str) -- Cut strings local sub_strings, str_i = {}, 1 repeat local sub_str = safe_cut(str, str_i, str_i + width_without_overhead - 1) str_i = str_i + #sub_str sub_strings[#sub_strings+1] = sub_str until str_i >= #str -- Format them for _, sub_str in ipairs(sub_strings) do l[#l+1] = SHORT_STR_DELIMITER l[#l+1] = sub_str l[#l+1] = SHORT_STR_DELIMITER l[#l+1] = ' ..\n' end l[#l] = '' end local function format_longform_string (str, depth, l) -- Error checking assert( type(str) == 'string' ) assert(type(depth) == 'number' and type(l) == 'table') -- Calculate local level_required = smallest_secure_longform_string_level(str) -- Format l[#l+1] = '['..string.rep('=', level_required)..'[' l[#l+1] = '\n' l[#l+1] = str l[#l+1] = ']'..string.rep('=', level_required)..']' end return function (str, depth, l) -- pretty.format_string -- TODO: Prefer \ddd style escaping to shorter (\n, \t), when many of the -- \ddd already exist in the text. -- Error checking assert( type(str) == 'string' ) assert(type(depth) == 'number' and type(l) == 'table') -- Do work if #str < NR_CHARS_IN_LONG_STRING then return format_shortform_string(str, depth, l) elseif depth > 0 then return format_cut_string (str, depth, l) elseif does_string_require_escaping (str) then return format_concatted_string(str, depth, l) else return format_longform_string(str, depth, l) end end