-- pretty.string -- The string formatting module for pretty. --[=[ Thoughts on displaying strings in the useful ways. TODO --]=] -------------------------------------------------------------------------------- -- Constants local NR_CHARS_IN_LONG_STRING = 40 local MAX_HORIZONAL_CHARACTER = 80 local SHORT_STR_DELIMITER = '\'' local STRING_CONT_INDICATOR = '...' -------- local CHAR_TO_STR_REPR = {} do for i = 00, 031 do CHAR_TO_STR_REPR[i] = ('\\%03i'):format(i) end for i = 32, 255 do CHAR_TO_STR_REPR[i] = string.char(i) end CHAR_TO_STR_REPR[7] = '\\a' CHAR_TO_STR_REPR[8] = '\\b' CHAR_TO_STR_REPR[9] = '\\t' CHAR_TO_STR_REPR[10] = '\\n' CHAR_TO_STR_REPR[11] = '\\v' CHAR_TO_STR_REPR[12] = '\\f' CHAR_TO_STR_REPR[13] = '\\r' CHAR_TO_STR_REPR[92] = '\\\\' CHAR_TO_STR_REPR[127] = '\\127' CHAR_TO_STR_REPR[SHORT_STR_DELIMITER:byte()] = '\\'..SHORT_STR_DELIMITER end local CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ = '[%z\001-\008\011-\031\127]' -------------------------------------------------------------------------------- -- Util local function requires_weird_escape_seq (str) return not not str:find(CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ) end local function escape_string (str) -- Attempts to escape the string, to a format that is both a valid Lua -- constant, and ledible unicode. -- TODO: Escape invalid unicode sequences. -- Error checking assert(type(str) == 'string') -- Do stuff local l = {} for i = 1, #str do l[#l+1] = CHAR_TO_STR_REPR[str:byte(i)] end return table.concat(l, '') end local function smallest_secure_longform_string_level (str) -- Determines the level a longform string needs to use, to avoid code -- injection. For example, if we want to use longform on the string -- 'Hello ]] World', we cannot use level-0 as this would result in -- '[[Hello ]] World]]', which could be an issue in certain applications. -- Error checking assert(type(str) == 'string') -- Do stuff local levels = { [1] = 1 } str:gsub('%]=*%]', function (m) levels[m:len()] = true end) return #levels - 1 end -------------------------------------------------------------------------------- local function format_shortform_string (str, depth, l) l[#l+1] = SHORT_STR_DELIMITER l[#l+1] = escape_string(str) l[#l+1] = SHORT_STR_DELIMITER end local function safe_cut (str, si, ei) -- Error checking assert(type(str) == 'string') assert(type(si) == 'number' or si == nil) assert(type(ei) == 'number' or ei == nil) -- Calculate local cut_str = str:sub(si, ei) -- Search for the number of backslashes just before the send of the string. -- If that number is even, it's a sequence of backslashes, if not it's a -- broken escape string. local start_of_backslashes, start_of_digits = cut_str:match '()\\*()%d?%d?$' local nr_backslashes_before_end = start_of_digits - start_of_backslashes if nr_backslashes_before_end % 2 == 1 then cut_str = cut_str:sub(1, start_of_backslashes - 1) end return cut_str end local function format_cut_string (str, depth, l) -- Calculate string local str = escape_string(str) str = safe_cut(str, 1, NR_CHARS_IN_LONG_STRING - #STRING_CONT_INDICATOR) -- Format l[#l+1] = SHORT_STR_DELIMITER l[#l+1] = str l[#l+1] = SHORT_STR_DELIMITER l[#l+1] = STRING_CONT_INDICATOR end local function format_concatted_string (str, depth, l) -- Cuts the string up into smaller individual substrings, each Concatted -- together. Is uglier compared to longform, but is at least idempotent. -- Error checking assert( type(str) == 'string' ) assert(type(depth) == 'number' and type(l) == 'table') -- Calculate local width_without_overhead = MAX_HORIZONAL_CHARACTER - 2*#SHORT_STR_DELIMITER - #' ..' local str = escape_string(str) -- Cut strings local sub_strings, str_i = {}, 1 repeat local sub_str = safe_cut(str, str_i, str_i + width_without_overhead - 1) str_i = str_i + #sub_str sub_strings[#sub_strings+1] = sub_str until str_i >= #str -- Format them for _, sub_str in ipairs(sub_strings) do l[#l+1] = SHORT_STR_DELIMITER l[#l+1] = sub_str l[#l+1] = SHORT_STR_DELIMITER l[#l+1] = ' ..\n' end l[#l] = '' end local function format_longform_string (str, depth, l) -- Error checking assert( type(str) == 'string' ) assert(type(depth) == 'number' and type(l) == 'table') -- Calculate local level_required = smallest_secure_longform_string_level(str) -- Format l[#l+1] = '['..string.rep('=', level_required)..'[' l[#l+1] = '\n' l[#l+1] = str l[#l+1] = ']'..string.rep('=', level_required)..']' end return function (str, depth, l) -- pretty.format_string -- Error checking assert( type(str) == 'string' ) assert(type(depth) == 'number' and type(l) == 'table') -- Do work if #str < NR_CHARS_IN_LONG_STRING then return format_shortform_string(str, depth, l) elseif depth > 0 then return format_cut_string (str, depth, l) elseif requires_weird_escape_seq (str) then return format_concatted_string(str, depth, l) else return format_longform_string(str, depth, l) end end