From f3cddec4d012b58e55d4ae402898285581afc2d2 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Fri, 21 Jul 2017 13:15:04 +0200 Subject: [PATCH] Improved handling of strings. Code is much clearer. `cut_strings` option has been removed. --- pretty.lua | 1 - pstring.lua | 110 +++++++++++++++++++++++--------------- test/test_pstring.lua | 119 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 180 insertions(+), 50 deletions(-) diff --git a/pretty.lua b/pretty.lua index c7b448a..a91ad27 100644 --- a/pretty.lua +++ b/pretty.lua @@ -520,7 +520,6 @@ local DEBUG_OPTION_USED = { } local KNOWN_OPTIONS = { _table_addr_comment = { type = 'boolean', default = false, debug = 'debug' }, - cut_strings = { type = 'boolean', default = false }, indent = { type = 'string', default = ' ' }, max_depth = { type = 'number', default = math.huge }, short_builtins = { type = 'boolean', default = false }, -- TODO: Outphase this. Rather automatically use the short versions in places where it would be strange to find the function, like keys, etc. diff --git a/pstring.lua b/pstring.lua index c1ee54b..b244b1e 100644 --- a/pstring.lua +++ b/pstring.lua @@ -12,16 +12,19 @@ TODO -- Constants local NR_CHARS_IN_LONG_STRING = 40 +local SHORT_STR_DELIMITER = '\'' +local STRING_CONT_INDICATOR = '...' -local CHAR_TO_STR_REPR = {} + +local CHAR_TO_STR_REPR = {} do for i = 00, 031 do CHAR_TO_STR_REPR[i] = ('\\%03i'):format(i) end for i = 32, 255 do CHAR_TO_STR_REPR[i] = string.char(i) end CHAR_TO_STR_REPR[7] = '\\a' CHAR_TO_STR_REPR[8] = '\\b' - CHAR_TO_STR_REPR[9] = '\t' - CHAR_TO_STR_REPR[10] = '\n' + CHAR_TO_STR_REPR[9] = '\\t' + CHAR_TO_STR_REPR[10] = '\\n' CHAR_TO_STR_REPR[11] = '\\v' CHAR_TO_STR_REPR[12] = '\\f' CHAR_TO_STR_REPR[13] = '\\r' @@ -29,9 +32,15 @@ do CHAR_TO_STR_REPR[127] = '\\127' end +local CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ = '[%z\001-\008\011-\031\127]' + -------------------------------------------------------------------------------- -- Util +local function requires_weird_escape_seq (str) + return not not str:find(CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ) +end + local function escape_string (str) -- Attempts to escape the string, to a format that is both a valid Lua -- constant, and ledible unicode. @@ -62,53 +71,72 @@ local function smallest_secure_longform_string_level (str) return #levels - 1 end + -------------------------------------------------------------------------------- + +local function format_shortform_string (str, depth, l) + l[#l+1] = SHORT_STR_DELIMITER + l[#l+1] = escape_string(str):gsub(SHORT_STR_DELIMITER, '\\'..SHORT_STR_DELIMITER) + l[#l+1] = SHORT_STR_DELIMITER +end + +local function format_cut_string (str, depth, l) + -- Calculate string + local str = escape_string(str) + :gsub(SHORT_STR_DELIMITER, '\\'..SHORT_STR_DELIMITER) + :sub(1, NR_CHARS_IN_LONG_STRING - #STRING_CONT_INDICATOR) + + -- Search for the number of backslashes just before the send of the string. + -- If that number is even, it's a sequence of backslashes, if not it's a + -- broken escape string. + local start_of_backslashes, start_of_digits = str:match '()\\*()%d?%d?$' + local nr_backslashes_before_end = start_of_digits - start_of_backslashes + if nr_backslashes_before_end % 2 == 1 then str = str:sub(1, start_of_backslashes - 1) end + + -- Format + l[#l+1] = SHORT_STR_DELIMITER + l[#l+1] = str + l[#l+1] = SHORT_STR_DELIMITER + l[#l+1] = STRING_CONT_INDICATOR +end + +local function format_concatted_string (str, depth, l) + error '[pretty.string/internal]: format_concatted_string not implemented yet!' +end + +local function format_longform_string (str, depth, l) + + -- Error checking + assert( type(str) == 'string' ) + assert(type(depth) == 'number' and type(l) == 'table') + + -- Calculate + local level_required = smallest_secure_longform_string_level(str) + + -- Format + l[#l+1] = '['..string.rep('=', level_required)..'[' + l[#l+1] = '\n' + l[#l+1] = str + l[#l+1] = ']'..string.rep('=', level_required)..']' +end + return function (str, depth, l) -- pretty.format_string - -- TODO: Add option for escaping unicode characters. - -- TODO: Improve cutstring argument. - -- Error checking assert( type(str) == 'string' ) assert(type(depth) == 'number' and type(l) == 'table') -- Do work - local is_long_string = (str:len() >= NR_CHARS_IN_LONG_STRING) - local newline_or_tab_index = str:find('[\n\t]') - local single_quote_index = str:find('\'') - local double_quote_index = str:find('\"') - - -- ... - local chance_of_longform = is_long_string and ((newline_or_tab_index or math.huge) <= NR_CHARS_IN_LONG_STRING) or double_quote_index and single_quote_index - local cut_string_index = l.options.cut_strings and (is_long_string or chance_of_longform) - and math.min(NR_CHARS_IN_LONG_STRING - 3, newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0) - - local longform = chance_of_longform and ((not cut_string_index) or cut_string_index < math.min(newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0)) - - local escape_newline_and_tab = not longform and newline_or_tab_index - - -- Determine string delimiters - local left, right - if longform then - local level = smallest_secure_longform_string_level(str) - left, right = '['..string.rep('=', level)..'[', ']'..string.rep('=', level)..']' - if newline_or_tab_index then str = '\n' .. str end - elseif not single_quote_index then - left, right = '\'', '\'' - else - left, right = '\"', '\"' - end - - -- Cut string - if cut_string_index then str = str:sub(1, cut_string_index) end - str = escape_string(str) - -- Escape newline and tab - if escape_newline_and_tab then str = str:gsub('\n', '\\n'):gsub('\t', '\\t') end - - l[#l+1] = left - l[#l+1] = str - l[#l+1] = right + if #str < NR_CHARS_IN_LONG_STRING then + return format_shortform_string(str, depth, l) + elseif depth > 0 then + return format_cut_string (str, depth, l) + elseif requires_weird_escape_seq (str) then + return format_concatted_string(str, depth, l) + else + return format_longform_string(str, depth, l) + end end diff --git a/test/test_pstring.lua b/test/test_pstring.lua index d8aab6c..64f2d98 100644 --- a/test/test_pstring.lua +++ b/test/test_pstring.lua @@ -18,6 +18,7 @@ local function format_test (t) end -------------------------------------------------------------------------------- +-- Shortform Strings format_test { input = 'Hello World', @@ -26,7 +27,7 @@ format_test { format_test { input = 'Hello \'World\'', - expect = '\"Hello \'World\'\"', + expect = '\'Hello \\\'World\\\'\'', } format_test { @@ -41,27 +42,27 @@ format_test { format_test { input = '\'Hello\' [[World]]', - expect = '\"\'Hello\' [[World]]\"', + expect = '\'\\\'Hello\\\' [[World]]\'', } format_test { input = '\'Hello\' \"there\" [[World]]', - expect = '[=[\'Hello\' \"there\" [[World]]]=]', + expect = '\'\\\'Hello\\\' \"there\" [[World]]\'', } format_test { input = '\'Hello\' \"there\" [=[World]=]', - expect = '[[\'Hello\' \"there\" [=[World]=]]]', + expect = '\'\\\'Hello\\\' \"there\" [=[World]=]\'', } format_test { input = '\nHello World', - expect = '\'\\nHello World\'', + expect = [['\nHello World']], } format_test { input = '\'\"\n', - expect = '[[\n\'\"\n]]', + expect = [['\'"\n']], } format_test { @@ -71,16 +72,118 @@ format_test { format_test { input = '\\', - expect = '\'\\\\\'', + expect = [['\\']], } format_test { input = '\000', expect = '\'\\000\'', } + format_test { input = '\a\b\v\r\f', - expect = '\'\\a\\b\\v\\r\\f\'', + expect = [['\a\b\v\r\f']], +} + +-------------------------------------------------------------------------------- +-- Cut Strings + +format_test { + name = 'Cut string basics', + input = {'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'}, + expect = '{\n \'Lorem ipsum dolor sit amet, consectet\'...\n}', +} + +format_test { + name = 'Cut strings are cut after escaping', + input = {'Lorem\tipsum\tdolor\tsit\tamet,\tconsectetur\tadipiscing\telit.\tNunc\tvestibulum\ttempus\tligula.\tSed\tac\tlobortis\tmi.'}, + expect = '{\n \'Lorem\\tipsum\\tdolor\\tsit\\tamet,\\tcons\'...\n}', +} + +format_test { + name = 'Cut strings are cut after escaping 2', + input = {'Lorem ipsum dolor sit amet, conse\t\t\tctetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'}, + expect = '{\n \'Lorem ipsum dolor sit amet, conse\\t\\t\'...\n}', +} + +format_test { + name = 'Cut strings are not cut in the middle of an escape code', + input = {'Lorem ipsum dolor sit amet, consec\t\t\ttetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'}, + expect = '{\n \'Lorem ipsum dolor sit amet, consec\\t\'...\n}', +} + +format_test { + name = 'Cut strings are not cut in the middle of escaping \\', + input = {'Lorem ipsum dolor sit amet, conse\\\\\\\\ctetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'}, + expect = '{\n \'Lorem ipsum dolor sit amet, conse\\\\\\\\\'...\n}', +} + +format_test { + name = 'Cut strings are not cut in the middle of decimal escape codes', + input = {'Lorem ipsum dolor sit amet, consect\014etur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'}, + expect = '{\n \'Lorem ipsum dolor sit amet, consect\'...\n}', +} + +format_test { + -- NOTE: Not priority functionallity. + name = 'Cut strings can shorten decimal escape codes, if nessesary and possible', + input = {'Lorem ipsum dolor sit amet, consec\014tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'}, + expect = '{\n \'Lorem ipsum dolor sit amet, consec\\14\'...\n}', +} + +-------------------------------------------------------------------------------- +-- Concatted Strings + +-- TODO + + +-------------------------------------------------------------------------------- +-- Longform Strings + +local LONG_STRING = [[ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. +Nunc vestibulum tempus ligula. Sed ac lobortis mi. +Morbi eu arcu id nunc cursus auctor. Nulla enim tortor, sodales ut nunc non, +euismod aliquam libero. Aliquam neque est, iaculis in nibh vel, mollis ultricies +ante. Sed egestas et massa sit amet posuere. Integer at suscipit lorem, non +consectetur lacus. Vivamus ac facilisis sem. Proin lacinia ex eu volutpat +interdum. +]] + +format_test { + name = 'Longform string basics', + input = LONG_STRING, + expect = '[[\n'..LONG_STRING..']]' +} + +local LONG_STRING_WITH_LEVELS = [=[ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus +ligula. Sed ac lobortis mi. [Morbi eu arcu id nunc cursus auctor. [Nulla enim +tortor, sodales ut nunc non, euismod aliquam libero.]] Aliquam neque est, +iaculis in nibh vel, mollis ultricies ante. Sed egestas et massa sit amet +posuere. Integer at suscipit lorem, non consectetur lacus. Vivamus ac facilisis +sem. Proin lacinia ex eu volutpat interdum. +]=] + +format_test { + name = 'Longform string that requires level 1', + input = LONG_STRING_WITH_LEVELS, + expect = '[=[\n'..LONG_STRING_WITH_LEVELS..']=]' +} + +local LONG_STRING_WITH_HIGH_LEVELS = [[ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus +ligula. Sed ac lobortis mi. [=[Morbi eu arcu id nunc cursus auctor. [Nulla enim +tortor, sodales ut nunc non, euismod aliquam libero.]=] Aliquam neque est, +iaculis in nibh vel, mollis ultricies ante. Sed egestas et massa sit amet +posuere. Integer at suscipit lorem, non consectetur lacus. Vivamus ac facilisis +sem. Proin lacinia ex eu volutpat interdum. +]] + +format_test { + name = 'Longform string that requires level 0, but not 1', + input = LONG_STRING_WITH_HIGH_LEVELS, + expect = '[[\n'..LONG_STRING_WITH_HIGH_LEVELS..']]' } --------------------------------------------------------------------------------