Improved handling of strings. Code is much clearer. cut_strings option has been removed.

2017-07-21 13:15:04 +02:00 · 2017-07-21 13:15:04 +02:00 · f3cddec4d0
commit f3cddec4d0
parent 39dc9ce84e
3 changed files with 180 additions and 50 deletions
--- a/pretty.lua
+++ b/pretty.lua
@ -520,7 +520,6 @@ local DEBUG_OPTION_USED = {  }
 local KNOWN_OPTIONS = {
    _table_addr_comment = { type = 'boolean', default = false, debug = 'debug' },

-    cut_strings         = { type = 'boolean', default = false },
    indent              = { type = 'string',  default = '    ' },
    max_depth           = { type = 'number',  default = math.huge },
    short_builtins      = { type = 'boolean', default = false }, -- TODO: Outphase this. Rather automatically use the short versions in places where it would be strange to find the function, like keys, etc.
--- a/pstring.lua
+++ b/pstring.lua
@ -12,16 +12,19 @@ TODO
 -- Constants

 local NR_CHARS_IN_LONG_STRING  =  40
+local SHORT_STR_DELIMITER      = '\''
+local STRING_CONT_INDICATOR    = '...'
+
+

 local CHAR_TO_STR_REPR     =  {}
-
 do
 	for i = 00, 031 do  CHAR_TO_STR_REPR[i] = ('\\%03i'):format(i)  end
    for i = 32, 255 do  CHAR_TO_STR_REPR[i] = string.char(i)  end
    CHAR_TO_STR_REPR[7]   = '\\a'
    CHAR_TO_STR_REPR[8]   = '\\b'
-    CHAR_TO_STR_REPR[9]   = '\t'
-    CHAR_TO_STR_REPR[10]  = '\n'
+    CHAR_TO_STR_REPR[9]   = '\\t'
+    CHAR_TO_STR_REPR[10]  = '\\n'
    CHAR_TO_STR_REPR[11]  = '\\v'
    CHAR_TO_STR_REPR[12]  = '\\f'
    CHAR_TO_STR_REPR[13]  = '\\r'
@ -29,9 +32,15 @@ do
    CHAR_TO_STR_REPR[127] = '\\127'
 end

+local CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ = '[%z\001-\008\011-\031\127]'
+
 --------------------------------------------------------------------------------
 -- Util

+local function requires_weird_escape_seq (str)
+	return not not str:find(CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ)
+end
+
 local function escape_string (str)
 	-- Attempts to escape the string, to a format that is both a valid Lua
 	-- constant, and ledible unicode.
@ -62,53 +71,72 @@ local function smallest_secure_longform_string_level (str)
    return #levels - 1
 end

+
 --------------------------------------------------------------------------------

+
+local function format_shortform_string (str, depth, l)
+	l[#l+1]  =  SHORT_STR_DELIMITER
+	l[#l+1]  =  escape_string(str):gsub(SHORT_STR_DELIMITER, '\\'..SHORT_STR_DELIMITER)
+	l[#l+1]  =  SHORT_STR_DELIMITER
+end
+
+local function format_cut_string (str, depth, l)
+	-- Calculate string
+	local str  =  escape_string(str)
+	             :gsub(SHORT_STR_DELIMITER, '\\'..SHORT_STR_DELIMITER)
+				 :sub(1, NR_CHARS_IN_LONG_STRING - #STRING_CONT_INDICATOR)
+
+	-- Search for the number of backslashes just before the send of the string.
+	-- If that number is even, it's a sequence of backslashes, if not it's a
+	-- broken escape string.
+	local start_of_backslashes, start_of_digits = str:match '()\\*()%d?%d?$'
+	local nr_backslashes_before_end  =  start_of_digits - start_of_backslashes
+	if nr_backslashes_before_end % 2 == 1 then  str = str:sub(1, start_of_backslashes - 1)  end
+
+	-- Format
+	l[#l+1]  =  SHORT_STR_DELIMITER
+	l[#l+1]  =  str
+	l[#l+1]  =  SHORT_STR_DELIMITER
+	l[#l+1]  =  STRING_CONT_INDICATOR
+end
+
+local function format_concatted_string (str, depth, l)
+	error '[pretty.string/internal]: format_concatted_string not implemented yet!'
+end
+
+local function format_longform_string (str, depth, l)
+
+    -- Error checking
+    assert( type(str) == 'string' )
+    assert(type(depth) == 'number' and type(l) == 'table')
+
+	-- Calculate
+	local level_required  =  smallest_secure_longform_string_level(str)
+
+	-- Format
+	l[#l+1] = '['..string.rep('=', level_required)..'['
+	l[#l+1] = '\n'
+    l[#l+1] = str
+    l[#l+1] = ']'..string.rep('=', level_required)..']'
+end
+
 return function (str, depth, l)
 	-- pretty.format_string

-    -- TODO: Add option for escaping unicode characters.
-    -- TODO: Improve cutstring argument.
-
    -- Error checking
    assert( type(str) == 'string' )
    assert(type(depth) == 'number' and type(l) == 'table')

    -- Do work

-    local is_long_string         =  (str:len() >= NR_CHARS_IN_LONG_STRING)
-    local newline_or_tab_index   =  str:find('[\n\t]')
-    local single_quote_index     =  str:find('\'')
-    local double_quote_index     =  str:find('\"')
-
-    -- ...
-    local chance_of_longform     =  is_long_string and ((newline_or_tab_index or math.huge) <= NR_CHARS_IN_LONG_STRING) or double_quote_index and single_quote_index
-    local cut_string_index       =  l.options.cut_strings and (is_long_string or chance_of_longform)
-                                                          and math.min(NR_CHARS_IN_LONG_STRING - 3, newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0)
-
-    local longform   = chance_of_longform and ((not cut_string_index) or cut_string_index < math.min(newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0))
-
-    local escape_newline_and_tab  =  not longform and newline_or_tab_index
-
-    -- Determine string delimiters
-    local left, right
-    if longform then
-        local level = smallest_secure_longform_string_level(str)
-        left, right = '['..string.rep('=', level)..'[', ']'..string.rep('=', level)..']'
-        if newline_or_tab_index then  str = '\n' .. str  end
-    elseif not single_quote_index then
-        left, right = '\'', '\''
+	if #str < NR_CHARS_IN_LONG_STRING then
+		return format_shortform_string(str, depth, l)
+	elseif depth > 0 then
+		return format_cut_string (str, depth, l)
+	elseif requires_weird_escape_seq (str) then
+		return format_concatted_string(str, depth, l)
 	else
-        left, right = '\"', '\"'
+		return format_longform_string(str, depth, l)
 	end
-
-    -- Cut string
-    if cut_string_index then        str = str:sub(1, cut_string_index)  end
-    str = escape_string(str)
-    -- Escape newline and tab
-    if escape_newline_and_tab then  str = str:gsub('\n', '\\n'):gsub('\t', '\\t')  end
-
-    l[#l+1] = left
-    l[#l+1] = str
-    l[#l+1] = right
 end
--- a/test/test_pstring.lua
+++ b/test/test_pstring.lua
@ -18,6 +18,7 @@ local function format_test (t)
 end

 --------------------------------------------------------------------------------
+-- Shortform Strings

 format_test {
    input  = 'Hello World',
@ -26,7 +27,7 @@ format_test {

 format_test {
    input  = 'Hello \'World\'',
-    expect = '\"Hello \'World\'\"',
+    expect = '\'Hello \\\'World\\\'\'',
 }

 format_test {
@ -41,27 +42,27 @@ format_test {

 format_test {
    input  = '\'Hello\' [[World]]',
-    expect = '\"\'Hello\' [[World]]\"',
+    expect = '\'\\\'Hello\\\' [[World]]\'',
 }

 format_test {
    input  = '\'Hello\' \"there\" [[World]]',
-    expect = '[=[\'Hello\' \"there\" [[World]]]=]',
+    expect = '\'\\\'Hello\\\' \"there\" [[World]]\'',
 }

 format_test {
    input  = '\'Hello\' \"there\" [=[World]=]',
-    expect = '[[\'Hello\' \"there\" [=[World]=]]]',
+    expect = '\'\\\'Hello\\\' \"there\" [=[World]=]\'',
 }

 format_test {
    input  = '\nHello World',
-    expect = '\'\\nHello World\'',
+    expect = [['\nHello World']],
 }

 format_test {
    input  = '\'\"\n',
-    expect = '[[\n\'\"\n]]',
+    expect = [['\'"\n']],
 }

 format_test {
@ -71,16 +72,118 @@ format_test {

 format_test {
    input  = '\\',
-    expect = '\'\\\\\'',
+    expect = [['\\']],
 }

 format_test {
    input  = '\000',
    expect = '\'\\000\'',
 }
+
 format_test {
    input  = '\a\b\v\r\f',
-    expect = '\'\\a\\b\\v\\r\\f\'',
+    expect = [['\a\b\v\r\f']],
+}
+
+--------------------------------------------------------------------------------
+-- Cut Strings
+
+format_test {
+    name   = 'Cut string basics',
+    input  = {'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
+    expect = '{\n    \'Lorem ipsum dolor sit amet, consectet\'...\n}',
+}
+
+format_test {
+    name   = 'Cut strings are cut after escaping',
+    input  = {'Lorem\tipsum\tdolor\tsit\tamet,\tconsectetur\tadipiscing\telit.\tNunc\tvestibulum\ttempus\tligula.\tSed\tac\tlobortis\tmi.'},
+    expect = '{\n    \'Lorem\\tipsum\\tdolor\\tsit\\tamet,\\tcons\'...\n}',
+}
+
+format_test {
+    name   = 'Cut strings are cut after escaping 2',
+    input  = {'Lorem ipsum dolor sit amet, conse\t\t\tctetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
+    expect = '{\n    \'Lorem ipsum dolor sit amet, conse\\t\\t\'...\n}',
+}
+
+format_test {
+    name   = 'Cut strings are not cut in the middle of an escape code',
+    input  = {'Lorem ipsum dolor sit amet, consec\t\t\ttetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
+    expect = '{\n    \'Lorem ipsum dolor sit amet, consec\\t\'...\n}',
+}
+
+format_test {
+    name   = 'Cut strings are not cut in the middle of escaping \\',
+    input  = {'Lorem ipsum dolor sit amet, conse\\\\\\\\ctetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
+    expect = '{\n    \'Lorem ipsum dolor sit amet, conse\\\\\\\\\'...\n}',
+}
+
+format_test {
+    name   = 'Cut strings are not cut in the middle of decimal escape codes',
+    input  = {'Lorem ipsum dolor sit amet, consect\014etur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
+    expect = '{\n    \'Lorem ipsum dolor sit amet, consect\'...\n}',
+}
+
+format_test {
+    -- NOTE: Not priority functionallity.
+    name   = 'Cut strings can shorten decimal escape codes, if nessesary and possible',
+    input  = {'Lorem ipsum dolor sit amet, consec\014tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
+    expect = '{\n    \'Lorem ipsum dolor sit amet, consec\\14\'...\n}',
+}
+
+--------------------------------------------------------------------------------
+-- Concatted Strings
+
+-- TODO
+
+
+--------------------------------------------------------------------------------
+-- Longform Strings
+
+local LONG_STRING = [[
+Lorem ipsum dolor sit amet, consectetur adipiscing elit.
+Nunc vestibulum tempus ligula. Sed ac lobortis mi.
+Morbi eu arcu id nunc cursus auctor. Nulla enim tortor, sodales ut nunc non,
+euismod aliquam libero. Aliquam neque est, iaculis in nibh vel, mollis ultricies
+ante. Sed egestas et massa sit amet posuere. Integer at suscipit lorem, non
+consectetur lacus. Vivamus ac facilisis sem. Proin lacinia ex eu volutpat
+interdum.
+]]
+
+format_test {
+    name   = 'Longform string basics',
+    input  = LONG_STRING,
+    expect = '[[\n'..LONG_STRING..']]'
+}
+
+local LONG_STRING_WITH_LEVELS = [=[
+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus
+ligula. Sed ac lobortis mi. [Morbi eu arcu id nunc cursus auctor. [Nulla enim
+tortor, sodales ut nunc non, euismod aliquam libero.]] Aliquam neque est,
+iaculis in nibh vel, mollis ultricies ante. Sed egestas et massa sit amet
+posuere. Integer at suscipit lorem, non consectetur lacus. Vivamus ac facilisis
+sem. Proin lacinia ex eu volutpat interdum.
+]=]
+
+format_test {
+    name   = 'Longform string that requires level 1',
+    input  = LONG_STRING_WITH_LEVELS,
+    expect = '[=[\n'..LONG_STRING_WITH_LEVELS..']=]'
+}
+
+local LONG_STRING_WITH_HIGH_LEVELS = [[
+Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus
+ligula. Sed ac lobortis mi. [=[Morbi eu arcu id nunc cursus auctor. [Nulla enim
+tortor, sodales ut nunc non, euismod aliquam libero.]=] Aliquam neque est,
+iaculis in nibh vel, mollis ultricies ante. Sed egestas et massa sit amet
+posuere. Integer at suscipit lorem, non consectetur lacus. Vivamus ac facilisis
+sem. Proin lacinia ex eu volutpat interdum.
+]]
+
+format_test {
+    name   = 'Longform string that requires level 0, but not 1',
+    input  = LONG_STRING_WITH_HIGH_LEVELS,
+    expect = '[[\n'..LONG_STRING_WITH_HIGH_LEVELS..']]'
 }

 --------------------------------------------------------------------------------