1
0

Improved handling of strings. Code is much clearer. cut_strings option has been removed.

This commit is contained in:
Jon Michael Aanes 2017-07-21 13:15:04 +02:00
parent 39dc9ce84e
commit f3cddec4d0
3 changed files with 180 additions and 50 deletions

View File

@ -520,7 +520,6 @@ local DEBUG_OPTION_USED = { }
local KNOWN_OPTIONS = {
_table_addr_comment = { type = 'boolean', default = false, debug = 'debug' },
cut_strings = { type = 'boolean', default = false },
indent = { type = 'string', default = ' ' },
max_depth = { type = 'number', default = math.huge },
short_builtins = { type = 'boolean', default = false }, -- TODO: Outphase this. Rather automatically use the short versions in places where it would be strange to find the function, like keys, etc.

View File

@ -12,16 +12,19 @@ TODO
-- Constants
local NR_CHARS_IN_LONG_STRING = 40
local SHORT_STR_DELIMITER = '\''
local STRING_CONT_INDICATOR = '...'
local CHAR_TO_STR_REPR = {}
do
for i = 00, 031 do CHAR_TO_STR_REPR[i] = ('\\%03i'):format(i) end
for i = 32, 255 do CHAR_TO_STR_REPR[i] = string.char(i) end
CHAR_TO_STR_REPR[7] = '\\a'
CHAR_TO_STR_REPR[8] = '\\b'
CHAR_TO_STR_REPR[9] = '\t'
CHAR_TO_STR_REPR[10] = '\n'
CHAR_TO_STR_REPR[9] = '\\t'
CHAR_TO_STR_REPR[10] = '\\n'
CHAR_TO_STR_REPR[11] = '\\v'
CHAR_TO_STR_REPR[12] = '\\f'
CHAR_TO_STR_REPR[13] = '\\r'
@ -29,9 +32,15 @@ do
CHAR_TO_STR_REPR[127] = '\\127'
end
local CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ = '[%z\001-\008\011-\031\127]'
--------------------------------------------------------------------------------
-- Util
local function requires_weird_escape_seq (str)
return not not str:find(CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ)
end
local function escape_string (str)
-- Attempts to escape the string, to a format that is both a valid Lua
-- constant, and ledible unicode.
@ -62,53 +71,72 @@ local function smallest_secure_longform_string_level (str)
return #levels - 1
end
--------------------------------------------------------------------------------
local function format_shortform_string (str, depth, l)
l[#l+1] = SHORT_STR_DELIMITER
l[#l+1] = escape_string(str):gsub(SHORT_STR_DELIMITER, '\\'..SHORT_STR_DELIMITER)
l[#l+1] = SHORT_STR_DELIMITER
end
local function format_cut_string (str, depth, l)
-- Calculate string
local str = escape_string(str)
:gsub(SHORT_STR_DELIMITER, '\\'..SHORT_STR_DELIMITER)
:sub(1, NR_CHARS_IN_LONG_STRING - #STRING_CONT_INDICATOR)
-- Search for the number of backslashes just before the send of the string.
-- If that number is even, it's a sequence of backslashes, if not it's a
-- broken escape string.
local start_of_backslashes, start_of_digits = str:match '()\\*()%d?%d?$'
local nr_backslashes_before_end = start_of_digits - start_of_backslashes
if nr_backslashes_before_end % 2 == 1 then str = str:sub(1, start_of_backslashes - 1) end
-- Format
l[#l+1] = SHORT_STR_DELIMITER
l[#l+1] = str
l[#l+1] = SHORT_STR_DELIMITER
l[#l+1] = STRING_CONT_INDICATOR
end
local function format_concatted_string (str, depth, l)
error '[pretty.string/internal]: format_concatted_string not implemented yet!'
end
local function format_longform_string (str, depth, l)
-- Error checking
assert( type(str) == 'string' )
assert(type(depth) == 'number' and type(l) == 'table')
-- Calculate
local level_required = smallest_secure_longform_string_level(str)
-- Format
l[#l+1] = '['..string.rep('=', level_required)..'['
l[#l+1] = '\n'
l[#l+1] = str
l[#l+1] = ']'..string.rep('=', level_required)..']'
end
return function (str, depth, l)
-- pretty.format_string
-- TODO: Add option for escaping unicode characters.
-- TODO: Improve cutstring argument.
-- Error checking
assert( type(str) == 'string' )
assert(type(depth) == 'number' and type(l) == 'table')
-- Do work
local is_long_string = (str:len() >= NR_CHARS_IN_LONG_STRING)
local newline_or_tab_index = str:find('[\n\t]')
local single_quote_index = str:find('\'')
local double_quote_index = str:find('\"')
-- ...
local chance_of_longform = is_long_string and ((newline_or_tab_index or math.huge) <= NR_CHARS_IN_LONG_STRING) or double_quote_index and single_quote_index
local cut_string_index = l.options.cut_strings and (is_long_string or chance_of_longform)
and math.min(NR_CHARS_IN_LONG_STRING - 3, newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0)
local longform = chance_of_longform and ((not cut_string_index) or cut_string_index < math.min(newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0))
local escape_newline_and_tab = not longform and newline_or_tab_index
-- Determine string delimiters
local left, right
if longform then
local level = smallest_secure_longform_string_level(str)
left, right = '['..string.rep('=', level)..'[', ']'..string.rep('=', level)..']'
if newline_or_tab_index then str = '\n' .. str end
elseif not single_quote_index then
left, right = '\'', '\''
if #str < NR_CHARS_IN_LONG_STRING then
return format_shortform_string(str, depth, l)
elseif depth > 0 then
return format_cut_string (str, depth, l)
elseif requires_weird_escape_seq (str) then
return format_concatted_string(str, depth, l)
else
left, right = '\"', '\"'
return format_longform_string(str, depth, l)
end
-- Cut string
if cut_string_index then str = str:sub(1, cut_string_index) end
str = escape_string(str)
-- Escape newline and tab
if escape_newline_and_tab then str = str:gsub('\n', '\\n'):gsub('\t', '\\t') end
l[#l+1] = left
l[#l+1] = str
l[#l+1] = right
end

View File

@ -18,6 +18,7 @@ local function format_test (t)
end
--------------------------------------------------------------------------------
-- Shortform Strings
format_test {
input = 'Hello World',
@ -26,7 +27,7 @@ format_test {
format_test {
input = 'Hello \'World\'',
expect = '\"Hello \'World\'\"',
expect = '\'Hello \\\'World\\\'\'',
}
format_test {
@ -41,27 +42,27 @@ format_test {
format_test {
input = '\'Hello\' [[World]]',
expect = '\"\'Hello\' [[World]]\"',
expect = '\'\\\'Hello\\\' [[World]]\'',
}
format_test {
input = '\'Hello\' \"there\" [[World]]',
expect = '[=[\'Hello\' \"there\" [[World]]]=]',
expect = '\'\\\'Hello\\\' \"there\" [[World]]\'',
}
format_test {
input = '\'Hello\' \"there\" [=[World]=]',
expect = '[[\'Hello\' \"there\" [=[World]=]]]',
expect = '\'\\\'Hello\\\' \"there\" [=[World]=]\'',
}
format_test {
input = '\nHello World',
expect = '\'\\nHello World\'',
expect = [['\nHello World']],
}
format_test {
input = '\'\"\n',
expect = '[[\n\'\"\n]]',
expect = [['\'"\n']],
}
format_test {
@ -71,16 +72,118 @@ format_test {
format_test {
input = '\\',
expect = '\'\\\\\'',
expect = [['\\']],
}
format_test {
input = '\000',
expect = '\'\\000\'',
}
format_test {
input = '\a\b\v\r\f',
expect = '\'\\a\\b\\v\\r\\f\'',
expect = [['\a\b\v\r\f']],
}
--------------------------------------------------------------------------------
-- Cut Strings
format_test {
name = 'Cut string basics',
input = {'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
expect = '{\n \'Lorem ipsum dolor sit amet, consectet\'...\n}',
}
format_test {
name = 'Cut strings are cut after escaping',
input = {'Lorem\tipsum\tdolor\tsit\tamet,\tconsectetur\tadipiscing\telit.\tNunc\tvestibulum\ttempus\tligula.\tSed\tac\tlobortis\tmi.'},
expect = '{\n \'Lorem\\tipsum\\tdolor\\tsit\\tamet,\\tcons\'...\n}',
}
format_test {
name = 'Cut strings are cut after escaping 2',
input = {'Lorem ipsum dolor sit amet, conse\t\t\tctetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
expect = '{\n \'Lorem ipsum dolor sit amet, conse\\t\\t\'...\n}',
}
format_test {
name = 'Cut strings are not cut in the middle of an escape code',
input = {'Lorem ipsum dolor sit amet, consec\t\t\ttetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\t\'...\n}',
}
format_test {
name = 'Cut strings are not cut in the middle of escaping \\',
input = {'Lorem ipsum dolor sit amet, conse\\\\\\\\ctetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
expect = '{\n \'Lorem ipsum dolor sit amet, conse\\\\\\\\\'...\n}',
}
format_test {
name = 'Cut strings are not cut in the middle of decimal escape codes',
input = {'Lorem ipsum dolor sit amet, consect\014etur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
expect = '{\n \'Lorem ipsum dolor sit amet, consect\'...\n}',
}
format_test {
-- NOTE: Not priority functionallity.
name = 'Cut strings can shorten decimal escape codes, if nessesary and possible',
input = {'Lorem ipsum dolor sit amet, consec\014tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\14\'...\n}',
}
--------------------------------------------------------------------------------
-- Concatted Strings
-- TODO
--------------------------------------------------------------------------------
-- Longform Strings
local LONG_STRING = [[
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Nunc vestibulum tempus ligula. Sed ac lobortis mi.
Morbi eu arcu id nunc cursus auctor. Nulla enim tortor, sodales ut nunc non,
euismod aliquam libero. Aliquam neque est, iaculis in nibh vel, mollis ultricies
ante. Sed egestas et massa sit amet posuere. Integer at suscipit lorem, non
consectetur lacus. Vivamus ac facilisis sem. Proin lacinia ex eu volutpat
interdum.
]]
format_test {
name = 'Longform string basics',
input = LONG_STRING,
expect = '[[\n'..LONG_STRING..']]'
}
local LONG_STRING_WITH_LEVELS = [=[
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus
ligula. Sed ac lobortis mi. [Morbi eu arcu id nunc cursus auctor. [Nulla enim
tortor, sodales ut nunc non, euismod aliquam libero.]] Aliquam neque est,
iaculis in nibh vel, mollis ultricies ante. Sed egestas et massa sit amet
posuere. Integer at suscipit lorem, non consectetur lacus. Vivamus ac facilisis
sem. Proin lacinia ex eu volutpat interdum.
]=]
format_test {
name = 'Longform string that requires level 1',
input = LONG_STRING_WITH_LEVELS,
expect = '[=[\n'..LONG_STRING_WITH_LEVELS..']=]'
}
local LONG_STRING_WITH_HIGH_LEVELS = [[
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus
ligula. Sed ac lobortis mi. [=[Morbi eu arcu id nunc cursus auctor. [Nulla enim
tortor, sodales ut nunc non, euismod aliquam libero.]=] Aliquam neque est,
iaculis in nibh vel, mollis ultricies ante. Sed egestas et massa sit amet
posuere. Integer at suscipit lorem, non consectetur lacus. Vivamus ac facilisis
sem. Proin lacinia ex eu volutpat interdum.
]]
format_test {
name = 'Longform string that requires level 0, but not 1',
input = LONG_STRING_WITH_HIGH_LEVELS,
expect = '[[\n'..LONG_STRING_WITH_HIGH_LEVELS..']]'
}
--------------------------------------------------------------------------------