Improved handling of strings. Code is much clearer. cut_strings
option has been removed.
This commit is contained in:
parent
39dc9ce84e
commit
f3cddec4d0
|
@ -520,7 +520,6 @@ local DEBUG_OPTION_USED = { }
|
||||||
local KNOWN_OPTIONS = {
|
local KNOWN_OPTIONS = {
|
||||||
_table_addr_comment = { type = 'boolean', default = false, debug = 'debug' },
|
_table_addr_comment = { type = 'boolean', default = false, debug = 'debug' },
|
||||||
|
|
||||||
cut_strings = { type = 'boolean', default = false },
|
|
||||||
indent = { type = 'string', default = ' ' },
|
indent = { type = 'string', default = ' ' },
|
||||||
max_depth = { type = 'number', default = math.huge },
|
max_depth = { type = 'number', default = math.huge },
|
||||||
short_builtins = { type = 'boolean', default = false }, -- TODO: Outphase this. Rather automatically use the short versions in places where it would be strange to find the function, like keys, etc.
|
short_builtins = { type = 'boolean', default = false }, -- TODO: Outphase this. Rather automatically use the short versions in places where it would be strange to find the function, like keys, etc.
|
||||||
|
|
110
pstring.lua
110
pstring.lua
|
@ -12,16 +12,19 @@ TODO
|
||||||
-- Constants
|
-- Constants
|
||||||
|
|
||||||
local NR_CHARS_IN_LONG_STRING = 40
|
local NR_CHARS_IN_LONG_STRING = 40
|
||||||
|
local SHORT_STR_DELIMITER = '\''
|
||||||
|
local STRING_CONT_INDICATOR = '...'
|
||||||
|
|
||||||
local CHAR_TO_STR_REPR = {}
|
|
||||||
|
|
||||||
|
|
||||||
|
local CHAR_TO_STR_REPR = {}
|
||||||
do
|
do
|
||||||
for i = 00, 031 do CHAR_TO_STR_REPR[i] = ('\\%03i'):format(i) end
|
for i = 00, 031 do CHAR_TO_STR_REPR[i] = ('\\%03i'):format(i) end
|
||||||
for i = 32, 255 do CHAR_TO_STR_REPR[i] = string.char(i) end
|
for i = 32, 255 do CHAR_TO_STR_REPR[i] = string.char(i) end
|
||||||
CHAR_TO_STR_REPR[7] = '\\a'
|
CHAR_TO_STR_REPR[7] = '\\a'
|
||||||
CHAR_TO_STR_REPR[8] = '\\b'
|
CHAR_TO_STR_REPR[8] = '\\b'
|
||||||
CHAR_TO_STR_REPR[9] = '\t'
|
CHAR_TO_STR_REPR[9] = '\\t'
|
||||||
CHAR_TO_STR_REPR[10] = '\n'
|
CHAR_TO_STR_REPR[10] = '\\n'
|
||||||
CHAR_TO_STR_REPR[11] = '\\v'
|
CHAR_TO_STR_REPR[11] = '\\v'
|
||||||
CHAR_TO_STR_REPR[12] = '\\f'
|
CHAR_TO_STR_REPR[12] = '\\f'
|
||||||
CHAR_TO_STR_REPR[13] = '\\r'
|
CHAR_TO_STR_REPR[13] = '\\r'
|
||||||
|
@ -29,9 +32,15 @@ do
|
||||||
CHAR_TO_STR_REPR[127] = '\\127'
|
CHAR_TO_STR_REPR[127] = '\\127'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
local CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ = '[%z\001-\008\011-\031\127]'
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
-- Util
|
-- Util
|
||||||
|
|
||||||
|
local function requires_weird_escape_seq (str)
|
||||||
|
return not not str:find(CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ)
|
||||||
|
end
|
||||||
|
|
||||||
local function escape_string (str)
|
local function escape_string (str)
|
||||||
-- Attempts to escape the string, to a format that is both a valid Lua
|
-- Attempts to escape the string, to a format that is both a valid Lua
|
||||||
-- constant, and ledible unicode.
|
-- constant, and ledible unicode.
|
||||||
|
@ -62,53 +71,72 @@ local function smallest_secure_longform_string_level (str)
|
||||||
return #levels - 1
|
return #levels - 1
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
local function format_shortform_string (str, depth, l)
|
||||||
|
l[#l+1] = SHORT_STR_DELIMITER
|
||||||
|
l[#l+1] = escape_string(str):gsub(SHORT_STR_DELIMITER, '\\'..SHORT_STR_DELIMITER)
|
||||||
|
l[#l+1] = SHORT_STR_DELIMITER
|
||||||
|
end
|
||||||
|
|
||||||
|
local function format_cut_string (str, depth, l)
|
||||||
|
-- Calculate string
|
||||||
|
local str = escape_string(str)
|
||||||
|
:gsub(SHORT_STR_DELIMITER, '\\'..SHORT_STR_DELIMITER)
|
||||||
|
:sub(1, NR_CHARS_IN_LONG_STRING - #STRING_CONT_INDICATOR)
|
||||||
|
|
||||||
|
-- Search for the number of backslashes just before the send of the string.
|
||||||
|
-- If that number is even, it's a sequence of backslashes, if not it's a
|
||||||
|
-- broken escape string.
|
||||||
|
local start_of_backslashes, start_of_digits = str:match '()\\*()%d?%d?$'
|
||||||
|
local nr_backslashes_before_end = start_of_digits - start_of_backslashes
|
||||||
|
if nr_backslashes_before_end % 2 == 1 then str = str:sub(1, start_of_backslashes - 1) end
|
||||||
|
|
||||||
|
-- Format
|
||||||
|
l[#l+1] = SHORT_STR_DELIMITER
|
||||||
|
l[#l+1] = str
|
||||||
|
l[#l+1] = SHORT_STR_DELIMITER
|
||||||
|
l[#l+1] = STRING_CONT_INDICATOR
|
||||||
|
end
|
||||||
|
|
||||||
|
local function format_concatted_string (str, depth, l)
|
||||||
|
error '[pretty.string/internal]: format_concatted_string not implemented yet!'
|
||||||
|
end
|
||||||
|
|
||||||
|
local function format_longform_string (str, depth, l)
|
||||||
|
|
||||||
|
-- Error checking
|
||||||
|
assert( type(str) == 'string' )
|
||||||
|
assert(type(depth) == 'number' and type(l) == 'table')
|
||||||
|
|
||||||
|
-- Calculate
|
||||||
|
local level_required = smallest_secure_longform_string_level(str)
|
||||||
|
|
||||||
|
-- Format
|
||||||
|
l[#l+1] = '['..string.rep('=', level_required)..'['
|
||||||
|
l[#l+1] = '\n'
|
||||||
|
l[#l+1] = str
|
||||||
|
l[#l+1] = ']'..string.rep('=', level_required)..']'
|
||||||
|
end
|
||||||
|
|
||||||
return function (str, depth, l)
|
return function (str, depth, l)
|
||||||
-- pretty.format_string
|
-- pretty.format_string
|
||||||
|
|
||||||
-- TODO: Add option for escaping unicode characters.
|
|
||||||
-- TODO: Improve cutstring argument.
|
|
||||||
|
|
||||||
-- Error checking
|
-- Error checking
|
||||||
assert( type(str) == 'string' )
|
assert( type(str) == 'string' )
|
||||||
assert(type(depth) == 'number' and type(l) == 'table')
|
assert(type(depth) == 'number' and type(l) == 'table')
|
||||||
|
|
||||||
-- Do work
|
-- Do work
|
||||||
|
|
||||||
local is_long_string = (str:len() >= NR_CHARS_IN_LONG_STRING)
|
if #str < NR_CHARS_IN_LONG_STRING then
|
||||||
local newline_or_tab_index = str:find('[\n\t]')
|
return format_shortform_string(str, depth, l)
|
||||||
local single_quote_index = str:find('\'')
|
elseif depth > 0 then
|
||||||
local double_quote_index = str:find('\"')
|
return format_cut_string (str, depth, l)
|
||||||
|
elseif requires_weird_escape_seq (str) then
|
||||||
-- ...
|
return format_concatted_string(str, depth, l)
|
||||||
local chance_of_longform = is_long_string and ((newline_or_tab_index or math.huge) <= NR_CHARS_IN_LONG_STRING) or double_quote_index and single_quote_index
|
else
|
||||||
local cut_string_index = l.options.cut_strings and (is_long_string or chance_of_longform)
|
return format_longform_string(str, depth, l)
|
||||||
and math.min(NR_CHARS_IN_LONG_STRING - 3, newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0)
|
end
|
||||||
|
|
||||||
local longform = chance_of_longform and ((not cut_string_index) or cut_string_index < math.min(newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0))
|
|
||||||
|
|
||||||
local escape_newline_and_tab = not longform and newline_or_tab_index
|
|
||||||
|
|
||||||
-- Determine string delimiters
|
|
||||||
local left, right
|
|
||||||
if longform then
|
|
||||||
local level = smallest_secure_longform_string_level(str)
|
|
||||||
left, right = '['..string.rep('=', level)..'[', ']'..string.rep('=', level)..']'
|
|
||||||
if newline_or_tab_index then str = '\n' .. str end
|
|
||||||
elseif not single_quote_index then
|
|
||||||
left, right = '\'', '\''
|
|
||||||
else
|
|
||||||
left, right = '\"', '\"'
|
|
||||||
end
|
|
||||||
|
|
||||||
-- Cut string
|
|
||||||
if cut_string_index then str = str:sub(1, cut_string_index) end
|
|
||||||
str = escape_string(str)
|
|
||||||
-- Escape newline and tab
|
|
||||||
if escape_newline_and_tab then str = str:gsub('\n', '\\n'):gsub('\t', '\\t') end
|
|
||||||
|
|
||||||
l[#l+1] = left
|
|
||||||
l[#l+1] = str
|
|
||||||
l[#l+1] = right
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -18,6 +18,7 @@ local function format_test (t)
|
||||||
end
|
end
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
-- Shortform Strings
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
input = 'Hello World',
|
input = 'Hello World',
|
||||||
|
@ -26,7 +27,7 @@ format_test {
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
input = 'Hello \'World\'',
|
input = 'Hello \'World\'',
|
||||||
expect = '\"Hello \'World\'\"',
|
expect = '\'Hello \\\'World\\\'\'',
|
||||||
}
|
}
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
|
@ -41,27 +42,27 @@ format_test {
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
input = '\'Hello\' [[World]]',
|
input = '\'Hello\' [[World]]',
|
||||||
expect = '\"\'Hello\' [[World]]\"',
|
expect = '\'\\\'Hello\\\' [[World]]\'',
|
||||||
}
|
}
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
input = '\'Hello\' \"there\" [[World]]',
|
input = '\'Hello\' \"there\" [[World]]',
|
||||||
expect = '[=[\'Hello\' \"there\" [[World]]]=]',
|
expect = '\'\\\'Hello\\\' \"there\" [[World]]\'',
|
||||||
}
|
}
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
input = '\'Hello\' \"there\" [=[World]=]',
|
input = '\'Hello\' \"there\" [=[World]=]',
|
||||||
expect = '[[\'Hello\' \"there\" [=[World]=]]]',
|
expect = '\'\\\'Hello\\\' \"there\" [=[World]=]\'',
|
||||||
}
|
}
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
input = '\nHello World',
|
input = '\nHello World',
|
||||||
expect = '\'\\nHello World\'',
|
expect = [['\nHello World']],
|
||||||
}
|
}
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
input = '\'\"\n',
|
input = '\'\"\n',
|
||||||
expect = '[[\n\'\"\n]]',
|
expect = [['\'"\n']],
|
||||||
}
|
}
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
|
@ -71,16 +72,118 @@ format_test {
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
input = '\\',
|
input = '\\',
|
||||||
expect = '\'\\\\\'',
|
expect = [['\\']],
|
||||||
}
|
}
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
input = '\000',
|
input = '\000',
|
||||||
expect = '\'\\000\'',
|
expect = '\'\\000\'',
|
||||||
}
|
}
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
input = '\a\b\v\r\f',
|
input = '\a\b\v\r\f',
|
||||||
expect = '\'\\a\\b\\v\\r\\f\'',
|
expect = [['\a\b\v\r\f']],
|
||||||
|
}
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
-- Cut Strings
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Cut string basics',
|
||||||
|
input = {'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||||
|
expect = '{\n \'Lorem ipsum dolor sit amet, consectet\'...\n}',
|
||||||
|
}
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Cut strings are cut after escaping',
|
||||||
|
input = {'Lorem\tipsum\tdolor\tsit\tamet,\tconsectetur\tadipiscing\telit.\tNunc\tvestibulum\ttempus\tligula.\tSed\tac\tlobortis\tmi.'},
|
||||||
|
expect = '{\n \'Lorem\\tipsum\\tdolor\\tsit\\tamet,\\tcons\'...\n}',
|
||||||
|
}
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Cut strings are cut after escaping 2',
|
||||||
|
input = {'Lorem ipsum dolor sit amet, conse\t\t\tctetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||||
|
expect = '{\n \'Lorem ipsum dolor sit amet, conse\\t\\t\'...\n}',
|
||||||
|
}
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Cut strings are not cut in the middle of an escape code',
|
||||||
|
input = {'Lorem ipsum dolor sit amet, consec\t\t\ttetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||||
|
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\t\'...\n}',
|
||||||
|
}
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Cut strings are not cut in the middle of escaping \\',
|
||||||
|
input = {'Lorem ipsum dolor sit amet, conse\\\\\\\\ctetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||||
|
expect = '{\n \'Lorem ipsum dolor sit amet, conse\\\\\\\\\'...\n}',
|
||||||
|
}
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Cut strings are not cut in the middle of decimal escape codes',
|
||||||
|
input = {'Lorem ipsum dolor sit amet, consect\014etur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||||
|
expect = '{\n \'Lorem ipsum dolor sit amet, consect\'...\n}',
|
||||||
|
}
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
-- NOTE: Not priority functionallity.
|
||||||
|
name = 'Cut strings can shorten decimal escape codes, if nessesary and possible',
|
||||||
|
input = {'Lorem ipsum dolor sit amet, consec\014tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||||
|
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\14\'...\n}',
|
||||||
|
}
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
-- Concatted Strings
|
||||||
|
|
||||||
|
-- TODO
|
||||||
|
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
-- Longform Strings
|
||||||
|
|
||||||
|
local LONG_STRING = [[
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
||||||
|
Nunc vestibulum tempus ligula. Sed ac lobortis mi.
|
||||||
|
Morbi eu arcu id nunc cursus auctor. Nulla enim tortor, sodales ut nunc non,
|
||||||
|
euismod aliquam libero. Aliquam neque est, iaculis in nibh vel, mollis ultricies
|
||||||
|
ante. Sed egestas et massa sit amet posuere. Integer at suscipit lorem, non
|
||||||
|
consectetur lacus. Vivamus ac facilisis sem. Proin lacinia ex eu volutpat
|
||||||
|
interdum.
|
||||||
|
]]
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Longform string basics',
|
||||||
|
input = LONG_STRING,
|
||||||
|
expect = '[[\n'..LONG_STRING..']]'
|
||||||
|
}
|
||||||
|
|
||||||
|
local LONG_STRING_WITH_LEVELS = [=[
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus
|
||||||
|
ligula. Sed ac lobortis mi. [Morbi eu arcu id nunc cursus auctor. [Nulla enim
|
||||||
|
tortor, sodales ut nunc non, euismod aliquam libero.]] Aliquam neque est,
|
||||||
|
iaculis in nibh vel, mollis ultricies ante. Sed egestas et massa sit amet
|
||||||
|
posuere. Integer at suscipit lorem, non consectetur lacus. Vivamus ac facilisis
|
||||||
|
sem. Proin lacinia ex eu volutpat interdum.
|
||||||
|
]=]
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Longform string that requires level 1',
|
||||||
|
input = LONG_STRING_WITH_LEVELS,
|
||||||
|
expect = '[=[\n'..LONG_STRING_WITH_LEVELS..']=]'
|
||||||
|
}
|
||||||
|
|
||||||
|
local LONG_STRING_WITH_HIGH_LEVELS = [[
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus
|
||||||
|
ligula. Sed ac lobortis mi. [=[Morbi eu arcu id nunc cursus auctor. [Nulla enim
|
||||||
|
tortor, sodales ut nunc non, euismod aliquam libero.]=] Aliquam neque est,
|
||||||
|
iaculis in nibh vel, mollis ultricies ante. Sed egestas et massa sit amet
|
||||||
|
posuere. Integer at suscipit lorem, non consectetur lacus. Vivamus ac facilisis
|
||||||
|
sem. Proin lacinia ex eu volutpat interdum.
|
||||||
|
]]
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Longform string that requires level 0, but not 1',
|
||||||
|
input = LONG_STRING_WITH_HIGH_LEVELS,
|
||||||
|
expect = '[[\n'..LONG_STRING_WITH_HIGH_LEVELS..']]'
|
||||||
}
|
}
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
Loading…
Reference in New Issue
Block a user