Improved handling of strings. Code is much clearer. cut_strings
option has been removed.
This commit is contained in:
parent
39dc9ce84e
commit
f3cddec4d0
|
@ -520,7 +520,6 @@ local DEBUG_OPTION_USED = { }
|
|||
local KNOWN_OPTIONS = {
|
||||
_table_addr_comment = { type = 'boolean', default = false, debug = 'debug' },
|
||||
|
||||
cut_strings = { type = 'boolean', default = false },
|
||||
indent = { type = 'string', default = ' ' },
|
||||
max_depth = { type = 'number', default = math.huge },
|
||||
short_builtins = { type = 'boolean', default = false }, -- TODO: Outphase this. Rather automatically use the short versions in places where it would be strange to find the function, like keys, etc.
|
||||
|
|
106
pstring.lua
106
pstring.lua
|
@ -12,16 +12,19 @@ TODO
|
|||
-- Constants
|
||||
|
||||
local NR_CHARS_IN_LONG_STRING = 40
|
||||
local SHORT_STR_DELIMITER = '\''
|
||||
local STRING_CONT_INDICATOR = '...'
|
||||
|
||||
|
||||
|
||||
local CHAR_TO_STR_REPR = {}
|
||||
|
||||
do
|
||||
for i = 00, 031 do CHAR_TO_STR_REPR[i] = ('\\%03i'):format(i) end
|
||||
for i = 32, 255 do CHAR_TO_STR_REPR[i] = string.char(i) end
|
||||
CHAR_TO_STR_REPR[7] = '\\a'
|
||||
CHAR_TO_STR_REPR[8] = '\\b'
|
||||
CHAR_TO_STR_REPR[9] = '\t'
|
||||
CHAR_TO_STR_REPR[10] = '\n'
|
||||
CHAR_TO_STR_REPR[9] = '\\t'
|
||||
CHAR_TO_STR_REPR[10] = '\\n'
|
||||
CHAR_TO_STR_REPR[11] = '\\v'
|
||||
CHAR_TO_STR_REPR[12] = '\\f'
|
||||
CHAR_TO_STR_REPR[13] = '\\r'
|
||||
|
@ -29,9 +32,15 @@ do
|
|||
CHAR_TO_STR_REPR[127] = '\\127'
|
||||
end
|
||||
|
||||
local CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ = '[%z\001-\008\011-\031\127]'
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Util
|
||||
|
||||
local function requires_weird_escape_seq (str)
|
||||
return not not str:find(CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ)
|
||||
end
|
||||
|
||||
local function escape_string (str)
|
||||
-- Attempts to escape the string, to a format that is both a valid Lua
|
||||
-- constant, and ledible unicode.
|
||||
|
@ -62,53 +71,72 @@ local function smallest_secure_longform_string_level (str)
|
|||
return #levels - 1
|
||||
end
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
local function format_shortform_string (str, depth, l)
|
||||
l[#l+1] = SHORT_STR_DELIMITER
|
||||
l[#l+1] = escape_string(str):gsub(SHORT_STR_DELIMITER, '\\'..SHORT_STR_DELIMITER)
|
||||
l[#l+1] = SHORT_STR_DELIMITER
|
||||
end
|
||||
|
||||
local function format_cut_string (str, depth, l)
|
||||
-- Calculate string
|
||||
local str = escape_string(str)
|
||||
:gsub(SHORT_STR_DELIMITER, '\\'..SHORT_STR_DELIMITER)
|
||||
:sub(1, NR_CHARS_IN_LONG_STRING - #STRING_CONT_INDICATOR)
|
||||
|
||||
-- Search for the number of backslashes just before the send of the string.
|
||||
-- If that number is even, it's a sequence of backslashes, if not it's a
|
||||
-- broken escape string.
|
||||
local start_of_backslashes, start_of_digits = str:match '()\\*()%d?%d?$'
|
||||
local nr_backslashes_before_end = start_of_digits - start_of_backslashes
|
||||
if nr_backslashes_before_end % 2 == 1 then str = str:sub(1, start_of_backslashes - 1) end
|
||||
|
||||
-- Format
|
||||
l[#l+1] = SHORT_STR_DELIMITER
|
||||
l[#l+1] = str
|
||||
l[#l+1] = SHORT_STR_DELIMITER
|
||||
l[#l+1] = STRING_CONT_INDICATOR
|
||||
end
|
||||
|
||||
local function format_concatted_string (str, depth, l)
|
||||
error '[pretty.string/internal]: format_concatted_string not implemented yet!'
|
||||
end
|
||||
|
||||
local function format_longform_string (str, depth, l)
|
||||
|
||||
-- Error checking
|
||||
assert( type(str) == 'string' )
|
||||
assert(type(depth) == 'number' and type(l) == 'table')
|
||||
|
||||
-- Calculate
|
||||
local level_required = smallest_secure_longform_string_level(str)
|
||||
|
||||
-- Format
|
||||
l[#l+1] = '['..string.rep('=', level_required)..'['
|
||||
l[#l+1] = '\n'
|
||||
l[#l+1] = str
|
||||
l[#l+1] = ']'..string.rep('=', level_required)..']'
|
||||
end
|
||||
|
||||
return function (str, depth, l)
|
||||
-- pretty.format_string
|
||||
|
||||
-- TODO: Add option for escaping unicode characters.
|
||||
-- TODO: Improve cutstring argument.
|
||||
|
||||
-- Error checking
|
||||
assert( type(str) == 'string' )
|
||||
assert(type(depth) == 'number' and type(l) == 'table')
|
||||
|
||||
-- Do work
|
||||
|
||||
local is_long_string = (str:len() >= NR_CHARS_IN_LONG_STRING)
|
||||
local newline_or_tab_index = str:find('[\n\t]')
|
||||
local single_quote_index = str:find('\'')
|
||||
local double_quote_index = str:find('\"')
|
||||
|
||||
-- ...
|
||||
local chance_of_longform = is_long_string and ((newline_or_tab_index or math.huge) <= NR_CHARS_IN_LONG_STRING) or double_quote_index and single_quote_index
|
||||
local cut_string_index = l.options.cut_strings and (is_long_string or chance_of_longform)
|
||||
and math.min(NR_CHARS_IN_LONG_STRING - 3, newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0)
|
||||
|
||||
local longform = chance_of_longform and ((not cut_string_index) or cut_string_index < math.min(newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0))
|
||||
|
||||
local escape_newline_and_tab = not longform and newline_or_tab_index
|
||||
|
||||
-- Determine string delimiters
|
||||
local left, right
|
||||
if longform then
|
||||
local level = smallest_secure_longform_string_level(str)
|
||||
left, right = '['..string.rep('=', level)..'[', ']'..string.rep('=', level)..']'
|
||||
if newline_or_tab_index then str = '\n' .. str end
|
||||
elseif not single_quote_index then
|
||||
left, right = '\'', '\''
|
||||
if #str < NR_CHARS_IN_LONG_STRING then
|
||||
return format_shortform_string(str, depth, l)
|
||||
elseif depth > 0 then
|
||||
return format_cut_string (str, depth, l)
|
||||
elseif requires_weird_escape_seq (str) then
|
||||
return format_concatted_string(str, depth, l)
|
||||
else
|
||||
left, right = '\"', '\"'
|
||||
return format_longform_string(str, depth, l)
|
||||
end
|
||||
|
||||
-- Cut string
|
||||
if cut_string_index then str = str:sub(1, cut_string_index) end
|
||||
str = escape_string(str)
|
||||
-- Escape newline and tab
|
||||
if escape_newline_and_tab then str = str:gsub('\n', '\\n'):gsub('\t', '\\t') end
|
||||
|
||||
l[#l+1] = left
|
||||
l[#l+1] = str
|
||||
l[#l+1] = right
|
||||
end
|
||||
|
|
|
@ -18,6 +18,7 @@ local function format_test (t)
|
|||
end
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Shortform Strings
|
||||
|
||||
format_test {
|
||||
input = 'Hello World',
|
||||
|
@ -26,7 +27,7 @@ format_test {
|
|||
|
||||
format_test {
|
||||
input = 'Hello \'World\'',
|
||||
expect = '\"Hello \'World\'\"',
|
||||
expect = '\'Hello \\\'World\\\'\'',
|
||||
}
|
||||
|
||||
format_test {
|
||||
|
@ -41,27 +42,27 @@ format_test {
|
|||
|
||||
format_test {
|
||||
input = '\'Hello\' [[World]]',
|
||||
expect = '\"\'Hello\' [[World]]\"',
|
||||
expect = '\'\\\'Hello\\\' [[World]]\'',
|
||||
}
|
||||
|
||||
format_test {
|
||||
input = '\'Hello\' \"there\" [[World]]',
|
||||
expect = '[=[\'Hello\' \"there\" [[World]]]=]',
|
||||
expect = '\'\\\'Hello\\\' \"there\" [[World]]\'',
|
||||
}
|
||||
|
||||
format_test {
|
||||
input = '\'Hello\' \"there\" [=[World]=]',
|
||||
expect = '[[\'Hello\' \"there\" [=[World]=]]]',
|
||||
expect = '\'\\\'Hello\\\' \"there\" [=[World]=]\'',
|
||||
}
|
||||
|
||||
format_test {
|
||||
input = '\nHello World',
|
||||
expect = '\'\\nHello World\'',
|
||||
expect = [['\nHello World']],
|
||||
}
|
||||
|
||||
format_test {
|
||||
input = '\'\"\n',
|
||||
expect = '[[\n\'\"\n]]',
|
||||
expect = [['\'"\n']],
|
||||
}
|
||||
|
||||
format_test {
|
||||
|
@ -71,16 +72,118 @@ format_test {
|
|||
|
||||
format_test {
|
||||
input = '\\',
|
||||
expect = '\'\\\\\'',
|
||||
expect = [['\\']],
|
||||
}
|
||||
|
||||
format_test {
|
||||
input = '\000',
|
||||
expect = '\'\\000\'',
|
||||
}
|
||||
|
||||
format_test {
|
||||
input = '\a\b\v\r\f',
|
||||
expect = '\'\\a\\b\\v\\r\\f\'',
|
||||
expect = [['\a\b\v\r\f']],
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Cut Strings
|
||||
|
||||
format_test {
|
||||
name = 'Cut string basics',
|
||||
input = {'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||
expect = '{\n \'Lorem ipsum dolor sit amet, consectet\'...\n}',
|
||||
}
|
||||
|
||||
format_test {
|
||||
name = 'Cut strings are cut after escaping',
|
||||
input = {'Lorem\tipsum\tdolor\tsit\tamet,\tconsectetur\tadipiscing\telit.\tNunc\tvestibulum\ttempus\tligula.\tSed\tac\tlobortis\tmi.'},
|
||||
expect = '{\n \'Lorem\\tipsum\\tdolor\\tsit\\tamet,\\tcons\'...\n}',
|
||||
}
|
||||
|
||||
format_test {
|
||||
name = 'Cut strings are cut after escaping 2',
|
||||
input = {'Lorem ipsum dolor sit amet, conse\t\t\tctetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||
expect = '{\n \'Lorem ipsum dolor sit amet, conse\\t\\t\'...\n}',
|
||||
}
|
||||
|
||||
format_test {
|
||||
name = 'Cut strings are not cut in the middle of an escape code',
|
||||
input = {'Lorem ipsum dolor sit amet, consec\t\t\ttetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\t\'...\n}',
|
||||
}
|
||||
|
||||
format_test {
|
||||
name = 'Cut strings are not cut in the middle of escaping \\',
|
||||
input = {'Lorem ipsum dolor sit amet, conse\\\\\\\\ctetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||
expect = '{\n \'Lorem ipsum dolor sit amet, conse\\\\\\\\\'...\n}',
|
||||
}
|
||||
|
||||
format_test {
|
||||
name = 'Cut strings are not cut in the middle of decimal escape codes',
|
||||
input = {'Lorem ipsum dolor sit amet, consect\014etur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||
expect = '{\n \'Lorem ipsum dolor sit amet, consect\'...\n}',
|
||||
}
|
||||
|
||||
format_test {
|
||||
-- NOTE: Not priority functionallity.
|
||||
name = 'Cut strings can shorten decimal escape codes, if nessesary and possible',
|
||||
input = {'Lorem ipsum dolor sit amet, consec\014tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\14\'...\n}',
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Concatted Strings
|
||||
|
||||
-- TODO
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Longform Strings
|
||||
|
||||
local LONG_STRING = [[
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
||||
Nunc vestibulum tempus ligula. Sed ac lobortis mi.
|
||||
Morbi eu arcu id nunc cursus auctor. Nulla enim tortor, sodales ut nunc non,
|
||||
euismod aliquam libero. Aliquam neque est, iaculis in nibh vel, mollis ultricies
|
||||
ante. Sed egestas et massa sit amet posuere. Integer at suscipit lorem, non
|
||||
consectetur lacus. Vivamus ac facilisis sem. Proin lacinia ex eu volutpat
|
||||
interdum.
|
||||
]]
|
||||
|
||||
format_test {
|
||||
name = 'Longform string basics',
|
||||
input = LONG_STRING,
|
||||
expect = '[[\n'..LONG_STRING..']]'
|
||||
}
|
||||
|
||||
local LONG_STRING_WITH_LEVELS = [=[
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus
|
||||
ligula. Sed ac lobortis mi. [Morbi eu arcu id nunc cursus auctor. [Nulla enim
|
||||
tortor, sodales ut nunc non, euismod aliquam libero.]] Aliquam neque est,
|
||||
iaculis in nibh vel, mollis ultricies ante. Sed egestas et massa sit amet
|
||||
posuere. Integer at suscipit lorem, non consectetur lacus. Vivamus ac facilisis
|
||||
sem. Proin lacinia ex eu volutpat interdum.
|
||||
]=]
|
||||
|
||||
format_test {
|
||||
name = 'Longform string that requires level 1',
|
||||
input = LONG_STRING_WITH_LEVELS,
|
||||
expect = '[=[\n'..LONG_STRING_WITH_LEVELS..']=]'
|
||||
}
|
||||
|
||||
local LONG_STRING_WITH_HIGH_LEVELS = [[
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc vestibulum tempus
|
||||
ligula. Sed ac lobortis mi. [=[Morbi eu arcu id nunc cursus auctor. [Nulla enim
|
||||
tortor, sodales ut nunc non, euismod aliquam libero.]=] Aliquam neque est,
|
||||
iaculis in nibh vel, mollis ultricies ante. Sed egestas et massa sit amet
|
||||
posuere. Integer at suscipit lorem, non consectetur lacus. Vivamus ac facilisis
|
||||
sem. Proin lacinia ex eu volutpat interdum.
|
||||
]]
|
||||
|
||||
format_test {
|
||||
name = 'Longform string that requires level 0, but not 1',
|
||||
input = LONG_STRING_WITH_HIGH_LEVELS,
|
||||
expect = '[[\n'..LONG_STRING_WITH_HIGH_LEVELS..']]'
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
|
Loading…
Reference in New Issue
Block a user