1
0
pretty/pretty.lua
Jon Michael Aanes 155c877987 Alternative process for determining short tables
This one is based on the representative width of the table. Not only
does this produce better results, but it's also more futureproof.
2017-04-14 12:19:23 +02:00

520 lines
17 KiB
Lua

-- Ensure loading library, if it exists, no matter where pretty.lua was loaded from.
-- Load the library component
local format_number, format_function, analyze_structure, TABLE_TYPE
do
local thispath = ... and select('1', ...):match('.+%.') or ''
-- Load number and function formatting
format_number = select(2, pcall(require, thispath..'number'))
format_function = select(2, pcall(require, thispath..'function'))
-- Load other stuff
local was_loaded
was_loaded, analyze_structure = pcall(require, thispath..'analyze_structure')
print(was_loaded, analyze_structure)
assert(was_loaded, '[pretty]: Could not load vital library: analyze_structure')
was_loaded, TABLE_TYPE = pcall(require, thispath..'table_type')
assert(was_loaded, '[pretty]: Could not load vital library: table_type')
end
--
local ERROR_UNKNOWN_TYPE = [[
[pretty]: Attempting to format unsupported value of type "%s".
A native formatting of the value is: %s
We are attempting to cover all Lua features, so please report this bug, so we can improve.
]]
local SINGLE_LINE_TABLE_TYPES = {
[TABLE_TYPE.SEQUENCE] = true,
[TABLE_TYPE.PURE_MAP] = true,
[TABLE_TYPE.STRING_MAP] = true,
}
local SINGLE_LINE_SEQ_MAX_ELEMENTS = 10
local SINGLE_LINE_MAP_MAX_ELEMENTS = 5
local NR_CHARS_IN_LONG_STRING = 40
local MAX_WIDTH_FOR_SINGLE_LINE_TABLE = 38
local KEY_TYPE_SORT_ORDER = {
['number'] = 0,
['string'] = 1,
['boolean'] = 2,
['table'] = 3,
['userdata'] = 4,
['thread'] = 5,
['function'] = 6,
}
local VALUE_TYPE_SORT_ORDER = {
['nil'] = 0,
['boolean'] = 1,
['number'] = 2,
['string'] = 3,
['table'] = 4,
['userdata'] = 5,
['thread'] = 6,
['function'] = 7,
}
local RESERVED_LUA_WORDS = {
['and'] = true,
['break'] = true,
['do'] = true,
['else'] = true,
['elseif'] = true,
['end'] = true,
['false'] = true,
['for'] = true,
['function'] = true,
['if'] = true,
['in'] = true,
['local'] = true,
['nil'] = true,
['not'] = true,
['or'] = true,
['repeat'] = true,
['return'] = true,
['then'] = true,
['true'] = true,
['until'] = true,
['while'] = true,
}
local CHAR_TO_STR_REPR = {}
do
for i = 00, 031 do CHAR_TO_STR_REPR[i] = '\\0'..(i < 10 and '0' or '')..i end
for i = 32, 255 do CHAR_TO_STR_REPR[i] = string.char(i) end
CHAR_TO_STR_REPR[7] = '\\a'
CHAR_TO_STR_REPR[8] = '\\b'
CHAR_TO_STR_REPR[9] = '\t'
CHAR_TO_STR_REPR[10] = '\n'
CHAR_TO_STR_REPR[11] = '\\v'
CHAR_TO_STR_REPR[12] = '\\f'
CHAR_TO_STR_REPR[13] = '\\r'
CHAR_TO_STR_REPR[92] = '\\\\'
CHAR_TO_STR_REPR[127] = '\\127'
end
--------------------------------------------------------------------------------
-- Util
local function padnum(d)
local dec, n = string.match(d, "(%.?)0*(.+)")
return #dec > 0 and ("%.12f"):format(d) or ("%s%03d%s"):format(dec, #n, n)
end
local function alphanum_compare_strings (a, b)
return tostring(a):gsub("%.?%d+", padnum)..("%3d"):format(#b)
< tostring(b):gsub("%.?%d+", padnum)..("%3d"):format(#a)
end
local function smallest_secure_longform_string_level (str)
-- Determines the level a longform string needs to use, to avoid "code"
-- injection. For example, if we want to use longform on the string
-- 'Hello ]] World', we cannot use level-0 as this would result in
-- '[[Hello ]] World]]', which could be an issue in certain applications.
local levels = { [1] = 1 }
str:gsub('%]=*%]', function (m) levels[m:len()] = true end)
return #levels - 1
end
local function compare_key_value_pairs (a, b)
-- Get types
local type_key_a, type_key_b = type(a[1]), type(b[1])
local type_value_a, type_value_b = type(a[2]), type(b[2])
-- Tons of compare
if (1 == (type_key_a == 'number' and 1 or 0) + (type_key_b == 'number' and 1 or 0)) then
return type_key_a == 'number'
elseif (type_key_a == 'number' and type_key_b == 'number') then
return a[1] < b[1]
elseif (type_value_a ~= type_value_b) then
return VALUE_TYPE_SORT_ORDER[type_value_a] < VALUE_TYPE_SORT_ORDER[type_value_b]
elseif (type_key_a == 'string' and type_key_b == 'string') then
return alphanum_compare_strings(a[1], b[1])
elseif (type_key_a ~= type_key_b) then
return KEY_TYPE_SORT_ORDER[type_value_a] < KEY_TYPE_SORT_ORDER[type_value_b]
end
end
local function get_key_value_pairs_in_proper_order (t)
-- Generates a sequence of key value pairs, in proper order.
-- Proper order is:
-- 1. All integer keys are first, in order
-- 2. Then by value type, as defined in VALUE_TYPE_SORT_ORDER in the top.
-- 3. Then by key type.
-- 3.1. String in alphanumeric order
-- 3.2. Other wierdness.
local key_value_pairs = {}
for key, value in pairs(t) do
key_value_pairs[#key_value_pairs+1] = { key, value }
end
table.sort(key_value_pairs, compare_key_value_pairs)
return key_value_pairs
end
local function fill_holes_in_key_value_pairs (key_value_pairs)
-- NOTE: Assumes that all keys are numbers
for i = 2, #key_value_pairs do
for j = key_value_pairs[i-1][1] + 1, key_value_pairs[i][1] - 1 do
key_value_pairs[#key_value_pairs+1] = { j, nil }
end
end
table.sort(key_value_pairs, compare_key_value_pairs)
end
local function is_identifier(str)
-- An identier is defined in the lua reference guide
return str:match('^[_%a][_%w]*$') and not RESERVED_LUA_WORDS[str]
end
local function contains_only_nice_string_keys (t)
-- A "nice" string is here defined is one following the rules of lua
-- identifiers.
for k, _ in pairs(t) do
if type(k) ~= 'string' or not is_identifier(k) then
return false
end
end
return true
end
local function contains_only_nice_number_indexes (t)
-- A "nice" index is here defined as one which would be visited when using
-- ipairs: An integer larger than 1 and less than #t
local max_index = #t
for k, v in pairs(t) do
if type(k) ~= 'number' or k < 1 or max_index < k or k ~= math.floor(k) then
return false
end
end
return true
end
local function escape_string (str)
local l = {}
for i = 1, #str do
l[#l+1] = CHAR_TO_STR_REPR[str:byte(i)]
end
return table.concat(l, '')
end
local function width_of_strings_in_l (l, start_i, end_i)
local width = 0
for i = start_i or 1, (end_i or #l) do
width = width + ((type(l[i]) ~= 'string') and 1 or #l[i])
end
return width
end
local function ignore_alignment_info (l, start_i, stop_i)
for i = start_i or 1, stop_i or #l do
if type(l[i]) == 'table' then
l[i] = ''
end
end
end
local function fix_alignment (l, start_i, stop_i)
local start_i, stop_i = start_i or 1, stop_i or #l
-- Find maximums
local max = {}
for i = start_i, stop_i do
if type(l[i]) == 'table' then
max[ l[i][2] ] = math.max( l[i][1], max[ l[i][2] ] or 0 )
end
end
-- Insert the proper whitespace
for i = start_i, stop_i do
if type(l[i]) == 'table' then
l[i] = string.rep(' ', max[ l[i][2] ] - l[i][1])
end
end
end
local function replace_seperator_info (l, replace_with, indent_char, depth, start_i, stop_i)
for i = start_i or 1, stop_i or #l do
if type(l[i]) ~= 'table' then
-- Do nothing
elseif l[i][1] == 'seperator' then
l[i] = replace_with .. indent_char:rep(depth)
elseif l[i][1] == 'indent' then
l[i], depth = '', depth + 1
elseif l[i][1] == 'unindent' then
l[i], depth = '', depth - 1
end
end
end
--------------------------------------------------------------------------------
-- Identifyer stuff
local function is_empty_table (value)
if type(value) ~= 'table' then
error(('[pretty/internal]: Only tables allowed in function pretty.is_empty_table, but was given %s (%s)'):format(value, type(value)), 2)
end
return next(value) == nil
end
local function get_table_type (value)
-- Determines table type:
-- * Sequence: All keys are integer in the range 1..#value
-- * Pure Map: #value == 0
-- * Mixed: Any other
if is_empty_table(value) then return TABLE_TYPE.EMPTY end
local is_sequence = contains_only_nice_number_indexes(value)
local only_string_keys = contains_only_nice_string_keys(value)
local is_pure_map = (#value == 0)
-- Return type
if is_sequence then return TABLE_TYPE.SEQUENCE
elseif only_string_keys then return TABLE_TYPE.STRING_MAP
elseif is_pure_map then return TABLE_TYPE.PURE_MAP
else return TABLE_TYPE.MIXED
end
end
--------------------------------------------------------------------------------
-- Formatting stuff
local format_table, format_value
-- Ways to format keys
local function format_key_and_value_string_map (l, key, value, options, depth)
l[#l+1] = key
l[#l+1] = { #key, 'key' }
l[#l+1] = ' = '
return format_value(value, options, depth, l)
end
local function format_key_and_value_arbitr_map (l, key, value, options, depth)
local index_before_key = #l+1
l[#l+1] = '['
format_value(key, options, 'max', l) -- TODO: Outphase the usage of the "max" depth thingy.
l[#l+1] = ']'
l[#l+1] = { width_of_strings_in_l(l, index_before_key), 'key' }
l[#l+1] = ' = '
return format_value(value, options, depth, l)
end
local function format_key_and_value_sequence (l, key, value, options, depth)
return format_value(value, options, depth, l)
end
local TABLE_TYPE_TO_PAIR_FORMAT = {
[TABLE_TYPE.EMPTY] = format_key_and_value_sequence,
[TABLE_TYPE.SEQUENCE] = format_key_and_value_sequence,
[TABLE_TYPE.SET] = format_key_and_value_arbitr_map,
[TABLE_TYPE.MIXED] = format_key_and_value_arbitr_map,
[TABLE_TYPE.STRING_MAP] = format_key_and_value_string_map,
[TABLE_TYPE.PURE_MAP] = format_key_and_value_arbitr_map,
}
-- Formatting tables
local function format_map (t, options, depth, l)
-- NOTE: Assumes that the input table was pre-checked with `is_single_line_table()`
local table_type = l.info[t] and l.info[t].type or get_table_type(t) -- FIXME: This is a temp fix
local key_value_pairs = get_key_value_pairs_in_proper_order(t)
if table_type == TABLE_TYPE.SEQUENCE and l.info[t].has_holes then
fill_holes_in_key_value_pairs(key_value_pairs)
end
local pair_format_func = TABLE_TYPE_TO_PAIR_FORMAT[table_type]
local start_of_table_i = #l + 1
l[#l+1] = '{'
l[#l+1] = {'indent'}
l[#l+1] = {'seperator'}
for _, pair in ipairs(key_value_pairs) do
pair_format_func(l, pair[1], pair[2], options, depth + 1)
l[#l+1] = ','
l[#l+1] = {'seperator'}
end
if l[#l][1] == 'seperator' then l[#l-1], l[#l] = nil, nil end
l[#l+1] = {'unindent'}
l[#l+1] = {'seperator'}
l[#l+1] = '}'
local table_width = width_of_strings_in_l(l, start_of_table_i)
if table_width <= MAX_WIDTH_FOR_SINGLE_LINE_TABLE then
-- Is short table: Ignore the "width of key"-shit
replace_seperator_info(l, ' ', '', 0, start_of_table_i)
ignore_alignment_info(l, start_of_table_i)
else
-- Is long table: Fix whitespace alignment
replace_seperator_info(l, '\n', options.indent, depth, start_of_table_i)
fix_alignment(l, start_of_table_i)
end
end
function format_table (t, options, depth, l)
local info = l.info[t] or {}
--local table_type = get_table_type(t)
if options.recursion == 'marked' and info.marker then
l[#l+1], l[#l+2], l[#l+3] = '<', info.marker, '>'
end
-- Empty or exteeding max-depth?
if info.type == TABLE_TYPE.EMPTY then l[#l+1] = '{}'; return
elseif depth ~= 'max' and depth >= options.max_depth or l.visited[t] then l[#l+1] = '{...}'; return
end
l.visited[t] = true
if depth == 'max' then l[#l+1] = '{...}'; return end
-- Normal table
format_map(t, options, depth, l)
end
local function format_string (str, options, depth, l)
-- TODO: Add option for escaping unicode characters.
local is_long_string = (str:len() >= NR_CHARS_IN_LONG_STRING)
local newline_or_tab_index = str:find('[\n\t]')
local single_quote_index = str:find('\'')
local double_quote_index = str:find('\"')
-- ...
local chance_of_longform = is_long_string and ((newline_or_tab_index or math.huge) <= NR_CHARS_IN_LONG_STRING) or double_quote_index and single_quote_index
local cut_string_index = options.cut_strings and (is_long_string or chance_of_longform)
and math.min(NR_CHARS_IN_LONG_STRING - 3, newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0)
local longform = chance_of_longform and ((not cut_string_index) or cut_string_index < math.min(newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0))
local escape_newline_and_tab = not longform and newline_or_tab_index
-- Determine string delimiters
local left, right
if longform then
local level = smallest_secure_longform_string_level(str)
left, right = '['..string.rep('=', level)..'[', ']'..string.rep('=', level)..']'
if newline_or_tab_index then str = '\n' .. str end
elseif not single_quote_index then
left, right = '\'', '\''
else
left, right = '\"', '\"'
end
-- Cut string
if cut_string_index then str = str:sub(1, cut_string_index) end
str = escape_string(str)
-- Escape newline and tab
if escape_newline_and_tab then str = str:gsub('\n', '\\n'):gsub('\t', '\\t') end
l[#l+1] = left
l[#l+1] = str
l[#l+1] = right
end
if not format_number then
-- Very simple number formatting, if number.lua is not available.
format_number = function (value, _, _, l)
l[#l+1] = tostring(value)
end
end
local function format_coroutine (value, options, depth, l)
l[#l+1] = coroutine.status(value)
l[#l+1] = ' coroutine: '
l[#l+1] = tostring(value):sub(9)
end
local function format_primitive (value, options, depth, l)
l[#l+1] = tostring(value)
end
if not format_function then
-- Very simple function formatting, if function.lua is not available.
format_function = function (value, _, _, l)
l[#l+1] = 'function (...) --[['..tostring(value):sub(11)..']] end'
end
end
local TYPE_TO_FORMAT_FUNC = {
['nil'] = format_primitive,
['boolean'] = format_primitive,
['number'] = format_number,
['string'] = format_string,
['thread'] = format_coroutine,
['table'] = format_table,
-- TODO
['function'] = format_function,
['userdata'] = format_primitive,
['cdata'] = format_primitive, -- Luajit exclusive ?
}
function format_value (value, _, depth, l)
local formatting = TYPE_TO_FORMAT_FUNC[type(value)]
if formatting then
formatting(value, l.options, depth, l, format_value)
else
error(ERROR_UNKNOWN_TYPE:format(type(value), tostring(value)), 2)
end
end
--------------------------------------------------------------------------------
local KNOWN_OPTIONS = {
_all_function_info = 'boolean',
cut_strings = 'boolean',
include_closure = 'boolean',
indent = 'string',
math_shorthand = 'boolean',
max_depth = 'number',
more_function_info = 'boolean',
recursion = 'string',
short_builtins = 'boolean',
}
local function ensure_that_all_options_are_known (options)
for option_name, option_value in pairs(options) do
if not KNOWN_OPTIONS[option_name] then
error(('[pretty]: Unknown option: %s. Was given value %s (%s)'):format(option_name, option_value, type(option_value)), 2)
elseif type(option_value) ~= KNOWN_OPTIONS[option_name] then
error(('[pretty]: Bad value given to option %s: %s (%s). Expected value of type %s'):format(option_name, option_value, type(option_value), KNOWN_OPTIONS[option_name]), 2)
end
end
end
local function pretty_format (value, options)
local l = { visited = { next_mark = 1 } }
l.options = options or {}
l.options.max_depth = l.options.max_depth or math.huge
l.options.indent = l.options.indent or '\t'
ensure_that_all_options_are_known(l.options)
l.info = (type(value) == 'table') and analyze_structure(value) or {}
format_value(value, nil, 0, l)
-- If any alignment info still exists, ignore it
ignore_alignment_info(l)
return table.concat(l, '')
end
return pretty_format