1
0
pretty/pretty.lua

433 lines
14 KiB
Lua
Raw Normal View History

2016-12-28 23:51:07 +00:00
local TABLE_TYPE_SEQUENCE = 'SEQUENCE'
local TABLE_TYPE_PURE_MAP = 'PURE MAP'
local TABLE_TYPE_MIXED = 'MIXED TABLE'
local TABLE_TYPE_EMPTY = 'EMPTY TABLE'
local SINGLE_LINE_SEQ_MAX_ELEMENTS = 10
local SINGLE_LINE_MAP_MAX_ELEMENTS = 5
local NR_CHARS_IN_LONG_STRING = 40
local TYPE_SORT_ORDER = {
['nil'] = 0,
['boolean'] = 1,
['number'] = 2,
['string'] = 3,
['table'] = 4,
['userdata'] = 5,
['thread'] = 6,
['function'] = 7,
}
local RESERVED_LUA_WORDS = {
['and'] = true,
['break'] = true,
['do'] = true,
['else'] = true,
['elseif'] = true,
['end'] = true,
['false'] = true,
['for'] = true,
['function'] = true,
['if'] = true,
['in'] = true,
['local'] = true,
['nil'] = true,
['not'] = true,
['or'] = true,
['repeat'] = true,
['return'] = true,
['then'] = true,
['true'] = true,
['until'] = true,
['while'] = true,
}
local CHAR_TO_STR_REPR = {}
do
for i = 00, 031 do CHAR_TO_STR_REPR[i] = '\\0'..(i < 10 and '0' or '')..i end
for i = 32, 255 do CHAR_TO_STR_REPR[i] = string.char(i) end
CHAR_TO_STR_REPR[7] = '\\a'
CHAR_TO_STR_REPR[8] = '\\b'
CHAR_TO_STR_REPR[9] = '\t'
CHAR_TO_STR_REPR[10] = '\n'
CHAR_TO_STR_REPR[11] = '\\v'
CHAR_TO_STR_REPR[12] = '\\f'
CHAR_TO_STR_REPR[13] = '\\r'
CHAR_TO_STR_REPR[92] = '\\\\'
CHAR_TO_STR_REPR[127] = '\\127'
end
--------------------------------------------------------------------------------
-- Util
local function padnum(d)
local dec, n = string.match(d, "(%.?)0*(.+)")
return #dec > 0 and ("%.12f"):format(d) or ("%s%03d%s"):format(dec, #n, n)
end
local function alphanum_compare_strings (a, b)
return tostring(a):gsub("%.?%d+", padnum)..("%3d"):format(#b)
< tostring(b):gsub("%.?%d+", padnum)..("%3d"):format(#a)
end
local function count_occurances_of_substring_in_string (str, substr)
local _, count = string.gsub(str, substr, '')
return count
end
local function smallest_secure_longform_string_level (str)
-- Determines the level a longform string needs to use, to avoid "code"
-- injection. For example, if we want to use longform on the string
-- 'Hello ]] World', we cannot use level-0 as this would result in
-- '[[Hello ]] World]]', which could be an issue in certain applications.
local levels = { [1] = 1 }
str:gsub('%]=*%]', function (m) levels[m:len()] = true end)
return #levels - 1
end
local function compare_key_value_pairs (a, b)
-- Get types
local type_key_a, type_key_b = type(a[1]), type(b[1])
local type_value_a, type_value_b = type(a[2]), type(b[2])
-- Tons of compare
if (type_key_a ~= 'string' or type_key_b ~= 'string') then
return TYPE_SORT_ORDER[type_key_a] < TYPE_SORT_ORDER[type_key_b]
elseif (type_value_a == type_value_b) then
return alphanum_compare_strings(a[1], b[1])
else
return TYPE_SORT_ORDER[type_value_a] < TYPE_SORT_ORDER[type_value_b]
end
end
local function get_key_value_pairs_in_proper_order (t)
-- Generates a sequence of key value pairs, in proper order.
-- Proper order is:
-- 1. By value type: as defined by the TYPE_SORT_ORDER in the top.
-- 2. By key type: TODO: Implement this.
-- 2.1. Numbers
-- 2.2. Strings in alphanumeric order
-- 2.3. Other wierdness.
local key_value_pairs = {}
for key, value in pairs(t) do
key_value_pairs[#key_value_pairs+1] = { key, value }
end
table.sort(key_value_pairs, compare_key_value_pairs)
return key_value_pairs
end
local function nr_elements_in_map (t)
local k, count = nil, -1
repeat
k, count = next(t, k), count + 1
until not k
return count
end
local function is_identifier(str)
-- An identier is defined in the lua reference guide
return str:match('^[_%a][_%w]*$') and not RESERVED_LUA_WORDS[str]
end
local function contains_only_nice_string_keys (t)
-- A "nice" string is here defined is one following the rules of lua
-- identifiers.
for k, _ in pairs(t) do
if type(k) ~= 'string' or not is_identifier(k) then
return false
end
end
return true
end
local function escape_string (str)
local l = {}
for i = 1, #str do
l[#l+1] = CHAR_TO_STR_REPR[str:byte(i)]
end
return table.concat(l, '')
end
--------------------------------------------------------------------------------
-- Identifyer stuff
local SIMPLE_VALUE_TYPES = {
['nil'] = true,
['boolean'] = true,
['number'] = true,
['string'] = true,
}
local function is_empty_table (value)
assert( type(value) == 'table', '[is_empty_table]: Only tables allowed!' )
return next(value) == nil
end
local function is_short_table (value)
-- In this context, a short table is either an empty table, or one with a
-- single element.
assert( type(value) == 'table', '[is_short_table]: Only tables allowed!' )
local first_key = next(value)
return (not first_key or SIMPLE_VALUE_TYPES[type(value[first_key])])
and (next(value, first_key) == nil)
end
local function is_simple_value (value)
-- In this context, a simple value is a either nil, a boolean, a number,
-- a string or a short table.
-- TODO: Add clause about long strings. (Maybe >7 chars?)
--if type(value) == 'table' then print(value, is_short_table(value)) end
return SIMPLE_VALUE_TYPES[ type(value) ]
or type(value) == 'table' and is_short_table(value)
end
local function contains_non_simple_key_or_value (t)
for k, v in pairs(t) do
if not is_simple_value(k) or not is_simple_value(v) then
return true
end
end
return false
end
local function get_table_type (value)
-- Determines table type:
-- * Sequence: All keys are integer in the range 1..#value
-- * Pure Map: #value == 0
-- * Mixed: Any other
if is_empty_table(value) then return TABLE_TYPE_EMPTY end
local not_sequence = false
local not_pure_map = (#value ~= 0)
local max_index = #value
-- Determine if there exist some non-index
for k, v in pairs(value) do
if type(k) ~= 'number' or k < 1 or max_index < k or k ~= math.floor(k) then
not_sequence = true
break
end
end
-- Return type
if not not_sequence then
return TABLE_TYPE_SEQUENCE
elseif not_pure_map then
return TABLE_TYPE_MIXED
else
return TABLE_TYPE_PURE_MAP
end
end
local function is_single_line_table (value)
-- In this context, a single-line table, is:
-- A) Either a sequence or a pure map.
-- B) Has no non-simple keys or values
-- C 1) If sequence, has at most SINGLE_LINE_SEQ_MAX_ELEMENTS elements.
-- C 2) If map, has at most SINGLE_LINE_MAP_MAX_ELEMENTS elements.
local table_type = get_table_type(value)
return not contains_non_simple_key_or_value(value)
and(table_type == TABLE_TYPE_SEQUENCE and #value <= SINGLE_LINE_SEQ_MAX_ELEMENTS
or table_type == TABLE_TYPE_PURE_MAP and nr_elements_in_map(value) <= SINGLE_LINE_MAP_MAX_ELEMENTS)
end
--------------------------------------------------------------------------------
-- Formatting stuff
local format_table, format_value
-- Ways to format keys
local function format_key_and_value_string_map (l, key, value, options, depth)
l[#l+1] = key
l[#l+1] = #key
l[#l+1] = ' = '
l[#l+1] = format_value(value, options, depth)
2016-12-28 23:51:07 +00:00
end
local function format_key_and_value_arbitr_map (l, key, value, options, depth)
l[#l+1] = '['
l[#l+1] = format_value(key, options, 'max')
l[#l+1] = ']'
l[#l+1] = #l[#l-1] + 2
l[#l+1] = ' = '
l[#l+1] = format_value(value, options, depth)
2016-12-28 23:51:07 +00:00
end
-- Formatting tables
local function format_single_line_sequence (t, options)
-- NOTE: Assumes that the input table was pre-checked with `is_single_line_table()`
local l = {}
for i = 1, #t do l[i] = format_value(t[i], options) end
return '{ ' .. table.concat(l, ', ') .. ' }'
end
local function format_single_line_map (t, options)
-- NOTE: Assumes that the input table was pre-checked with `is_single_line_table()`
local pair_format_func = contains_only_nice_string_keys(t) and format_key_and_value_string_map or format_key_and_value_arbitr_map
2016-12-28 23:51:07 +00:00
local key_value_pairs = get_key_value_pairs_in_proper_order(t)
local l = {'{ '}
for _, pair in ipairs(key_value_pairs) do
local top_before = #l
pair_format_func(l, pair[1], pair[2], options, 'max')
2016-12-28 23:51:07 +00:00
l[#l+1] = ', '
-- Ignore the "width of key"-shit
for i = top_before, #l do if type(l[i]) == 'number' then l[i] = '' end end
2016-12-28 23:51:07 +00:00
end
if l[#l] == ', ' then l[#l] = nil end
l[#l+1] = ' }'
return table.concat(l, '')
end
local function format_sequence (t, options, depth)
if depth ~= 'max' and depth >= options.max_depth then return '{...}'
elseif is_single_line_table(t) then return format_single_line_sequence(t, options)
elseif depth == 'max' then return '{...}'
end
local l = {'{\n'}
for index, value in ipairs(t) do
l[#l+1] = options.indent:rep(depth + 1)
l[#l+1] = format_value(value, options, depth + 1)
l[#l+1] = ',\n'
end
l[#l] = '\n'
l[#l+1] = options.indent:rep(depth)
l[#l+1] = '}'
return table.concat(l, '')
end
local function format_map (t, options, depth)
if depth ~= 'max' and depth >= options.max_depth then return '{...}'
elseif is_single_line_table(t) then return format_single_line_map(t, options)
elseif depth == 'max' then return '{...}'
end
local key_value_pairs = get_key_value_pairs_in_proper_order(t)
local pair_format_func = contains_only_nice_string_keys(t) and format_key_and_value_string_map or format_key_and_value_arbitr_map
2016-12-28 23:51:07 +00:00
-- Figure out the max key length
local l = {'{\n'}
local top_before = #l
for _, pair in pairs(key_value_pairs) do
2016-12-28 23:51:07 +00:00
l[#l+1] = options.indent:rep(depth + 1)
pair_format_func(l, pair[1], pair[2], options, depth + 1)
2016-12-28 23:51:07 +00:00
l[#l+1] = ',\n'
end
-- Figure out max key len
local max_key_len = 0
for i = top_before, #l do
if type(l[i]) == 'number' and l[i] > max_key_len then
max_key_len = l[i]
end
end
-- Replace in the proper whitespace
for i = top_before, #l do
if type(l[i]) == 'number' then
l[i] = string.rep(' ', max_key_len - l[i])
end
end
2016-12-28 23:51:07 +00:00
l[#l] = '\n'
l[#l+1] = options.indent:rep(depth)
l[#l+1] = '}'
return table.concat(l, '')
end
function format_table (t, options, depth)
local table_type = get_table_type(t)
if table_type == TABLE_TYPE_EMPTY then return '{}'
elseif table_type == TABLE_TYPE_SEQUENCE then return format_sequence(t, options, depth)
else return format_map(t, options, depth)
end
end
local function format_string (str, options)
-- TODO: Add option for escaping unicode characters.
local is_long_string = (str:len() >= NR_CHARS_IN_LONG_STRING)
local newline_or_tab_index = str:find('[\n\t]')
local single_quote_index = str:find('\'')
local double_quote_index = str:find('\"')
local chance_of_longform = is_long_string and (newline_or_tab_index <= NR_CHARS_IN_LONG_STRING) or double_quote_index and single_quote_index
local cut_string_index = options.cut_strings and (is_long_string or chance_of_longform)
and math.min(NR_CHARS_IN_LONG_STRING - 3, newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0)
local longform = chance_of_longform and ((not cut_string_index) or cut_string_index < math.min(newline_or_tab_index or 1/0, double_quote_index or 1/0, single_quote_index or 1/0))
local escape_newline_and_tab = not longform and newline_or_tab_index
-- Determine string delimiters
local left, right
if longform then
local level = smallest_secure_longform_string_level(str)
left, right = '['..string.rep('=', level)..'[', ']'..string.rep('=', level)..']'
if newline_or_tab_index then str = '\n' .. str end
elseif not single_quote_index then
left, right = '\'', '\''
else
left, right = '\"', '\"'
end
-- Cut string
if cut_string_index then str = str:sub(1, cut_string_index) end
str = escape_string(str)
-- Escape newline and tab
if escape_newline_and_tab then str = str:gsub('\n', '\\n'):gsub('\t', '\\t') end
return left .. str .. right
end
local function format_number (value, shorthand)
if value ~= value then return shorthand and 'nan' or '0/0'
elseif value == 1/0 then return shorthand and 'inf' or '1/0'
elseif value == -1/0 then return shorthand and '-inf' or '-1/0'
else return tostring(value)
end
end
function format_value (value, options, depth)
local type = type(value)
if type == 'table' then return format_table(value, options, depth or 'max')
elseif type == 'string' then return format_string(value, options)
elseif type == 'number' then return format_number(value, options.math_shorthand)
else return tostring(value)
end
end
--------------------------------------------------------------------------------
local function pretty_format (value, options)
local options = options or {}
options.max_depth = options.max_depth or math.huge
options.indent = options.indent or '\t'
return format_value(value, options, 0)
end
return pretty_format