338 lines
11 KiB
Lua
338 lines
11 KiB
Lua
|
|
-- pretty.analyze_structure
|
|
-- The datastructure analyzing module for pretty.
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
local TABLE_TYPE = assert(require((... and select('1', ...):match('.+%.') or '')..'common'), '[pretty]: Could not load vital library: common')
|
|
. TABLE_TYPE
|
|
|
|
local RESERVED_LUA_WORDS = {
|
|
['and'] = true,
|
|
['break'] = true,
|
|
['do'] = true,
|
|
['else'] = true,
|
|
['elseif'] = true,
|
|
['end'] = true,
|
|
['false'] = true,
|
|
['for'] = true,
|
|
['function'] = true,
|
|
['if'] = true,
|
|
['in'] = true,
|
|
['local'] = true,
|
|
['nil'] = true,
|
|
['not'] = true,
|
|
['or'] = true,
|
|
['repeat'] = true,
|
|
['return'] = true,
|
|
['then'] = true,
|
|
['true'] = true,
|
|
['until'] = true,
|
|
['while'] = true,
|
|
['goto'] = true,
|
|
}
|
|
|
|
local LEAF_VALUE_TYPES = {
|
|
['nil'] = true,
|
|
['number'] = true,
|
|
['string'] = true,
|
|
['boolean'] = true,
|
|
}
|
|
|
|
local SHORT_STRING_MAX_LEN = 7 -- Range: [0, ∞[
|
|
local MINIMUM_NUMBER_OF_SET_ELEMENTS = 2 -- Range: [1, ∞[
|
|
local ALLOWED_HOLE_SIZE_IN_SEQUENCE = 1 -- Range: [0, ∞[. Set to 0, to completely disallow holes in sequences.
|
|
local RECURSIVE_TOSTRING_TIMEOUT = 10 -- Range: [1, ∞[. High values may result in crashes on specially-crafted input.
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
local function is_identifier(s)
|
|
-- Predicate: Is the given string usable as an identifier in this version of
|
|
-- Lua?
|
|
|
|
assert(type(s) == 'string')
|
|
--
|
|
return not not loadstring(s..'=0') and not s:find '%.' and not RESERVED_LUA_WORDS[s]
|
|
end
|
|
|
|
local function get_key_types (t)
|
|
assert(type(t) == 'table')
|
|
--
|
|
local types = { nr_types = -1 }
|
|
for key, _ in pairs(t) do
|
|
types[type(key)] = true
|
|
end
|
|
--
|
|
for type_name, _ in pairs(types) do
|
|
types.nr_types = types.nr_types + 1
|
|
end
|
|
return types
|
|
end
|
|
|
|
local function get_value_types (t)
|
|
assert(type(t) == 'table')
|
|
--
|
|
local types = { nr_types = -1 }
|
|
for _, value in pairs(t) do
|
|
types[type(value)] = true
|
|
end
|
|
--
|
|
for type_name, _ in pairs(types) do
|
|
types.nr_types = types.nr_types + 1
|
|
end
|
|
return types
|
|
end
|
|
|
|
local function largest_number_index (t)
|
|
-- Returns the largest number index in t.
|
|
|
|
assert(type(t) == 'table')
|
|
--
|
|
local max_index = 0
|
|
for k,v in pairs(t) do
|
|
if type(k) == 'number' then
|
|
max_index = math.max(max_index, k)
|
|
end
|
|
end
|
|
return max_index
|
|
end
|
|
|
|
local function nr_elements_in_table (t)
|
|
-- Determines the total number of elements in the table.
|
|
|
|
assert(type(t) == 'table')
|
|
--
|
|
local k, count = nil, -1
|
|
repeat
|
|
k, count = next(t, k), count + 1
|
|
until k == nil
|
|
return count
|
|
end
|
|
|
|
local function nr_elements_in_seq (t)
|
|
-- Determines the number of elements in the sequence part of the table.
|
|
-- Allows holes of size `ALLOWED_HOLE_SIZE_IN_SEQUENCE`, before stopping.
|
|
-- This function works even when the given table's metamethods throws errors.
|
|
--
|
|
-- Returns:
|
|
-- * Number: number of elements
|
|
-- * Boolean: whether the table has holes.
|
|
|
|
assert(type(t) == 'table')
|
|
|
|
-- We don't want to crash if the __index metamethod throws an error, so we
|
|
-- copy the pairs with number keys into a fresh table, which we then operate
|
|
-- on lower down.
|
|
if debug.getmetatable(t) and debug.getmetatable(t).__index then
|
|
local t_prime = {}
|
|
for k, v in pairs(t) do
|
|
if type(k) == 'number' then t_prime[k] = v end
|
|
end
|
|
t = t_prime
|
|
end
|
|
|
|
-- Now we run though the table, from 1 and up.
|
|
local i, last_elem_i, nr_elems, has_holes = 0, 0, 0, false
|
|
while i <= last_elem_i + 1 + ALLOWED_HOLE_SIZE_IN_SEQUENCE do
|
|
i = i + 1
|
|
if t[i] ~= nil then
|
|
last_elem_i, nr_elems = i, nr_elems + 1
|
|
else
|
|
has_holes = true
|
|
end
|
|
end
|
|
return nr_elems, has_holes
|
|
end
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
local function contains_only_nice_string_keys (t)
|
|
-- Predicate: Does t contain only string keys which could be used as
|
|
-- identifiers, eg.
|
|
|
|
for k, _ in pairs(t) do
|
|
if type(k) ~= 'string' or not is_identifier(k) then
|
|
return false
|
|
end
|
|
end
|
|
return true
|
|
end
|
|
|
|
local function is_set (t)
|
|
-- Predicate: Does t contain only boolean values.
|
|
local value_types = get_value_types(t)
|
|
return value_types.boolean and value_types.nr_types == 1
|
|
end
|
|
|
|
local function count_childrens_key_count (t)
|
|
local keys, nr_children = {}, 0
|
|
for _, child in pairs(t) do
|
|
nr_children = nr_children + 1
|
|
for k in pairs(child) do keys[k] = (keys[k] or 0) + 1 end
|
|
end
|
|
return keys, nr_children
|
|
end
|
|
|
|
local function is_tabular (t)
|
|
-- Determines if `t` contains sub-tables of identical substructure. (tabular)
|
|
-- Further determines if `t` contains sub-tables sharing some amount of
|
|
-- substructure. (pseudo-tabular)
|
|
-- If either of above is true, it also returns a table with pairs
|
|
-- (key, number), where number is the amount of sub-tables containing
|
|
-- the key.
|
|
|
|
-- Quick return if table is empty, or not containing only values of type table.
|
|
local value_types = get_value_types(t)
|
|
if not value_types.table or value_types.nr_types ~= 1 then
|
|
return false, false
|
|
end
|
|
|
|
-- Determine keys of first child.
|
|
local children_keys, nr_children = count_childrens_key_count(t)
|
|
|
|
-- Make sure every child has exact same sub-structure.
|
|
local all_shared = true
|
|
local at_least_one_shared = false
|
|
for key, nr_with_key in pairs(children_keys) do
|
|
if nr_with_key ~= nr_children then all_shared = false end
|
|
if nr_with_key == nr_children then at_least_one_shared = true end
|
|
end
|
|
|
|
return all_shared, at_least_one_shared, children_keys
|
|
end
|
|
|
|
local function has_uniform_structure (t)
|
|
-- TODO: This can probably be more relaxed. Maybe combine string, number and boolean?
|
|
|
|
assert(type(t) == 'table')
|
|
|
|
-- Find the key and value types.
|
|
local first_key = next(t)
|
|
if first_key == nil then return true end
|
|
local key_type, value_type = type(first_key), type(t[first_key])
|
|
|
|
-- Ensure every other key value pair is the same.
|
|
for key, value in pairs(t) do
|
|
if type(key) ~= key_type or type(value) ~= value_type then
|
|
return false
|
|
end
|
|
end
|
|
|
|
return true
|
|
end
|
|
|
|
local function is_leaf_node (t)
|
|
-- Predicate: Returns true if table only contains elements of type nil,
|
|
-- number, string or boolean
|
|
|
|
assert(type(t) == 'table')
|
|
|
|
for k,v in pairs(t) do
|
|
if not LEAF_VALUE_TYPES[type(k)] or not LEAF_VALUE_TYPES[type(v)] then
|
|
return false
|
|
end
|
|
end
|
|
return true
|
|
end
|
|
|
|
local function super_tostring (t)
|
|
-- Normally taking `tostring` of a value returns a string, but Lua does not
|
|
-- enforce that __tostring must return a string, so this function attempts
|
|
-- to find a definitive `tostring` representation, even if metatable
|
|
-- shenanigans has occurred.
|
|
-- Is guaranteed to either return a string or a nil. Nil only occurs if the
|
|
-- function cannot find a string.
|
|
|
|
local seen = { [t] = true }
|
|
for i = 1, RECURSIVE_TOSTRING_TIMEOUT do
|
|
local success, nt = pcall(tostring, t)
|
|
if not success then return 'error on tostring: '..tostring(nt) end
|
|
if type(nt) == 'string' or nt == nil then return tostring(nt) end
|
|
if seen[nt] then return nil end
|
|
seen[nt], t = true, nt
|
|
end
|
|
return nil
|
|
end
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
local function get_table_info (t)
|
|
local key_types = get_key_types(t)
|
|
|
|
local info = {}
|
|
info.string_repr = super_tostring(t)
|
|
info.address = info.string_repr and info.string_repr:match '^table: 0x(%x+)$' or nil
|
|
info.nr_elems = nr_elements_in_table(t)
|
|
info.seq_elems, info.has_holes = nr_elements_in_seq(t)
|
|
info.map_elems = info.nr_elems - info.seq_elems
|
|
info.has_seq = info.seq_elems > 0
|
|
info.has_map = info.map_elems > 0
|
|
info.is_set = is_set(t) and info.nr_elems >= MINIMUM_NUMBER_OF_SET_ELEMENTS
|
|
info.is_uniform = has_uniform_structure(t)
|
|
info.is_leaf_node = is_leaf_node(t)
|
|
info.key_types = get_key_types(t)
|
|
info.value_types = get_value_types(t)
|
|
info.is_tabular,
|
|
info.is_pseudo_tabular,
|
|
info.child_keys = is_tabular(t)
|
|
|
|
-- Determine type of table
|
|
if not info.has_seq and not info.has_map then info.type = TABLE_TYPE.EMPTY
|
|
elseif info.has_seq and not info.has_map then info.type = TABLE_TYPE.SEQUENCE
|
|
elseif info.is_set then info.type = TABLE_TYPE.SET
|
|
elseif info.has_seq then info.type = TABLE_TYPE.MIXED
|
|
elseif contains_only_nice_string_keys(t) then info.type = TABLE_TYPE.STRING_MAP
|
|
else info.type = TABLE_TYPE.PURE_MAP
|
|
end
|
|
|
|
return info
|
|
end
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
local function analyze_structure (root, max_depth, info)
|
|
-- Argument fixing
|
|
local info = info or {}
|
|
|
|
-- Quick return
|
|
if type(root) ~= 'table' then return info end
|
|
|
|
-- Error checking
|
|
assert(type(root) == 'table')
|
|
|
|
if type(max_depth) ~= 'number' then error(('[pretty/internal]: Bad argument #2, expected number, got %s (%s)'):format(max_depth, type(max_depth)), 2) end
|
|
|
|
info.root = info.root or root
|
|
local visited, next_mark, depth = {}, 1, { [root] = 0 }
|
|
local queue = { root, bottom = 1, top = 2 }
|
|
|
|
while queue.bottom < queue.top do
|
|
queue.bottom = queue.bottom + 1
|
|
local node = queue[queue.bottom-1]
|
|
-- Who've been visited? Bookkeeping
|
|
visited[node], info[node] = (visited[node] or 0) + 1, info[node] or get_table_info(node)
|
|
if visited[node] == 2 then
|
|
info[node].marker, next_mark = next_mark, next_mark + 1
|
|
end
|
|
-- Get table info & visit children.
|
|
if visited[node] < 2 and depth[node] < max_depth then
|
|
for k, v in pairs(node) do
|
|
if type(k) == 'table' then queue[queue.top], queue.top, depth[k] = k, queue.top + 1, math.min(depth[k] or math.huge, depth[node] + 1) end
|
|
if type(v) == 'table' then queue[queue.top], queue.top, depth[v] = v, queue.top + 1, math.min(depth[v] or math.huge, depth[node] + 1) end
|
|
end
|
|
end
|
|
end
|
|
|
|
-- Use depth collected
|
|
for node in pairs(depth) do
|
|
info[node].depth = depth[node]
|
|
end
|
|
|
|
assert(type(info) == 'table')
|
|
return info
|
|
end
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
return analyze_structure
|