1
0
pretty/analyze_structure.lua

342 lines
11 KiB
Lua
Raw Permalink Normal View History

2017-07-17 19:33:11 +00:00
-- pretty.analyze_structure
-- The datastructure analyzing module for pretty.
--------------------------------------------------------------------------------
local TABLE_TYPE = assert(require((... and select('1', ...):match('.+%.') or '')..'common'), '[pretty]: Could not load vital library: common')
. TABLE_TYPE
local RESERVED_LUA_WORDS = {
['and'] = true,
['break'] = true,
['do'] = true,
['else'] = true,
['elseif'] = true,
['end'] = true,
['false'] = true,
['for'] = true,
['function'] = true,
['if'] = true,
['in'] = true,
['local'] = true,
['nil'] = true,
['not'] = true,
['or'] = true,
['repeat'] = true,
['return'] = true,
['then'] = true,
['true'] = true,
['until'] = true,
['while'] = true,
['goto'] = true,
}
local LEAF_VALUE_TYPES = {
['nil'] = true,
['number'] = true,
['string'] = true,
['boolean'] = true,
}
local SHORT_STRING_MAX_LEN = 7 -- Range: [0, ∞[
local MINIMUM_NUMBER_OF_SET_ELEMENTS = 2 -- Range: [1, ∞[
local ALLOWED_HOLE_SIZE_IN_SEQUENCE = 1 -- Range: [0, ∞[. Set to 0, to completely disallow holes in sequences.
local RECURSIVE_TOSTRING_TIMEOUT = 10 -- Range: [1, ∞[. High values may result in crashes on specially-crafted input.
2017-10-22 09:55:05 +00:00
--------------------------------------------------------------------------------
local function is_identifier(s)
-- Predicate: Is the given string usable as an identifier in this version of
-- Lua?
assert(type(s) == 'string')
--
return not not loadstring(s..'=0') and not s:find '%.' and not RESERVED_LUA_WORDS[s]
end
local function get_key_types (t)
assert(type(t) == 'table')
--
local types = { nr_types = -1 }
for key, _ in pairs(t) do
types[type(key)] = true
end
--
for type_name, _ in pairs(types) do
types.nr_types = types.nr_types + 1
end
return types
end
local function get_value_types (t)
assert(type(t) == 'table')
--
local types = { nr_types = -1 }
for _, value in pairs(t) do
types[type(value)] = true
end
--
for type_name, _ in pairs(types) do
types.nr_types = types.nr_types + 1
end
return types
end
local function largest_number_index (t)
-- Returns the largest number index in t.
assert(type(t) == 'table')
--
local max_index = 0
for k,v in pairs(t) do
if type(k) == 'number' then
max_index = math.max(max_index, k)
end
end
return max_index
end
local function nr_elements_in_table (t)
-- Determines the total number of elements in the table.
assert(type(t) == 'table')
--
local k, count = nil, -1
repeat
k, count = next(t, k), count + 1
until k == nil
return count
end
local function nr_elements_in_seq (t)
-- Determines the number of elements in the sequence part of the table.
-- Allows holes of size `ALLOWED_HOLE_SIZE_IN_SEQUENCE`, before stopping.
-- This function works even when the given table's metamethods throws errors.
--
-- Returns:
-- * Number: number of elements
-- * Boolean: whether the table has holes.
assert(type(t) == 'table')
-- We don't want to crash if the __index metamethod throws an error, so we
-- copy the pairs with number keys into a fresh table, which we then operate
-- on lower down.
if debug.getmetatable(t) and debug.getmetatable(t).__index then
local t_prime = {}
for k, v in pairs(t) do
if type(k) == 'number' then t_prime[k] = v end
end
t = t_prime
end
-- Now we run though the table, from 1 and up.
local i, last_elem_i, nr_elems, has_holes = 0, 0, 0, false
while i <= last_elem_i + 1 + ALLOWED_HOLE_SIZE_IN_SEQUENCE do
i = i + 1
if t[i] ~= nil then
last_elem_i, nr_elems = i, nr_elems + 1
else
has_holes = true
end
end
return nr_elems, has_holes
end
--------------------------------------------------------------------------------
local function contains_only_nice_string_keys (t)
-- Predicate: Does t contain only string keys which could be used as
-- identifiers, eg.
for k, _ in pairs(t) do
if type(k) ~= 'string' or not is_identifier(k) then
return false
end
end
return true
end
local function is_set (t)
-- Predicate: Does t contain only boolean values.
local value_types = get_value_types(t)
return value_types.boolean and value_types.nr_types == 1
end
local function count_childrens_key_count (t)
local keys, nr_children = {}, 0
for _, child in pairs(t) do
nr_children = nr_children + 1
for k in pairs(child) do keys[k] = (keys[k] or 0) + 1 end
end
return keys, nr_children
end
local function is_tabular (t)
-- Determines if `t` contains sub-tables of identical substructure. (tabular)
-- Further determines if `t` contains sub-tables sharing some amount of
-- substructure. (pseudo-tabular)
-- If either of above is true, it also returns a table with pairs
-- (key, number), where number is the amount of sub-tables containing
-- the key.
-- Quick return if table is empty, or not containing only values of type table.
local value_types = get_value_types(t)
if not value_types.table or value_types.nr_types ~= 1 then
return false, false
end
-- Determine keys of first child.
local children_keys, nr_children = count_childrens_key_count(t)
-- Make sure every child has exact same sub-structure.
local all_shared = true
local at_least_one_shared = false
for key, nr_with_key in pairs(children_keys) do
if nr_with_key ~= nr_children then all_shared = false end
if nr_with_key == nr_children then at_least_one_shared = true end
end
return all_shared, at_least_one_shared, children_keys
end
local function has_uniform_structure (t)
-- TODO: This can probably be more relaxed. Maybe combine string, number and boolean?
assert(type(t) == 'table')
-- Find the key and value types.
local first_key = next(t)
if first_key == nil then return true end
2017-10-22 10:29:43 +00:00
local key_type, value_type = type(first_key), type(t[first_key])
local nr_elems = (value_type == 'table') and nr_elements_in_table(t[first_key]) or nil
-- Ensure every other key value pair is the same.
for key, value in pairs(t) do
if type(key) ~= key_type or type(value) ~= value_type then
return false
end
2017-10-22 10:29:43 +00:00
if nr_elems and nr_elems ~= nr_elements_in_table(value) then
return false
end
end
return true
end
local function is_leaf_node (t)
-- Predicate: Returns true if table only contains elements of type nil,
-- number, string or boolean
assert(type(t) == 'table')
for k,v in pairs(t) do
if not LEAF_VALUE_TYPES[type(k)] or not LEAF_VALUE_TYPES[type(v)] then
return false
end
end
return true
end
local function super_tostring (t)
-- Normally taking `tostring` of a value returns a string, but Lua does not
-- enforce that __tostring must return a string, so this function attempts
-- to find a definitive `tostring` representation, even if metatable
-- shenanigans has occurred.
-- Is guaranteed to either return a string or a nil. Nil only occurs if the
-- function cannot find a string.
local seen = { [t] = true }
2017-10-22 09:55:05 +00:00
for i = 1, RECURSIVE_TOSTRING_TIMEOUT do
local success, nt = pcall(tostring, t)
if not success then return 'error on tostring: '..tostring(nt) end
if type(nt) == 'string' or nt == nil then return tostring(nt) end
if seen[nt] then return nil end
seen[nt], t = true, nt
end
2017-10-22 09:55:05 +00:00
return nil
end
--------------------------------------------------------------------------------
local function get_table_info (t)
local key_types = get_key_types(t)
local info = {}
info.string_repr = super_tostring(t)
info.address = info.string_repr and info.string_repr:match '^table: 0x(%x+)$' or nil
info.nr_elems = nr_elements_in_table(t)
info.seq_elems, info.has_holes = nr_elements_in_seq(t)
info.map_elems = info.nr_elems - info.seq_elems
info.has_seq = info.seq_elems > 0
info.has_map = info.map_elems > 0
info.is_set = is_set(t) and info.nr_elems >= MINIMUM_NUMBER_OF_SET_ELEMENTS
info.is_uniform = has_uniform_structure(t)
info.is_leaf_node = is_leaf_node(t)
info.key_types = get_key_types(t)
info.value_types = get_value_types(t)
info.is_tabular,
info.is_pseudo_tabular,
info.child_keys = is_tabular(t)
-- Determine type of table
if not info.has_seq and not info.has_map then info.type = TABLE_TYPE.EMPTY
elseif info.has_seq and not info.has_map then info.type = TABLE_TYPE.SEQUENCE
elseif info.is_set then info.type = TABLE_TYPE.SET
elseif info.has_seq then info.type = TABLE_TYPE.MIXED
elseif contains_only_nice_string_keys(t) then info.type = TABLE_TYPE.STRING_MAP
else info.type = TABLE_TYPE.PURE_MAP
end
return info
end
--------------------------------------------------------------------------------
local function analyze_structure (root, max_depth, info)
-- Argument fixing
local info = info or {}
-- Quick return
if type(root) ~= 'table' then return info end
-- Error checking
assert(type(root) == 'table')
if type(max_depth) ~= 'number' then error(('[pretty/internal]: Bad argument #2, expected number, got %s (%s)'):format(max_depth, type(max_depth)), 2) end
info.root = info.root or root
local visited, next_mark, depth = {}, 1, { [root] = 0 }
local queue = { root, bottom = 1, top = 2 }
while queue.bottom < queue.top do
queue.bottom = queue.bottom + 1
local node = queue[queue.bottom-1]
-- Who've been visited? Bookkeeping
visited[node], info[node] = (visited[node] or 0) + 1, info[node] or get_table_info(node)
if visited[node] == 2 then
info[node].marker, next_mark = next_mark, next_mark + 1
end
-- Get table info & visit children.
if visited[node] < 2 and depth[node] < max_depth then
for k, v in pairs(node) do
if type(k) == 'table' then queue[queue.top], queue.top, depth[k] = k, queue.top + 1, math.min(depth[k] or math.huge, depth[node] + 1) end
if type(v) == 'table' then queue[queue.top], queue.top, depth[v] = v, queue.top + 1, math.min(depth[v] or math.huge, depth[node] + 1) end
end
end
end
-- Use depth collected
for node in pairs(depth) do
info[node].depth = depth[node]
end
assert(type(info) == 'table')
return info
end
--------------------------------------------------------------------------------
return analyze_structure