-- pretty.analyze_structure
-- The datastructure analyzing module for pretty.

--------------------------------------------------------------------------------

local TABLE_TYPE  =  assert(require((... and select('1', ...):match('.+%.') or '')..'common'), '[pretty]: Could not load vital library: common')
                  . TABLE_TYPE

local RESERVED_LUA_WORDS = {
    ['and']      = true,
    ['break']    = true,
    ['do']       = true,
    ['else']     = true,
    ['elseif']   = true,
    ['end']      = true,
    ['false']    = true,
    ['for']      = true,
    ['function'] = true,
    ['if']       = true,
    ['in']       = true,
    ['local']    = true,
    ['nil']      = true,
    ['not']      = true,
    ['or']       = true,
    ['repeat']   = true,
    ['return']   = true,
    ['then']     = true,
    ['true']     = true,
    ['until']    = true,
    ['while']    = true,
    ['goto']     = true,
}

local LEAF_VALUE_TYPES = {
    ['nil']      = true,
    ['number']   = true,
    ['string']   = true,
    ['boolean']  = true,
}

local SHORT_STRING_MAX_LEN          	=  7	-- Range: [0, ∞[
local MINIMUM_NUMBER_OF_SET_ELEMENTS	=  2	-- Range: [1, ∞[
local ALLOWED_HOLE_SIZE_IN_SEQUENCE 	=  1	-- Range: [0, ∞[. Set to 0, to completely disallow holes in sequences.
local RECURSIVE_TOSTRING_TIMEOUT    	= 10	-- Range: [1, ∞[. High values may result in crashes on specially-crafted input.

--------------------------------------------------------------------------------

local function is_identifier(s)
    -- Predicate: Is the given string usable as an identifier in this version of
    -- Lua?

    assert(type(s) == 'string')
    --
    return not not loadstring(s..'=0') and not s:find '%.' and not RESERVED_LUA_WORDS[s]
end

local function get_key_types (t)
    assert(type(t) == 'table')
    --
	local types = { nr_types = -1 }
	for key, _ in pairs(t) do
		types[type(key)] = true
	end
	--
	for type_name, _ in pairs(types) do
        types.nr_types = types.nr_types + 1
	end
	return types
end

local function get_value_types (t)
    assert(type(t) == 'table')
    --
    local types = { nr_types = -1 }
	for _, value in pairs(t) do
		types[type(value)] = true
	end
	--
	for type_name, _ in pairs(types) do
        types.nr_types = types.nr_types + 1
	end
	return types
end

local function largest_number_index (t)
	-- Returns the largest number index in t.

    assert(type(t) == 'table')
    --
	local max_index = 0
	for k,v in pairs(t) do
		if type(k) == 'number' then
            max_index = math.max(max_index, k)
        end
	end
	return max_index
end

local function nr_elements_in_table (t)
    -- Determines the total number of elements in the table.

    assert(type(t) == 'table')
    --
    local k, count = nil, -1
    repeat
        k, count = next(t, k), count + 1
    until k == nil
    return count
end

local function nr_elements_in_seq (t)
    -- Determines the number of elements in the sequence part of the table.
    -- Allows holes of size `ALLOWED_HOLE_SIZE_IN_SEQUENCE`, before stopping.
    -- This function works even when the given table's metamethods throws errors.
    --
    -- Returns:
    --  * Number: number of elements
    --  * Boolean: whether the table has holes.

    assert(type(t) == 'table')

    -- We don't want to crash if the __index metamethod throws an error, so we
    -- copy the pairs with number keys into a fresh table, which we then operate
    -- on lower down.
    if debug.getmetatable(t) and debug.getmetatable(t).__index then
        local t_prime = {}
        for k, v in pairs(t) do
            if type(k) == 'number' then  t_prime[k] = v  end
        end
        t = t_prime
    end

    -- Now we run though the table, from 1 and up.
    local i, last_elem_i, nr_elems, has_holes  =  0, 0, 0, false
    while i <= last_elem_i + 1 + ALLOWED_HOLE_SIZE_IN_SEQUENCE do
        i  =  i + 1
        if t[i] ~= nil then
            last_elem_i, nr_elems  =  i, nr_elems + 1
        else
            has_holes = true
        end
    end
    return nr_elems, has_holes
end

--------------------------------------------------------------------------------

local function contains_only_nice_string_keys (t)
	-- Predicate: Does t contain only string keys which could be used as
	-- identifiers, eg.

    for k, _ in pairs(t) do
        if type(k) ~= 'string' or not is_identifier(k) then
            return false
        end
    end
    return true
end

local function is_set (t)
    -- Predicate: Does t contain only boolean values.
    local value_types = get_value_types(t)
    return value_types.boolean and value_types.nr_types == 1
end

local function count_childrens_key_count (t)
    local keys, nr_children  =  {}, 0
    for _, child in pairs(t) do
        nr_children = nr_children + 1
        for k in pairs(child) do  keys[k] = (keys[k] or 0) + 1  end
    end
    return keys, nr_children
end

local function is_tabular (t)
    -- Determines if `t` contains sub-tables of identical substructure. (tabular)
    -- Further determines if `t` contains sub-tables sharing some amount of
    -- substructure. (pseudo-tabular)
    -- If either of above is true, it also returns a table with pairs
    -- (key, number), where number is the amount of sub-tables containing
    -- the key.

    -- Quick return if table is empty, or not containing only values of type table.
    local value_types = get_value_types(t)
    if not value_types.table or value_types.nr_types ~= 1 then
        return false, false
    end

    -- Determine keys of first child.
    local children_keys, nr_children = count_childrens_key_count(t)

    -- Make sure every child has exact same sub-structure.
    local all_shared = true
    local at_least_one_shared = false
    for key, nr_with_key in pairs(children_keys) do
        if nr_with_key ~= nr_children then  all_shared = false  end
        if nr_with_key == nr_children then  at_least_one_shared = true  end
    end

    return all_shared, at_least_one_shared, children_keys
end

local function has_uniform_structure (t)
    -- TODO: This can probably be more relaxed. Maybe combine string, number and boolean?

    assert(type(t) == 'table')

    -- Find the key and value types.
    local first_key = next(t)
    if first_key == nil then  return true  end
    local key_type, value_type  =  type(first_key), type(t[first_key])
    local nr_elems  =  (value_type == 'table') and nr_elements_in_table(t[first_key]) or nil

    -- Ensure every other key value pair is the same.
    for key, value in pairs(t) do
        if type(key) ~= key_type or type(value) ~= value_type then
            return false
        end
        if nr_elems and nr_elems ~= nr_elements_in_table(value) then
            return false
        end
    end

    return true
end

local function is_leaf_node (t)
    -- Predicate: Returns true if table only contains elements of type nil,
    -- number, string or boolean

    assert(type(t) == 'table')

    for k,v in pairs(t) do
        if not LEAF_VALUE_TYPES[type(k)] or not LEAF_VALUE_TYPES[type(v)] then
            return false
        end
    end
    return true
end

local function super_tostring (t)
    -- Normally taking `tostring` of a value returns a string, but Lua does not
    -- enforce that __tostring must return a string, so this function attempts
    -- to find a definitive `tostring` representation, even if metatable
    -- shenanigans has occurred.
    -- Is guaranteed to either return a string or a nil. Nil only occurs if the
    -- function cannot find a string.

    local seen = { [t] = true }
    for i = 1, RECURSIVE_TOSTRING_TIMEOUT do
        local success, nt  =  pcall(tostring, t)
        if not success then  return 'error on tostring: '..tostring(nt)  end
        if type(nt) == 'string' or nt == nil then  return tostring(nt)  end
        if seen[nt] then  return nil  end
        seen[nt], t  =  true, nt
    end
    return nil
end

--------------------------------------------------------------------------------

local function get_table_info (t)
	local key_types = get_key_types(t)

	local info = {}
          info.string_repr  = super_tostring(t)
          info.address      = info.string_repr and info.string_repr:match '^table: 0x(%x+)$' or nil
          info.nr_elems     = nr_elements_in_table(t)
          info.seq_elems, info.has_holes  =  nr_elements_in_seq(t)
          info.map_elems    = info.nr_elems - info.seq_elems
	      info.has_seq      = info.seq_elems > 0
	      info.has_map      = info.map_elems > 0
          info.is_set       = is_set(t) and info.nr_elems >= MINIMUM_NUMBER_OF_SET_ELEMENTS
          info.is_uniform   = has_uniform_structure(t)
          info.is_leaf_node = is_leaf_node(t)
          info.key_types    = get_key_types(t)
          info.value_types  = get_value_types(t)
          info.is_tabular,
          info.is_pseudo_tabular,
          info.child_keys   = is_tabular(t)

    -- Determine type of table
	if not info.has_seq and not info.has_map  then  info.type = TABLE_TYPE.EMPTY
	elseif info.has_seq and not info.has_map  then  info.type = TABLE_TYPE.SEQUENCE
    elseif info.is_set                        then  info.type = TABLE_TYPE.SET
    elseif info.has_seq                       then  info.type = TABLE_TYPE.MIXED
	elseif contains_only_nice_string_keys(t)  then  info.type = TABLE_TYPE.STRING_MAP
    else                                            info.type = TABLE_TYPE.PURE_MAP
	end

	return info
end

--------------------------------------------------------------------------------

local function analyze_structure (root, max_depth, info)
    -- Argument fixing
    local info  =  info or {}

    -- Quick return
    if type(root) ~= 'table' then  return info  end

    -- Error checking
    assert(type(root) == 'table')

    if type(max_depth) ~= 'number' then  error(('[pretty/internal]: Bad argument #2, expected number, got %s (%s)'):format(max_depth, type(max_depth)), 2)  end

    info.root  =  info.root or root
    local visited, next_mark, depth  =  {}, 1, { [root] = 0 }
    local queue  =  { root, bottom = 1, top = 2 }

    while queue.bottom < queue.top do
        queue.bottom  =  queue.bottom + 1
        local node        =  queue[queue.bottom-1]
        -- Who've been visited? Bookkeeping
        visited[node], info[node]  =  (visited[node] or 0) + 1, info[node] or get_table_info(node)
        if visited[node] == 2 then
            info[node].marker, next_mark  =  next_mark, next_mark + 1
        end
        -- Get table info & visit children.
        if visited[node] < 2 and depth[node] < max_depth then
            for k, v in pairs(node) do
                if type(k) == 'table' then  queue[queue.top], queue.top, depth[k]  =  k, queue.top + 1, math.min(depth[k] or math.huge, depth[node] + 1)  end
                if type(v) == 'table' then  queue[queue.top], queue.top, depth[v]  =  v, queue.top + 1, math.min(depth[v] or math.huge, depth[node] + 1)  end
            end
        end
    end

    -- Use depth collected
    for node in pairs(depth) do
        info[node].depth = depth[node]
    end

    assert(type(info) == 'table')
    return info
end

--------------------------------------------------------------------------------

return analyze_structure