-- pretty.string
-- The string formatting module for pretty.

--[=[ Thoughts on displaying strings in the useful ways.

Thoughts are TODO

--]=]

local DISPLAY =  assert(require((... and select('1', ...):match('.+%.') or '')..'common'), '[pretty]: Could not load vital library: common') . DISPLAY

--------------------------------------------------------------------------------
-- Constants

local NR_CHARS_IN_LONG_STRING  =  40
local MAX_HORIZONAL_CHARACTER  =  80
local SHORT_STR_DELIMITER      = '\''
local STRING_CONT_INDICATOR    = '...'

--------

local CHAR_TO_STR_REPR     =  {}
do
	for i = 00, 031 do  CHAR_TO_STR_REPR[i] = ('\\%03i'):format(i)  end
    for i = 32, 255 do  CHAR_TO_STR_REPR[i] = string.char(i)  end
    CHAR_TO_STR_REPR[7]   = '\\a'
    CHAR_TO_STR_REPR[8]   = '\\b'
    CHAR_TO_STR_REPR[9]   = '\\t'
    CHAR_TO_STR_REPR[10]  = '\\n'
    CHAR_TO_STR_REPR[11]  = '\\v'
    CHAR_TO_STR_REPR[12]  = '\\f'
    CHAR_TO_STR_REPR[13]  = '\\r'
    CHAR_TO_STR_REPR[92]  = '\\\\'
    CHAR_TO_STR_REPR[127] = '\\127'

	CHAR_TO_STR_REPR[SHORT_STR_DELIMITER:byte()] = '\\'..SHORT_STR_DELIMITER
end

local CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ = '[%z\001-\008\011-\031\127]'

--------------------------------------------------------------------------------
-- Util

local function does_string_require_escaping (str)
	return not not str:find(CHARACTERS_THAT_REQUIRE_ESCAPE_SEQ)
end

local escape_string do

	local ESCAPE_SINGLE_BYTE	= function (char)  return CHAR_TO_STR_REPR[char:byte()]  end
	local ESCAPE_MALFORMED_CONT_BYTE	= function (a, b)  return a..'\\' .. b:byte()  end
	local ESCAPE_MALFORMED_START_BYTE	= function (a, b)  return '\\'..a:byte() .. b  end

	function escape_string (str)
		-- Attempts to escape the string, to a format that is both a valid Lua
		-- constant, and ledible unicode.

	    -- Error checking
	    assert(type(str) == 'string')

	    -- Escape single bytes
		local str, count  =  str:gsub('.', ESCAPE_SINGLE_BYTE), 0

		-- Escape malformed continuation bytes
		repeat	str, count = str:gsub('([^\128-\255])([\128-\191])', ESCAPE_MALFORMED_CONT_BYTE)
		until	count == 0

		-- Escape malformed start bytes
		repeat	str, count = str:gsub('([\191-\255])([^\128-\191])', ESCAPE_MALFORMED_START_BYTE)
		until	count == 0

		-- Done, lets return
		return str
	end
end

local function smallest_secure_longform_string_level (str)
    -- Determines the level a longform string needs to use, to avoid code
    -- injection. For example, if we want to use longform on the string
    -- 'Hello ]] World', we cannot use level-0 as this would result in
    -- '[[Hello ]] World]]', which could be an issue in certain applications.

    -- Error checking
    assert(type(str) == 'string')

    -- Do stuff
    local levels = { [1] = 1 }
    str:gsub('%]=*%]', function (m) levels[m:len()] = true end)
    return #levels - 1
end


--------------------------------------------------------------------------------

local function format_shortform_string (str, _, l)
	l[#l+1]  =  SHORT_STR_DELIMITER
	l[#l+1]  =  escape_string(str)
	l[#l+1]  =  SHORT_STR_DELIMITER
end

local function safe_cut (str, si, ei)

	-- Error checking
	assert(type(str) == 'string')
	assert(type(si)  == 'number' or si == nil)
	assert(type(ei)  == 'number' or ei == nil)

	-- Calculate
	local cut_str = str:sub(si, ei)

	-- Search for the number of backslashes and digits at the end of the string.
	-- If the number of backslashes is even, it's a sequence of backslashes, if
	-- not it's a broken escape string.
	local start_of_backslashes, start_of_digits = cut_str:match '()\\*()%d?%d?$'
	local nr_backslashes_before_end  =  start_of_digits - start_of_backslashes
	if nr_backslashes_before_end % 2 == 1 then
		-- Lets see if we can't shorten the escape code, to fit within the
		-- cut limit.
		local space_left = #cut_str - (start_of_digits - 2)
		cut_str = cut_str:sub(1, start_of_digits - 2)
		ei = ei - space_left

		local digits, after_digits = str:match('^\\(%d?%d?%d?)()', si - 1 + start_of_digits - 1)

		if space_left >= 1 + 3 - #digits:match '0*' then
			ei = after_digits - 1
			cut_str = cut_str .. ('\\%0'..(space_left-1)..'i'):format(digits)
		end
	end

	return cut_str, ei
end


local function format_cut_string (str, _, l)
	-- Calculate string
	local str  =  escape_string(str)
	      str  =  safe_cut(str, 1, NR_CHARS_IN_LONG_STRING - #STRING_CONT_INDICATOR)

	-- Format
	l[#l+1]  =  SHORT_STR_DELIMITER
	l[#l+1]  =  str
	l[#l+1]  =  SHORT_STR_DELIMITER
	l[#l+1]  =  STRING_CONT_INDICATOR
end

local function format_concatted_string (str, _, l)
	-- Cuts the string up into smaller individual substrings, each Concatted
	-- together. Is uglier compared to longform, but is at least idempotent.

	-- TODO: Attempt to cut near whitespace?

	-- Error checking
	assert( type(str) == 'string' )
	assert( type(l)   == 'table'  )

	-- Calculate
	local width_without_overhead  =  MAX_HORIZONAL_CHARACTER - 2*#SHORT_STR_DELIMITER - #' ..'
	local str  =  escape_string(str)

	-- Cut strings
	local sub_strings, str_i = {}, 1
	repeat
		local sub_str, ei  =  safe_cut(str, str_i, str_i + width_without_overhead - 1)
		str_i = ei + 1
		sub_strings[#sub_strings+1] = sub_str
	until str_i >= #str

	-- Format them
	for _, sub_str in ipairs(sub_strings) do
		l[#l+1]  =  SHORT_STR_DELIMITER
		l[#l+1]  =  sub_str
		l[#l+1]  =  SHORT_STR_DELIMITER
		l[#l+1]  =  ' ..\n'
	end
	l[#l]  =  ''
end

local function format_longform_string (str, _, l)

    -- Error checking
    assert(type(str) == 'string')
    assert(type(l) == 'table')

	-- Calculate
	local level_required  =  smallest_secure_longform_string_level(str)

	-- Format
	l[#l+1] = '['..string.rep('=', level_required)..'['
	l[#l+1] = '\n'
    l[#l+1] = str
    l[#l+1] = ']'..string.rep('=', level_required)..']'
end

return function (str, display, l)
	-- pretty.format_string

	-- TODO: Prefer \ddd style escaping to shorter (\n, \t), when many of the
	-- \ddd already exist in the text.

    -- Error checking
    assert(type(str) == 'string')
    assert(type(display) == 'number' and type(l) == 'table')

    -- Do work

	if #str < NR_CHARS_IN_LONG_STRING then
		return format_shortform_string(str, nil, l)
	elseif display < DISPLAY.EXPAND then
		return format_cut_string (str, nil, l)
	elseif does_string_require_escaping (str) then
		return format_concatted_string(str, nil, l)
	else
		return format_longform_string(str, nil, l)
	end
end