pretty/number.lua


-- pretty.number
-- The number formatting module for pretty.

--[=[ Thoughts on displaying numbers in an informative way.

Numbers are such an intrinsic part of computer science, mathematics, science in
general and even finance industry, that it boggles they mind how different they
each treat numbers. This is one of the reasons that many programming languages
possess a huge number of number types: Integers both signed and unsigned,
rational number, floating point numbers, fixed point numbers, not to mention the
various Computer Algebra Systems (CAS), that exist.

Lua has historically used floating point numbers, which can represent numbers in
a huge range, at the cost of non-uniform precision across that range. How can we
represent numbers in a intuitive and useful way?

1.	Native representation: We could use Lua's native way of representing
	numbers. This is easy, as we just use the `tostring` function.
	This is unsatifactory, if we're aiming for Lua-compatible output, as
	`tostring` will produce `inf` (`1/0`) and `nan` (`0/0`), which are valid
	identifiers, and does not guarentee that these variables are equal to their
	respective values.
	It's also unsatifactory when aiming for precision, as `tostring` truncate
	the output if "close enough".
2.	Unnecessary precision: We can use `string.format` with the format code
	`%.99f`, to get a "precise" representation of the number. Unfortunantly, for
	many "larger" (> 10⁻¹³?) numbers, some of the right digits will be 0, due
	to floating point precision issues. We also run against the fact that π is
	rounded and is only accurate 15 decimal digits. Do we really care about the
	remaining digits, most of which are 0?
3.	As required representation: We use as many digits as necessary for the Lua
	parser to read the exact same number. Involves invoking `string.format` and
	`tonumber` a lot.
	This approach improves on 2. as we not only discard all those 0s and the
	confusing unaccurate digits. Indeed, many "nice" numbers won't even possess
	any decimal digits. Numbers feed directly into the interpreter would look
	identical to the input, barring removal of trailing 0s.
	And with that we achieved precision and a reasonable amount of consiseness.
4.	Fractions: One issue with floating points have always been that the
	precision is sorely limited. Some languages possess fraction types, but
	such systems suffer from pathological cases, where memory usage spikes.
	We can still exploit fractions despite using floating point numbers. When we
	type `13/7` into Lua, we get a number back. This is the "canonical"
	floating point representation of `13/7`, and every time we encounter this
	number, we can use `13/7` to represent it.
	This allows more consise representation of certain numbers, for example
	numbers with repeating digits. It will also highlight patterns that a
	decimal representation might not.
	Unfortunantly it might also hide patterns, and it reduces the usefulness of
	pretty as a calculator.
5.	Reverse engineered number: Remember the concept of "canonical"
	representation from 4.? We can expand that concept to also talk about more
	general expressions like `math.sqrt(2)` or `2^-5`. Discovering those
	expressions from a single number is pretty complex.
	Again, this allows for very consise representations of numbers, and can find
	hidden patterns all over the place. It's much easier to see the relation to
	radians when we see `0.5*math.pi`, rather than `0.78539816339745`.
	But again we lose the calculator aspect.
6.	TODO: Write about `inf` and `nan`.
7.	TODO: Write about rounding.
8.	TODO: Write about unicode.

We a bit of every one of the above methods. They each have their upsides and
their drawbacks. Which approach we use, depend on the number, and what is the
shortest representation.
If the chosen representation is different from the "as required approach", and
we're only pretty printing a single number value, then we also append the
approximate value given by the "as required approach" approach as a comment, in
case somebody was looking for that.
--]=]

--------------------------------------------------------------------------------

local DISPLAY =  assert(require((... and select('1', ...):match('.+%.') or '')..'common'), '[pretty]: Could not load vital library: common') . DISPLAY

-- Constants

local MAXIMUM_INT  = 2^53    -- The maximum double for where all integers can be represented exactly.
local MAXIMUM_ZERO = 10^-7   -- Used when attempting to determine fraction. Anything below is counted as 0.

--------------------------------------------------------------------------------
-- Util

local function calculate_fraction (n)
	-- Returns x and y such that x/y = n. If none could be found, returns nil.
	local a, b = 1, n % 1
	while MAXIMUM_ZERO < b and a <= MAXIMUM_INT do
		local r = math.pow(b, -1)
		a, b = a * r, r % 1
		-- Invariant:  n * a / a = n
	end
	-- Check the values make sense.
	local numerator, denumberator = math.floor(n * a), math.floor(a)
	if numerator / denumberator == n then
		return numerator, denumberator
	end
end

--------------------------------------------------------------------------------

local SPECIAL_NUMBER = {
	-- x = ∞
	{ est   = function (a)   return math.huge                               end,
	  real  = function (a)   return math.huge                               end,
	  repr  = function (a)   return '1/0'                                   end,
	},
	-- x = a/b
	{ est   = calculate_fraction,
	  real  = function (a, b) return b ~= 1 and (a/b)                       end,
	  repr  = function (a, b) return a..'/'..b                              end,
    },
	-- x = 2^a
	{ est   = function (n) return math.log(n)/math.log(2)                   end,
	  real  = function (a) return 2^a                                       end,
	  repr  = function (a) return '2^'..a                                   end,
    },
	-- x = 10^a
	{ est   = function (n) return math.log(n)/math.log(10)                  end,
	  real  = function (a) return 10^a                                      end,
	  repr  = function (a) return '10^'..a                                  end,
    },
	-- x = 1/√a
	{ est   = function (n) return 1/(n^2)                                   end,
	  real  = function (a) return a >= 0 and 1/math.sqrt(a)                 end,
	  repr  = function (a) return ('1/math.sqrt(%.0f)'):format(a)             end,
    },
	-- x = lg a
	{ est   = function (n) return math.exp(n)                               end,
	  real  = function (a) return a >= 0 and math.log(a)                    end,
	  repr  = function (a) return ('math.log(%.0f)'):format(a)                end,
    },
	-- x = ℯ^a
	{ est   = function (n) return math.log(n)                               end,
	  real  = function (a) return math.exp(a)                               end,
	  repr  = function (a) return ('math.exp(%.0f)'):format(a)                end,
    },
	-- x = aπ
	-- TODO: Add support for factions of π.
	{ est   = function (n) return n/math.pi                                 end,
	  real  = function (a) return a*math.pi                                 end,
	  repr  = function (a) return a == 1 and 'math.pi' or a..'*math.pi'     end,
    },
	-- x = sqrt(a)
	{ est   = function (n) return n^2                                       end,
	  real  = function (a) return a >= 0 and math.sqrt(a)                   end,
	  repr  = function (a) return ('math.sqrt(%.0f)'):format(a)               end,
    },
}

--------------------------------------------------------------------------------

local function format_soft_num (n)
	assert(type(n) == 'number')

	if     n ~= n   then  return '0/0'
	elseif n == 0   then  return '0'
	elseif n <  0   then  return '-' .. format_soft_num(-n)
	end

	-- Finding the shortest
	local shortest, length = nil, math.huge
	local function alternative_repr (repr)
		if #repr < length then  shortest, length  =  repr, #repr  end
	end

	-- Maybe it's a "special" number?
	for _, special_number_tests in pairs(SPECIAL_NUMBER) do
		local a = { special_number_tests.est(n) }
		if a[1] then
			for i = 1, #a do  a[i] = math.floor(a[i] + 0.5)  end
			local num  = special_number_tests.real(unpack(a))
			if num == n then
				alternative_repr( special_number_tests.repr(unpack(a)) )
			elseif num then
				local repr = special_number_tests.repr(unpack(a))
				local native_precise = tonumber(('%'..#repr..'f'):format(n))
				if math.abs(num - n) <= math.abs( native_precise - n ) then
					alternative_repr( repr )
				end
			end
		end
	end

	-- Maybe it's a decimal number?
	alternative_repr( tostring(n):gsub('([^e]+)e%+?(%-?[^e]*)', function(a, b) return (a == '1' and '' or a..'*')..'10^'..b end))

	-- Well, this is not a pretty number!
	return shortest
end

local function format_hard_num (n)
	assert(type(n) == 'number')

	-- All the fun special cases
	if     n ~=  n         then  return  '0/0'
	elseif n ==  0         then  return  '0'
	elseif n ==  math.huge then  return  '1/0'
	elseif n == -math.huge then  return '-1/0'
	end

	-- Now for the serious part.
	for i = 0, 99 do
		local repr = ('%.'..i..'f'):format(n)
		local num  = tonumber(repr)
		if num == n then  return  repr  end
	end

	assert(false)
end

return function (value, display, l)
	-- Formats the number nicely. If display is DISPLAY.EXPAND, we have some
	-- space for extra info, we give some tidbits, to help investigation.

	assert(type(value) == 'number')
	assert(type(display) == 'number' and type(l) == 'table')

	-- First format a "soft" version. This number is not guarenteed to accurate.
	-- It's purpose is to give a general idea of the value.
	l[#l+1] = format_soft_num(value)

	-- If we have space for it, format a "hard" value, also. This number is as
	-- short as possible, while evaluating precisely to the value of the number.
	if display == DISPLAY.EXPAND then
		local hard_repr = format_hard_num(value)
		if l[#l] ~= hard_repr then
			l[#l+1] = '  --  Approx: '
			l[#l+1] = hard_repr
		end
		-- TODO: Add extra information. I don't really know what is useful?
		-- Prime factorization is fun, but useless unless people are doing
		-- cryptography or general number theory.
		-- Bit pattern is maybe too lowlevel.
	end
end