Merged cdata into master, and added some tests.

2017-08-07 10:39:45 +02:00 · 2017-08-07 10:39:45 +02:00 · 1781f8267a
commit 1781f8267a
parent a0008a5c5c 5124189b4e
7 changed files with 343 additions and 28 deletions
--- a/analyze_byte_string.lua
+++ b/analyze_byte_string.lua
@ -0,0 +1,67 @@
+
+--require 'fun' ()
+local utf8 = require 'utf8'
+
+local ASCII_CHAR_PATTERN   = '[\32-\126\009\010\013]'
+local UNICODE_CHAR_PATTERN = '[\01-\127\192-\255][\128-\191]*'
+
+
+local function probability_of_ascii_string (str)
+	assert(type(str) == 'string')
+
+	-- Find ascii subsequences of the string.
+	-- Then find the total number of ascii characters,
+	-- and the length of the longest subsequence.
+	local len_of_longest_subseq, nr_ascii_chars  =  0, 0
+	for subseq in str:gmatch(ASCII_CHAR_PATTERN..'+') do
+		len_of_longest_subseq  =  math.max(#subseq, len_of_longest_subseq)
+		nr_ascii_chars            =  nr_ascii_chars + #subseq
+	end
+
+	-- Perform probability calculation
+	-- This heuristic is based on the observation that large numbers of
+	-- ascii characters, and long subsequences are the primary indicators
+	-- of ascii strings.
+	return (len_of_longest_subseq + nr_ascii_chars) / (2 * #str)
+end
+
+local function probability_of_utf8_string (str)
+	assert(type(str) == 'string')
+
+	-- Find numbers of valid utf8 bytes
+	local valid_bytes  =  0
+	for char, valid in utf8.iterate(str) do
+		if valid then  valid_bytes = valid_bytes + #char  end
+	end
+
+	-- Calculate ratio of valid bytes to total number of bytes.
+	return valid_bytes / #str
+end
+
+local function probability_of_utf16_string (str)
+	return 0
+end
+
+local function probability_of_binary_data (str)
+	return 2/3
+end
+
+local str_representations = {
+	ascii  = probability_of_ascii_string,
+	utf8   = probability_of_utf8_string ,
+	utf16  = probability_of_utf16_string,
+	binary = probability_of_binary_data,
+}
+
+return function (str)
+	local str_info, most_likely, most_likely_prob = {}, 'ascii', 0
+	for repr_name, prob_func in pairs(str_representations) do
+		local prob = prob_func(str)
+		str_info[repr_name..'_prob'] = prob
+		if prob >= most_likely_prob then
+			most_likely, most_likely_prob  =  repr_name, prob
+		end
+	end
+	       str_info.most_likely = most_likely
+	return str_info
+end
--- a/cdata.lua
+++ b/cdata.lua
@ -0,0 +1,127 @@
+
+-- Import
+
+local ffi = require 'ffi'
+local bit = require 'bit'
+
+-- Constants
+
+--------------------------------------------------------------------------------
+-- Util
+
+local HEX_TO_BIN = {
+    ['0'] = '0000', ['1'] = '0001', ['2'] = '0010', ['3'] = '0011',
+    ['4'] = '0100', ['5'] = '0101', ['6'] = '0110', ['7'] = '0111',
+	['8'] = '1000', ['9'] = '1001', ['A'] = '1010', ['B'] = '1011',
+	['C'] = '1100', ['D'] = '1101', ['E'] = '1110', ['F'] = '1111',
+}
+
+local function to_hex (val, nr_elements, element_size)
+	local l = {}
+	for i = 0, nr_elements - 1 do
+		local v = val[i]
+		l[#l+1] = bit.tohex(v, -2*element_size)
+		l[#l+1] = ' '
+	end
+	l[#l] = nil
+	return table.concat(l, '')
+end
+
+local function to_bin (val, nr_elements, element_size)
+	return to_hex(val, nr_elements, element_size):gsub('[0-9A-F]', HEX_TO_BIN)
+end
+
+local function is_nice_unicode_string (str)
+	-- TODO... Maybe also look into a purely binary oriented representation.
+	return false
+end
+
+local function is_nice_ascii_string (str)
+	for i = 1, #str do
+		local byte = str:byte(i)
+		if not (32 <= byte and byte <= 126) then  return false  end
+	end
+	return true
+end
+
+local function get_type_and_size_of_singular ( ctype )
+	local nr_elements, layers  =  1, 0
+	while true do
+		local etype, elements  =  ctype:match('(.+)%[(%d*)%]$')
+		if not elements then  break  end
+		ctype, nr_elements  =  etype, nr_elements * elements
+		layers = layers + 1
+	end
+	return ctype, nr_elements, layers
+end
+
+--------------------------------------------------------------------------------
+
+local CDATA_REPR_MATCHER = 'cdata<(.+)>: (0x%w+)'
+
+
+local function format_cdata (value, display, l, format_value)
+
+	-- Error check
+	assert(type(value)        == 'cdata'   )
+	assert(type(display)      == 'number'  )
+	assert(type(l)            == 'table'   )
+	assert(type(format_value) == 'function')
+
+	-- Do stuff
+	local native_repr  =  tostring(value)
+	local data_length  =  ffi.sizeof(value)
+	local ctype, addr  =  native_repr:match(CDATA_REPR_MATCHER)
+
+	-- Is void pointer?
+	if ctype == 'void *' then
+		local address_pointing_at = tonumber(ffi.cast('int', value))
+		l[#l+1] = 'void pointer to ' .. addr
+		return ;
+	end
+
+	-- Is normal pointer?
+	if ctype:match('%*$') then
+		if type(value[0]) ~= 'cdata' then
+			-- Data presentable in Lua, refered to by pointers?
+			l[#l+1] = 'pointer to '
+			return format_value(value[0], display, l.options, l)
+		else
+			l[#l+1] = '* '
+			return format_cdata(value[0], display, l.options, l, format_value)
+		end
+	end
+
+	l[#l+1] = 'cdata {'
+	--l[#l+1] = '\n\tnative = \''   .. native_repr .. '\','
+	l[#l+1] = '\n\ttype   = '   .. ctype .. ','
+	l[#l+1] = '\n\taddr   = '   .. addr .. ','
+	if data_length then
+		-- Size
+		local str = ffi.string(value, data_length)
+		l[#l+1] = '\n\tsize   = '   .. data_length .. ','
+
+		-- Element size and type
+		local element_type, nr_elements, nr_layers  =  get_type_and_size_of_singular(ctype)
+		local element_size = data_length / nr_elements
+		l[#l+1] = '\n\tnr_e   = ' .. nr_elements .. ','
+		l[#l+1] = '\n\ttype_e = '   .. element_type .. ','
+		l[#l+1] = '\n\tsize_e = '   .. element_size .. ','
+
+		-- If can be expressed as string, express it as string.
+		if is_nice_ascii_string(str) or is_nice_unicode_string(str) then
+			local string_or_unicode = is_nice_ascii_string(str) and 'ascii' or 'utf8 '
+			l[#l+1] = '\n\t'..string_or_unicode..'    = ' .. str .. ','
+		end
+		--
+		if nr_layers == 1 then
+			-- Only a single level of arrays
+			l[#l+1] = '\n\thex    = ' .. to_hex(value, nr_elements, element_size) .. ','
+			l[#l+1] = '\n\tbin    = ' .. to_bin(value, nr_elements, element_size) .. ','
+		end
+
+	end
+	l[#l+1] = '\n}'
+end
+
+return format_cdata
--- a/pretty.lua
+++ b/pretty.lua
@ -522,10 +522,11 @@ local TYPE_TO_FORMAT_FUNC = {
    ['string']   =  import 'pstring',
    ['thread']   =  format_coroutine,
    ['table']    =  format_table,
-
    ['function'] =  import 'function',
-    ['userdata'] =  format_primitive,  -- TODO
-    ['cdata']    =  format_primitive,  -- TODO & Luajit only
+
+    -- TODO
+    ['userdata'] =  format_primitive,
+    ['cdata']    =  import 'cdata',     -- Luajit exclusive ?
 }

 local function format_value (value, display, l)
--- a/test/test_cdata.lua
+++ b/test/test_cdata.lua
@ -0,0 +1,130 @@
+
+-- Only relevant in LUAJIT.
+if type(jit) ~= 'table' then  return  end
+
+local SUITE = require 'TestSuite' 'cdata'
+SUITE:setEnviroment{
+    format  = require 'pretty',
+    analyze_byte_string = require 'analyze_byte_string',
+}
+
+--------------------------------------------------------------------------------
+-- Test stuff.
+
+local ffi = require('ffi')
+ffi.cdef[[
+    typedef struct foo { int a, b; } foo_t;
+
+    void free(void *ptr);
+    void *malloc(size_t size);
+    int poll(struct pollfd *fds, unsigned long nfds, int timeout);
+]]
+
+-- TODO: Add more advanced understanding of cdata.
+
+
+local function format_test (t)
+    SUITE:addTest(t.expect, function ()
+        assert_equal(t.expect, format(t.input, t.options))
+    end)
+end
+
+--------------------------------------------------------------------------------
+-- Understanding binary data
+
+SUITE:addTest('Understand ascii', function ()
+    local str   =  'hello world'
+    local info  =  analyze_byte_string(str)
+    assert_equal('ascii', info.most_likely)
+end)
+
+SUITE:addTest('Understand utf8', function ()
+    local str = 'Æh? Hvø Tæler Då Om?'
+    local info  =  analyze_byte_string(str)
+    assert_equal('utf8', info.most_likely)
+end)
+
+SUITE:addTest('Understand binary', function ()
+    local str = '\190\098\140\097\255'
+    local info  =  analyze_byte_string(str)
+    print(format(info))
+    assert_equal('binary', info.most_likely)
+end)
+
+SUITE:addTest('More binary', function ()
+    local str = '\098\140\097\140\100'
+    local info  =  analyze_byte_string(str)
+    assert_equal('binary', info.most_likely)
+end)
+
+--------------------------------------------------------------------------------
+
+format_test {
+    input  = ffi.C.poll,
+    expect = 'cdata<.+>: 0x%x+',
+}
+
+do
+    local list = ffi.new('char [17]')
+    for i = 0, 16 do  list[i] = i end
+    format_test {
+        input  = list,
+        expect = 'cdata<.+>: 0x%x+',
+    }
+end
+
+do
+    local list = ffi.new('int [17]')
+    for i = 0, 16 do  list[i] = i end
+    format_test {
+        input  = list,
+        expect = 'cdata<.+>: 0x%x+',
+    }
+end
+
+do
+    local list = ffi.new('char [10]')
+    for i = 0, 10-1 do  list[i] = i + 65 end
+    format_test {
+        input  = list,
+        expect = 'cdata<.+>: 0x%x+',
+    }
+end
+
+do
+    local mat = ffi.new('char [3][3]')
+    for x = 0, 2 do for y = 0, 2 do mat[x][y] = x * 16 + y end end
+    format_test {
+        input  = mat,
+        expect = 'cdata<.+>: 0x%x+',
+    }
+end
+
+do
+    local p = ffi.gc(ffi.C.malloc(1), ffi.C.free)
+    format_test {
+        input  = p,
+        expect = 'cdata<.+>: 0x%x+',
+    }
+end
+
+SUITE:addTest('a_very_small_part_of_math', function ()
+    local p = ffi.new('char[1]')
+          p[0] = 27
+    local actual_result  =  format(p + 0, {})
+    assert_equal('Derp', actual_result)
+end)
+
+do
+    local p = ffi.new('foo_t[1]')
+          p[0].a = 27
+          p[0].b = 27
+    format_test {
+        input  = p + 0,
+        expect = 'cdata<.+>: 0x%x+',
+    }
+end
+
+--------------------------------------------------------------------------------
+
+return SUITE
--- a/test/test_pretty.lua
+++ b/test/test_pretty.lua
@ -329,31 +329,6 @@ end)

 -- TODO: This is a very complex topic, and will expanded upon after 1.0.0.

--------------------------------------------------------------------------------
-- CDATA
-
-- TODO: Add more advanced understanding of cdata.
-
-if HAS_JIT_LIBRARY then
-
-    local ffi = require('ffi')
-    ffi.cdef[[
-        int poll(struct pollfd *fds, unsigned long nfds, int timeout);
-    ]]
-
-    format_test {
-        input  = ffi.C.poll,
-        approx = true,
-        expect = 'cdata<.+>: 0x%x+',
-    }
-
-    format_test {
-        input  = ffi.new('int[10]'),
-        approx = true,
-        expect = 'cdata<.+>: 0x%x+',
-    }
-end
-
 --------------------------------------------------------------------------------
 -- General

--- a/test/test_resilience.lua
+++ b/test/test_resilience.lua
@ -95,6 +95,8 @@ SUITE:addTest('Proper malformed utf8 escaping (through LÖVE)', function ()

    -- The input strings are gotten from TestSuite's example strings.

+    do return error 'Test skipped' end
+
    local pjk_path = '/tmp/test_pjk_'..os.time()

    local conf = [[
--- a/test/test_sorting.lua
+++ b/test/test_sorting.lua
@ -79,6 +79,19 @@ format_test {
    expect = '{ 1, nil, 3 }',
 }

+format_test {
+	name   = 'Proper sorting of number keys',
+    input  = { [-1/0] = 'a', [-100] = 'b', [-1] = 'c', [0] = 'd', [1] = 'e', [100] = 'f', [1/0] = 'g' },
+    expect = '{\n    [-1/0] = \'a\',  [-100] = \'b\',\n    [-1]   = \'c\',  [0]    = \'d\',\n    [1]    = \'e\',  [100]  = \'f\',\n    [1/0]  = \'g\'\n}',
+}
+
+format_test {
+	name   = 'Proper sorting of number strings keys',
+    input  = { ['-100'] = 'b', ['-1'] = 'c', ['0'] = 'd', ['1'] = 'e', ['100'] = 'f' },
+    expect = '{\n    [\'-100\'] = \'b\',  [\'-1\']   = \'c\',\n    [\'0\']    = \'d\',  [\'1\']    = \'e\',\n    [\'100\']  = \'f\'\n}',
+}
+
+
 --[[ Sorting is hard in unicode, and I can't be bothered.
 format_test {
    name   = 'Unicode: ø comes before å in danish',