Merged cdata into master, and added some tests.

2017-08-07 10:39:45 +02:00 · 2017-08-07 10:39:45 +02:00 · 1781f8267a
commit 1781f8267a
parent a0008a5c5c 5124189b4e
7 changed files with 343 additions and 28 deletions
--- a/analyze_byte_string.lua
+++ b/analyze_byte_string.lua
@ -0,0 +1,67 @@
 --require 'fun' ()
 local utf8 = require 'utf8'
 local ASCII_CHAR_PATTERN   = '[\32-\126\009\010\013]'
 local UNICODE_CHAR_PATTERN = '[\01-\127\192-\255][\128-\191]*'
 local function probability_of_ascii_string (str)
 	assert(type(str) == 'string')
 	-- Find ascii subsequences of the string.
 	-- Then find the total number of ascii characters,
 	-- and the length of the longest subsequence.
 	local len_of_longest_subseq, nr_ascii_chars  =  0, 0
 	for subseq in str:gmatch(ASCII_CHAR_PATTERN..'+') do
 		len_of_longest_subseq  =  math.max(#subseq, len_of_longest_subseq)
 		nr_ascii_chars            =  nr_ascii_chars + #subseq
 	end
 	-- Perform probability calculation
 	-- This heuristic is based on the observation that large numbers of
 	-- ascii characters, and long subsequences are the primary indicators
 	-- of ascii strings.
 	return (len_of_longest_subseq + nr_ascii_chars) / (2 * #str)
 end
 local function probability_of_utf8_string (str)
 	assert(type(str) == 'string')
 	-- Find numbers of valid utf8 bytes
 	local valid_bytes  =  0
 	for char, valid in utf8.iterate(str) do
 		if valid then  valid_bytes = valid_bytes + #char  end
 	end
 	-- Calculate ratio of valid bytes to total number of bytes.
 	return valid_bytes / #str
 end
 local function probability_of_utf16_string (str)
 	return 0
 end
 local function probability_of_binary_data (str)
 	return 2/3
 end
 local str_representations = {
 	ascii  = probability_of_ascii_string,
 	utf8   = probability_of_utf8_string ,
 	utf16  = probability_of_utf16_string,
 	binary = probability_of_binary_data,
 }
 return function (str)
 	local str_info, most_likely, most_likely_prob = {}, 'ascii', 0
 	for repr_name, prob_func in pairs(str_representations) do
 		local prob = prob_func(str)
 		str_info[repr_name..'_prob'] = prob
 		if prob >= most_likely_prob then
 			most_likely, most_likely_prob  =  repr_name, prob
 		end
 	end
 	       str_info.most_likely = most_likely
 	return str_info
 end
--- a/cdata.lua
+++ b/cdata.lua
@ -0,0 +1,127 @@
 -- Import
 local ffi = require 'ffi'
 local bit = require 'bit'
 -- Constants
 --------------------------------------------------------------------------------
 -- Util
 local HEX_TO_BIN = {
    ['0'] = '0000', ['1'] = '0001', ['2'] = '0010', ['3'] = '0011',
    ['4'] = '0100', ['5'] = '0101', ['6'] = '0110', ['7'] = '0111',
 	['8'] = '1000', ['9'] = '1001', ['A'] = '1010', ['B'] = '1011',
 	['C'] = '1100', ['D'] = '1101', ['E'] = '1110', ['F'] = '1111',
 }
 local function to_hex (val, nr_elements, element_size)
 	local l = {}
 	for i = 0, nr_elements - 1 do
 		local v = val[i]
 		l[#l+1] = bit.tohex(v, -2*element_size)
 		l[#l+1] = ' '
 	end
 	l[#l] = nil
 	return table.concat(l, '')
 end
 local function to_bin (val, nr_elements, element_size)
 	return to_hex(val, nr_elements, element_size):gsub('[0-9A-F]', HEX_TO_BIN)
 end
 local function is_nice_unicode_string (str)
 	-- TODO... Maybe also look into a purely binary oriented representation.
 	return false
 end
 local function is_nice_ascii_string (str)
 	for i = 1, #str do
 		local byte = str:byte(i)
 		if not (32 <= byte and byte <= 126) then  return false  end
 	end
 	return true
 end
 local function get_type_and_size_of_singular ( ctype )
 	local nr_elements, layers  =  1, 0
 	while true do
 		local etype, elements  =  ctype:match('(.+)%[(%d*)%]$')
 		if not elements then  break  end
 		ctype, nr_elements  =  etype, nr_elements * elements
 		layers = layers + 1
 	end
 	return ctype, nr_elements, layers
 end
 --------------------------------------------------------------------------------
 local CDATA_REPR_MATCHER = 'cdata<(.+)>: (0x%w+)'
 local function format_cdata (value, display, l, format_value)
 	-- Error check
 	assert(type(value)        == 'cdata'   )
 	assert(type(display)      == 'number'  )
 	assert(type(l)            == 'table'   )
 	assert(type(format_value) == 'function')
 	-- Do stuff
 	local native_repr  =  tostring(value)
 	local data_length  =  ffi.sizeof(value)
 	local ctype, addr  =  native_repr:match(CDATA_REPR_MATCHER)
 	-- Is void pointer?
 	if ctype == 'void *' then
 		local address_pointing_at = tonumber(ffi.cast('int', value))
 		l[#l+1] = 'void pointer to ' .. addr
 		return ;
 	end
 	-- Is normal pointer?
 	if ctype:match('%*$') then
 		if type(value[0]) ~= 'cdata' then
 			-- Data presentable in Lua, refered to by pointers?
 			l[#l+1] = 'pointer to '
 			return format_value(value[0], display, l.options, l)
 		else
 			l[#l+1] = '* '
 			return format_cdata(value[0], display, l.options, l, format_value)
 		end
 	end
 	l[#l+1] = 'cdata {'
 	--l[#l+1] = '\n\tnative = \''   .. native_repr .. '\','
 	l[#l+1] = '\n\ttype   = '   .. ctype .. ','
 	l[#l+1] = '\n\taddr   = '   .. addr .. ','
 	if data_length then
 		-- Size
 		local str = ffi.string(value, data_length)
 		l[#l+1] = '\n\tsize   = '   .. data_length .. ','
 		-- Element size and type
 		local element_type, nr_elements, nr_layers  =  get_type_and_size_of_singular(ctype)
 		local element_size = data_length / nr_elements
 		l[#l+1] = '\n\tnr_e   = ' .. nr_elements .. ','
 		l[#l+1] = '\n\ttype_e = '   .. element_type .. ','
 		l[#l+1] = '\n\tsize_e = '   .. element_size .. ','
 		-- If can be expressed as string, express it as string.
 		if is_nice_ascii_string(str) or is_nice_unicode_string(str) then
 			local string_or_unicode = is_nice_ascii_string(str) and 'ascii' or 'utf8 '
 			l[#l+1] = '\n\t'..string_or_unicode..'    = ' .. str .. ','
 		end
 		--
 		if nr_layers == 1 then
 			-- Only a single level of arrays
 			l[#l+1] = '\n\thex    = ' .. to_hex(value, nr_elements, element_size) .. ','
 			l[#l+1] = '\n\tbin    = ' .. to_bin(value, nr_elements, element_size) .. ','
 		end
 	end
 	l[#l+1] = '\n}'
 end
 return format_cdata
--- a/pretty.lua
+++ b/pretty.lua
@ -522,10 +522,11 @@ local TYPE_TO_FORMAT_FUNC = {
    ['string']   =  import 'pstring',
    ['thread']   =  format_coroutine,
    ['table']    =  format_table,
    ['function'] =  import 'function',
-    ['userdata'] =  format_primitive,  -- TODO
+
-    ['cdata']    =  format_primitive,  -- TODO & Luajit only
+    -- TODO
    ['userdata'] =  format_primitive,
    ['cdata']    =  import 'cdata',     -- Luajit exclusive ?
 }
 local function format_value (value, display, l)
--- a/test/test_cdata.lua
+++ b/test/test_cdata.lua
@ -0,0 +1,130 @@
 -- Only relevant in LUAJIT.
 if type(jit) ~= 'table' then  return  end
 local SUITE = require 'TestSuite' 'cdata'
 SUITE:setEnviroment{
    format  = require 'pretty',
    analyze_byte_string = require 'analyze_byte_string',
 }
 --------------------------------------------------------------------------------
 -- Test stuff.
 local ffi = require('ffi')
 ffi.cdef[[
    typedef struct foo { int a, b; } foo_t;
    void free(void *ptr);
    void *malloc(size_t size);
    int poll(struct pollfd *fds, unsigned long nfds, int timeout);
 ]]
 -- TODO: Add more advanced understanding of cdata.
 local function format_test (t)
    SUITE:addTest(t.expect, function ()
        assert_equal(t.expect, format(t.input, t.options))
    end)
 end
 --------------------------------------------------------------------------------
 -- Understanding binary data
 SUITE:addTest('Understand ascii', function ()
    local str   =  'hello world'
    local info  =  analyze_byte_string(str)
    assert_equal('ascii', info.most_likely)
 end)
 SUITE:addTest('Understand utf8', function ()
    local str = 'Æh? Hvø Tæler Då Om?'
    local info  =  analyze_byte_string(str)
    assert_equal('utf8', info.most_likely)
 end)
 SUITE:addTest('Understand binary', function ()
    local str = '\190\098\140\097\255'
    local info  =  analyze_byte_string(str)
    print(format(info))
    assert_equal('binary', info.most_likely)
 end)
 SUITE:addTest('More binary', function ()
    local str = '\098\140\097\140\100'
    local info  =  analyze_byte_string(str)
    assert_equal('binary', info.most_likely)
 end)
 --------------------------------------------------------------------------------
 format_test {
    input  = ffi.C.poll,
    expect = 'cdata<.+>: 0x%x+',
 }
 do
    local list = ffi.new('char [17]')
    for i = 0, 16 do  list[i] = i end
    format_test {
        input  = list,
        expect = 'cdata<.+>: 0x%x+',
    }
 end
 do
    local list = ffi.new('int [17]')
    for i = 0, 16 do  list[i] = i end
    format_test {
        input  = list,
        expect = 'cdata<.+>: 0x%x+',
    }
 end
 do
    local list = ffi.new('char [10]')
    for i = 0, 10-1 do  list[i] = i + 65 end
    format_test {
        input  = list,
        expect = 'cdata<.+>: 0x%x+',
    }
 end
 do
    local mat = ffi.new('char [3][3]')
    for x = 0, 2 do for y = 0, 2 do mat[x][y] = x * 16 + y end end
    format_test {
        input  = mat,
        expect = 'cdata<.+>: 0x%x+',
    }
 end
 do
    local p = ffi.gc(ffi.C.malloc(1), ffi.C.free)
    format_test {
        input  = p,
        expect = 'cdata<.+>: 0x%x+',
    }
 end
 SUITE:addTest('a_very_small_part_of_math', function ()
    local p = ffi.new('char[1]')
          p[0] = 27
    local actual_result  =  format(p + 0, {})
    assert_equal('Derp', actual_result)
 end)
 do
    local p = ffi.new('foo_t[1]')
          p[0].a = 27
          p[0].b = 27
    format_test {
        input  = p + 0,
        expect = 'cdata<.+>: 0x%x+',
    }
 end
 --------------------------------------------------------------------------------
 return SUITE
--- a/test/test_pretty.lua
+++ b/test/test_pretty.lua
@ -329,31 +329,6 @@ end)
 -- TODO: This is a very complex topic, and will expanded upon after 1.0.0.
 --------------------------------------------------------------------------------
 -- CDATA
 -- TODO: Add more advanced understanding of cdata.
 if HAS_JIT_LIBRARY then
    local ffi = require('ffi')
    ffi.cdef[[
        int poll(struct pollfd *fds, unsigned long nfds, int timeout);
    ]]
    format_test {
        input  = ffi.C.poll,
        approx = true,
        expect = 'cdata<.+>: 0x%x+',
    }
    format_test {
        input  = ffi.new('int[10]'),
        approx = true,
        expect = 'cdata<.+>: 0x%x+',
    }
 end
 --------------------------------------------------------------------------------
 -- General
--- a/test/test_resilience.lua
+++ b/test/test_resilience.lua
@ -95,6 +95,8 @@ SUITE:addTest('Proper malformed utf8 escaping (through LÖVE)', function ()
    -- The input strings are gotten from TestSuite's example strings.
    do return error 'Test skipped' end
    local pjk_path = '/tmp/test_pjk_'..os.time()
    local conf = [[
--- a/test/test_sorting.lua
+++ b/test/test_sorting.lua
@ -79,6 +79,19 @@ format_test {
    expect = '{ 1, nil, 3 }',
 }
 format_test {
 	name   = 'Proper sorting of number keys',
    input  = { [-1/0] = 'a', [-100] = 'b', [-1] = 'c', [0] = 'd', [1] = 'e', [100] = 'f', [1/0] = 'g' },
    expect = '{\n    [-1/0] = \'a\',  [-100] = \'b\',\n    [-1]   = \'c\',  [0]    = \'d\',\n    [1]    = \'e\',  [100]  = \'f\',\n    [1/0]  = \'g\'\n}',
 }
 format_test {
 	name   = 'Proper sorting of number strings keys',
    input  = { ['-100'] = 'b', ['-1'] = 'c', ['0'] = 'd', ['1'] = 'e', ['100'] = 'f' },
    expect = '{\n    [\'-100\'] = \'b\',  [\'-1\']   = \'c\',\n    [\'0\']    = \'d\',  [\'1\']    = \'e\',\n    [\'100\']  = \'f\'\n}',
 }
 --[[ Sorting is hard in unicode, and I can't be bothered.
 format_test {
    name   = 'Unicode: ø comes before å in danish',