From ffbbfef499c25fa382271c89b42401e3423df503 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Tue, 4 Apr 2017 16:19:22 +0200 Subject: [PATCH 1/4] Beginning work on more advanced cdata inspection. --- cdata.lua | 83 ++++++++++++++++++++++++++++++++++++++++++++ pretty.lua | 8 ++++- test/test_pretty.lua | 32 +++++++++++++---- 3 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 cdata.lua diff --git a/cdata.lua b/cdata.lua new file mode 100644 index 0000000..ac12575 --- /dev/null +++ b/cdata.lua @@ -0,0 +1,83 @@ + +-- Import + +local ffi = require 'ffi' + +-- Constants + +-------------------------------------------------------------------------------- +-- Util + +local NUMBER_TO_HEX = { + [00] = '0', [01] = '1', [02] = '2', [03] = '3', [04] = '4', [05] = '5', + [06] = '6', [07] = '7', [08] = '8', [09] = '9', [10] = 'A', [11] = 'B', + [12] = 'C', [13] = 'D', [14] = 'E', [15] = 'F', +} + +local function to_hex (str) + local l = {} + for i = 1, #str do + local v = str:byte(i) + l[#l+1] = NUMBER_TO_HEX[math.floor(v / 16)] + l[#l+1] = NUMBER_TO_HEX[v % 16] + l[#l+1] = ' ' + end + l[#l] = nil + return table.concat(l, '') +end + +local function is_nice_ascii_string (str) + for i = 1, #str do + local byte = str:byte(i) + if not (32 <= byte and byte <= 126) then return false end + end + return true +end + +local function get_type_and_size_of_singular ( ctype ) + local nr_elements = 1 + while true do + local etype, elements = ctype:match('(.+)%[(%d*)%]$') + if not elements then break end + ctype, nr_elements = etype, nr_elements * elements + end + return ctype, nr_elements +end + +-------------------------------------------------------------------------------- + +local CDATA_REPR_MATCHER = 'cdata<(.+)>: (0x%w+)' + +return function (value, options, depth, l) + local native_repr = tostring(value) + local data_length = ffi.sizeof(value) + local ctype, addr = native_repr:match(CDATA_REPR_MATCHER) + + l[#l+1] = 'cdata {' + --l[#l+1] = '\n\tnative = \'' .. native_repr .. '\',' + l[#l+1] = '\n\ttype = ' .. ctype .. ',' + l[#l+1] = '\n\taddr = ' .. addr .. ',' + if data_length then + -- Size + local str = ffi.string(value, data_length) + l[#l+1] = '\n\tsize = ' .. data_length .. ',' + + -- Element size and type + local element_type, nr_elements = get_type_and_size_of_singular(ctype) + local element_size = data_length / nr_elements + l[#l+1] = '\n\tnr_e = ' .. nr_elements .. ',' + l[#l+1] = '\n\ttype_e = ' .. element_type .. ',' + l[#l+1] = '\n\tsize_e = ' .. element_size .. ',' + + -- + if is_nice_ascii_string(str) then + l[#l+1] = '\n\tstr = ' .. str .. ',' + end + l[#l+1] = '\n\tbin = ' .. to_hex(str) .. ',' + + + + + end + l[#l+1] = '\n}' +end diff --git a/pretty.lua b/pretty.lua index 8653637..19d6824 100644 --- a/pretty.lua +++ b/pretty.lua @@ -9,6 +9,7 @@ do -- Load number and function formatting format_number = select(2, pcall(require, thispath..'number')) format_function = select(2, pcall(require, thispath..'function')) + format_cdata = select(2, pcall(require, thispath..'cdata')) -- Load other stuff local was_loaded @@ -480,6 +481,11 @@ if not format_function then end end +if not format_cdata then + -- Very simple cdata formatting, if cdata.lua is not available. + format_cdata = format_primitive +end + local TYPE_TO_FORMAT_FUNC = { ['nil'] = format_primitive, ['boolean'] = format_primitive, @@ -491,7 +497,7 @@ local TYPE_TO_FORMAT_FUNC = { -- TODO ['function'] = format_function, ['userdata'] = format_primitive, - ['cdata'] = format_primitive, -- Luajit exclusive ? + ['cdata'] = format_cdata, -- Luajit exclusive ? } function format_value (value, _, depth, l) diff --git a/test/test_pretty.lua b/test/test_pretty.lua index 7bf2bd6..87968bc 100644 --- a/test/test_pretty.lua +++ b/test/test_pretty.lua @@ -377,15 +377,35 @@ if type(jit) == 'table' then format_test { input = ffi.C.poll, - approx = true, expect = 'cdata<.+>: 0x%x+', } - format_test { - input = ffi.new('int[10]'), - approx = true, - expect = 'cdata<.+>: 0x%x+', - } + do + local list = ffi.new('char [17]') + for i = 0, 16 do list[i] = i end + format_test { + input = list, + expect = 'cdata<.+>: 0x%x+', + } + end + + do + local list = ffi.new('char [10]') + for i = 0, 10-1 do list[i] = i + 65 end + format_test { + input = list, + expect = 'cdata<.+>: 0x%x+', + } + end + + do + local mat = ffi.new('char [3][3]') + for x = 0, 2 do for y = 0, 2 do mat[x][y] = x * 16 + y end end + format_test { + input = mat, + expect = 'cdata<.+>: 0x%x+', + } + end end -------------------------------------------------------------------------------- From 0a79a8a77a07c1f5fbe3fdcffe4dac0ee87c4254 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Tue, 4 Apr 2017 20:02:58 +0200 Subject: [PATCH 2/4] Experimentations with pointer types. --- cdata.lua | 62 ++++++++++++++++++++++++++++++++------------ test/test_pretty.lua | 49 +++++++++++++++++++++++++++++----- 2 files changed, 88 insertions(+), 23 deletions(-) diff --git a/cdata.lua b/cdata.lua index ac12575..f4013f7 100644 --- a/cdata.lua +++ b/cdata.lua @@ -2,6 +2,7 @@ -- Import local ffi = require 'ffi' +local bit = require 'bit' -- Constants @@ -14,18 +15,22 @@ local NUMBER_TO_HEX = { [12] = 'C', [13] = 'D', [14] = 'E', [15] = 'F', } -local function to_hex (str) +local function to_hex (val, nr_elements, element_size) local l = {} - for i = 1, #str do - local v = str:byte(i) - l[#l+1] = NUMBER_TO_HEX[math.floor(v / 16)] - l[#l+1] = NUMBER_TO_HEX[v % 16] + for i = 0, nr_elements - 1 do + local v = val[i] + l[#l+1] = bit.tohex(v, -2*element_size) l[#l+1] = ' ' end l[#l] = nil return table.concat(l, '') end +local function is_nice_unicode_string (str) + -- TODO... Maybe also look into a purely binary oriented representation. + return false +end + local function is_nice_ascii_string (str) for i = 1, #str do local byte = str:byte(i) @@ -35,24 +40,45 @@ local function is_nice_ascii_string (str) end local function get_type_and_size_of_singular ( ctype ) - local nr_elements = 1 + local nr_elements, layers = 1, 0 while true do local etype, elements = ctype:match('(.+)%[(%d*)%]$') if not elements then break end ctype, nr_elements = etype, nr_elements * elements + layers = layers + 1 end - return ctype, nr_elements + return ctype, nr_elements, layers end -------------------------------------------------------------------------------- local CDATA_REPR_MATCHER = 'cdata<(.+)>: (0x%w+)' -return function (value, options, depth, l) +local function format_cdata (value, options, depth, l, format_value) + local native_repr = tostring(value) local data_length = ffi.sizeof(value) local ctype, addr = native_repr:match(CDATA_REPR_MATCHER) + -- Is void pointer? + if ctype == 'void *' then + local address_pointing_at = tonumber(ffi.cast('int', value)) + l[#l+1] = 'void pointer to ' .. addr + return ; + end + + -- Is normal pointer? + if ctype:match('%*$') then + if type(value[0]) ~= 'cdata' then + -- Data presentable in Lua, refered to by pointers? + l[#l+1] = 'pointer to ' + return format_value(value[0], options, depth, l) + else + l[#l+1] = '* ' + return format_cdata(value[0], options, depth, l, format_value) + end + end + l[#l+1] = 'cdata {' --l[#l+1] = '\n\tnative = \'' .. native_repr .. '\',' l[#l+1] = '\n\ttype = ' .. ctype .. ',' @@ -63,21 +89,25 @@ return function (value, options, depth, l) l[#l+1] = '\n\tsize = ' .. data_length .. ',' -- Element size and type - local element_type, nr_elements = get_type_and_size_of_singular(ctype) + local element_type, nr_elements, nr_layers = get_type_and_size_of_singular(ctype) local element_size = data_length / nr_elements l[#l+1] = '\n\tnr_e = ' .. nr_elements .. ',' l[#l+1] = '\n\ttype_e = ' .. element_type .. ',' l[#l+1] = '\n\tsize_e = ' .. element_size .. ',' - -- - if is_nice_ascii_string(str) then - l[#l+1] = '\n\tstr = ' .. str .. ',' + -- If can be expressed as string, express it as string. + if is_nice_ascii_string(str) or is_nice_unicode_string(str) then + local string_or_unicode = is_nice_ascii_string(str) and 'ascii' or 'utf8 ' + l[#l+1] = '\n\t'..string_or_unicode..' = ' .. str .. ',' + end + -- + if nr_layers == 1 then + -- Only a single level of arrays + l[#l+1] = '\n\thex = ' .. to_hex(value, nr_elements, element_size) .. ',' end - l[#l+1] = '\n\tbin = ' .. to_hex(str) .. ',' - - - end l[#l+1] = '\n}' end + +return format_cdata diff --git a/test/test_pretty.lua b/test/test_pretty.lua index 87968bc..051eb75 100644 --- a/test/test_pretty.lua +++ b/test/test_pretty.lua @@ -19,15 +19,12 @@ local function format_test (t) if t.longterm then return end if t.adv_getlocal and not HAS_ADV_GETLOCAL then return end SUITE:addTest(t.expect, function () - local input_value = t.input - local input_options = t.options - local expected_result = t.expect - local actual_result = format(input_value, input_options) + local actual_result = format(t.input, t.options) if not t.approx or type(actual_result) ~= 'string' then - assert_equal(expected_result, actual_result) + assert_equal(t.expect, actual_result) else - if not actual_result:match(expected_result) then - error(ASSERT_ERROR_APPROX:format(expected_result, actual_result)) + if not actual_result:match(t.expect) then + error(ASSERT_ERROR_APPROX:format(t.expect, actual_result)) end end end) @@ -372,6 +369,10 @@ if type(jit) == 'table' then local ffi = require('ffi') ffi.cdef[[ + typedef struct foo { int a, b; } foo_t; + + void free(void *ptr); + void *malloc(size_t size); int poll(struct pollfd *fds, unsigned long nfds, int timeout); ]] @@ -389,6 +390,15 @@ if type(jit) == 'table' then } end + do + local list = ffi.new('int [17]') + for i = 0, 16 do list[i] = i end + format_test { + input = list, + expect = 'cdata<.+>: 0x%x+', + } + end + do local list = ffi.new('char [10]') for i = 0, 10-1 do list[i] = i + 65 end @@ -406,6 +416,31 @@ if type(jit) == 'table' then expect = 'cdata<.+>: 0x%x+', } end + + do + local p = ffi.gc(ffi.C.malloc(1), ffi.C.free) + format_test { + input = p, + expect = 'cdata<.+>: 0x%x+', + } + end + + SUITE:addTest('a_very_small_part_of_math', function () + local p = ffi.new('char[1]') + p[0] = 27 + local actual_result = format(p + 0, {}) + assert_equal('Derp', actual_result) + end) + + do + local p = ffi.new('foo_t[1]') + p[0].a = 27 + p[0].b = 27 + format_test { + input = p + 0, + expect = 'cdata<.+>: 0x%x+', + } + end end -------------------------------------------------------------------------------- From df232b144b0478b1fab1ec38737e99a34f323908 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Mon, 1 May 2017 15:41:23 +0200 Subject: [PATCH 3/4] Moved cdata tests to own file, and began work on string analysis. --- analyze_byte_string.lua | 54 +++++++++++++++++ test/test_cdata.lua | 130 ++++++++++++++++++++++++++++++++++++++++ test/test_pretty.lua | 83 ------------------------- 3 files changed, 184 insertions(+), 83 deletions(-) create mode 100644 analyze_byte_string.lua create mode 100644 test/test_cdata.lua diff --git a/analyze_byte_string.lua b/analyze_byte_string.lua new file mode 100644 index 0000000..6c7b022 --- /dev/null +++ b/analyze_byte_string.lua @@ -0,0 +1,54 @@ + +require 'fun' () +local utf8 = require 'utf8' + +local ASCII_CHAR_PATTERN = '[\32-\126\009\010\013]' +local UNICODE_CHAR_PATTERN = '[\01-\127\192-\255][\128-\191]*' + +local function ledgible_subsequences_in (str) + return totable(str:gmatch(ASCII_CHAR_PATTERN..'+')) +end + +local function probability_of_ascii_string (str) + local sub_seqs = ledgible_subsequences_in(str) + local nr_characters = #str + local nr_ledgible_characters = foldl(operator.add, map(operator.len, sub_seqs)) + local len_of_longest_subseq = foldl(math.max, map(operator.len, sub_seqs)) + return ((len_of_longest_subseq/nr_ledgible_characters) + (nr_ledgible_characters / nr_characters)) / 2 +end + +local function probability_of_utf8_string (str) + local valid_bytes = 0 + for char, valid in utf8.iterate(str) do + if valid then valid_bytes = valid_bytes + #char end + end + return valid_bytes / #str +end + +local function probability_of_utf16_string (str) + return 0 +end + +local function probability_of_binary_data (str) + return 2/3 +end + +local str_representations = { + ascii = probability_of_ascii_string, + utf8 = probability_of_utf8_string , + utf16 = probability_of_utf16_string, + binary = probability_of_binary_data, +} + +return function (str) + local str_info, most_likely, most_likely_prob = {}, 'ascii', 0 + for repr_name, prob_func in pairs(str_representations) do + local prob = prob_func(str) + str_info[repr_name..'_prob'] = prob + if prob >= most_likely_prob then + most_likely, most_likely_prob = repr_name, prob + end + end + str_info.most_likely = most_likely + return str_info +end diff --git a/test/test_cdata.lua b/test/test_cdata.lua new file mode 100644 index 0000000..26774b8 --- /dev/null +++ b/test/test_cdata.lua @@ -0,0 +1,130 @@ + +-- Only relevant in LUAJIT. +if type(jit) ~= 'table' then return end + +local SUITE = require('TestSuite').new('cdata') +SUITE:setEnviroment{ + format = require('pretty'), + analyze_byte_string = require 'analyze_byte_string', +} + +-------------------------------------------------------------------------------- +-- Test stuff. + +local ffi = require('ffi') +ffi.cdef[[ + typedef struct foo { int a, b; } foo_t; + + void free(void *ptr); + void *malloc(size_t size); + int poll(struct pollfd *fds, unsigned long nfds, int timeout); +]] + +-- TODO: Add more advanced understanding of cdata. + + +local function format_test (t) + SUITE:addTest(t.expect, function () + assert_equal(t.expect, format(t.input, t.options)) + end) +end + +-------------------------------------------------------------------------------- +-- Understanding binary data + +SUITE:addTest('Understand ascii', function () + local str = 'hello world' + local info = analyze_byte_string(str) + assert_equal('ascii', info.most_likely) +end) + +SUITE:addTest('Understand utf8', function () + local str = 'Æh? Hvø Tæler Då Om?' + local info = analyze_byte_string(str) + assert_equal('utf8', info.most_likely) +end) + +SUITE:addTest('Understand binary', function () + local str = '\190\098\140\097\255' + local info = analyze_byte_string(str) + print(format(info)) + assert_equal('binary', info.most_likely) +end) + +SUITE:addTest('More binary', function () + local str = '\098\140\097\140\100' + local info = analyze_byte_string(str) + assert_equal('binary', info.most_likely) +end) + +-------------------------------------------------------------------------------- + +format_test { + input = ffi.C.poll, + expect = 'cdata<.+>: 0x%x+', +} + +do + local list = ffi.new('char [17]') + for i = 0, 16 do list[i] = i end + format_test { + input = list, + expect = 'cdata<.+>: 0x%x+', + } +end + +do + local list = ffi.new('int [17]') + for i = 0, 16 do list[i] = i end + format_test { + input = list, + expect = 'cdata<.+>: 0x%x+', + } +end + +do + local list = ffi.new('char [10]') + for i = 0, 10-1 do list[i] = i + 65 end + format_test { + input = list, + expect = 'cdata<.+>: 0x%x+', + } +end + +do + local mat = ffi.new('char [3][3]') + for x = 0, 2 do for y = 0, 2 do mat[x][y] = x * 16 + y end end + format_test { + input = mat, + expect = 'cdata<.+>: 0x%x+', + } +end + +do + local p = ffi.gc(ffi.C.malloc(1), ffi.C.free) + format_test { + input = p, + expect = 'cdata<.+>: 0x%x+', + } +end + +SUITE:addTest('a_very_small_part_of_math', function () + local p = ffi.new('char[1]') + p[0] = 27 + local actual_result = format(p + 0, {}) + assert_equal('Derp', actual_result) +end) + +do + local p = ffi.new('foo_t[1]') + p[0].a = 27 + p[0].b = 27 + format_test { + input = p + 0, + expect = 'cdata<.+>: 0x%x+', + } +end + +-------------------------------------------------------------------------------- + +return SUITE diff --git a/test/test_pretty.lua b/test/test_pretty.lua index 051eb75..8ee9707 100644 --- a/test/test_pretty.lua +++ b/test/test_pretty.lua @@ -360,89 +360,6 @@ format_test { -- TODO: Add more tests for sorting. --------------------------------------------------------------------------------- --- CDATA - --- TODO: Add more advanced understanding of cdata. - -if type(jit) == 'table' then - - local ffi = require('ffi') - ffi.cdef[[ - typedef struct foo { int a, b; } foo_t; - - void free(void *ptr); - void *malloc(size_t size); - int poll(struct pollfd *fds, unsigned long nfds, int timeout); - ]] - - format_test { - input = ffi.C.poll, - expect = 'cdata<.+>: 0x%x+', - } - - do - local list = ffi.new('char [17]') - for i = 0, 16 do list[i] = i end - format_test { - input = list, - expect = 'cdata<.+>: 0x%x+', - } - end - - do - local list = ffi.new('int [17]') - for i = 0, 16 do list[i] = i end - format_test { - input = list, - expect = 'cdata<.+>: 0x%x+', - } - end - - do - local list = ffi.new('char [10]') - for i = 0, 10-1 do list[i] = i + 65 end - format_test { - input = list, - expect = 'cdata<.+>: 0x%x+', - } - end - - do - local mat = ffi.new('char [3][3]') - for x = 0, 2 do for y = 0, 2 do mat[x][y] = x * 16 + y end end - format_test { - input = mat, - expect = 'cdata<.+>: 0x%x+', - } - end - - do - local p = ffi.gc(ffi.C.malloc(1), ffi.C.free) - format_test { - input = p, - expect = 'cdata<.+>: 0x%x+', - } - end - - SUITE:addTest('a_very_small_part_of_math', function () - local p = ffi.new('char[1]') - p[0] = 27 - local actual_result = format(p + 0, {}) - assert_equal('Derp', actual_result) - end) - - do - local p = ffi.new('foo_t[1]') - p[0].a = 27 - p[0].b = 27 - format_test { - input = p + 0, - expect = 'cdata<.+>: 0x%x+', - } - end -end - -------------------------------------------------------------------------------- return SUITE From 5124189b4e2c50c5f23fea615181b6350713e583 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Mon, 1 May 2017 16:23:57 +0200 Subject: [PATCH 4/4] Added `to_bin` function. --- cdata.lua | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/cdata.lua b/cdata.lua index f4013f7..acfc961 100644 --- a/cdata.lua +++ b/cdata.lua @@ -9,10 +9,11 @@ local bit = require 'bit' -------------------------------------------------------------------------------- -- Util -local NUMBER_TO_HEX = { - [00] = '0', [01] = '1', [02] = '2', [03] = '3', [04] = '4', [05] = '5', - [06] = '6', [07] = '7', [08] = '8', [09] = '9', [10] = 'A', [11] = 'B', - [12] = 'C', [13] = 'D', [14] = 'E', [15] = 'F', +local HEX_TO_BIN = { + ['0'] = '0000', ['1'] = '0001', ['2'] = '0010', ['3'] = '0011', + ['4'] = '0100', ['5'] = '0101', ['6'] = '0110', ['7'] = '0111', + ['8'] = '1000', ['9'] = '1001', ['A'] = '1010', ['B'] = '1011', + ['C'] = '1100', ['D'] = '1101', ['E'] = '1110', ['F'] = '1111', } local function to_hex (val, nr_elements, element_size) @@ -26,6 +27,10 @@ local function to_hex (val, nr_elements, element_size) return table.concat(l, '') end +local function to_bin (val, nr_elements, element_size) + return to_hex(val, nr_elements, element_size):gsub('[0-9A-F]', HEX_TO_BIN) +end + local function is_nice_unicode_string (str) -- TODO... Maybe also look into a purely binary oriented representation. return false @@ -104,6 +109,7 @@ local function format_cdata (value, options, depth, l, format_value) if nr_layers == 1 then -- Only a single level of arrays l[#l+1] = '\n\thex = ' .. to_hex(value, nr_elements, element_size) .. ',' + l[#l+1] = '\n\tbin = ' .. to_bin(value, nr_elements, element_size) .. ',' end end