diff --git a/analyze_byte_string.lua b/analyze_byte_string.lua new file mode 100644 index 0000000..8327ae5 --- /dev/null +++ b/analyze_byte_string.lua @@ -0,0 +1,67 @@ + +--require 'fun' () +local utf8 = require 'utf8' + +local ASCII_CHAR_PATTERN = '[\32-\126\009\010\013]' +local UNICODE_CHAR_PATTERN = '[\01-\127\192-\255][\128-\191]*' + + +local function probability_of_ascii_string (str) + assert(type(str) == 'string') + + -- Find ascii subsequences of the string. + -- Then find the total number of ascii characters, + -- and the length of the longest subsequence. + local len_of_longest_subseq, nr_ascii_chars = 0, 0 + for subseq in str:gmatch(ASCII_CHAR_PATTERN..'+') do + len_of_longest_subseq = math.max(#subseq, len_of_longest_subseq) + nr_ascii_chars = nr_ascii_chars + #subseq + end + + -- Perform probability calculation + -- This heuristic is based on the observation that large numbers of + -- ascii characters, and long subsequences are the primary indicators + -- of ascii strings. + return (len_of_longest_subseq + nr_ascii_chars) / (2 * #str) +end + +local function probability_of_utf8_string (str) + assert(type(str) == 'string') + + -- Find numbers of valid utf8 bytes + local valid_bytes = 0 + for char, valid in utf8.iterate(str) do + if valid then valid_bytes = valid_bytes + #char end + end + + -- Calculate ratio of valid bytes to total number of bytes. + return valid_bytes / #str +end + +local function probability_of_utf16_string (str) + return 0 +end + +local function probability_of_binary_data (str) + return 2/3 +end + +local str_representations = { + ascii = probability_of_ascii_string, + utf8 = probability_of_utf8_string , + utf16 = probability_of_utf16_string, + binary = probability_of_binary_data, +} + +return function (str) + local str_info, most_likely, most_likely_prob = {}, 'ascii', 0 + for repr_name, prob_func in pairs(str_representations) do + local prob = prob_func(str) + str_info[repr_name..'_prob'] = prob + if prob >= most_likely_prob then + most_likely, most_likely_prob = repr_name, prob + end + end + str_info.most_likely = most_likely + return str_info +end diff --git a/cdata.lua b/cdata.lua new file mode 100644 index 0000000..68e125b --- /dev/null +++ b/cdata.lua @@ -0,0 +1,127 @@ + +-- Import + +local ffi = require 'ffi' +local bit = require 'bit' + +-- Constants + +-------------------------------------------------------------------------------- +-- Util + +local HEX_TO_BIN = { + ['0'] = '0000', ['1'] = '0001', ['2'] = '0010', ['3'] = '0011', + ['4'] = '0100', ['5'] = '0101', ['6'] = '0110', ['7'] = '0111', + ['8'] = '1000', ['9'] = '1001', ['A'] = '1010', ['B'] = '1011', + ['C'] = '1100', ['D'] = '1101', ['E'] = '1110', ['F'] = '1111', +} + +local function to_hex (val, nr_elements, element_size) + local l = {} + for i = 0, nr_elements - 1 do + local v = val[i] + l[#l+1] = bit.tohex(v, -2*element_size) + l[#l+1] = ' ' + end + l[#l] = nil + return table.concat(l, '') +end + +local function to_bin (val, nr_elements, element_size) + return to_hex(val, nr_elements, element_size):gsub('[0-9A-F]', HEX_TO_BIN) +end + +local function is_nice_unicode_string (str) + -- TODO... Maybe also look into a purely binary oriented representation. + return false +end + +local function is_nice_ascii_string (str) + for i = 1, #str do + local byte = str:byte(i) + if not (32 <= byte and byte <= 126) then return false end + end + return true +end + +local function get_type_and_size_of_singular ( ctype ) + local nr_elements, layers = 1, 0 + while true do + local etype, elements = ctype:match('(.+)%[(%d*)%]$') + if not elements then break end + ctype, nr_elements = etype, nr_elements * elements + layers = layers + 1 + end + return ctype, nr_elements, layers +end + +-------------------------------------------------------------------------------- + +local CDATA_REPR_MATCHER = 'cdata<(.+)>: (0x%w+)' + + +local function format_cdata (value, display, l, format_value) + + -- Error check + assert(type(value) == 'cdata' ) + assert(type(display) == 'number' ) + assert(type(l) == 'table' ) + assert(type(format_value) == 'function') + + -- Do stuff + local native_repr = tostring(value) + local data_length = ffi.sizeof(value) + local ctype, addr = native_repr:match(CDATA_REPR_MATCHER) + + -- Is void pointer? + if ctype == 'void *' then + local address_pointing_at = tonumber(ffi.cast('int', value)) + l[#l+1] = 'void pointer to ' .. addr + return ; + end + + -- Is normal pointer? + if ctype:match('%*$') then + if type(value[0]) ~= 'cdata' then + -- Data presentable in Lua, refered to by pointers? + l[#l+1] = 'pointer to ' + return format_value(value[0], display, l.options, l) + else + l[#l+1] = '* ' + return format_cdata(value[0], display, l.options, l, format_value) + end + end + + l[#l+1] = 'cdata {' + --l[#l+1] = '\n\tnative = \'' .. native_repr .. '\',' + l[#l+1] = '\n\ttype = ' .. ctype .. ',' + l[#l+1] = '\n\taddr = ' .. addr .. ',' + if data_length then + -- Size + local str = ffi.string(value, data_length) + l[#l+1] = '\n\tsize = ' .. data_length .. ',' + + -- Element size and type + local element_type, nr_elements, nr_layers = get_type_and_size_of_singular(ctype) + local element_size = data_length / nr_elements + l[#l+1] = '\n\tnr_e = ' .. nr_elements .. ',' + l[#l+1] = '\n\ttype_e = ' .. element_type .. ',' + l[#l+1] = '\n\tsize_e = ' .. element_size .. ',' + + -- If can be expressed as string, express it as string. + if is_nice_ascii_string(str) or is_nice_unicode_string(str) then + local string_or_unicode = is_nice_ascii_string(str) and 'ascii' or 'utf8 ' + l[#l+1] = '\n\t'..string_or_unicode..' = ' .. str .. ',' + end + -- + if nr_layers == 1 then + -- Only a single level of arrays + l[#l+1] = '\n\thex = ' .. to_hex(value, nr_elements, element_size) .. ',' + l[#l+1] = '\n\tbin = ' .. to_bin(value, nr_elements, element_size) .. ',' + end + + end + l[#l+1] = '\n}' +end + +return format_cdata diff --git a/pretty.lua b/pretty.lua index 8bb3a5d..de1d4c6 100644 --- a/pretty.lua +++ b/pretty.lua @@ -522,10 +522,11 @@ local TYPE_TO_FORMAT_FUNC = { ['string'] = import 'pstring', ['thread'] = format_coroutine, ['table'] = format_table, - ['function'] = import 'function', - ['userdata'] = format_primitive, -- TODO - ['cdata'] = format_primitive, -- TODO & Luajit only + + -- TODO + ['userdata'] = format_primitive, + ['cdata'] = import 'cdata', -- Luajit exclusive ? } local function format_value (value, display, l) diff --git a/test/test_cdata.lua b/test/test_cdata.lua new file mode 100644 index 0000000..7fbdff8 --- /dev/null +++ b/test/test_cdata.lua @@ -0,0 +1,130 @@ + +-- Only relevant in LUAJIT. +if type(jit) ~= 'table' then return end + +local SUITE = require 'TestSuite' 'cdata' +SUITE:setEnviroment{ + format = require 'pretty', + analyze_byte_string = require 'analyze_byte_string', +} + +-------------------------------------------------------------------------------- +-- Test stuff. + +local ffi = require('ffi') +ffi.cdef[[ + typedef struct foo { int a, b; } foo_t; + + void free(void *ptr); + void *malloc(size_t size); + int poll(struct pollfd *fds, unsigned long nfds, int timeout); +]] + +-- TODO: Add more advanced understanding of cdata. + + +local function format_test (t) + SUITE:addTest(t.expect, function () + assert_equal(t.expect, format(t.input, t.options)) + end) +end + +-------------------------------------------------------------------------------- +-- Understanding binary data + +SUITE:addTest('Understand ascii', function () + local str = 'hello world' + local info = analyze_byte_string(str) + assert_equal('ascii', info.most_likely) +end) + +SUITE:addTest('Understand utf8', function () + local str = 'Æh? Hvø Tæler Då Om?' + local info = analyze_byte_string(str) + assert_equal('utf8', info.most_likely) +end) + +SUITE:addTest('Understand binary', function () + local str = '\190\098\140\097\255' + local info = analyze_byte_string(str) + print(format(info)) + assert_equal('binary', info.most_likely) +end) + +SUITE:addTest('More binary', function () + local str = '\098\140\097\140\100' + local info = analyze_byte_string(str) + assert_equal('binary', info.most_likely) +end) + +-------------------------------------------------------------------------------- + +format_test { + input = ffi.C.poll, + expect = 'cdata<.+>: 0x%x+', +} + +do + local list = ffi.new('char [17]') + for i = 0, 16 do list[i] = i end + format_test { + input = list, + expect = 'cdata<.+>: 0x%x+', + } +end + +do + local list = ffi.new('int [17]') + for i = 0, 16 do list[i] = i end + format_test { + input = list, + expect = 'cdata<.+>: 0x%x+', + } +end + +do + local list = ffi.new('char [10]') + for i = 0, 10-1 do list[i] = i + 65 end + format_test { + input = list, + expect = 'cdata<.+>: 0x%x+', + } +end + +do + local mat = ffi.new('char [3][3]') + for x = 0, 2 do for y = 0, 2 do mat[x][y] = x * 16 + y end end + format_test { + input = mat, + expect = 'cdata<.+>: 0x%x+', + } +end + +do + local p = ffi.gc(ffi.C.malloc(1), ffi.C.free) + format_test { + input = p, + expect = 'cdata<.+>: 0x%x+', + } +end + +SUITE:addTest('a_very_small_part_of_math', function () + local p = ffi.new('char[1]') + p[0] = 27 + local actual_result = format(p + 0, {}) + assert_equal('Derp', actual_result) +end) + +do + local p = ffi.new('foo_t[1]') + p[0].a = 27 + p[0].b = 27 + format_test { + input = p + 0, + expect = 'cdata<.+>: 0x%x+', + } +end + +-------------------------------------------------------------------------------- + +return SUITE diff --git a/test/test_pretty.lua b/test/test_pretty.lua index c64d1b5..05d9fc5 100644 --- a/test/test_pretty.lua +++ b/test/test_pretty.lua @@ -329,31 +329,6 @@ end) -- TODO: This is a very complex topic, and will expanded upon after 1.0.0. --------------------------------------------------------------------------------- --- CDATA - --- TODO: Add more advanced understanding of cdata. - -if HAS_JIT_LIBRARY then - - local ffi = require('ffi') - ffi.cdef[[ - int poll(struct pollfd *fds, unsigned long nfds, int timeout); - ]] - - format_test { - input = ffi.C.poll, - approx = true, - expect = 'cdata<.+>: 0x%x+', - } - - format_test { - input = ffi.new('int[10]'), - approx = true, - expect = 'cdata<.+>: 0x%x+', - } -end - -------------------------------------------------------------------------------- -- General diff --git a/test/test_resilience.lua b/test/test_resilience.lua index dcaa828..8396132 100644 --- a/test/test_resilience.lua +++ b/test/test_resilience.lua @@ -95,6 +95,8 @@ SUITE:addTest('Proper malformed utf8 escaping (through LÖVE)', function () -- The input strings are gotten from TestSuite's example strings. + do return error 'Test skipped' end + local pjk_path = '/tmp/test_pjk_'..os.time() local conf = [[ diff --git a/test/test_sorting.lua b/test/test_sorting.lua index 9cddb8d..625288e 100644 --- a/test/test_sorting.lua +++ b/test/test_sorting.lua @@ -79,6 +79,19 @@ format_test { expect = '{ 1, nil, 3 }', } +format_test { + name = 'Proper sorting of number keys', + input = { [-1/0] = 'a', [-100] = 'b', [-1] = 'c', [0] = 'd', [1] = 'e', [100] = 'f', [1/0] = 'g' }, + expect = '{\n [-1/0] = \'a\', [-100] = \'b\',\n [-1] = \'c\', [0] = \'d\',\n [1] = \'e\', [100] = \'f\',\n [1/0] = \'g\'\n}', +} + +format_test { + name = 'Proper sorting of number strings keys', + input = { ['-100'] = 'b', ['-1'] = 'c', ['0'] = 'd', ['1'] = 'e', ['100'] = 'f' }, + expect = '{\n [\'-100\'] = \'b\', [\'-1\'] = \'c\',\n [\'0\'] = \'d\', [\'1\'] = \'e\',\n [\'100\'] = \'f\'\n}', +} + + --[[ Sorting is hard in unicode, and I can't be bothered. format_test { name = 'Unicode: ø comes before å in danish',