From df232b144b0478b1fab1ec38737e99a34f323908 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Mon, 1 May 2017 15:41:23 +0200 Subject: [PATCH] Moved cdata tests to own file, and began work on string analysis. --- analyze_byte_string.lua | 54 +++++++++++++++++ test/test_cdata.lua | 130 ++++++++++++++++++++++++++++++++++++++++ test/test_pretty.lua | 83 ------------------------- 3 files changed, 184 insertions(+), 83 deletions(-) create mode 100644 analyze_byte_string.lua create mode 100644 test/test_cdata.lua diff --git a/analyze_byte_string.lua b/analyze_byte_string.lua new file mode 100644 index 0000000..6c7b022 --- /dev/null +++ b/analyze_byte_string.lua @@ -0,0 +1,54 @@ + +require 'fun' () +local utf8 = require 'utf8' + +local ASCII_CHAR_PATTERN = '[\32-\126\009\010\013]' +local UNICODE_CHAR_PATTERN = '[\01-\127\192-\255][\128-\191]*' + +local function ledgible_subsequences_in (str) + return totable(str:gmatch(ASCII_CHAR_PATTERN..'+')) +end + +local function probability_of_ascii_string (str) + local sub_seqs = ledgible_subsequences_in(str) + local nr_characters = #str + local nr_ledgible_characters = foldl(operator.add, map(operator.len, sub_seqs)) + local len_of_longest_subseq = foldl(math.max, map(operator.len, sub_seqs)) + return ((len_of_longest_subseq/nr_ledgible_characters) + (nr_ledgible_characters / nr_characters)) / 2 +end + +local function probability_of_utf8_string (str) + local valid_bytes = 0 + for char, valid in utf8.iterate(str) do + if valid then valid_bytes = valid_bytes + #char end + end + return valid_bytes / #str +end + +local function probability_of_utf16_string (str) + return 0 +end + +local function probability_of_binary_data (str) + return 2/3 +end + +local str_representations = { + ascii = probability_of_ascii_string, + utf8 = probability_of_utf8_string , + utf16 = probability_of_utf16_string, + binary = probability_of_binary_data, +} + +return function (str) + local str_info, most_likely, most_likely_prob = {}, 'ascii', 0 + for repr_name, prob_func in pairs(str_representations) do + local prob = prob_func(str) + str_info[repr_name..'_prob'] = prob + if prob >= most_likely_prob then + most_likely, most_likely_prob = repr_name, prob + end + end + str_info.most_likely = most_likely + return str_info +end diff --git a/test/test_cdata.lua b/test/test_cdata.lua new file mode 100644 index 0000000..26774b8 --- /dev/null +++ b/test/test_cdata.lua @@ -0,0 +1,130 @@ + +-- Only relevant in LUAJIT. +if type(jit) ~= 'table' then return end + +local SUITE = require('TestSuite').new('cdata') +SUITE:setEnviroment{ + format = require('pretty'), + analyze_byte_string = require 'analyze_byte_string', +} + +-------------------------------------------------------------------------------- +-- Test stuff. + +local ffi = require('ffi') +ffi.cdef[[ + typedef struct foo { int a, b; } foo_t; + + void free(void *ptr); + void *malloc(size_t size); + int poll(struct pollfd *fds, unsigned long nfds, int timeout); +]] + +-- TODO: Add more advanced understanding of cdata. + + +local function format_test (t) + SUITE:addTest(t.expect, function () + assert_equal(t.expect, format(t.input, t.options)) + end) +end + +-------------------------------------------------------------------------------- +-- Understanding binary data + +SUITE:addTest('Understand ascii', function () + local str = 'hello world' + local info = analyze_byte_string(str) + assert_equal('ascii', info.most_likely) +end) + +SUITE:addTest('Understand utf8', function () + local str = 'Æh? Hvø Tæler Då Om?' + local info = analyze_byte_string(str) + assert_equal('utf8', info.most_likely) +end) + +SUITE:addTest('Understand binary', function () + local str = '\190\098\140\097\255' + local info = analyze_byte_string(str) + print(format(info)) + assert_equal('binary', info.most_likely) +end) + +SUITE:addTest('More binary', function () + local str = '\098\140\097\140\100' + local info = analyze_byte_string(str) + assert_equal('binary', info.most_likely) +end) + +-------------------------------------------------------------------------------- + +format_test { + input = ffi.C.poll, + expect = 'cdata<.+>: 0x%x+', +} + +do + local list = ffi.new('char [17]') + for i = 0, 16 do list[i] = i end + format_test { + input = list, + expect = 'cdata<.+>: 0x%x+', + } +end + +do + local list = ffi.new('int [17]') + for i = 0, 16 do list[i] = i end + format_test { + input = list, + expect = 'cdata<.+>: 0x%x+', + } +end + +do + local list = ffi.new('char [10]') + for i = 0, 10-1 do list[i] = i + 65 end + format_test { + input = list, + expect = 'cdata<.+>: 0x%x+', + } +end + +do + local mat = ffi.new('char [3][3]') + for x = 0, 2 do for y = 0, 2 do mat[x][y] = x * 16 + y end end + format_test { + input = mat, + expect = 'cdata<.+>: 0x%x+', + } +end + +do + local p = ffi.gc(ffi.C.malloc(1), ffi.C.free) + format_test { + input = p, + expect = 'cdata<.+>: 0x%x+', + } +end + +SUITE:addTest('a_very_small_part_of_math', function () + local p = ffi.new('char[1]') + p[0] = 27 + local actual_result = format(p + 0, {}) + assert_equal('Derp', actual_result) +end) + +do + local p = ffi.new('foo_t[1]') + p[0].a = 27 + p[0].b = 27 + format_test { + input = p + 0, + expect = 'cdata<.+>: 0x%x+', + } +end + +-------------------------------------------------------------------------------- + +return SUITE diff --git a/test/test_pretty.lua b/test/test_pretty.lua index 051eb75..8ee9707 100644 --- a/test/test_pretty.lua +++ b/test/test_pretty.lua @@ -360,89 +360,6 @@ format_test { -- TODO: Add more tests for sorting. --------------------------------------------------------------------------------- --- CDATA - --- TODO: Add more advanced understanding of cdata. - -if type(jit) == 'table' then - - local ffi = require('ffi') - ffi.cdef[[ - typedef struct foo { int a, b; } foo_t; - - void free(void *ptr); - void *malloc(size_t size); - int poll(struct pollfd *fds, unsigned long nfds, int timeout); - ]] - - format_test { - input = ffi.C.poll, - expect = 'cdata<.+>: 0x%x+', - } - - do - local list = ffi.new('char [17]') - for i = 0, 16 do list[i] = i end - format_test { - input = list, - expect = 'cdata<.+>: 0x%x+', - } - end - - do - local list = ffi.new('int [17]') - for i = 0, 16 do list[i] = i end - format_test { - input = list, - expect = 'cdata<.+>: 0x%x+', - } - end - - do - local list = ffi.new('char [10]') - for i = 0, 10-1 do list[i] = i + 65 end - format_test { - input = list, - expect = 'cdata<.+>: 0x%x+', - } - end - - do - local mat = ffi.new('char [3][3]') - for x = 0, 2 do for y = 0, 2 do mat[x][y] = x * 16 + y end end - format_test { - input = mat, - expect = 'cdata<.+>: 0x%x+', - } - end - - do - local p = ffi.gc(ffi.C.malloc(1), ffi.C.free) - format_test { - input = p, - expect = 'cdata<.+>: 0x%x+', - } - end - - SUITE:addTest('a_very_small_part_of_math', function () - local p = ffi.new('char[1]') - p[0] = 27 - local actual_result = format(p + 0, {}) - assert_equal('Derp', actual_result) - end) - - do - local p = ffi.new('foo_t[1]') - p[0].a = 27 - p[0].b = 27 - format_test { - input = p + 0, - expect = 'cdata<.+>: 0x%x+', - } - end -end - -------------------------------------------------------------------------------- return SUITE