1
0

Many bad unicode sequences are now properly escaped

This commit is contained in:
Jon Michael Aanes 2017-07-21 16:44:46 +02:00
parent bfbfe4de56
commit 6f7e767b68
2 changed files with 34 additions and 7 deletions

View File

@ -53,10 +53,20 @@ local function escape_string (str)
-- Error checking -- Error checking
assert(type(str) == 'string') assert(type(str) == 'string')
-- Do stuff -- First escape the easy ones.
local l = {} local str = str:gsub('.', function (char) return CHAR_TO_STR_REPR[char:byte()] end)
for i = 1, #str do l[#l+1] = CHAR_TO_STR_REPR[str:byte(i)] end -- Escape malformed continuation characters
return table.concat(l, '') repeat
local count
str, count = str:gsub('([^\128-\255])([\128-\191])', function(a, b) print(a,b) return a..'\\' .. b:byte() end)
until count == 0
-- Escape malformed start characters
repeat
local count
str, count = str:gsub('([\191-\255])([^\128-\191])', function(a, b) print(a,b) return '\\'..a:byte() .. b end)
until count == 0
-- return
return str
end end
local function smallest_secure_longform_string_level (str) local function smallest_secure_longform_string_level (str)
@ -166,6 +176,9 @@ end
return function (str, depth, l) return function (str, depth, l)
-- pretty.format_string -- pretty.format_string
-- TODO: Prefer \ddd style escaping to shorter (\n, \t), when many of the
-- \ddd already exist in the text.
-- Error checking -- Error checking
assert( type(str) == 'string' ) assert( type(str) == 'string' )
assert(type(depth) == 'number' and type(l) == 'table') assert(type(depth) == 'number' and type(l) == 'table')

View File

@ -224,11 +224,25 @@ format_test {
} }
format_test { format_test {
name = 'Malformed Unicode is escaped', name = 'Single utf8 continuation byte is escaped',
input = '\000\001\003\012\169\003\000\030', input = 'abc\169def',
expect = '\'\\000\\000\\001\\003\\012\\169\\003\\000\\030\'', expect = '\'abc\\169def\'',
} }
format_test {
name = 'Multiple utf8 continuation bytes are escaped',
input = 'abc\169\190\169\169def',
expect = '\'abc\\169\\190\\169\\169def\'',
}
format_test {
name = 'Single start byte utf8 chars is escaped',
input = 'abc\255def',
expect = '\'abc\\255def\'',
}
-- TODO: Add more malformed unicode tests: https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
return SUITE return SUITE