Many bad unicode sequences are now properly escaped
This commit is contained in:
parent
bfbfe4de56
commit
6f7e767b68
21
pstring.lua
21
pstring.lua
|
@ -53,10 +53,20 @@ local function escape_string (str)
|
|||
-- Error checking
|
||||
assert(type(str) == 'string')
|
||||
|
||||
-- Do stuff
|
||||
local l = {}
|
||||
for i = 1, #str do l[#l+1] = CHAR_TO_STR_REPR[str:byte(i)] end
|
||||
return table.concat(l, '')
|
||||
-- First escape the easy ones.
|
||||
local str = str:gsub('.', function (char) return CHAR_TO_STR_REPR[char:byte()] end)
|
||||
-- Escape malformed continuation characters
|
||||
repeat
|
||||
local count
|
||||
str, count = str:gsub('([^\128-\255])([\128-\191])', function(a, b) print(a,b) return a..'\\' .. b:byte() end)
|
||||
until count == 0
|
||||
-- Escape malformed start characters
|
||||
repeat
|
||||
local count
|
||||
str, count = str:gsub('([\191-\255])([^\128-\191])', function(a, b) print(a,b) return '\\'..a:byte() .. b end)
|
||||
until count == 0
|
||||
-- return
|
||||
return str
|
||||
end
|
||||
|
||||
local function smallest_secure_longform_string_level (str)
|
||||
|
@ -166,6 +176,9 @@ end
|
|||
return function (str, depth, l)
|
||||
-- pretty.format_string
|
||||
|
||||
-- TODO: Prefer \ddd style escaping to shorter (\n, \t), when many of the
|
||||
-- \ddd already exist in the text.
|
||||
|
||||
-- Error checking
|
||||
assert( type(str) == 'string' )
|
||||
assert(type(depth) == 'number' and type(l) == 'table')
|
||||
|
|
|
@ -224,11 +224,25 @@ format_test {
|
|||
}
|
||||
|
||||
format_test {
|
||||
name = 'Malformed Unicode is escaped',
|
||||
input = '\000\001\003\012\169\003\000\030',
|
||||
expect = '\'\\000\\000\\001\\003\\012\\169\\003\\000\\030\'',
|
||||
name = 'Single utf8 continuation byte is escaped',
|
||||
input = 'abc\169def',
|
||||
expect = '\'abc\\169def\'',
|
||||
}
|
||||
|
||||
format_test {
|
||||
name = 'Multiple utf8 continuation bytes are escaped',
|
||||
input = 'abc\169\190\169\169def',
|
||||
expect = '\'abc\\169\\190\\169\\169def\'',
|
||||
}
|
||||
|
||||
format_test {
|
||||
name = 'Single start byte utf8 chars is escaped',
|
||||
input = 'abc\255def',
|
||||
expect = '\'abc\\255def\'',
|
||||
}
|
||||
|
||||
-- TODO: Add more malformed unicode tests: https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
return SUITE
|
||||
|
|
Loading…
Reference in New Issue
Block a user