Many bad unicode sequences are now properly escaped
This commit is contained in:
parent
bfbfe4de56
commit
6f7e767b68
21
pstring.lua
21
pstring.lua
|
@ -53,10 +53,20 @@ local function escape_string (str)
|
||||||
-- Error checking
|
-- Error checking
|
||||||
assert(type(str) == 'string')
|
assert(type(str) == 'string')
|
||||||
|
|
||||||
-- Do stuff
|
-- First escape the easy ones.
|
||||||
local l = {}
|
local str = str:gsub('.', function (char) return CHAR_TO_STR_REPR[char:byte()] end)
|
||||||
for i = 1, #str do l[#l+1] = CHAR_TO_STR_REPR[str:byte(i)] end
|
-- Escape malformed continuation characters
|
||||||
return table.concat(l, '')
|
repeat
|
||||||
|
local count
|
||||||
|
str, count = str:gsub('([^\128-\255])([\128-\191])', function(a, b) print(a,b) return a..'\\' .. b:byte() end)
|
||||||
|
until count == 0
|
||||||
|
-- Escape malformed start characters
|
||||||
|
repeat
|
||||||
|
local count
|
||||||
|
str, count = str:gsub('([\191-\255])([^\128-\191])', function(a, b) print(a,b) return '\\'..a:byte() .. b end)
|
||||||
|
until count == 0
|
||||||
|
-- return
|
||||||
|
return str
|
||||||
end
|
end
|
||||||
|
|
||||||
local function smallest_secure_longform_string_level (str)
|
local function smallest_secure_longform_string_level (str)
|
||||||
|
@ -166,6 +176,9 @@ end
|
||||||
return function (str, depth, l)
|
return function (str, depth, l)
|
||||||
-- pretty.format_string
|
-- pretty.format_string
|
||||||
|
|
||||||
|
-- TODO: Prefer \ddd style escaping to shorter (\n, \t), when many of the
|
||||||
|
-- \ddd already exist in the text.
|
||||||
|
|
||||||
-- Error checking
|
-- Error checking
|
||||||
assert( type(str) == 'string' )
|
assert( type(str) == 'string' )
|
||||||
assert(type(depth) == 'number' and type(l) == 'table')
|
assert(type(depth) == 'number' and type(l) == 'table')
|
||||||
|
|
|
@ -224,11 +224,25 @@ format_test {
|
||||||
}
|
}
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
name = 'Malformed Unicode is escaped',
|
name = 'Single utf8 continuation byte is escaped',
|
||||||
input = '\000\001\003\012\169\003\000\030',
|
input = 'abc\169def',
|
||||||
expect = '\'\\000\\000\\001\\003\\012\\169\\003\\000\\030\'',
|
expect = '\'abc\\169def\'',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Multiple utf8 continuation bytes are escaped',
|
||||||
|
input = 'abc\169\190\169\169def',
|
||||||
|
expect = '\'abc\\169\\190\\169\\169def\'',
|
||||||
|
}
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Single start byte utf8 chars is escaped',
|
||||||
|
input = 'abc\255def',
|
||||||
|
expect = '\'abc\\255def\'',
|
||||||
|
}
|
||||||
|
|
||||||
|
-- TODO: Add more malformed unicode tests: https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
return SUITE
|
return SUITE
|
||||||
|
|
Loading…
Reference in New Issue
Block a user