1
0

Implemented shortening of decimal escaped charaters, when at end of cut-strings.

This commit is contained in:
Jon Michael Aanes 2017-10-23 14:40:30 +02:00
parent 856d9df690
commit c5389dfa42
3 changed files with 47 additions and 10 deletions

View File

@ -27,7 +27,6 @@ local UNICODE_ZERO_WIDTH_CHARACTERS = {}
for i = 128, 191 do UNICODE_ZERO_WIDTH_CHARACTERS['\204'..string.char(i)] = true end for i = 128, 191 do UNICODE_ZERO_WIDTH_CHARACTERS['\204'..string.char(i)] = true end
for i = 128, 175 do UNICODE_ZERO_WIDTH_CHARACTERS['\205'..string.char(i)] = true end for i = 128, 175 do UNICODE_ZERO_WIDTH_CHARACTERS['\205'..string.char(i)] = true end
local function iterate_utf8_chars (str) local function iterate_utf8_chars (str)
-- TODO: Detect invalid codepoints. -- TODO: Detect invalid codepoints.
return str:gmatch(UNICODE_CHAR_PATTERN) return str:gmatch(UNICODE_CHAR_PATTERN)

View File

@ -109,14 +109,27 @@ local function safe_cut (str, si, ei)
-- Calculate -- Calculate
local cut_str = str:sub(si, ei) local cut_str = str:sub(si, ei)
-- Search for the number of backslashes just before the send of the string. -- Search for the number of backslashes and digits at the end of the string.
-- If that number is even, it's a sequence of backslashes, if not it's a -- If the number of backslashes is even, it's a sequence of backslashes, if
-- broken escape string. -- not it's a broken escape string.
local start_of_backslashes, start_of_digits = cut_str:match '()\\*()%d?%d?$' local start_of_backslashes, start_of_digits = cut_str:match '()\\*()%d?%d?$'
local nr_backslashes_before_end = start_of_digits - start_of_backslashes local nr_backslashes_before_end = start_of_digits - start_of_backslashes
if nr_backslashes_before_end % 2 == 1 then cut_str = cut_str:sub(1, start_of_backslashes - 1) end if nr_backslashes_before_end % 2 == 1 then
-- Lets see if we can't shorten the escape code, to fit within the
-- cut limit.
local space_left = #cut_str - (start_of_digits - 2)
cut_str = cut_str:sub(1, start_of_digits - 2)
ei = ei - space_left
return cut_str local digits, after_digits = str:match('^\\(%d?%d?%d?)()', si - 1 + start_of_digits - 1)
if space_left >= 1 + 3 - #digits:match '0*' then
ei = after_digits - 1
cut_str = cut_str .. ('\\%0'..(space_left-1)..'i'):format(digits)
end
end
return cut_str, ei
end end
@ -149,8 +162,8 @@ local function format_concatted_string (str, _, l)
-- Cut strings -- Cut strings
local sub_strings, str_i = {}, 1 local sub_strings, str_i = {}, 1
repeat repeat
local sub_str = safe_cut(str, str_i, str_i + width_without_overhead - 1) local sub_str, ei = safe_cut(str, str_i, str_i + width_without_overhead - 1)
str_i = str_i + #sub_str str_i = ei + 1
sub_strings[#sub_strings+1] = sub_str sub_strings[#sub_strings+1] = sub_str
until str_i >= #str until str_i >= #str

View File

@ -130,6 +130,13 @@ format_test {
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\t\'...\n}', expect = '{\n \'Lorem ipsum dolor sit amet, consec\\t\'...\n}',
} }
format_test {
name = 'Cut strings are not cut in the middle of backslash sequence',
not_idempotent = true,
input = {'Lorem ipsum dolor sit amet, consec\\\\\\\\\\\\\\tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\\\\'...\n}',
}
format_test { format_test {
name = 'Cut strings are not cut in the middle of escaping \\', name = 'Cut strings are not cut in the middle of escaping \\',
not_idempotent = true, not_idempotent = true,
@ -145,13 +152,19 @@ format_test {
} }
format_test { format_test {
-- NOTE: Not priority functionallity. name = 'Cut strings can shorten decimal escape codes, if necessary and possible',
name = 'Cut strings can shorten decimal escape codes, if nessesary and possible',
not_idempotent = true, not_idempotent = true,
input = {'Lorem ipsum dolor sit amet, consec\014tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'}, input = {'Lorem ipsum dolor sit amet, consec\014tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\14\'...\n}', expect = '{\n \'Lorem ipsum dolor sit amet, consec\\14\'...\n}',
} }
format_test {
name = 'Cut strings can shorten decimal escape codes, if necessary and possible, but will keep them as long as possible',
not_idempotent = true,
input = {'Lorem ipsum dolor sit amet, consec\004tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\04\'...\n}',
}
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- Concatted Strings -- Concatted Strings
@ -167,6 +180,18 @@ format_test {
expect = [['Lorem ipsum dolor sit amet, consectetur adipiscing elit.\004\002\000Nunc ve' ..]]..'\n'..[['stibulum tempus ligula. Sed ac lobortis mi.']], expect = [['Lorem ipsum dolor sit amet, consectetur adipiscing elit.\004\002\000Nunc ve' ..]]..'\n'..[['stibulum tempus ligula. Sed ac lobortis mi.']],
} }
format_test {
name = 'Concatted string with decimal escape at border',
input = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.Nunc vestibu\004\255\000lum tempus ligula. Sed ac lobortis mi.',
expect = [['Lorem ipsum dolor sit amet, consectetur adipiscing elit.Nunc vestibu\004' ..]]..'\n'..[['\255\000lum tempus ligula. Sed ac lobortis mi.']],
}
format_test {
name = 'Concatted string with decimal escape at border 2',
input = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.Nunc vestibu\004\002\000lum tempus ligula. Sed ac lobortis mi.',
expect = [['Lorem ipsum dolor sit amet, consectetur adipiscing elit.Nunc vestibu\004\02' ..]]..'\n'..[['\000lum tempus ligula. Sed ac lobortis mi.']],
}
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- Longform Strings -- Longform Strings