Implemented shortening of decimal escaped charaters, when at end of cut-strings.
This commit is contained in:
parent
856d9df690
commit
c5389dfa42
|
@ -27,7 +27,6 @@ local UNICODE_ZERO_WIDTH_CHARACTERS = {}
|
||||||
for i = 128, 191 do UNICODE_ZERO_WIDTH_CHARACTERS['\204'..string.char(i)] = true end
|
for i = 128, 191 do UNICODE_ZERO_WIDTH_CHARACTERS['\204'..string.char(i)] = true end
|
||||||
for i = 128, 175 do UNICODE_ZERO_WIDTH_CHARACTERS['\205'..string.char(i)] = true end
|
for i = 128, 175 do UNICODE_ZERO_WIDTH_CHARACTERS['\205'..string.char(i)] = true end
|
||||||
|
|
||||||
|
|
||||||
local function iterate_utf8_chars (str)
|
local function iterate_utf8_chars (str)
|
||||||
-- TODO: Detect invalid codepoints.
|
-- TODO: Detect invalid codepoints.
|
||||||
return str:gmatch(UNICODE_CHAR_PATTERN)
|
return str:gmatch(UNICODE_CHAR_PATTERN)
|
||||||
|
|
27
pstring.lua
27
pstring.lua
|
@ -109,14 +109,27 @@ local function safe_cut (str, si, ei)
|
||||||
-- Calculate
|
-- Calculate
|
||||||
local cut_str = str:sub(si, ei)
|
local cut_str = str:sub(si, ei)
|
||||||
|
|
||||||
-- Search for the number of backslashes just before the send of the string.
|
-- Search for the number of backslashes and digits at the end of the string.
|
||||||
-- If that number is even, it's a sequence of backslashes, if not it's a
|
-- If the number of backslashes is even, it's a sequence of backslashes, if
|
||||||
-- broken escape string.
|
-- not it's a broken escape string.
|
||||||
local start_of_backslashes, start_of_digits = cut_str:match '()\\*()%d?%d?$'
|
local start_of_backslashes, start_of_digits = cut_str:match '()\\*()%d?%d?$'
|
||||||
local nr_backslashes_before_end = start_of_digits - start_of_backslashes
|
local nr_backslashes_before_end = start_of_digits - start_of_backslashes
|
||||||
if nr_backslashes_before_end % 2 == 1 then cut_str = cut_str:sub(1, start_of_backslashes - 1) end
|
if nr_backslashes_before_end % 2 == 1 then
|
||||||
|
-- Lets see if we can't shorten the escape code, to fit within the
|
||||||
|
-- cut limit.
|
||||||
|
local space_left = #cut_str - (start_of_digits - 2)
|
||||||
|
cut_str = cut_str:sub(1, start_of_digits - 2)
|
||||||
|
ei = ei - space_left
|
||||||
|
|
||||||
return cut_str
|
local digits, after_digits = str:match('^\\(%d?%d?%d?)()', si - 1 + start_of_digits - 1)
|
||||||
|
|
||||||
|
if space_left >= 1 + 3 - #digits:match '0*' then
|
||||||
|
ei = after_digits - 1
|
||||||
|
cut_str = cut_str .. ('\\%0'..(space_left-1)..'i'):format(digits)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return cut_str, ei
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
@ -149,8 +162,8 @@ local function format_concatted_string (str, _, l)
|
||||||
-- Cut strings
|
-- Cut strings
|
||||||
local sub_strings, str_i = {}, 1
|
local sub_strings, str_i = {}, 1
|
||||||
repeat
|
repeat
|
||||||
local sub_str = safe_cut(str, str_i, str_i + width_without_overhead - 1)
|
local sub_str, ei = safe_cut(str, str_i, str_i + width_without_overhead - 1)
|
||||||
str_i = str_i + #sub_str
|
str_i = ei + 1
|
||||||
sub_strings[#sub_strings+1] = sub_str
|
sub_strings[#sub_strings+1] = sub_str
|
||||||
until str_i >= #str
|
until str_i >= #str
|
||||||
|
|
||||||
|
|
|
@ -130,6 +130,13 @@ format_test {
|
||||||
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\t\'...\n}',
|
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\t\'...\n}',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Cut strings are not cut in the middle of backslash sequence',
|
||||||
|
not_idempotent = true,
|
||||||
|
input = {'Lorem ipsum dolor sit amet, consec\\\\\\\\\\\\\\tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||||
|
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\\\\'...\n}',
|
||||||
|
}
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
name = 'Cut strings are not cut in the middle of escaping \\',
|
name = 'Cut strings are not cut in the middle of escaping \\',
|
||||||
not_idempotent = true,
|
not_idempotent = true,
|
||||||
|
@ -145,13 +152,19 @@ format_test {
|
||||||
}
|
}
|
||||||
|
|
||||||
format_test {
|
format_test {
|
||||||
-- NOTE: Not priority functionallity.
|
name = 'Cut strings can shorten decimal escape codes, if necessary and possible',
|
||||||
name = 'Cut strings can shorten decimal escape codes, if nessesary and possible',
|
|
||||||
not_idempotent = true,
|
not_idempotent = true,
|
||||||
input = {'Lorem ipsum dolor sit amet, consec\014tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
input = {'Lorem ipsum dolor sit amet, consec\014tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||||
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\14\'...\n}',
|
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\14\'...\n}',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Cut strings can shorten decimal escape codes, if necessary and possible, but will keep them as long as possible',
|
||||||
|
not_idempotent = true,
|
||||||
|
input = {'Lorem ipsum dolor sit amet, consec\004tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'},
|
||||||
|
expect = '{\n \'Lorem ipsum dolor sit amet, consec\\04\'...\n}',
|
||||||
|
}
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
-- Concatted Strings
|
-- Concatted Strings
|
||||||
|
|
||||||
|
@ -167,6 +180,18 @@ format_test {
|
||||||
expect = [['Lorem ipsum dolor sit amet, consectetur adipiscing elit.\004\002\000Nunc ve' ..]]..'\n'..[['stibulum tempus ligula. Sed ac lobortis mi.']],
|
expect = [['Lorem ipsum dolor sit amet, consectetur adipiscing elit.\004\002\000Nunc ve' ..]]..'\n'..[['stibulum tempus ligula. Sed ac lobortis mi.']],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Concatted string with decimal escape at border',
|
||||||
|
input = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.Nunc vestibu\004\255\000lum tempus ligula. Sed ac lobortis mi.',
|
||||||
|
expect = [['Lorem ipsum dolor sit amet, consectetur adipiscing elit.Nunc vestibu\004' ..]]..'\n'..[['\255\000lum tempus ligula. Sed ac lobortis mi.']],
|
||||||
|
}
|
||||||
|
|
||||||
|
format_test {
|
||||||
|
name = 'Concatted string with decimal escape at border 2',
|
||||||
|
input = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.Nunc vestibu\004\002\000lum tempus ligula. Sed ac lobortis mi.',
|
||||||
|
expect = [['Lorem ipsum dolor sit amet, consectetur adipiscing elit.Nunc vestibu\004\02' ..]]..'\n'..[['\000lum tempus ligula. Sed ac lobortis mi.']],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
-- Longform Strings
|
-- Longform Strings
|
||||||
|
|
Loading…
Reference in New Issue
Block a user