From c5389dfa4237a24085c5db3d3f5866862e2293e5 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Mon, 23 Oct 2017 14:40:30 +0200 Subject: [PATCH] Implemented shortening of decimal escaped charaters, when at end of cut-strings. --- common.lua | 1 - pstring.lua | 27 ++++++++++++++++++++------- test/test_pstring.lua | 29 +++++++++++++++++++++++++++-- 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/common.lua b/common.lua index 3f96154..830b0b2 100644 --- a/common.lua +++ b/common.lua @@ -27,7 +27,6 @@ local UNICODE_ZERO_WIDTH_CHARACTERS = {} for i = 128, 191 do UNICODE_ZERO_WIDTH_CHARACTERS['\204'..string.char(i)] = true end for i = 128, 175 do UNICODE_ZERO_WIDTH_CHARACTERS['\205'..string.char(i)] = true end - local function iterate_utf8_chars (str) -- TODO: Detect invalid codepoints. return str:gmatch(UNICODE_CHAR_PATTERN) diff --git a/pstring.lua b/pstring.lua index db6d47c..18f99ac 100644 --- a/pstring.lua +++ b/pstring.lua @@ -109,14 +109,27 @@ local function safe_cut (str, si, ei) -- Calculate local cut_str = str:sub(si, ei) - -- Search for the number of backslashes just before the send of the string. - -- If that number is even, it's a sequence of backslashes, if not it's a - -- broken escape string. + -- Search for the number of backslashes and digits at the end of the string. + -- If the number of backslashes is even, it's a sequence of backslashes, if + -- not it's a broken escape string. local start_of_backslashes, start_of_digits = cut_str:match '()\\*()%d?%d?$' local nr_backslashes_before_end = start_of_digits - start_of_backslashes - if nr_backslashes_before_end % 2 == 1 then cut_str = cut_str:sub(1, start_of_backslashes - 1) end + if nr_backslashes_before_end % 2 == 1 then + -- Lets see if we can't shorten the escape code, to fit within the + -- cut limit. + local space_left = #cut_str - (start_of_digits - 2) + cut_str = cut_str:sub(1, start_of_digits - 2) + ei = ei - space_left - return cut_str + local digits, after_digits = str:match('^\\(%d?%d?%d?)()', si - 1 + start_of_digits - 1) + + if space_left >= 1 + 3 - #digits:match '0*' then + ei = after_digits - 1 + cut_str = cut_str .. ('\\%0'..(space_left-1)..'i'):format(digits) + end + end + + return cut_str, ei end @@ -149,8 +162,8 @@ local function format_concatted_string (str, _, l) -- Cut strings local sub_strings, str_i = {}, 1 repeat - local sub_str = safe_cut(str, str_i, str_i + width_without_overhead - 1) - str_i = str_i + #sub_str + local sub_str, ei = safe_cut(str, str_i, str_i + width_without_overhead - 1) + str_i = ei + 1 sub_strings[#sub_strings+1] = sub_str until str_i >= #str diff --git a/test/test_pstring.lua b/test/test_pstring.lua index e7c6bb2..addcaff 100644 --- a/test/test_pstring.lua +++ b/test/test_pstring.lua @@ -130,6 +130,13 @@ format_test { expect = '{\n \'Lorem ipsum dolor sit amet, consec\\t\'...\n}', } +format_test { + name = 'Cut strings are not cut in the middle of backslash sequence', + not_idempotent = true, + input = {'Lorem ipsum dolor sit amet, consec\\\\\\\\\\\\\\tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'}, + expect = '{\n \'Lorem ipsum dolor sit amet, consec\\\\\'...\n}', +} + format_test { name = 'Cut strings are not cut in the middle of escaping \\', not_idempotent = true, @@ -145,13 +152,19 @@ format_test { } format_test { - -- NOTE: Not priority functionallity. - name = 'Cut strings can shorten decimal escape codes, if nessesary and possible', + name = 'Cut strings can shorten decimal escape codes, if necessary and possible', not_idempotent = true, input = {'Lorem ipsum dolor sit amet, consec\014tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'}, expect = '{\n \'Lorem ipsum dolor sit amet, consec\\14\'...\n}', } +format_test { + name = 'Cut strings can shorten decimal escape codes, if necessary and possible, but will keep them as long as possible', + not_idempotent = true, + input = {'Lorem ipsum dolor sit amet, consec\004tetur adipiscing elit. Nunc vestibulum tempus ligula. Sed ac lobortis mi.'}, + expect = '{\n \'Lorem ipsum dolor sit amet, consec\\04\'...\n}', +} + -------------------------------------------------------------------------------- -- Concatted Strings @@ -167,6 +180,18 @@ format_test { expect = [['Lorem ipsum dolor sit amet, consectetur adipiscing elit.\004\002\000Nunc ve' ..]]..'\n'..[['stibulum tempus ligula. Sed ac lobortis mi.']], } +format_test { + name = 'Concatted string with decimal escape at border', + input = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.Nunc vestibu\004\255\000lum tempus ligula. Sed ac lobortis mi.', + expect = [['Lorem ipsum dolor sit amet, consectetur adipiscing elit.Nunc vestibu\004' ..]]..'\n'..[['\255\000lum tempus ligula. Sed ac lobortis mi.']], +} + +format_test { + name = 'Concatted string with decimal escape at border 2', + input = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.Nunc vestibu\004\002\000lum tempus ligula. Sed ac lobortis mi.', + expect = [['Lorem ipsum dolor sit amet, consectetur adipiscing elit.Nunc vestibu\004\02' ..]]..'\n'..[['\000lum tempus ligula. Sed ac lobortis mi.']], +} + -------------------------------------------------------------------------------- -- Longform Strings