From 994dc1496ea86464a8fdfc730cad12331d359e16 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Thu, 19 Sep 2024 21:26:27 +0200 Subject: [PATCH] Weird project --- README.md | 1 + regex-sampling.tig | 286 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 287 insertions(+) create mode 100644 README.md create mode 100755 regex-sampling.tig diff --git a/README.md b/README.md new file mode 100644 index 0000000..78126b1 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +Weird attempt to generate random strings from regex. diff --git a/regex-sampling.tig b/regex-sampling.tig new file mode 100755 index 0000000..b381fea --- /dev/null +++ b/regex-sampling.tig @@ -0,0 +1,286 @@ +/* */ + +let /* Utility */ + type bool = int + var true := 1 = 1 + var false := 1 = 0 + + function read_all_input (): string = + let var str := "" + in while true + do let var char := getchar() + in str := concat(str, char) + ; if char = "" + then break + end + ; str + end + + function min (a: int, b: int): int = + if a < b + then a + else b + + function max (a: int, b: int): int = + if a > b + then a + else b + + function mod (a: int, b: int): int = + a - (a / b) * b + + function i2s (i: int): string = + /* Negative numbers */ + if i < 0 + then concat("-", i2s(-i)) + + /* Positive numbers */ + else let var digit := chr(ord("0") + mod(i, 10)) + in if i <= 9 + then digit + else concat(i2s(i/10), digit) + end + + function repeat (s: string, n: int): string = + if 0 < n + then concat(s, repeat(s, n - 1)) + else "" + + /* Utility: Random *****/ + + var rand_state := 2^63-1 + + function random (): int = + /* Generates numbers in the range [0, 2^32[ */ + ( rand_state := mod(134775813 * rand_state + 1, 2^32) + ; print("New state: ") + ; print(i2s(rand_state)) + ; print("\n") + ; rand_state ) + + function add_entropy (c: int) = + /* We do not have a lot of entropy in Tiger, so this function + * adds a little taken as argument */ + ( rand_state := rand_state + c + ; random () + ; () ) + + function rand_in_range (a: int, b: int): int = + /* TODO: Fix very slight bias towards lower numbers */ + let var r := a + mod(random(), b - a + 1) + in print("Derp: ") + ; print(i2s(r)) + ; print(" in [") + ; print(i2s(a)) + ; print("-") + ; print(i2s(b)) + ; print("]\n") + ; r + end + + /* Regex ******************************************************************/ + + type regex_comp = { typ: int + , char_first : int + , char_last : int + , subexp_1 : regex_comp + , subexp_2 : regex_comp } + var REGEX_CHAR_RANGE := 0 + var REGEX_REPEAT := 1 + var REGEX_CONCAT := 2 + + function is_well_formed_regex (r: regex_comp): bool = + /* Char ranges */ + if r.typ = REGEX_CHAR_RANGE + then r.char_first <= r.char_last + & 0 <= r.char_first & r.char_last <= 255 + & r.subexp_1 = nil + & r.subexp_2 = nil + + /* Repeating */ + else if r.typ = REGEX_REPEAT + then 0 <= r.char_first + & -1 <= r.char_last + & (r.char_first <= r.char_last | r.char_last = -1) + & r.subexp_1 <> nil + & r.subexp_2 = nil + + /* Concatting */ + else if r.typ = REGEX_CONCAT + then r.char_first = 0 + & r.char_last = 0 + & r.subexp_1 <> nil + & r.subexp_2 <> nil + + /* Unknown regex_comp type */ + else false + + function format_regex (r: regex_comp): string = + /* Char ranges */ + if r.typ = REGEX_CHAR_RANGE & r.char_first = r.char_last + then chr(r.char_first) + else if r.typ = REGEX_CHAR_RANGE + then concat("[", concat(chr(r.char_first), concat("-", concat(chr(r.char_last), "]")))) + + /* Repeating expressions */ + else if r.typ = REGEX_REPEAT + then let var base_exp_str := concat( "(", concat( format_regex(r.subexp_1), ")")) + var rep_str := + if r.char_first = 0 + & r.char_last = 1 + then "?" + else if r.char_first = 0 + & r.char_last = -1 + then "*" + else if r.char_first = 1 + & r.char_last = -1 + then "+" + + else + concat( "{", + concat( i2s(r.char_first), + concat( if r.char_first <> r.char_last + then "," + else "", + concat( if r.char_last = -1 | r.char_first = r.char_last + then "" + else i2s(r.char_last) + , "}")))) + + in concat(base_exp_str, rep_str) + end + + /* Concat expressions */ + else if r.typ = REGEX_CONCAT + then let var subexp_1 := format_regex(r.subexp_1) + var subexp_2 := format_regex(r.subexp_2) + in concat(subexp_1, subexp_2) + end + + /* Unknown range */ + else "%&!" + + function parse_regex (s: string): regex_comp = + regex_comp { typ = REGEX_CHAR_RANGE + , char_first = ord(s) + , char_last = ord(s) + , subexp_1 = nil + , subexp_2 = nil } + + /** Regex Sampling *********/ + + type strings = array of string + type samples = { num_samples: int, samples: strings } + function num_samples(s: samples): int = s.num_samples + function get_sample(s: samples, i: int): string = s.samples[i] + function rand_sample(s: samples): string = + get_sample(s, rand_in_range(0, num_samples(s)-1)) + + function gen_samples (r: regex_comp): samples = + /* Char ranges */ + if r.typ = REGEX_CHAR_RANGE + then let var num_samples := min(4, r.char_last - r.char_first + 1) + var samples := strings[num_samples] of "" + in for i := 0 to num_samples - 1 + do samples[i] := chr(rand_in_range(r.char_first, r.char_last)) + + ; samples { num_samples = num_samples + , samples = samples } + end + + /* Repeating expressions */ + else if r.typ = REGEX_REPEAT + then let var subsamples := gen_samples(r.subexp_1) + var max_derps := if r.char_last = -1 + then 8 + else max(r.char_first, min(8, r.char_last)) + var num_samples := min(4, num_samples(subsamples) + * (max_derps - r.char_first + 1)) + var samples := strings[num_samples] of "" + in for i := 0 to num_samples - 1 + do ( samples[i] := "" + ; for rep_i := 0 to rand_in_range(r.char_first, min(r.char_last, max_derps)) - 1 + do samples[i] := concat( samples[i] + , rand_sample(subsamples)) + ) + + ; samples { num_samples = num_samples + , samples = samples } + end + + /* Concat expressions */ + else if r.typ = REGEX_CONCAT + then let var subsamples_1 := gen_samples(r.subexp_1) + var subsamples_2 := gen_samples(r.subexp_2) + var num_samples := min(4, num_samples(subsamples_1) + * num_samples(subsamples_2)) + var samples := strings[num_samples] of "" + in for i := 0 to num_samples - 1 + do samples[i] := concat( rand_sample(subsamples_1) + , rand_sample(subsamples_2) ) + + ; samples { num_samples = num_samples + , samples = samples } + end + + /* Unknown type */ + else samples { num_samples = -1 + , samples = strings[0] of "" } + + /** Main program ***********/ + + /* Read input string */ + /* + var input := read_all_input() + + /* Parse input as regex */ + var regex := parse_regex(input) + */ + var regex := regex_comp { typ = REGEX_CHAR_RANGE + , char_first = 65 + , char_last = 80 + , subexp_1 = nil + , subexp_2 = nil } + + var regex := regex_comp { typ = REGEX_REPEAT + , char_first = 3 + , char_last = 3 + , subexp_1 = regex + , subexp_2 = nil } +/* + + var regex := regex_comp { typ = REGEX_CONCAT + , char_first = 0 + , char_last = 0 + , subexp_1 = regex + , subexp_2 = regex_comp { typ = + REGEX_CHAR_RANGE, char_first = ord("0"), + char_last = ord("9"), subexp_1 = nil, + subexp_2 = nil } } + + var regex := regex_comp { typ = REGEX_REPEAT + , char_first = 2 + , char_last = 2 + , subexp_1 = regex + , subexp_2 = nil } +*/ + +in /* Add entropy based on input */ +/* + for i := 0 to size(input)-1 + do add_entropy(ord(substring(input, i, 1))) + */ + + /* Generate and print examples */ + let var samples := gen_samples(regex) + in print("Examples for ") + ; print(format_regex(regex)) + ; print(":\n") + ; for i := 0 to num_samples(samples)-1 + do ( print(" - ") + ; print(get_sample(samples, i)) + ; print("\n") ) + ; 0 + end +end +