1
0
tiger-regex-sampling/tiger-regex-sampling.tig

291 lines
9.3 KiB
Plaintext

/*
# Tiger Regex Sampling
Program to generate examples from the given regexes.
*/
let /* Utility */
type bool = int
var true := 1 = 1
var false := 1 = 0
function read_all_input (): string =
let var str := ""
in while true
do let var char := getchar()
in str := concat(str, char)
; if char = ""
then break
end
; str
end
function min (a: int, b: int): int =
if a < b
then a
else b
function max (a: int, b: int): int =
if a > b
then a
else b
function mod (a: int, b: int): int =
a - (a / b) * b
function i2s (i: int): string =
/* Negative numbers */
if i < 0
then concat("-", i2s(-i))
/* Positive numbers */
else let var digit := chr(ord("0") + mod(i, 10))
in if i <= 9
then digit
else concat(i2s(i/10), digit)
end
function repeat (s: string, n: int): string =
if 0 < n
then concat(s, repeat(s, n - 1))
else ""
/* Utility: Random *****/
var rand_state := 2^63-1
function random (): int =
/* Generates numbers in the range [0, 2^32[ */
( rand_state := mod(134775813 * rand_state + 1, 2^32)
; print("New state: ")
; print(i2s(rand_state))
; print("\n")
; rand_state )
function add_entropy (c: int) =
/* We do not have a lot of entropy in Tiger, so this function
* adds a little taken as argument */
( rand_state := rand_state + c
; random ()
; () )
function rand_in_range (a: int, b: int): int =
/* TODO: Fix very slight bias towards lower numbers */
let var r := a + mod(random(), b - a + 1)
in print("Derp: ")
; print(i2s(r))
; print(" in [")
; print(i2s(a))
; print("-")
; print(i2s(b))
; print("]\n")
; r
end
/* Regex ******************************************************************/
type regex_comp = { typ: int
, char_first : int
, char_last : int
, subexp_1 : regex_comp
, subexp_2 : regex_comp }
var REGEX_CHAR_RANGE := 0
var REGEX_REPEAT := 1
var REGEX_CONCAT := 2
function is_well_formed_regex (r: regex_comp): bool =
/* Char ranges */
if r.typ = REGEX_CHAR_RANGE
then r.char_first <= r.char_last
& 0 <= r.char_first & r.char_last <= 255
& r.subexp_1 = nil
& r.subexp_2 = nil
/* Repeating */
else if r.typ = REGEX_REPEAT
then 0 <= r.char_first
& -1 <= r.char_last
& (r.char_first <= r.char_last | r.char_last = -1)
& r.subexp_1 <> nil
& r.subexp_2 = nil
/* Concatting */
else if r.typ = REGEX_CONCAT
then r.char_first = 0
& r.char_last = 0
& r.subexp_1 <> nil
& r.subexp_2 <> nil
/* Unknown regex_comp type */
else false
function format_regex (r: regex_comp): string =
/* Char ranges */
if r.typ = REGEX_CHAR_RANGE & r.char_first = r.char_last
then chr(r.char_first)
else if r.typ = REGEX_CHAR_RANGE
then concat("[", concat(chr(r.char_first), concat("-", concat(chr(r.char_last), "]"))))
/* Repeating expressions */
else if r.typ = REGEX_REPEAT
then let var base_exp_str := concat( "(", concat( format_regex(r.subexp_1), ")"))
var rep_str :=
if r.char_first = 0
& r.char_last = 1
then "?"
else if r.char_first = 0
& r.char_last = -1
then "*"
else if r.char_first = 1
& r.char_last = -1
then "+"
else
concat( "{",
concat( i2s(r.char_first),
concat( if r.char_first <> r.char_last
then ","
else "",
concat( if r.char_last = -1 | r.char_first = r.char_last
then ""
else i2s(r.char_last)
, "}"))))
in concat(base_exp_str, rep_str)
end
/* Concat expressions */
else if r.typ = REGEX_CONCAT
then let var subexp_1 := format_regex(r.subexp_1)
var subexp_2 := format_regex(r.subexp_2)
in concat(subexp_1, subexp_2)
end
/* Unknown range */
else "%&!"
function parse_regex (s: string): regex_comp =
regex_comp { typ = REGEX_CHAR_RANGE
, char_first = ord(s)
, char_last = ord(s)
, subexp_1 = nil
, subexp_2 = nil }
/** Regex Sampling *********/
type strings = array of string
type samples = { num_samples: int, samples: strings }
function num_samples(s: samples): int = s.num_samples
function get_sample(s: samples, i: int): string = s.samples[i]
function rand_sample(s: samples): string =
get_sample(s, rand_in_range(0, num_samples(s)-1))
function gen_samples (r: regex_comp): samples =
/* Char ranges */
if r.typ = REGEX_CHAR_RANGE
then let var num_samples := min(4, r.char_last - r.char_first + 1)
var samples := strings[num_samples] of ""
in for i := 0 to num_samples - 1
do samples[i] := chr(rand_in_range(r.char_first, r.char_last))
; samples { num_samples = num_samples
, samples = samples }
end
/* Repeating expressions */
else if r.typ = REGEX_REPEAT
then let var subsamples := gen_samples(r.subexp_1)
var max_derps := if r.char_last = -1
then 8
else max(r.char_first, min(8, r.char_last))
var num_samples := min(4, num_samples(subsamples)
* (max_derps - r.char_first + 1))
var samples := strings[num_samples] of ""
in for i := 0 to num_samples - 1
do ( samples[i] := ""
; for rep_i := 0 to rand_in_range(r.char_first, min(r.char_last, max_derps)) - 1
do samples[i] := concat( samples[i]
, rand_sample(subsamples))
)
; samples { num_samples = num_samples
, samples = samples }
end
/* Concat expressions */
else if r.typ = REGEX_CONCAT
then let var subsamples_1 := gen_samples(r.subexp_1)
var subsamples_2 := gen_samples(r.subexp_2)
var num_samples := min(4, num_samples(subsamples_1)
* num_samples(subsamples_2))
var samples := strings[num_samples] of ""
in for i := 0 to num_samples - 1
do samples[i] := concat( rand_sample(subsamples_1)
, rand_sample(subsamples_2) )
; samples { num_samples = num_samples
, samples = samples }
end
/* Unknown type */
else samples { num_samples = -1
, samples = strings[0] of "" }
/** Main program ***********/
/* Read input string */
/*
var input := read_all_input()
/* Parse input as regex */
var regex := parse_regex(input)
*/
var regex := regex_comp { typ = REGEX_CHAR_RANGE
, char_first = 65
, char_last = 80
, subexp_1 = nil
, subexp_2 = nil }
var regex := regex_comp { typ = REGEX_REPEAT
, char_first = 3
, char_last = 3
, subexp_1 = regex
, subexp_2 = nil }
/*
var regex := regex_comp { typ = REGEX_CONCAT
, char_first = 0
, char_last = 0
, subexp_1 = regex
, subexp_2 = regex_comp { typ =
REGEX_CHAR_RANGE, char_first = ord("0"),
char_last = ord("9"), subexp_1 = nil,
subexp_2 = nil } }
var regex := regex_comp { typ = REGEX_REPEAT
, char_first = 2
, char_last = 2
, subexp_1 = regex
, subexp_2 = nil }
*/
in /* Add entropy based on input */
/*
for i := 0 to size(input)-1
do add_entropy(ord(substring(input, i, 1)))
*/
/* Generate and print examples */
let var samples := gen_samples(regex)
in print("Examples for ")
; print(format_regex(regex))
; print(":\n")
; for i := 0 to num_samples(samples)-1
do ( print(" - ")
; print(get_sample(samples, i))
; print("\n") )
; 0
end
end