Weird project
This commit is contained in:
commit
994dc1496e
286
regex-sampling.tig
Executable file
286
regex-sampling.tig
Executable file
|
@ -0,0 +1,286 @@
|
|||
/* */
|
||||
|
||||
let /* Utility */
|
||||
type bool = int
|
||||
var true := 1 = 1
|
||||
var false := 1 = 0
|
||||
|
||||
function read_all_input (): string =
|
||||
let var str := ""
|
||||
in while true
|
||||
do let var char := getchar()
|
||||
in str := concat(str, char)
|
||||
; if char = ""
|
||||
then break
|
||||
end
|
||||
; str
|
||||
end
|
||||
|
||||
function min (a: int, b: int): int =
|
||||
if a < b
|
||||
then a
|
||||
else b
|
||||
|
||||
function max (a: int, b: int): int =
|
||||
if a > b
|
||||
then a
|
||||
else b
|
||||
|
||||
function mod (a: int, b: int): int =
|
||||
a - (a / b) * b
|
||||
|
||||
function i2s (i: int): string =
|
||||
/* Negative numbers */
|
||||
if i < 0
|
||||
then concat("-", i2s(-i))
|
||||
|
||||
/* Positive numbers */
|
||||
else let var digit := chr(ord("0") + mod(i, 10))
|
||||
in if i <= 9
|
||||
then digit
|
||||
else concat(i2s(i/10), digit)
|
||||
end
|
||||
|
||||
function repeat (s: string, n: int): string =
|
||||
if 0 < n
|
||||
then concat(s, repeat(s, n - 1))
|
||||
else ""
|
||||
|
||||
/* Utility: Random *****/
|
||||
|
||||
var rand_state := 2^63-1
|
||||
|
||||
function random (): int =
|
||||
/* Generates numbers in the range [0, 2^32[ */
|
||||
( rand_state := mod(134775813 * rand_state + 1, 2^32)
|
||||
; print("New state: ")
|
||||
; print(i2s(rand_state))
|
||||
; print("\n")
|
||||
; rand_state )
|
||||
|
||||
function add_entropy (c: int) =
|
||||
/* We do not have a lot of entropy in Tiger, so this function
|
||||
* adds a little taken as argument */
|
||||
( rand_state := rand_state + c
|
||||
; random ()
|
||||
; () )
|
||||
|
||||
function rand_in_range (a: int, b: int): int =
|
||||
/* TODO: Fix very slight bias towards lower numbers */
|
||||
let var r := a + mod(random(), b - a + 1)
|
||||
in print("Derp: ")
|
||||
; print(i2s(r))
|
||||
; print(" in [")
|
||||
; print(i2s(a))
|
||||
; print("-")
|
||||
; print(i2s(b))
|
||||
; print("]\n")
|
||||
; r
|
||||
end
|
||||
|
||||
/* Regex ******************************************************************/
|
||||
|
||||
type regex_comp = { typ: int
|
||||
, char_first : int
|
||||
, char_last : int
|
||||
, subexp_1 : regex_comp
|
||||
, subexp_2 : regex_comp }
|
||||
var REGEX_CHAR_RANGE := 0
|
||||
var REGEX_REPEAT := 1
|
||||
var REGEX_CONCAT := 2
|
||||
|
||||
function is_well_formed_regex (r: regex_comp): bool =
|
||||
/* Char ranges */
|
||||
if r.typ = REGEX_CHAR_RANGE
|
||||
then r.char_first <= r.char_last
|
||||
& 0 <= r.char_first & r.char_last <= 255
|
||||
& r.subexp_1 = nil
|
||||
& r.subexp_2 = nil
|
||||
|
||||
/* Repeating */
|
||||
else if r.typ = REGEX_REPEAT
|
||||
then 0 <= r.char_first
|
||||
& -1 <= r.char_last
|
||||
& (r.char_first <= r.char_last | r.char_last = -1)
|
||||
& r.subexp_1 <> nil
|
||||
& r.subexp_2 = nil
|
||||
|
||||
/* Concatting */
|
||||
else if r.typ = REGEX_CONCAT
|
||||
then r.char_first = 0
|
||||
& r.char_last = 0
|
||||
& r.subexp_1 <> nil
|
||||
& r.subexp_2 <> nil
|
||||
|
||||
/* Unknown regex_comp type */
|
||||
else false
|
||||
|
||||
function format_regex (r: regex_comp): string =
|
||||
/* Char ranges */
|
||||
if r.typ = REGEX_CHAR_RANGE & r.char_first = r.char_last
|
||||
then chr(r.char_first)
|
||||
else if r.typ = REGEX_CHAR_RANGE
|
||||
then concat("[", concat(chr(r.char_first), concat("-", concat(chr(r.char_last), "]"))))
|
||||
|
||||
/* Repeating expressions */
|
||||
else if r.typ = REGEX_REPEAT
|
||||
then let var base_exp_str := concat( "(", concat( format_regex(r.subexp_1), ")"))
|
||||
var rep_str :=
|
||||
if r.char_first = 0
|
||||
& r.char_last = 1
|
||||
then "?"
|
||||
else if r.char_first = 0
|
||||
& r.char_last = -1
|
||||
then "*"
|
||||
else if r.char_first = 1
|
||||
& r.char_last = -1
|
||||
then "+"
|
||||
|
||||
else
|
||||
concat( "{",
|
||||
concat( i2s(r.char_first),
|
||||
concat( if r.char_first <> r.char_last
|
||||
then ","
|
||||
else "",
|
||||
concat( if r.char_last = -1 | r.char_first = r.char_last
|
||||
then ""
|
||||
else i2s(r.char_last)
|
||||
, "}"))))
|
||||
|
||||
in concat(base_exp_str, rep_str)
|
||||
end
|
||||
|
||||
/* Concat expressions */
|
||||
else if r.typ = REGEX_CONCAT
|
||||
then let var subexp_1 := format_regex(r.subexp_1)
|
||||
var subexp_2 := format_regex(r.subexp_2)
|
||||
in concat(subexp_1, subexp_2)
|
||||
end
|
||||
|
||||
/* Unknown range */
|
||||
else "%&!"
|
||||
|
||||
function parse_regex (s: string): regex_comp =
|
||||
regex_comp { typ = REGEX_CHAR_RANGE
|
||||
, char_first = ord(s)
|
||||
, char_last = ord(s)
|
||||
, subexp_1 = nil
|
||||
, subexp_2 = nil }
|
||||
|
||||
/** Regex Sampling *********/
|
||||
|
||||
type strings = array of string
|
||||
type samples = { num_samples: int, samples: strings }
|
||||
function num_samples(s: samples): int = s.num_samples
|
||||
function get_sample(s: samples, i: int): string = s.samples[i]
|
||||
function rand_sample(s: samples): string =
|
||||
get_sample(s, rand_in_range(0, num_samples(s)-1))
|
||||
|
||||
function gen_samples (r: regex_comp): samples =
|
||||
/* Char ranges */
|
||||
if r.typ = REGEX_CHAR_RANGE
|
||||
then let var num_samples := min(4, r.char_last - r.char_first + 1)
|
||||
var samples := strings[num_samples] of ""
|
||||
in for i := 0 to num_samples - 1
|
||||
do samples[i] := chr(rand_in_range(r.char_first, r.char_last))
|
||||
|
||||
; samples { num_samples = num_samples
|
||||
, samples = samples }
|
||||
end
|
||||
|
||||
/* Repeating expressions */
|
||||
else if r.typ = REGEX_REPEAT
|
||||
then let var subsamples := gen_samples(r.subexp_1)
|
||||
var max_derps := if r.char_last = -1
|
||||
then 8
|
||||
else max(r.char_first, min(8, r.char_last))
|
||||
var num_samples := min(4, num_samples(subsamples)
|
||||
* (max_derps - r.char_first + 1))
|
||||
var samples := strings[num_samples] of ""
|
||||
in for i := 0 to num_samples - 1
|
||||
do ( samples[i] := ""
|
||||
; for rep_i := 0 to rand_in_range(r.char_first, min(r.char_last, max_derps)) - 1
|
||||
do samples[i] := concat( samples[i]
|
||||
, rand_sample(subsamples))
|
||||
)
|
||||
|
||||
; samples { num_samples = num_samples
|
||||
, samples = samples }
|
||||
end
|
||||
|
||||
/* Concat expressions */
|
||||
else if r.typ = REGEX_CONCAT
|
||||
then let var subsamples_1 := gen_samples(r.subexp_1)
|
||||
var subsamples_2 := gen_samples(r.subexp_2)
|
||||
var num_samples := min(4, num_samples(subsamples_1)
|
||||
* num_samples(subsamples_2))
|
||||
var samples := strings[num_samples] of ""
|
||||
in for i := 0 to num_samples - 1
|
||||
do samples[i] := concat( rand_sample(subsamples_1)
|
||||
, rand_sample(subsamples_2) )
|
||||
|
||||
; samples { num_samples = num_samples
|
||||
, samples = samples }
|
||||
end
|
||||
|
||||
/* Unknown type */
|
||||
else samples { num_samples = -1
|
||||
, samples = strings[0] of "" }
|
||||
|
||||
/** Main program ***********/
|
||||
|
||||
/* Read input string */
|
||||
/*
|
||||
var input := read_all_input()
|
||||
|
||||
/* Parse input as regex */
|
||||
var regex := parse_regex(input)
|
||||
*/
|
||||
var regex := regex_comp { typ = REGEX_CHAR_RANGE
|
||||
, char_first = 65
|
||||
, char_last = 80
|
||||
, subexp_1 = nil
|
||||
, subexp_2 = nil }
|
||||
|
||||
var regex := regex_comp { typ = REGEX_REPEAT
|
||||
, char_first = 3
|
||||
, char_last = 3
|
||||
, subexp_1 = regex
|
||||
, subexp_2 = nil }
|
||||
/*
|
||||
|
||||
var regex := regex_comp { typ = REGEX_CONCAT
|
||||
, char_first = 0
|
||||
, char_last = 0
|
||||
, subexp_1 = regex
|
||||
, subexp_2 = regex_comp { typ =
|
||||
REGEX_CHAR_RANGE, char_first = ord("0"),
|
||||
char_last = ord("9"), subexp_1 = nil,
|
||||
subexp_2 = nil } }
|
||||
|
||||
var regex := regex_comp { typ = REGEX_REPEAT
|
||||
, char_first = 2
|
||||
, char_last = 2
|
||||
, subexp_1 = regex
|
||||
, subexp_2 = nil }
|
||||
*/
|
||||
|
||||
in /* Add entropy based on input */
|
||||
/*
|
||||
for i := 0 to size(input)-1
|
||||
do add_entropy(ord(substring(input, i, 1)))
|
||||
*/
|
||||
|
||||
/* Generate and print examples */
|
||||
let var samples := gen_samples(regex)
|
||||
in print("Examples for ")
|
||||
; print(format_regex(regex))
|
||||
; print(":\n")
|
||||
; for i := 0 to num_samples(samples)-1
|
||||
do ( print(" - ")
|
||||
; print(get_sample(samples, i))
|
||||
; print("\n") )
|
||||
; 0
|
||||
end
|
||||
end
|
||||
|
Loading…
Reference in New Issue
Block a user