aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md7
-rw-r--r--regex.ml69
-rw-r--r--results2
3 files changed, 72 insertions, 6 deletions
diff --git a/README.md b/README.md
index 98c2b0a..14ff265 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,9 @@ structures, thus I'd like to get the hang of writing small programs fast.
DFA to regex converter.
TODO:
- Reduce regexes that are concatenated with the empty regex
+ - Try adding more simple rules for reducing, for example `(e+a)((a*)(b)) == (a*b)`
- Expand the string_of_regex function, for example omit parenthesis around
- concatenated letters
-- Make some automaton parser \ No newline at end of file
+ concatenated letters (not as easy as I thought)
+- Make some automaton parser
+
+It created a not-so-long regex such that some online regex-to-dfa converter has successfully converted to DFA. Regex can be found in [results](/results). \ No newline at end of file
diff --git a/regex.ml b/regex.ml
index e0b7b35..d09908c 100644
--- a/regex.ml
+++ b/regex.ml
@@ -36,7 +36,35 @@ let basic_regexes states delta =
in let gen_basic state =
List.map big_plus (List.map (get_letters state) states)
in List.map gen_basic states
-
+
+let rec reduce_once regex =
+ match regex with
+ | Empty -> Empty
+ | Epsilon -> Epsilon
+ | Letter l -> Letter l
+ | Concat (Empty, _) -> Empty
+ | Concat (_, Empty) -> Empty
+ | Concat (Epsilon, p) -> reduce_once p
+ | Concat (p, Epsilon) -> reduce_once p
+ | Concat (Star (Letter l1), Plus (Epsilon, Letter l2)) ->
+ if l1 = l2 then Star (Letter l1) else
+ Concat (Star (Letter l1), Plus (Epsilon, Letter l2))
+ | Concat (p1, p2) -> Concat (reduce_once p1, reduce_once p2)
+ | Plus (Empty, p) -> reduce_once p
+ | Plus (p, Empty) -> reduce_once p
+ | Plus (Epsilon, Star p) -> Star (reduce_once p)
+ | Plus (Star p, Epsilon) -> Star (reduce_once p)
+ | Plus (p1, p2) -> Plus (reduce_once p1, reduce_once p2)
+ | Star Empty -> Empty
+ | Star Epsilon -> Epsilon
+ | Star (Plus (Epsilon, p)) -> Star (reduce_once p)
+ | Star (Plus (p, Epsilon)) -> Star (reduce_once p)
+ | Star p -> Star (reduce_once p)
+
+let rec reduce regex =
+ let reduced = reduce_once regex in
+ if reduced = regex then regex else reduce reduced
+
let s = [0;1;2;3;4];;
let delta = [ (0, "a", 0); (0, "b", 1);
(1, "a", 2); (1, "b", 1);
@@ -74,9 +102,42 @@ let rec string_of_regex regex =
| Empty -> "∅"
| Epsilon -> "ε"
| Letter l -> l
- | Plus (phi1, phi2) -> "(" ^ (string_of_regex phi1) ^ ") + (" ^ (string_of_regex phi2) ^ ")"
- | Concat (phi1, phi2) -> (string_of_regex phi1) ^ (string_of_regex phi2)
+ | Plus (Letter l, r) -> l ^ "+" ^ (string_of_regex r)
+ | Plus (r, Letter l) -> (string_of_regex r) ^ "+" ^ l
+ | Plus (Epsilon, r) -> (string_of_regex Epsilon) ^ "+" ^ (string_of_regex r)
+ | Plus (r, Epsilon) -> (string_of_regex r) ^ "+" ^ (string_of_regex Epsilon)
+ | Plus (Empty, r) -> (string_of_regex Empty) ^ "+" ^ (string_of_regex r)
+ | Plus (r, Empty) -> (string_of_regex r) ^ "+" ^ (string_of_regex Empty)
+ | Plus (phi1, phi2) -> (string_of_regex phi1) ^ "+" ^ (string_of_regex phi2)
+ | Concat (Letter l1, Letter l2) -> l1 ^ l2
+ | Concat (Letter l, Epsilon) -> l ^ (string_of_regex Epsilon)
+ | Concat (Letter l, Empty) -> l ^ (string_of_regex Empty)
+ | Concat (Epsilon, Letter l) -> (string_of_regex Epsilon) ^ l
+ | Concat (Empty, Letter l) -> (string_of_regex Empty) ^ l
+ | Concat (Empty, Epsilon) -> (string_of_regex Empty) ^ (string_of_regex Epsilon)
+ | Concat (Epsilon, Empty) -> (string_of_regex Epsilon) ^ (string_of_regex Empty)
+ | Concat (Empty, Empty) -> (string_of_regex Empty) ^ (string_of_regex Empty)
+ | Concat (Epsilon, Epsilon) -> (string_of_regex Epsilon) ^ (string_of_regex Epsilon)
+ | Concat (phi1, phi2) -> "(" ^ (string_of_regex phi1) ^ ")(" ^ (string_of_regex phi2) ^ ")"
+ | Star (Empty) -> (string_of_regex Empty) ^ "*"
+ | Star (Epsilon) -> (string_of_regex Epsilon) ^ "*"
+ | Star (Letter l) -> l ^ "*"
| Star phi -> "(" ^ (string_of_regex phi) ^ ")*"
+
let pp_print_regex fmtr f =
- Format.pp_print_string fmtr (string_of_regex f) \ No newline at end of file
+ Format.pp_print_string fmtr (string_of_regex f)
+
+let basic = basic_regexes (iter 5) delta
+let r1 = increment_phis 5 basic 0
+let r2 = increment_phis 5 r1 1
+let r3 = increment_phis 5 r2 2
+let r4 = increment_phis 5 r3 3
+let r5 = increment_phis 5 r4 4
+
+let ijth i j r = (List.nth (List.nth r i) j)
+let a0 = ijth 0 0 r5
+let a1 = ijth 0 1 r5
+let a2 = ijth 0 2 r5
+let a3 = ijth 0 3 r5
+let res = Plus (Plus (Plus (reduce a0, reduce a1), reduce a2), reduce a3)
diff --git a/results b/results
new file mode 100644
index 0000000..b68d8c4
--- /dev/null
+++ b/results
@@ -0,0 +1,2 @@
+(* Regex for language with no words conatining "baba" as a substring: *)
+ε+a+(ε+a)(a*)+((b+(ε+a)((a*)(b)))((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(a+(a)(a*)))+(((b+(ε+a)((a*)(b)))((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(b)))(((((b)((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(b)))*)(((b)((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(a+(a)(a*)))))+b+(ε+a)((a*)(b))+(b+(ε+a)((a*)(b)))(b*)+((b+(ε+a)((a*)(b)))((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)((a)((a*)(b))+((a)((a*)(b)))(b*)))+(((b+(ε+a)((a*)(b)))((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(b)))(((((b)((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(b)))*)(b+(b)(b*)+((b)((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)((a)((a*)(b))+((a)((a*)(b)))(b*)))))+(b+(ε+a)((a*)(b)))((b*)(a))+((b+(ε+a)((a*)(b)))((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(ε+((a)((a*)(b)))((b*)(a))))+(((b+(ε+a)((a*)(b)))((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(b)))(((((b)((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(b)))*)((b)((b*)(a))+((b)((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(ε+((a)((a*)(b)))((b*)(a))))))+((b+(ε+a)((a*)(b)))((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(b))+(((b+(ε+a)((a*)(b)))((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(b)))(((((b)((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(b)))*)(ε+((b)((b*)(a)))(((((a)((a*)(b)))((b*)(a)))*)(b))))