I am trying to learn a bit of clojure by porting a toy NFA regexp matcher. Obviously my main issue is representing and manipulating graphs. I hit a working solution, but my implementation (using gensym
to emulate pointers, basically) leaves me with a yucky taste in my mouth.
Care to suggest improvements, both for representing graphs and for general readability and idioms? (the original imperative solution is much more readable than my current one is).
Cheers!
; This is a port of Matt Might's toy regexp library described in
; http://matt.might.net/articles/implementation-of-nfas-and-regular-expressions-in-java/
; char_transitions is a map that associates a character with a set
; of target state names
; empty_transitions is a set of target state names
(defrecord NfaState [final char_transitions empty_transitions])
; 'entry' and 'exit' are state names
; 'states' is a map that associates state names with states
(defrecord Nfa [entry exit states])
(defn- add-empty-edge [nfa curr_state_name next_state_name]
(let [curr_state (curr_state_name (:states nfa))]
(Nfa. (:entry nfa) (:exit nfa) (assoc (:states nfa) curr_state_name (NfaState. (:final curr_state) (:char_transitions curr_state) (conj (:empty_transitions curr_state) next_state_name))))))
(defn- nfa-matches?
([nfa nfa_state_name string] (nfa-matches? nfa nfa_state_name string #{}))
([nfa nfa_state_name string visited]
(let [nfa_state (nfa_state_name (:states nfa))
new_visited (conj visited nfa_state_name)]
(cond
(contains? visited nfa_state_name) false
(empty? string) (or (:final nfa_state)
(some #(nfa-matches? nfa % string new_visited)
(:empty_transitions nfa_state)))
(not (empty? string)) (or
(some #(nfa-matches? nfa % (.substring string 1)) (get (:char_transitions nfa_state) (.charAt string 0) #{}))
(some #(nfa-matches? nfa % string new_visited) (:empty_transitions nfa_state)))
:else false))))
(defn matches? [nfa string]
"Matches a string against an NFA"
(nfa-matches? nfa (:entry nfa) string))
(defn c [ch]
"Creates an NFA which matches the character 'c'"
(let [in (gensym)
out (gensym)]
(Nfa. in out {in (NfaState. false {ch #{out}} #{}) out (NfaState. true {} #{})})))
(defn e []
"Creates an NFA which matches the empty string"
(let [in (gensym)
out (gensym)]
(Nfa. in out {in (NfaState. false {} #{out}) out (NfaState. true {} #{})})))
(defn rep [nfa]
"Creates an NFA which matches zero or more repetitions of the given NFA"
(add-empty-edge (add-empty-edge nfa (:entry nfa) (:exit nfa)) (:exit nfa) (:entry nfa)))
(defn- update-final [nfastate new_final]
(NfaState. new_final (:char_transitions nfastate) (:empty_transitions nfastate)))
(defn s [first second]
"Creates an NFA which matches a sequence of two provided NFAs"
(add-empty-edge (Nfa. (:entry first) (:exit second) (merge (update-in (:states first) [(:exit first)] update-final false) (update-in (:states second) [(:exit second)] update-final true))) (:exit first) (:entry second)))
(defn ou [first second]
"Creates an NFA which matches either provided NFA"
(let [in (gensym)
out (gensym)]
(add-empty-edge (add-empty-edge (Nfa. in out (assoc (merge (update-in (:states first) [(:exit first)] update-final false) (update-in (:states second) [(:exit second)] update-final false)) in (NfaState. false {} #{(:entry first) (:entry second)}) out (NfaState. true {} #{}))) (:exit first) out) (:exit second) out)))
; sugar
(defn st [string]
"Creates an NFA which matches the provided string"
(if (empty? string)
(e)
(s (c (.charAt string 0)) (st (.substring string 1)))))