Hacker News new | ask | show | jobs
by rntz 1014 days ago
It turns out to be not that hard to just compute the language of the regex, if it is finite, and otherwise note that it is infinite:

    import Prelude hiding (null)
    import Data.Set (Set, toList, fromList, empty, singleton, isSubsetOf, unions, null)

    data Regex = Class [Char]   -- character class
               | Seq [Regex]    -- sequence, ABC
               | Choice [Regex] -- choice, A|B|C
               | Star Regex     -- zero or more, A*
                 deriving (Show)

    -- The language of a regex is either finite or infinite.
    -- We only care about the finite case.
    data Lang = Finite (Set String) | Infinite deriving (Show, Eq)

    zero = Finite empty
    one = Finite (singleton "")

    isEmpty (Finite s) = null s
    isEmpty Infinite = False

    cat :: Lang -> Lang -> Lang
    cat x y | isEmpty x || isEmpty y = zero
    cat (Finite s) (Finite t) = Finite $ fromList [x ++ y | x <- toList s, y <- toList t]
    cat _ _ = Infinite

    subsingleton :: Lang -> Bool
    subsingleton Infinite = False
    subsingleton (Finite s) = isSubsetOf s (fromList [""])

    eval :: Regex -> Lang
    eval (Class chars) = Finite $ fromList [[c] | c <- chars]
    eval (Seq rs) = foldr cat one $ map eval rs
    eval (Choice rs) | any (== Infinite) langs = Infinite
                     | otherwise = Finite $ unions [s | Finite s <- langs]
      where langs = map eval rs
    eval (Star r) | subsingleton (eval r) = one
                  | otherwise = Infinite