My answers to the exercises of chapter 8.
Page 205:
2. While filesystems on Unix are usually sensitive to case (e.g. “G” vs. “g”) in file names, Windows filesystems are not. Add a parameter to the globToRegex and matchesGlob functions that allows control over case sensitive matching.
Note that the solution below ignores ranges in character classes.
module GlobRegex
( globToRegex
, matchesGlob
) where
import Data.Char (isLower,isUpper,toLower,toUpper)
import Data.List (nub)
import Text.Regex.Posix ((=~))
globToRegex :: String -> Bool-> String
globToRegex cs matchCase
= '^' : globToRegex' cs matchCase ++ "$"
globToRegex' :: String -> Bool -> String
globToRegex' "" _ = ""
globToRegex' ('*':cs) matchCase
= ".*" ++ globToRegex' cs matchCase
globToRegex' ('?':cs) matchCase
= '.' : globToRegex' cs matchCase
globToRegex' ('[':'!':c:cs) matchCase
= let c' = if matchCase
then [c]
else nub (c:shiftCase c:[])
in "[^" ++ c' ++ charClass cs matchCase
globToRegex' ('[':c:cs) matchCase
= let c' = if matchCase
then [c]
else nub (c:shiftCase c:[])
in '[' : c' ++ charClass cs matchCase
globToRegex' ('[':_) _ = error "unterminated character class"
globToRegex' (c:cs) matchCase
= escape c matchCase ++ globToRegex' cs matchCase
escape :: Char -> Bool -> String
escape c matchCase
| c `elem` regexChars = '\' : [c]
| matchCase = [c]
| otherwise = '[' : nub (c:shiftCase c:[]) ++ "]"
where regexChars = "\\+()^$.{}]|"
charClass :: String -> Bool -> String
charClass (']':cs) matchCase = ']' : globToRegex' cs matchCase
charClass (c:cs) True = c : charClass cs True
charClass (c:cs) False = c' ++ charClass cs False
where c' = nub (c:shiftCase c:[])
charClass [] _ = error "unterminated character class"
shiftCase :: Char -> Char
shiftCase c | isLower c = toUpper c
| isUpper c = toLower c
| otherwise = c
matchesGlob :: FilePath -> String -> Bool -> Bool
matchesGlob name pat matchCase
= name =~ globToRegex pat matchCase
( globToRegex
, matchesGlob
) where
import Data.Char (isLower,isUpper,toLower,toUpper)
import Data.List (nub)
import Text.Regex.Posix ((=~))
globToRegex :: String -> Bool-> String
globToRegex cs matchCase
= '^' : globToRegex' cs matchCase ++ "$"
globToRegex' :: String -> Bool -> String
globToRegex' "" _ = ""
globToRegex' ('*':cs) matchCase
= ".*" ++ globToRegex' cs matchCase
globToRegex' ('?':cs) matchCase
= '.' : globToRegex' cs matchCase
globToRegex' ('[':'!':c:cs) matchCase
= let c' = if matchCase
then [c]
else nub (c:shiftCase c:[])
in "[^" ++ c' ++ charClass cs matchCase
globToRegex' ('[':c:cs) matchCase
= let c' = if matchCase
then [c]
else nub (c:shiftCase c:[])
in '[' : c' ++ charClass cs matchCase
globToRegex' ('[':_) _ = error "unterminated character class"
globToRegex' (c:cs) matchCase
= escape c matchCase ++ globToRegex' cs matchCase
escape :: Char -> Bool -> String
escape c matchCase
| c `elem` regexChars = '\' : [c]
| matchCase = [c]
| otherwise = '[' : nub (c:shiftCase c:[]) ++ "]"
where regexChars = "\\+()^$.{}]|"
charClass :: String -> Bool -> String
charClass (']':cs) matchCase = ']' : globToRegex' cs matchCase
charClass (c:cs) True = c : charClass cs True
charClass (c:cs) False = c' ++ charClass cs False
where c' = nub (c:shiftCase c:[])
charClass [] _ = error "unterminated character class"
shiftCase :: Char -> Char
shiftCase c | isLower c = toUpper c
| isUpper c = toLower c
| otherwise = c
matchesGlob :: FilePath -> String -> Bool -> Bool
matchesGlob name pat matchCase
= name =~ globToRegex pat matchCase
Page 212:
1. Glob patterns are simple enough to interpret that it's easy to write a matcher directly in Haskell, rather than going through the regexp machinery. Give it a try.
Note: I forgot to implement negated character classes.
module MatchGlob (match) where
import Data.List (tails)
type Glob = String
match :: Glob -> String -> Bool
match "" "" = True
match "" _ = False
match "*" "" = True
match _ "" = False
match (x:xs) z@(y:ys) =
case x of
'?' -> match xs ys
'\' -> matchEscape xs z
'*' -> matchStar xs z
'[' -> matchClass xs z
otherwise -> x == y && match xs ys
matchEscape :: Glob -> String -> Bool
matchEscape [] _ = error "unescaped backslash"
matchEscape _ "" = False
matchEscape (x:xs) (y:ys) = x == y && match xs ys
matchStar :: Glob -> String -> Bool
matchStar x y = or (map (match x) (tails y))
matchClass :: Glob -> String -> Bool
matchClass (x:xs) (y:ys) =
let (xs',zs) = break (== ']') xs
in if zs == ""
then error "unterminated character class"
else y `elem` expand (x:xs') && match (tail zs) ys
matchClass _ _ = error "illegal character class"
expand :: String -> String
expand [] = ""
expand (x:'-':[]) = x:'-':""
expand (x:'-':y:zs) = [x..y] ++ expand zs
expand (x:xs) = x:expand xs
import Data.List (tails)
type Glob = String
match :: Glob -> String -> Bool
match "" "" = True
match "" _ = False
match "*" "" = True
match _ "" = False
match (x:xs) z@(y:ys) =
case x of
'?' -> match xs ys
'\' -> matchEscape xs z
'*' -> matchStar xs z
'[' -> matchClass xs z
otherwise -> x == y && match xs ys
matchEscape :: Glob -> String -> Bool
matchEscape [] _ = error "unescaped backslash"
matchEscape _ "" = False
matchEscape (x:xs) (y:ys) = x == y && match xs ys
matchStar :: Glob -> String -> Bool
matchStar x y = or (map (match x) (tails y))
matchClass :: Glob -> String -> Bool
matchClass (x:xs) (y:ys) =
let (xs',zs) = break (== ']') xs
in if zs == ""
then error "unterminated character class"
else y `elem` expand (x:xs') && match (tail zs) ys
matchClass _ _ = error "illegal character class"
expand :: String -> String
expand [] = ""
expand (x:'-':[]) = x:'-':""
expand (x:'-':y:zs) = [x..y] ++ expand zs
expand (x:xs) = x:expand xs