Split-off and re-export installFileGlob
[cabal.git] / Cabal / src / Distribution / Simple / Glob / Internal.hs
blob4f0b91eca3954ae45e490e9fa00bb567ad6eda20
1 {-# LANGUAGE DeriveFunctor #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE LambdaCase #-}
5 -----------------------------------------------------------------------------
7 -- |
8 -- Module : Distribution.Simple.Glob.Internal
9 -- Copyright : Isaac Jones, Simon Marlow 2003-2004
10 -- License : BSD3
11 -- portions Copyright (c) 2007, Galois Inc.
13 -- Maintainer : cabal-devel@haskell.org
14 -- Portability : portable
16 -- Internal module for simple file globbing.
17 -- Please import "Distribution.Simple.Glob" instead.
18 module Distribution.Simple.Glob.Internal where
20 import Distribution.Compat.Prelude
21 import Prelude ()
23 import Control.Monad (mapM)
25 import Distribution.Parsec
26 import Distribution.Pretty
28 import Distribution.CabalSpecVersion
29 import Distribution.Simple.Utils
30 import Distribution.Verbosity hiding (normal)
32 import Data.List (stripPrefix)
33 import System.Directory
34 import System.FilePath
36 import qualified Distribution.Compat.CharParsing as P
37 import qualified Text.PrettyPrint as Disp
39 --------------------------------------------------------------------------------
41 -- | A filepath specified by globbing.
42 data Glob
43 = -- | @<dirGlob>/<glob>@
44 GlobDir !GlobPieces !Glob
45 | -- | @**/<glob>@, where @**@ denotes recursively traversing
46 -- all directories and matching filenames on <glob>.
47 GlobDirRecursive !GlobPieces
48 | -- | A file glob.
49 GlobFile !GlobPieces
50 | -- | Trailing dir; a glob ending in @/@.
51 GlobDirTrailing
52 deriving (Eq, Show, Generic)
54 instance Binary Glob
55 instance Structured Glob
57 -- | A single directory or file component of a globbed path
58 type GlobPieces = [GlobPiece]
60 -- | A piece of a globbing pattern
61 data GlobPiece
62 = -- | A wildcard @*@
63 WildCard
64 | -- | A literal string @dirABC@
65 Literal String
66 | -- | A union of patterns, e.g. @dir/{a,*.txt,c}/...@
67 Union [GlobPieces]
68 deriving (Eq, Show, Generic)
70 instance Binary GlobPiece
71 instance Structured GlobPiece
73 -------------------------------------------------------------------------------
75 -- * Matching
77 --------------------------------------------------------------------------------
79 -- | Match a 'Glob' against the file system, starting from a
80 -- given root directory. The results are all relative to the given root.
82 -- @since 3.12.0.0
83 matchGlob :: FilePath -> Glob -> IO [FilePath]
84 matchGlob root glob =
85 -- For this function, which is the general globbing one (doesn't care about
86 -- cabal spec, used e.g. for monitoring), we consider all matches.
87 mapMaybe
88 ( \case
89 GlobMatch a -> Just a
90 GlobWarnMultiDot a -> Just a
91 GlobMatchesDirectory a -> Just a
92 GlobMissingDirectory{} -> Nothing
94 <$> runDirFileGlob silent Nothing root glob
96 -- | Match a globbing pattern against a file path component
97 matchGlobPieces :: GlobPieces -> String -> Bool
98 matchGlobPieces = goStart
99 where
100 -- From the man page, glob(7):
101 -- "If a filename starts with a '.', this character must be
102 -- matched explicitly."
104 go, goStart :: [GlobPiece] -> String -> Bool
106 goStart (WildCard : _) ('.' : _) = False
107 goStart (Union globs : rest) cs =
109 (\glob -> goStart (glob ++ rest) cs)
110 globs
111 goStart rest cs = go rest cs
113 go [] "" = True
114 go (Literal lit : rest) cs
115 | Just cs' <- stripPrefix lit cs =
116 go rest cs'
117 | otherwise = False
118 go [WildCard] "" = True
119 go (WildCard : rest) (c : cs) = go rest (c : cs) || go (WildCard : rest) cs
120 go (Union globs : rest) cs = any (\glob -> go (glob ++ rest) cs) globs
121 go [] (_ : _) = False
122 go (_ : _) "" = False
124 -------------------------------------------------------------------------------
126 -- * Parsing & printing
128 --------------------------------------------------------------------------------
129 -- Filepaths with globs may be parsed in the special context is globbing in
130 -- cabal package fields, such as `data-files`. In that case, we restrict the
131 -- globbing syntax to that supported by the cabal spec version in use.
132 -- Otherwise, we parse the globs to the extent of our globbing features
133 -- (wildcards `*`, unions `{a,b,c}`, and directory-recursive wildcards `**`).
135 -- ** Parsing globs in a cabal package
137 parseFileGlob :: CabalSpecVersion -> FilePath -> Either GlobSyntaxError Glob
138 parseFileGlob version filepath = case reverse (splitDirectories filepath) of
139 [] ->
140 Left EmptyGlob
141 (filename : "**" : segments)
142 | allowGlobStar -> do
143 finalSegment <- case splitExtensions filename of
144 ("*", ext)
145 | '*' `elem` ext -> Left StarInExtension
146 | null ext -> Left NoExtensionOnStar
147 | otherwise -> Right (GlobDirRecursive [WildCard, Literal ext])
149 | allowLiteralFilenameGlobStar ->
150 Right (GlobDirRecursive [Literal filename])
151 | otherwise ->
152 Left LiteralFileNameGlobStar
154 foldM addStem finalSegment segments
155 | otherwise -> Left VersionDoesNotSupportGlobStar
156 (filename : segments) -> do
157 pat <- case splitExtensions filename of
158 ("*", ext)
159 | not allowGlob -> Left VersionDoesNotSupportGlob
160 | '*' `elem` ext -> Left StarInExtension
161 | null ext -> Left NoExtensionOnStar
162 | otherwise -> Right (GlobFile [WildCard, Literal ext])
163 (_, ext)
164 | '*' `elem` ext -> Left StarInExtension
165 | '*' `elem` filename -> Left StarInFileName
166 | otherwise -> Right (GlobFile [Literal filename])
168 foldM addStem pat segments
169 where
170 addStem pat seg
171 | '*' `elem` seg = Left StarInDirectory
172 | otherwise = Right (GlobDir [Literal seg] pat)
173 allowGlob = version >= CabalSpecV1_6
174 allowGlobStar = version >= CabalSpecV2_4
175 allowLiteralFilenameGlobStar = version >= CabalSpecV3_8
177 enableMultidot :: CabalSpecVersion -> Bool
178 enableMultidot version
179 | version >= CabalSpecV2_4 = True
180 | otherwise = False
182 -- ** Parsing globs otherwise
184 instance Pretty Glob where
185 pretty (GlobDir glob pathglob) =
186 dispGlobPieces glob
187 Disp.<> Disp.char '/'
188 Disp.<> pretty pathglob
189 pretty (GlobDirRecursive glob) =
190 Disp.text "**/"
191 Disp.<> dispGlobPieces glob
192 pretty (GlobFile glob) = dispGlobPieces glob
193 pretty GlobDirTrailing = Disp.empty
195 instance Parsec Glob where
196 parsec = parsecPath
197 where
198 parsecPath :: CabalParsing m => m Glob
199 parsecPath = do
200 glob <- parsecGlob
201 dirSep *> (GlobDir glob <$> parsecPath <|> pure (GlobDir glob GlobDirTrailing)) <|> pure (GlobFile glob)
202 -- We could support parsing recursive directory search syntax
203 -- @**@ here too, rather than just in 'parseFileGlob'
205 dirSep :: CabalParsing m => m ()
206 dirSep =
207 () <$ P.char '/'
208 <|> P.try
209 ( do
210 _ <- P.char '\\'
211 -- check this isn't an escape code
212 P.notFollowedBy (P.satisfy isGlobEscapedChar)
215 parsecGlob :: CabalParsing m => m GlobPieces
216 parsecGlob = some parsecPiece
217 where
218 parsecPiece = P.choice [literal, wildcard, union]
220 wildcard = WildCard <$ P.char '*'
221 union = Union . toList <$> P.between (P.char '{') (P.char '}') (P.sepByNonEmpty parsecGlob (P.char ','))
222 literal = Literal <$> some litchar
224 litchar = normal <|> escape
226 normal = P.satisfy (\c -> not (isGlobEscapedChar c) && c /= '/' && c /= '\\')
227 escape = P.try $ P.char '\\' >> P.satisfy isGlobEscapedChar
229 --------------------------------------------------------------------------------
230 -- Parse and printing utils
231 --------------------------------------------------------------------------------
233 dispGlobPieces :: GlobPieces -> Disp.Doc
234 dispGlobPieces = Disp.hcat . map dispPiece
235 where
236 dispPiece WildCard = Disp.char '*'
237 dispPiece (Literal str) = Disp.text (escape str)
238 dispPiece (Union globs) =
239 Disp.braces
240 ( Disp.hcat
241 ( Disp.punctuate
242 (Disp.char ',')
243 (map dispGlobPieces globs)
246 escape [] = []
247 escape (c : cs)
248 | isGlobEscapedChar c = '\\' : c : escape cs
249 | otherwise = c : escape cs
251 isGlobEscapedChar :: Char -> Bool
252 isGlobEscapedChar '*' = True
253 isGlobEscapedChar '{' = True
254 isGlobEscapedChar '}' = True
255 isGlobEscapedChar ',' = True
256 isGlobEscapedChar _ = False
258 -- ** Cabal package globbing errors
260 data GlobSyntaxError
261 = StarInDirectory
262 | StarInFileName
263 | StarInExtension
264 | NoExtensionOnStar
265 | EmptyGlob
266 | LiteralFileNameGlobStar
267 | VersionDoesNotSupportGlobStar
268 | VersionDoesNotSupportGlob
269 deriving (Eq, Show)
271 explainGlobSyntaxError :: FilePath -> GlobSyntaxError -> String
272 explainGlobSyntaxError filepath StarInDirectory =
273 "invalid file glob '"
274 ++ filepath
275 ++ "'. A wildcard '**' is only allowed as the final parent"
276 ++ " directory. Stars must not otherwise appear in the parent"
277 ++ " directories."
278 explainGlobSyntaxError filepath StarInExtension =
279 "invalid file glob '"
280 ++ filepath
281 ++ "'. Wildcards '*' are only allowed as the"
282 ++ " file's base name, not in the file extension."
283 explainGlobSyntaxError filepath StarInFileName =
284 "invalid file glob '"
285 ++ filepath
286 ++ "'. Wildcards '*' may only totally replace the"
287 ++ " file's base name, not only parts of it."
288 explainGlobSyntaxError filepath NoExtensionOnStar =
289 "invalid file glob '"
290 ++ filepath
291 ++ "'. If a wildcard '*' is used it must be with an file extension."
292 explainGlobSyntaxError filepath LiteralFileNameGlobStar =
293 "invalid file glob '"
294 ++ filepath
295 ++ "'. Prior to 'cabal-version: 3.8'"
296 ++ " if a wildcard '**' is used as a parent directory, the"
297 ++ " file's base name must be a wildcard '*'."
298 explainGlobSyntaxError _ EmptyGlob =
299 "invalid file glob. A glob cannot be the empty string."
300 explainGlobSyntaxError filepath VersionDoesNotSupportGlobStar =
301 "invalid file glob '"
302 ++ filepath
303 ++ "'. Using the double-star syntax requires 'cabal-version: 2.4'"
304 ++ " or greater. Alternatively, for compatibility with earlier Cabal"
305 ++ " versions, list the included directories explicitly."
306 explainGlobSyntaxError filepath VersionDoesNotSupportGlob =
307 "invalid file glob '"
308 ++ filepath
309 ++ "'. Using star wildcards requires 'cabal-version: >= 1.6'. "
310 ++ "Alternatively if you require compatibility with earlier Cabal "
311 ++ "versions then list all the files explicitly."
313 -- Note throughout that we use splitDirectories, not splitPath. On
314 -- Posix, this makes no difference, but, because Windows accepts both
315 -- slash and backslash as its path separators, if we left in the
316 -- separators from the glob we might not end up properly normalised.
318 data GlobResult a
319 = -- | The glob matched the value supplied.
320 GlobMatch a
321 | -- | The glob did not match the value supplied because the
322 -- cabal-version is too low and the extensions on the file did
323 -- not precisely match the glob's extensions, but rather the
324 -- glob was a proper suffix of the file's extensions; i.e., if
325 -- not for the low cabal-version, it would have matched.
326 GlobWarnMultiDot a
327 | -- | The glob couldn't match because the directory named doesn't
328 -- exist. The directory will be as it appears in the glob (i.e.,
329 -- relative to the directory passed to 'matchDirFileGlob', and,
330 -- for 'data-files', relative to 'data-dir').
331 GlobMissingDirectory a
332 | -- | The glob matched a directory when we were looking for files only.
333 -- It didn't match a file!
335 -- @since 3.12.0.0
336 GlobMatchesDirectory a
337 deriving (Show, Eq, Ord, Functor)
339 -- | Match files against a pre-parsed glob, starting in a directory.
341 -- The 'Version' argument must be the spec version of the package
342 -- description being processed, as globs behave slightly differently
343 -- in different spec versions.
345 -- The 'FilePath' argument is the directory that the glob is relative
346 -- to. It must be a valid directory (and hence it can't be the empty
347 -- string). The returned values will not include this prefix.
348 runDirFileGlob
349 :: Verbosity
350 -> Maybe CabalSpecVersion
351 -- ^ If the glob we are running should care about the cabal spec, and warnings such as 'GlobWarnMultiDot', then this should be the version.
352 -- If you want to run a glob but don't care about any of the cabal-spec restrictions on globs, use 'Nothing'!
353 -> FilePath
354 -> Glob
355 -> IO [GlobResult FilePath]
356 runDirFileGlob verbosity mspec rawRoot pat = do
357 -- The default data-dir is null. Our callers -should- be
358 -- converting that to '.' themselves, but it's a certainty that
359 -- some future call-site will forget and trigger a really
360 -- hard-to-debug failure if we don't check for that here.
361 when (null rawRoot) $
362 warn verbosity $
363 "Null dir passed to runDirFileGlob; interpreting it "
364 ++ "as '.'. This is probably an internal error."
365 let root = if null rawRoot then "." else rawRoot
366 debug verbosity $ "Expanding glob '" ++ show (pretty pat) ++ "' in directory '" ++ root ++ "'."
367 -- This function might be called from the project root with dir as
368 -- ".". Walking the tree starting there involves going into .git/
369 -- and dist-newstyle/, which is a lot of work for no reward, so
370 -- extract the constant prefix from the pattern and start walking
371 -- there, and only walk as much as we need to: recursively if **,
372 -- the whole directory if *, and just the specific file if it's a
373 -- literal.
375 (prefixSegments, variablePattern) = splitConstantPrefix pat
376 joinedPrefix = joinPath prefixSegments
378 -- The glob matching function depends on whether we care about the cabal version or not
379 doesGlobMatch :: GlobPieces -> String -> Maybe (GlobResult ())
380 doesGlobMatch glob str = case mspec of
381 Just spec -> checkNameMatches spec glob str
382 Nothing -> if matchGlobPieces glob str then Just (GlobMatch ()) else Nothing
384 go (GlobFile glob) dir = do
385 entries <- getDirectoryContents (root </> dir)
386 catMaybes
387 <$> mapM
388 ( \s -> do
389 -- When running a glob from a Cabal package description (i.e.
390 -- when a cabal spec version is passed as an argument), we
391 -- disallow matching a @GlobFile@ against a directory, preferring
392 -- @GlobDir dir GlobDirTrailing@ to specify a directory match.
393 isFile <- maybe (return True) (const $ doesFileExist (root </> dir </> s)) mspec
394 let match = (dir </> s <$) <$> doesGlobMatch glob s
395 return $
396 if isFile
397 then match
398 else case match of
399 Just (GlobMatch x) -> Just $ GlobMatchesDirectory x
400 Just (GlobWarnMultiDot x) -> Just $ GlobMatchesDirectory x
401 Just (GlobMatchesDirectory x) -> Just $ GlobMatchesDirectory x
402 Just (GlobMissingDirectory x) -> Just $ GlobMissingDirectory x -- this should never match, unless you are in a file-delete-heavy concurrent setting i guess
403 Nothing -> Nothing
405 entries
406 go (GlobDirRecursive glob) dir = do
407 entries <- getDirectoryContentsRecursive (root </> dir)
408 return $
409 mapMaybe
410 ( \s -> do
411 globMatch <- doesGlobMatch glob (takeFileName s)
412 pure ((dir </> s) <$ globMatch)
414 entries
415 go (GlobDir glob globPath) dir = do
416 entries <- getDirectoryContents (root </> dir)
417 subdirs <-
418 filterM
419 ( \subdir ->
420 doesDirectoryExist
421 (root </> dir </> subdir)
423 $ filter (matchGlobPieces glob) entries
424 concat <$> traverse (\subdir -> go globPath (dir </> subdir)) subdirs
425 go GlobDirTrailing dir = return [GlobMatch dir]
427 directoryExists <- doesDirectoryExist (root </> joinedPrefix)
428 if directoryExists
429 then go variablePattern joinedPrefix
430 else return [GlobMissingDirectory joinedPrefix]
431 where
432 -- \| Extract the (possibly null) constant prefix from the pattern.
433 -- This has the property that, if @(pref, final) = splitConstantPrefix pat@,
434 -- then @pat === foldr GlobDir final pref@.
435 splitConstantPrefix :: Glob -> ([FilePath], Glob)
436 splitConstantPrefix = unfoldr' step
437 where
438 step (GlobDir [Literal seg] pat') = Right (seg, pat')
439 step pat' = Left pat'
441 unfoldr' :: (a -> Either r (b, a)) -> a -> ([b], r)
442 unfoldr' f a = case f a of
443 Left r -> ([], r)
444 Right (b, a') -> case unfoldr' f a' of
445 (bs, r) -> (b : bs, r)
447 -- | Is the root of this relative glob path a directory-recursive wildcard, e.g. @**/*.txt@ ?
448 isRecursiveInRoot :: Glob -> Bool
449 isRecursiveInRoot (GlobDirRecursive _) = True
450 isRecursiveInRoot _ = False
452 -- | Check how the string matches the glob under this cabal version
453 checkNameMatches :: CabalSpecVersion -> GlobPieces -> String -> Maybe (GlobResult ())
454 checkNameMatches spec glob candidate
455 -- Check if glob matches in its general form
456 | matchGlobPieces glob candidate =
457 -- if multidot is supported, then this is a clean match
458 if enableMultidot spec
459 then pure (GlobMatch ())
460 else -- if not, issue a warning saying multidot is needed for the match
462 let (_, candidateExts) = splitExtensions $ takeFileName candidate
463 extractExts :: GlobPieces -> Maybe String
464 extractExts [] = Nothing
465 extractExts [Literal lit]
466 -- Any literal terminating a glob, and which does have an extension,
467 -- returns that extension. Otherwise, recurse until Nothing is returned.
468 | let ext = takeExtensions lit
469 , ext /= "" =
470 Just ext
471 extractExts (_ : x) = extractExts x
472 in case extractExts glob of
473 Just exts
474 | exts == candidateExts ->
475 return (GlobMatch ())
476 | exts `isSuffixOf` candidateExts ->
477 return (GlobWarnMultiDot ())
478 _ -> return (GlobMatch ())
479 | otherwise = empty
481 -- | How/does the glob match the given filepath, according to the cabal version?
482 -- Since this is pure, we don't make a distinction between matching on
483 -- directories or files (i.e. this function won't return 'GlobMatchesDirectory')
484 fileGlobMatches :: CabalSpecVersion -> Glob -> FilePath -> Maybe (GlobResult ())
485 fileGlobMatches version g path = go g (splitDirectories path)
486 where
487 go GlobDirTrailing [] = Just (GlobMatch ())
488 go (GlobFile glob) [file] = checkNameMatches version glob file
489 go (GlobDirRecursive glob) dirs
490 | [] <- reverse dirs =
491 Nothing -- @dir/**/x.txt@ should not match @dir/hello@
492 | file : _ <- reverse dirs =
493 checkNameMatches version glob file
494 go (GlobDir glob globPath) (dir : dirs) = do
495 _ <- checkNameMatches version glob dir -- we only care if dir segment matches
496 go globPath dirs
497 go _ _ = Nothing