Fix default Nix configuration option in generated ~/.cabal/config file (#8878)
[cabal.git] / solver-benchmarks / HackageBenchmark.hs
blobd650f38e56a18ffeb3d43cd9b5db13665eb1b312
1 {-# OPTIONS_GHC -fno-warn-orphans #-}
2 {-# LANGUAGE RecordWildCards #-}
3 {-# LANGUAGE TupleSections #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
6 module HackageBenchmark (
7 hackageBenchmarkMain
9 -- Exposed for testing:
10 , CabalResult(..)
11 , isSignificantTimeDifference
12 , combineTrialResults
13 , isSignificantResult
14 , shouldContinueAfterFirstTrial
15 ) where
17 import Control.Concurrent.Async (concurrently)
18 import Control.Monad (forM, replicateM, unless, when)
19 import qualified Data.ByteString as BS
20 import Data.List (nub, unzip4)
21 import Data.Maybe (isJust, catMaybes)
22 import Data.Monoid ((<>))
23 import Data.String (fromString)
24 import Data.Function ((&))
25 import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
26 import qualified Data.Vector.Unboxed as V
27 import Options.Applicative
28 import Statistics.Sample (mean, stdDev, geometricMean)
29 import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
30 , mannWhitneyUCriticalValue
31 , mannWhitneyUtest)
32 import Statistics.Types (PValue, mkPValue)
33 import System.Directory (getTemporaryDirectory, createDirectoryIfMissing)
34 import System.Environment (getEnvironment)
35 import System.Exit (ExitCode(..), exitWith, exitFailure)
36 import System.FilePath ((</>))
37 import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
38 , stdout)
39 import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
40 , createProcess, readProcess, shell, waitForProcess, proc, readCreateProcessWithExitCode )
41 import Text.Printf (printf)
43 import qualified Data.Map.Strict as Map
45 import Distribution.Package (PackageName, mkPackageName, unPackageName)
47 data Args = Args {
48 argCabal1 :: FilePath
49 , argCabal2 :: FilePath
50 , argCabal1Flags :: [String]
51 , argCabal2Flags :: [String]
52 , argPackages :: [PackageName]
53 , argMinRunTimeDifferenceToRerun :: Double
54 , argPValue :: PValue Double
55 , argTrials :: Int
56 , argConcurrently :: Bool
57 , argPrintTrials :: Bool
58 , argPrintSkippedPackages :: Bool
59 , argTimeoutSeconds :: Int
62 data CabalTrial = CabalTrial NominalDiffTime CabalResult
64 data CabalResult
65 = Solution
66 | NoInstallPlan
67 | BackjumpLimit
68 | Unbuildable
69 | UnbuildableDep
70 | ComponentCycle
71 | ModReexpIssue
72 | PkgNotFound
73 | Timeout
74 | Unknown
75 deriving (Eq, Show)
77 hackageBenchmarkMain :: IO ()
78 hackageBenchmarkMain = do
79 hSetBuffering stdout LineBuffering
80 args@Args {..} <- execParser parserInfo
81 checkArgs args
82 printConfig args
83 pkgs <- getPackages args
84 putStrLn ""
86 let concurrently' :: IO a -> IO b -> IO (a, b)
87 concurrently' | argConcurrently = concurrently
88 | otherwise = \ma mb -> do { a <- ma; b <- mb; return (a, b) }
90 let -- The maximum length of the heading and package names.
91 nameColumnWidth :: Int
92 nameColumnWidth =
93 maximum $ map length $ "package" : map unPackageName pkgs
95 -- create cabal runners
96 runCabal1 <- runCabal argTimeoutSeconds CabalUnderTest1 argCabal1 argCabal1Flags
97 runCabal2 <- runCabal argTimeoutSeconds CabalUnderTest2 argCabal2 argCabal2Flags
99 -- When the output contains both trails and summaries, label each row as
100 -- "trial" or "summary".
101 when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
102 putStrLn $
103 printf "%-*s %-14s %-14s %11s %11s %11s %11s %11s"
104 nameColumnWidth "package" "result1" "result2"
105 "mean1" "mean2" "stddev1" "stddev2" "speedup"
107 speedups :: [Double] <- fmap catMaybes $ forM pkgs $ \pkg -> do
108 let printTrial msgType result1 result2 time1 time2 =
109 putStrLn $
110 printf "%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
111 msgType nameColumnWidth (unPackageName pkg)
112 (show result1) (show result2)
113 (diffTimeToDouble time1) (diffTimeToDouble time2)
115 (CabalTrial t1 r1, CabalTrial t2 r2) <- runCabal1 pkg `concurrently'` runCabal2 pkg
117 if not $
118 shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
119 then do
120 when argPrintSkippedPackages $
121 if argPrintTrials
122 then printTrial "trial (skipping)" r1 r2 t1 t2
123 else putStrLn $ printf "%-*s (first run times were too similar)"
124 nameColumnWidth (unPackageName pkg)
125 return Nothing
126 else do
127 when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
128 (ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
129 . replicateM (argTrials - 1) $ do
131 (CabalTrial t1' r1', CabalTrial t2' r2') <- runCabal1 pkg `concurrently'` runCabal2 pkg
132 when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
133 return (t1', t2', r1', r2')
135 let result1 = combineTrialResults rs1
136 result2 = combineTrialResults rs2
137 times1 = V.fromList (map diffTimeToDouble ts1)
138 times2 = V.fromList (map diffTimeToDouble ts2)
139 mean1 = mean times1
140 mean2 = mean times2
141 stddev1 = stdDev times1
142 stddev2 = stdDev times2
143 speedup = mean1 / mean2
145 when argPrintTrials $ putStr $ printf "%-16s " "summary"
146 if isSignificantResult result1 result2
147 || isSignificantTimeDifference argPValue ts1 ts2
148 then putStrLn $
149 printf "%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
150 nameColumnWidth (unPackageName pkg)
151 (show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
152 else when (argPrintTrials || argPrintSkippedPackages) $
153 putStrLn $
154 printf "%-*s (not significant, speedup = %10.3f)" nameColumnWidth (unPackageName pkg) speedup
156 -- return speedup value
157 return (Just speedup)
159 -- finally, calculate the geometric mean of speedups
160 printf "Geometric mean of %d packages' speedups is %10.3f\n" (length speedups) (geometricMean (V.fromList speedups))
162 where
163 checkArgs :: Args -> IO ()
164 checkArgs Args {..} = do
165 let die msg = hPutStrLn stderr msg >> exitFailure
166 unless (argTrials > 0) $ die "--trials must be greater than 0."
167 unless (argMinRunTimeDifferenceToRerun >= 0) $
168 die "--min-run-time-percentage-difference-to-rerun must be non-negative."
169 unless (isSampleLargeEnough argPValue argTrials) $
170 die "p-value is too small for the number of trials."
172 printConfig :: Args -> IO ()
173 printConfig Args {..} = do
174 putStrLn "Comparing:"
175 putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
176 callProcess argCabal1 ["--version"]
177 putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
178 callProcess argCabal2 ["--version"]
179 -- TODO: Print index state.
180 putStrLn "Base package database:"
181 callProcess "ghc-pkg" ["list"]
183 getPackages :: Args -> IO [PackageName]
184 getPackages Args {..} = do
185 pkgs <-
186 if null argPackages
187 then do
188 putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
189 list <- readProcess argCabal1 ["list", "--simple-output"] ""
190 return $ nub [mkPackageName $ head (words line) | line <- lines list]
191 else do
192 putStrLn "Using given package list ..."
193 return argPackages
194 putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
195 return pkgs
197 data CabalUnderTest = CabalUnderTest1 | CabalUnderTest2
199 runCabal
200 :: Int -- ^ timeout in seconds
201 -> CabalUnderTest -- ^ cabal under test
202 -> FilePath -- ^ cabal
203 -> [String] -- ^ flags
204 -> IO (PackageName -> IO CabalTrial) -- ^ testing function.
205 runCabal timeoutSeconds cabalUnderTest cabal flags = do
206 tmpDir <- getTemporaryDirectory
208 -- cabal directory for this cabal under test
209 let cabalDir = tmpDir </> "solver-benchmarks-workdir" </> case cabalUnderTest of
210 CabalUnderTest1 -> "cabal1"
211 CabalUnderTest2 -> "cabal2"
213 putStrLn $ "Cabal directory (for " ++ cabal ++ ") " ++ cabalDir
214 createDirectoryIfMissing True cabalDir
216 -- shell environment
217 currEnv <- Map.fromList <$> getEnvironment
218 let thisEnv :: [(String, String)]
219 thisEnv = Map.toList $ currEnv
220 & Map.insert "CABAL_CONFIG" (cabalDir </> "config")
221 & Map.insert "CABAL_DIR" cabalDir
223 -- Run cabal update,
224 putStrLn $ "Running cabal update (using " ++ cabal ++ ") ..."
225 (ec, uout, uerr) <- readCreateProcessWithExitCode (proc cabal ["update"])
226 { cwd = Just cabalDir
227 , env = Just thisEnv
230 unless (ec == ExitSuccess) $ do
231 putStrLn uout
232 putStrLn uerr
233 exitWith ec
235 -- return an actual runner
236 return $ \pkg -> do
237 ((exitCode, err), time) <- timeEvent $ do
239 let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
240 cabalCmd = unwords $
241 [ cabal
243 , "v2-install"
245 -- These flags prevent a Cabal project or package environment from
246 -- affecting the install plan.
248 -- Note: we are somewhere in /tmp, hopefully there is no cabal.project on upper level
249 , "--package-env=non-existent-package-env"
251 -- --lib allows solving for packages with libraries or
252 -- executables.
253 , "--lib"
255 , unPackageName pkg
257 , "--dry-run"
259 -- The test doesn't currently handle stdout, so we suppress it
260 -- with silent. nowrap simplifies parsing the errors messages.
261 , "-vsilent+nowrap"
265 ++ flags
267 cmd = (shell (timeout ++ " " ++ cabalCmd))
268 { std_err = CreatePipe
269 , env = Just thisEnv
270 , cwd = Just cabalDir
273 -- TODO: Read stdout and compare the install plans.
274 (_, _, Just errh, ph) <- createProcess cmd
275 err <- BS.hGetContents errh
276 (, err) <$> waitForProcess ph
278 let exhaustiveMsg =
279 "After searching the rest of the dependency tree exhaustively"
280 result
281 | exitCode == ExitSuccess = Solution
282 | exitCode == ExitFailure 124 = Timeout
283 | fromString exhaustiveMsg `BS.isInfixOf` err = NoInstallPlan
284 | fromString "Backjump limit reached" `BS.isInfixOf` err = BackjumpLimit
285 | fromString "none of the components are available to build" `BS.isInfixOf` err = Unbuildable
286 | fromString "Dependency on unbuildable" `BS.isInfixOf` err = UnbuildableDep
287 | fromString "Dependency cycle between the following components" `BS.isInfixOf` err = ComponentCycle
288 | fromString "Problem with module re-exports" `BS.isInfixOf` err = ModReexpIssue
289 | fromString "There is no package named" `BS.isInfixOf` err = PkgNotFound
290 | otherwise = Unknown
291 return (CabalTrial time result)
293 isSampleLargeEnough :: PValue Double -> Int -> Bool
294 isSampleLargeEnough pvalue trials =
295 -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
296 -- used when both sample sizes are less than or equal to 20.
297 trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)
299 isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
300 isSignificantTimeDifference pvalue xs ys =
301 let toVector = V.fromList . map diffTimeToDouble
302 in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
303 Nothing -> error "not enough data for mannWhitneyUtest"
304 Just Significant -> True
305 Just NotSignificant -> False
307 -- Should we stop after the first trial of this package to save time? This
308 -- function skips the package if the results are uninteresting and the times are
309 -- within --min-run-time-percentage-difference-to-rerun.
310 shouldContinueAfterFirstTrial :: Double
311 -> NominalDiffTime
312 -> NominalDiffTime
313 -> CabalResult
314 -> CabalResult
315 -> Bool
316 shouldContinueAfterFirstTrial 0 _ _ _ _ = True
317 shouldContinueAfterFirstTrial _ _ _ Timeout Timeout = False
318 shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1 r2 =
319 isSignificantResult r1 r2
320 || abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)
322 isSignificantResult :: CabalResult -> CabalResult -> Bool
323 isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
325 -- Is this result expected in a benchmark run on all of Hackage?
326 isExpectedResult :: CabalResult -> Bool
327 isExpectedResult Solution = True
328 isExpectedResult NoInstallPlan = True
329 isExpectedResult BackjumpLimit = True
330 isExpectedResult Timeout = True
331 isExpectedResult Unbuildable = True
332 isExpectedResult UnbuildableDep = True
333 isExpectedResult ComponentCycle = True
334 isExpectedResult ModReexpIssue = True
335 isExpectedResult PkgNotFound = False
336 isExpectedResult Unknown = False
338 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
339 -- should be the same. If they aren't the same, we returns Unknown.
340 combineTrialResults :: [CabalResult] -> CabalResult
341 combineTrialResults rs
342 | allEqual rs = head rs
343 | allEqual [r | r <- rs, r /= Timeout] = Timeout
344 | otherwise = Unknown
345 where
346 allEqual :: Eq a => [a] -> Bool
347 allEqual xs = length (nub xs) == 1
349 timeEvent :: IO a -> IO (a, NominalDiffTime)
350 timeEvent task = do
351 start <- getCurrentTime
352 r <- task
353 end <- getCurrentTime
354 return (r, diffUTCTime end start)
356 diffTimeToDouble :: NominalDiffTime -> Double
357 diffTimeToDouble = fromRational . toRational
359 parserInfo :: ParserInfo Args
360 parserInfo = info (argParser <**> helper)
361 ( fullDesc
362 <> progDesc ("Find differences between two cabal commands when solving"
363 ++ " for all packages on Hackage.")
364 <> header "hackage-benchmark" )
366 argParser :: Parser Args
367 argParser = Args
368 <$> strOption
369 ( long "cabal1"
370 <> metavar "PATH"
371 <> help "First cabal executable")
372 <*> strOption
373 ( long "cabal2"
374 <> metavar "PATH"
375 <> help "Second cabal executable")
376 <*> option (words <$> str)
377 ( long "cabal1-flags"
378 <> value []
379 <> metavar "FLAGS"
380 <> help "Extra flags for the first cabal executable")
381 <*> option (words <$> str)
382 ( long "cabal2-flags"
383 <> value []
384 <> metavar "FLAGS"
385 <> help "Extra flags for the second cabal executable")
386 <*> option (map mkPackageName . words <$> str)
387 ( long "packages"
388 <> value []
389 <> metavar "PACKAGES"
390 <> help ("Space separated list of packages to test, or all of Hackage"
391 ++ " if unspecified"))
392 <*> option auto
393 ( long "min-run-time-percentage-difference-to-rerun"
394 <> showDefault
395 <> value 0.0
396 <> metavar "PERCENTAGE"
397 <> help ("Stop testing a package when the difference in run times in"
398 ++ " the first trial are within this percentage, in order to"
399 ++ " save time"))
400 <*> option (mkPValue <$> auto)
401 ( long "pvalue"
402 <> showDefault
403 <> value (mkPValue 0.05)
404 <> metavar "DOUBLE"
405 <> help ("p-value used to determine whether to print the results for"
406 ++ " each package"))
407 <*> option auto
408 ( long "trials"
409 <> showDefault
410 <> value 10
411 <> metavar "N"
412 <> help "Number of trials for each package")
413 <*> switch
414 ( long "concurrently"
415 <> help "Run cabals concurrently")
416 <*> switch
417 ( long "print-trials"
418 <> help "Whether to include the results from individual trials in the output")
419 <*> switch
420 ( long "print-skipped-packages"
421 <> help "Whether to include skipped packages in the output")
422 <*> option auto
423 ( long "timeout"
424 <> showDefault
425 <> value 90
426 <> metavar "SECONDS"
427 <> help "Maximum time to run a cabal command, in seconds")