Merge pull request #5171 from bgamari/master
[cabal.git] / solver-benchmarks / HackageBenchmark.hs
blobabbf33029de0cba42e7e80b72c49753dcc739024
1 {-# OPTIONS_GHC -fno-warn-orphans #-}
2 {-# LANGUAGE RecordWildCards #-}
3 {-# LANGUAGE TupleSections #-}
5 module HackageBenchmark (
6 hackageBenchmarkMain
8 -- Exposed for testing:
9 , CabalResult(..)
10 , isSignificantTimeDifference
11 , combineTrialResults
12 , isSignificantResult
13 , shouldContinueAfterFirstTrial
14 ) where
16 import Control.Monad (forM_, replicateM, unless, when)
17 import qualified Data.ByteString as B
18 import Data.List (nub, unzip4)
19 import Data.Maybe (isJust)
20 import Data.Monoid ((<>))
21 import Data.String (fromString)
22 import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
23 import qualified Data.Vector.Unboxed as V
24 import Options.Applicative
25 import Statistics.Sample (mean, stdDev)
26 import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
27 , mannWhitneyUCriticalValue
28 , mannWhitneyUtest)
29 import Statistics.Types (PValue, mkPValue)
30 import System.Exit (ExitCode(..), exitFailure)
31 import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
32 , stdout)
33 import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
34 , createProcess, readProcess, shell, waitForProcess )
35 import Text.Printf (printf)
37 import Distribution.Package (PackageName, mkPackageName, unPackageName)
39 data Args = Args {
40 argCabal1 :: FilePath
41 , argCabal2 :: FilePath
42 , argCabal1Flags :: [String]
43 , argCabal2Flags :: [String]
44 , argPackages :: [PackageName]
45 , argMinRunTimeDifferenceToRerun :: Double
46 , argPValue :: PValue Double
47 , argTrials :: Int
48 , argPrintTrials :: Bool
49 , argPrintSkippedPackages :: Bool
50 , argTimeoutSeconds :: Int
53 data CabalTrial = CabalTrial NominalDiffTime CabalResult
55 data CabalResult
56 = Solution
57 | NoInstallPlan
58 | BackjumpLimit
59 | PkgNotFound
60 | Timeout
61 | Unknown
62 deriving (Eq, Show)
64 hackageBenchmarkMain :: IO ()
65 hackageBenchmarkMain = do
66 hSetBuffering stdout LineBuffering
67 args@Args {..} <- execParser parserInfo
68 checkArgs args
69 printConfig args
70 pkgs <- getPackages args
71 putStrLn ""
73 let -- The maximum length of the heading and package names.
74 nameColumnWidth :: Int
75 nameColumnWidth =
76 maximum $ map length $ "package" : map unPackageName pkgs
77 runCabal1 = runCabal argTimeoutSeconds argCabal1 argCabal1Flags
78 runCabal2 = runCabal argTimeoutSeconds argCabal2 argCabal2Flags
80 -- When the output contains both trails and summaries, label each row as
81 -- "trial" or "summary".
82 when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
83 putStrLn $
84 printf "%-*s %-13s %-13s %11s %11s %11s %11s %11s"
85 nameColumnWidth "package" "result1" "result2"
86 "mean1" "mean2" "stddev1" "stddev2" "speedup"
88 forM_ pkgs $ \pkg -> do
89 let printTrial msgType result1 result2 time1 time2 =
90 putStrLn $
91 printf "%-16s %-*s %-13s %-13s %10.3fs %10.3fs"
92 msgType nameColumnWidth (unPackageName pkg)
93 (show result1) (show result2)
94 (diffTimeToDouble time1) (diffTimeToDouble time2)
96 CabalTrial t1 r1 <- runCabal1 pkg
97 CabalTrial t2 r2 <- runCabal2 pkg
98 if not $
99 shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
100 then when argPrintSkippedPackages $
101 if argPrintTrials
102 then printTrial "trial (skipping)" r1 r2 t1 t2
103 else putStrLn $ printf "%-*s (first run times were too similar)"
104 nameColumnWidth (unPackageName pkg)
105 else do
106 when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
107 (ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
108 . replicateM (argTrials - 1) $ do
109 CabalTrial t1' r1' <- runCabal1 pkg
110 CabalTrial t2' r2' <- runCabal2 pkg
111 when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
112 return (t1', t2', r1', r2')
114 let result1 = combineTrialResults rs1
115 result2 = combineTrialResults rs2
116 times1 = V.fromList (map diffTimeToDouble ts1)
117 times2 = V.fromList (map diffTimeToDouble ts2)
118 mean1 = mean times1
119 mean2 = mean times2
120 stddev1 = stdDev times1
121 stddev2 = stdDev times2
122 speedup = mean1 / mean2
124 when argPrintTrials $ putStr $ printf "%-16s " "summary"
125 if isSignificantResult result1 result2
126 || isSignificantTimeDifference argPValue ts1 ts2
127 then putStrLn $
128 printf "%-*s %-13s %-13s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
129 nameColumnWidth (unPackageName pkg)
130 (show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
131 else when (argPrintTrials || argPrintSkippedPackages) $
132 putStrLn $
133 printf "%-*s (not significant)" nameColumnWidth (unPackageName pkg)
134 where
135 checkArgs :: Args -> IO ()
136 checkArgs Args {..} = do
137 let die msg = hPutStrLn stderr msg >> exitFailure
138 unless (argTrials > 0) $ die "--trials must be greater than 0."
139 unless (argMinRunTimeDifferenceToRerun >= 0) $
140 die "--min-run-time-percentage-difference-to-rerun must be non-negative."
141 unless (isSampleLargeEnough argPValue argTrials) $
142 die "p-value is too small for the number of trials."
144 printConfig :: Args -> IO ()
145 printConfig Args {..} = do
146 putStrLn "Comparing:"
147 putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
148 callProcess argCabal1 ["--version"]
149 putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
150 callProcess argCabal2 ["--version"]
151 -- TODO: Print index state.
152 putStrLn "Base package database:"
153 callProcess "ghc-pkg" ["list"]
155 getPackages :: Args -> IO [PackageName]
156 getPackages Args {..} = do
157 pkgs <-
158 if null argPackages
159 then do
160 putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
161 list <- readProcess argCabal1 ["list", "--simple-output"] ""
162 return $ nub [mkPackageName $ head (words line) | line <- lines list]
163 else do
164 putStrLn "Using given package list ..."
165 return argPackages
166 putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
167 return pkgs
169 runCabal :: Int -> FilePath -> [String] -> PackageName -> IO CabalTrial
170 runCabal timeoutSeconds cabal flags pkg = do
171 ((exitCode, err), time) <- timeEvent $ do
172 let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
173 cabalCmd =
174 unwords $
175 [cabal, "install", unPackageName pkg, "--dry-run", "-v0"] ++ flags
176 cmd = (shell (timeout ++ " " ++ cabalCmd)) { std_err = CreatePipe }
178 -- TODO: Read stdout and compare the install plans.
179 (_, _, Just errh, ph) <- createProcess cmd
180 err <- B.hGetContents errh
181 (, err) <$> waitForProcess ph
182 let exhaustiveMsg =
183 "After searching the rest of the dependency tree exhaustively"
184 result
185 | exitCode == ExitSuccess = Solution
186 | exitCode == ExitFailure 124 = Timeout
187 | fromString exhaustiveMsg `B.isInfixOf` err = NoInstallPlan
188 | fromString "Backjump limit reached" `B.isInfixOf` err = BackjumpLimit
189 | fromString "There is no package named" `B.isInfixOf` err = PkgNotFound
190 | otherwise = Unknown
191 return (CabalTrial time result)
193 isSampleLargeEnough :: PValue Double -> Int -> Bool
194 isSampleLargeEnough pvalue trials =
195 -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
196 -- used when both sample sizes are less than or equal to 20.
197 trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)
199 isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
200 isSignificantTimeDifference pvalue xs ys =
201 let toVector = V.fromList . map diffTimeToDouble
202 in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
203 Nothing -> error "not enough data for mannWhitneyUtest"
204 Just Significant -> True
205 Just NotSignificant -> False
207 -- Should we stop after the first trial of this package to save time? This
208 -- function skips the package if the results are uninteresting and the times are
209 -- within --min-run-time-percentage-difference-to-rerun.
210 shouldContinueAfterFirstTrial :: Double
211 -> NominalDiffTime
212 -> NominalDiffTime
213 -> CabalResult
214 -> CabalResult
215 -> Bool
216 shouldContinueAfterFirstTrial 0 _ _ _ _ = True
217 shouldContinueAfterFirstTrial _ _ _ Timeout Timeout = False
218 shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1 r2 =
219 isSignificantResult r1 r2
220 || abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)
222 isSignificantResult :: CabalResult -> CabalResult -> Bool
223 isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
225 -- Is this result expected in a benchmark run on all of Hackage?
226 isExpectedResult :: CabalResult -> Bool
227 isExpectedResult Solution = True
228 isExpectedResult NoInstallPlan = True
229 isExpectedResult BackjumpLimit = True
230 isExpectedResult Timeout = True
231 isExpectedResult PkgNotFound = False
232 isExpectedResult Unknown = False
234 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
235 -- should be the same. If they aren't the same, we returns Unknown.
236 combineTrialResults :: [CabalResult] -> CabalResult
237 combineTrialResults rs
238 | allEqual rs = head rs
239 | allEqual [r | r <- rs, r /= Timeout] = Timeout
240 | otherwise = Unknown
241 where
242 allEqual :: Eq a => [a] -> Bool
243 allEqual xs = length (nub xs) == 1
245 timeEvent :: IO a -> IO (a, NominalDiffTime)
246 timeEvent task = do
247 start <- getCurrentTime
248 r <- task
249 end <- getCurrentTime
250 return (r, diffUTCTime end start)
252 diffTimeToDouble :: NominalDiffTime -> Double
253 diffTimeToDouble = fromRational . toRational
255 parserInfo :: ParserInfo Args
256 parserInfo = info (argParser <**> helper)
257 ( fullDesc
258 <> progDesc ("Find differences between two cabal commands when solving"
259 ++ " for all packages on Hackage.")
260 <> header "hackage-benchmark" )
262 argParser :: Parser Args
263 argParser = Args
264 <$> strOption
265 ( long "cabal1"
266 <> metavar "PATH"
267 <> help "First cabal executable")
268 <*> strOption
269 ( long "cabal2"
270 <> metavar "PATH"
271 <> help "Second cabal executable")
272 <*> option (words <$> str)
273 ( long "cabal1-flags"
274 <> value []
275 <> metavar "FLAGS"
276 <> help "Extra flags for the first cabal executable")
277 <*> option (words <$> str)
278 ( long "cabal2-flags"
279 <> value []
280 <> metavar "FLAGS"
281 <> help "Extra flags for the second cabal executable")
282 <*> option (map mkPackageName . words <$> str)
283 ( long "packages"
284 <> value []
285 <> metavar "PACKAGES"
286 <> help ("Space separated list of packages to test, or all of Hackage"
287 ++ " if unspecified"))
288 <*> option auto
289 ( long "min-run-time-percentage-difference-to-rerun"
290 <> showDefault
291 <> value 0.0
292 <> metavar "PERCENTAGE"
293 <> help ("Stop testing a package when the difference in run times in"
294 ++ " the first trial are within this percentage, in order to"
295 ++ " save time"))
296 <*> option (mkPValue <$> auto)
297 ( long "pvalue"
298 <> showDefault
299 <> value (mkPValue 0.05)
300 <> metavar "DOUBLE"
301 <> help ("p-value used to determine whether to print the results for"
302 ++ " each package"))
303 <*> option auto
304 ( long "trials"
305 <> showDefault
306 <> value 10
307 <> metavar "N"
308 <> help "Number of trials for each package")
309 <*> switch
310 ( long "print-trials"
311 <> help "Whether to include the results from individual trials in the output")
312 <*> switch
313 ( long "print-skipped-packages"
314 <> help "Whether to include skipped packages in the output")
315 <*> option auto
316 ( long "timeout"
317 <> showDefault
318 <> value 90
319 <> metavar "SECONDS"
320 <> help "Maximum time to run a cabal command, in seconds")