solver-benchmarks/HackageBenchmark.hs

   1 {-# OPTIONS_GHC -fno-warn-orphans #-}
   2 {-# LANGUAGE RecordWildCards #-}
   3 {-# LANGUAGE TupleSections #-}
   4
   5 module HackageBenchmark (
   6     hackageBenchmarkMain
   7
   8 -- Exposed for testing:
   9   , CabalResult(..)
  10   , isSignificantTimeDifference
  11   , combineTrialResults
  12   , isSignificantResult
  13   , shouldContinueAfterFirstTrial
  14   ) where
  15
  16 import Control.Monad (forM_, replicateM, unless, when)
  17 import qualified Data.ByteString as B
  18 import Data.List (nub, unzip4)
  19 import Data.Maybe (isJust)
  20 import Data.Monoid ((<>))
  21 import Data.String (fromString)
  22 import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
  23 import qualified Data.Vector.Unboxed as V
  24 import Options.Applicative
  25 import Statistics.Sample (mean, stdDev)
  26 import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
  27                                     , mannWhitneyUCriticalValue
  28                                     , mannWhitneyUtest)
  29 import Statistics.Types (PValue, mkPValue)
  30 import System.Exit (ExitCode(..), exitFailure)
  31 import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
  32                  , stdout)
  33 import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
  34                       , createProcess, readProcess, shell, waitForProcess )
  35 import Text.Printf (printf)
  36
  37 import Distribution.Package (PackageName, mkPackageName, unPackageName)
  38
  39 data Args = Args {
  40     argCabal1                      :: FilePath
  41   , argCabal2                      :: FilePath
  42   , argCabal1Flags                 :: [String]
  43   , argCabal2Flags                 :: [String]
  44   , argPackages                    :: [PackageName]
  45   , argMinRunTimeDifferenceToRerun :: Double
  46   , argPValue                      :: PValue Double
  47   , argTrials                      :: Int
  48   , argPrintTrials                 :: Bool
  49   , argPrintSkippedPackages        :: Bool
  50   , argTimeoutSeconds              :: Int
  51   }
  52
  53 data CabalTrial = CabalTrial NominalDiffTime CabalResult
  54
  55 data CabalResult
  56   = Solution
  57   | NoInstallPlan
  58   | BackjumpLimit
  59   | PkgNotFound
  60   | Timeout
  61   | Unknown
  62   deriving (Eq, Show)
  63
  64 hackageBenchmarkMain :: IO ()
  65 hackageBenchmarkMain = do
  66   hSetBuffering stdout LineBuffering
  67   args@Args {..} <- execParser parserInfo
  68   checkArgs args
  69   printConfig args
  70   pkgs <- getPackages args
  71   putStrLn ""
  72
  73   let -- The maximum length of the heading and package names.
  74       nameColumnWidth :: Int
  75       nameColumnWidth =
  76           maximum $ map length $ "package" : map unPackageName pkgs
  77       runCabal1 = runCabal argTimeoutSeconds argCabal1 argCabal1Flags
  78       runCabal2 = runCabal argTimeoutSeconds argCabal2 argCabal2Flags
  79
  80   -- When the output contains both trails and summaries, label each row as
  81   -- "trial" or "summary".
  82   when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
  83   putStrLn $
  84       printf "%-*s %-13s %-13s %11s %11s %11s %11s %11s"
  85              nameColumnWidth "package" "result1" "result2"
  86              "mean1" "mean2" "stddev1" "stddev2" "speedup"
  87
  88   forM_ pkgs $ \pkg -> do
  89     let printTrial msgType result1 result2 time1 time2 =
  90             putStrLn $
  91             printf "%-16s %-*s %-13s %-13s %10.3fs %10.3fs"
  92                    msgType nameColumnWidth (unPackageName pkg)
  93                    (show result1) (show result2)
  94                    (diffTimeToDouble time1) (diffTimeToDouble time2)
  95
  96     CabalTrial t1 r1 <- runCabal1 pkg
  97     CabalTrial t2 r2 <- runCabal2 pkg
  98     if not $
  99        shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
 100     then when argPrintSkippedPackages $
 101          if argPrintTrials
 102          then printTrial "trial (skipping)" r1 r2 t1 t2
 103          else putStrLn $ printf "%-*s (first run times were too similar)"
 104                                 nameColumnWidth (unPackageName pkg)
 105     else do
 106       when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
 107       (ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
 108                             . replicateM (argTrials - 1) $ do
 109         CabalTrial t1' r1' <- runCabal1 pkg
 110         CabalTrial t2' r2' <- runCabal2 pkg
 111         when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
 112         return (t1', t2', r1', r2')
 113
 114       let result1 = combineTrialResults rs1
 115           result2 = combineTrialResults rs2
 116           times1 = V.fromList (map diffTimeToDouble ts1)
 117           times2 = V.fromList (map diffTimeToDouble ts2)
 118           mean1 = mean times1
 119           mean2 = mean times2
 120           stddev1 = stdDev times1
 121           stddev2 = stdDev times2
 122           speedup = mean1 / mean2
 123
 124       when argPrintTrials $ putStr $ printf "%-16s " "summary"
 125       if isSignificantResult result1 result2
 126           || isSignificantTimeDifference argPValue ts1 ts2
 127       then putStrLn $
 128            printf "%-*s %-13s %-13s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
 129                   nameColumnWidth (unPackageName pkg)
 130                   (show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
 131       else when (argPrintTrials || argPrintSkippedPackages) $
 132            putStrLn $
 133            printf "%-*s (not significant)" nameColumnWidth (unPackageName pkg)
 134   where
 135     checkArgs :: Args -> IO ()
 136     checkArgs Args {..} = do
 137       let die msg = hPutStrLn stderr msg >> exitFailure
 138       unless (argTrials > 0) $ die "--trials must be greater than 0."
 139       unless (argMinRunTimeDifferenceToRerun >= 0) $
 140           die "--min-run-time-percentage-difference-to-rerun must be non-negative."
 141       unless (isSampleLargeEnough argPValue argTrials) $
 142           die "p-value is too small for the number of trials."
 143
 144     printConfig :: Args -> IO ()
 145     printConfig Args {..} = do
 146       putStrLn "Comparing:"
 147       putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
 148       callProcess argCabal1 ["--version"]
 149       putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
 150       callProcess argCabal2 ["--version"]
 151       -- TODO: Print index state.
 152       putStrLn "Base package database:"
 153       callProcess "ghc-pkg" ["list"]
 154
 155     getPackages :: Args -> IO [PackageName]
 156     getPackages Args {..} = do
 157       pkgs <-
 158           if null argPackages
 159           then do
 160             putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
 161             list <- readProcess argCabal1 ["list", "--simple-output"] ""
 162             return $ nub [mkPackageName $ head (words line) | line <- lines list]
 163           else do
 164             putStrLn "Using given package list ..."
 165             return argPackages
 166       putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
 167       return pkgs
 168
 169 runCabal :: Int -> FilePath -> [String] -> PackageName -> IO CabalTrial
 170 runCabal timeoutSeconds cabal flags pkg = do
 171   ((exitCode, err), time) <- timeEvent $ do
 172     let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
 173         cabalCmd =
 174             unwords $
 175             [cabal, "install", unPackageName pkg, "--dry-run", "-v0"] ++ flags
 176         cmd = (shell (timeout ++ " " ++ cabalCmd)) { std_err = CreatePipe }
 177
 178     -- TODO: Read stdout and compare the install plans.
 179     (_, _, Just errh, ph) <- createProcess cmd
 180     err <- B.hGetContents errh
 181     (, err) <$> waitForProcess ph
 182   let exhaustiveMsg =
 183           "After searching the rest of the dependency tree exhaustively"
 184       result
 185         | exitCode == ExitSuccess                                  = Solution
 186         | exitCode == ExitFailure 124                              = Timeout
 187         | fromString exhaustiveMsg `B.isInfixOf` err               = NoInstallPlan
 188         | fromString "Backjump limit reached" `B.isInfixOf` err    = BackjumpLimit
 189         | fromString "There is no package named" `B.isInfixOf` err = PkgNotFound
 190         | otherwise                                                = Unknown
 191   return (CabalTrial time result)
 192
 193 isSampleLargeEnough :: PValue Double -> Int -> Bool
 194 isSampleLargeEnough pvalue trials =
 195     -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
 196     -- used when both sample sizes are less than or equal to 20.
 197     trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)
 198
 199 isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
 200 isSignificantTimeDifference pvalue xs ys =
 201   let toVector = V.fromList . map diffTimeToDouble
 202   in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
 203        Nothing             -> error "not enough data for mannWhitneyUtest"
 204        Just Significant    -> True
 205        Just NotSignificant -> False
 206
 207 -- Should we stop after the first trial of this package to save time? This
 208 -- function skips the package if the results are uninteresting and the times are
 209 -- within --min-run-time-percentage-difference-to-rerun.
 210 shouldContinueAfterFirstTrial :: Double
 211                               -> NominalDiffTime
 212                               -> NominalDiffTime
 213                               -> CabalResult
 214                               -> CabalResult
 215                               -> Bool
 216 shouldContinueAfterFirstTrial 0                            _  _  _       _       = True
 217 shouldContinueAfterFirstTrial _                            _  _  Timeout Timeout = False
 218 shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1      r2      =
 219     isSignificantResult r1 r2
 220  || abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)
 221
 222 isSignificantResult :: CabalResult -> CabalResult -> Bool
 223 isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
 224
 225 -- Is this result expected in a benchmark run on all of Hackage?
 226 isExpectedResult :: CabalResult -> Bool
 227 isExpectedResult Solution      = True
 228 isExpectedResult NoInstallPlan = True
 229 isExpectedResult BackjumpLimit = True
 230 isExpectedResult Timeout       = True
 231 isExpectedResult PkgNotFound   = False
 232 isExpectedResult Unknown       = False
 233
 234 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
 235 -- should be the same. If they aren't the same, we returns Unknown.
 236 combineTrialResults :: [CabalResult] -> CabalResult
 237 combineTrialResults rs
 238   | allEqual rs                          = head rs
 239   | allEqual [r | r <- rs, r /= Timeout] = Timeout
 240   | otherwise                            = Unknown
 241   where
 242     allEqual :: Eq a => [a] -> Bool
 243     allEqual xs = length (nub xs) == 1
 244
 245 timeEvent :: IO a -> IO (a, NominalDiffTime)
 246 timeEvent task = do
 247   start <- getCurrentTime
 248   r <- task
 249   end <- getCurrentTime
 250   return (r, diffUTCTime end start)
 251
 252 diffTimeToDouble :: NominalDiffTime -> Double
 253 diffTimeToDouble = fromRational . toRational
 254
 255 parserInfo :: ParserInfo Args
 256 parserInfo = info (argParser <**> helper)
 257      ( fullDesc
 258     <> progDesc ("Find differences between two cabal commands when solving"
 259                    ++ " for all packages on Hackage.")
 260     <> header "hackage-benchmark" )
 261
 262 argParser :: Parser Args
 263 argParser = Args
 264     <$> strOption
 265          ( long "cabal1"
 266         <> metavar "PATH"
 267         <> help "First cabal executable")
 268     <*> strOption
 269          ( long "cabal2"
 270         <> metavar "PATH"
 271         <> help "Second cabal executable")
 272     <*> option (words <$> str)
 273          ( long "cabal1-flags"
 274         <> value []
 275         <> metavar "FLAGS"
 276         <> help "Extra flags for the first cabal executable")
 277     <*> option (words <$> str)
 278          ( long "cabal2-flags"
 279         <> value []
 280         <> metavar "FLAGS"
 281         <> help "Extra flags for the second cabal executable")
 282     <*> option (map mkPackageName . words <$> str)
 283          ( long "packages"
 284         <> value []
 285         <> metavar "PACKAGES"
 286         <> help ("Space separated list of packages to test, or all of Hackage"
 287                    ++ " if unspecified"))
 288     <*> option auto
 289          ( long "min-run-time-percentage-difference-to-rerun"
 290         <> showDefault
 291         <> value 0.0
 292         <> metavar "PERCENTAGE"
 293         <> help ("Stop testing a package when the difference in run times in"
 294                    ++ " the first trial are within this percentage, in order to"
 295                    ++ " save time"))
 296     <*> option (mkPValue <$> auto)
 297          ( long "pvalue"
 298         <> showDefault
 299         <> value (mkPValue 0.05)
 300         <> metavar "DOUBLE"
 301         <> help ("p-value used to determine whether to print the results for"
 302                    ++ " each package"))
 303     <*> option auto
 304          ( long "trials"
 305         <> showDefault
 306         <> value 10
 307         <> metavar "N"
 308         <> help "Number of trials for each package")
 309     <*> switch
 310          ( long "print-trials"
 311         <> help "Whether to include the results from individual trials in the output")
 312     <*> switch
 313          ( long "print-skipped-packages"
 314         <> help "Whether to include skipped packages in the output")
 315     <*> option auto
 316          ( long "timeout"
 317         <> showDefault
 318         <> value 90
 319         <> metavar "SECONDS"
 320         <> help "Maximum time to run a cabal command, in seconds")