solver-benchmarks/HackageBenchmark.hs

   1 {-# OPTIONS_GHC -fno-warn-orphans #-}
   2 {-# LANGUAGE RecordWildCards #-}
   3 {-# LANGUAGE TupleSections #-}
   4 {-# LANGUAGE ScopedTypeVariables #-}
   5
   6 module HackageBenchmark (
   7     hackageBenchmarkMain
   8
   9 -- Exposed for testing:
  10   , CabalResult(..)
  11   , isSignificantTimeDifference
  12   , combineTrialResults
  13   , isSignificantResult
  14   , shouldContinueAfterFirstTrial
  15   ) where
  16
  17 import Control.Concurrent.Async (concurrently)
  18 import Control.Monad (forM, replicateM, unless, when)
  19 import qualified Data.ByteString as BS
  20 import Data.List (nub, unzip4)
  21 import Data.Maybe (isJust, catMaybes)
  22 import Data.Monoid ((<>))
  23 import Data.String (fromString)
  24 import Data.Function ((&))
  25 import Data.Time (NominalDiffTime, diffUTCTime, getCurrentTime)
  26 import qualified Data.Vector.Unboxed as V
  27 import Options.Applicative
  28 import Statistics.Sample (mean, stdDev, geometricMean)
  29 import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
  30                                     , mannWhitneyUCriticalValue
  31                                     , mannWhitneyUtest)
  32 import Statistics.Types (PValue, mkPValue)
  33 import System.Directory (getTemporaryDirectory, createDirectoryIfMissing)
  34 import System.Environment (getEnvironment)
  35 import System.Exit (ExitCode(..), exitWith, exitFailure)
  36 import System.FilePath ((</>))
  37 import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
  38                  , stdout)
  39 import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
  40                       , createProcess, readProcess, shell, waitForProcess, proc, readCreateProcessWithExitCode )
  41 import Text.Printf (printf)
  42
  43 import qualified Data.Map.Strict as Map
  44
  45 import Distribution.Package (PackageName, mkPackageName, unPackageName)
  46
  47 data Args = Args {
  48     argCabal1                      :: FilePath
  49   , argCabal2                      :: FilePath
  50   , argCabal1Flags                 :: [String]
  51   , argCabal2Flags                 :: [String]
  52   , argPackages                    :: [PackageName]
  53   , argMinRunTimeDifferenceToRerun :: Double
  54   , argPValue                      :: PValue Double
  55   , argTrials                      :: Int
  56   , argConcurrently                :: Bool
  57   , argPrintTrials                 :: Bool
  58   , argPrintSkippedPackages        :: Bool
  59   , argTimeoutSeconds              :: Int
  60   }
  61
  62 data CabalTrial = CabalTrial NominalDiffTime CabalResult
  63
  64 data CabalResult
  65   = Solution
  66   | NoInstallPlan
  67   | BackjumpLimit
  68   | Unbuildable
  69   | UnbuildableDep
  70   | ComponentCycle
  71   | ModReexpIssue
  72   | PkgNotFound
  73   | Timeout
  74   | Unknown
  75   deriving (Eq, Show)
  76
  77 hackageBenchmarkMain :: IO ()
  78 hackageBenchmarkMain = do
  79   hSetBuffering stdout LineBuffering
  80   args@Args {..} <- execParser parserInfo
  81   checkArgs args
  82   printConfig args
  83   pkgs <- getPackages args
  84   putStrLn ""
  85
  86   let concurrently' :: IO a -> IO b -> IO (a, b)
  87       concurrently' | argConcurrently = concurrently
  88                     | otherwise       = \ma mb -> do { a <- ma; b <- mb; return (a, b) }
  89
  90   let -- The maximum length of the heading and package names.
  91       nameColumnWidth :: Int
  92       nameColumnWidth =
  93           maximum $ map length $ "package" : map unPackageName pkgs
  94
  95   -- create cabal runners
  96   runCabal1 <- runCabal argTimeoutSeconds CabalUnderTest1 argCabal1 argCabal1Flags
  97   runCabal2 <- runCabal argTimeoutSeconds CabalUnderTest2 argCabal2 argCabal2Flags
  98
  99   -- When the output contains both trails and summaries, label each row as
 100   -- "trial" or "summary".
 101   when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
 102   putStrLn $
 103       printf "%-*s %-14s %-14s %11s %11s %11s %11s %11s"
 104              nameColumnWidth "package" "result1" "result2"
 105              "mean1" "mean2" "stddev1" "stddev2" "speedup"
 106
 107   speedups :: [Double] <- fmap catMaybes $ forM pkgs $ \pkg -> do
 108     let printTrial msgType result1 result2 time1 time2 =
 109             putStrLn $
 110             printf "%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
 111                    msgType nameColumnWidth (unPackageName pkg)
 112                    (show result1) (show result2)
 113                    (diffTimeToDouble time1) (diffTimeToDouble time2)
 114
 115     (CabalTrial t1 r1, CabalTrial t2 r2) <- runCabal1 pkg `concurrently'` runCabal2 pkg
 116
 117     if not $
 118        shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
 119     then do
 120       when argPrintSkippedPackages $
 121          if argPrintTrials
 122          then printTrial "trial (skipping)" r1 r2 t1 t2
 123          else putStrLn $ printf "%-*s (first run times were too similar)"
 124                                 nameColumnWidth (unPackageName pkg)
 125       return Nothing
 126     else do
 127       when argPrintTrials $ printTrial "trial" r1 r2 t1 t2
 128       (ts1, ts2, rs1, rs2) <- (unzip4 . ((t1, t2, r1, r2) :) <$>)
 129                             . replicateM (argTrials - 1) $ do
 130
 131         (CabalTrial t1' r1', CabalTrial t2' r2') <- runCabal1 pkg `concurrently'` runCabal2 pkg
 132         when argPrintTrials $ printTrial "trial" r1' r2' t1' t2'
 133         return (t1', t2', r1', r2')
 134
 135       let result1 = combineTrialResults rs1
 136           result2 = combineTrialResults rs2
 137           times1 = V.fromList (map diffTimeToDouble ts1)
 138           times2 = V.fromList (map diffTimeToDouble ts2)
 139           mean1 = mean times1
 140           mean2 = mean times2
 141           stddev1 = stdDev times1
 142           stddev2 = stdDev times2
 143           speedup = mean1 / mean2
 144
 145       when argPrintTrials $ putStr $ printf "%-16s " "summary"
 146       if isSignificantResult result1 result2
 147           || isSignificantTimeDifference argPValue ts1 ts2
 148       then putStrLn $
 149            printf "%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
 150                   nameColumnWidth (unPackageName pkg)
 151                   (show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
 152       else when (argPrintTrials || argPrintSkippedPackages) $
 153            putStrLn $
 154            printf "%-*s (not significant, speedup = %10.3f)" nameColumnWidth (unPackageName pkg) speedup
 155
 156       -- return speedup value
 157       return (Just speedup)
 158
 159   -- finally, calculate the geometric mean of speedups
 160   printf "Geometric mean of %d packages' speedups is %10.3f\n" (length speedups) (geometricMean (V.fromList speedups))
 161
 162   where
 163     checkArgs :: Args -> IO ()
 164     checkArgs Args {..} = do
 165       let die msg = hPutStrLn stderr msg >> exitFailure
 166       unless (argTrials > 0) $ die "--trials must be greater than 0."
 167       unless (argMinRunTimeDifferenceToRerun >= 0) $
 168           die "--min-run-time-percentage-difference-to-rerun must be non-negative."
 169       unless (isSampleLargeEnough argPValue argTrials) $
 170           die "p-value is too small for the number of trials."
 171
 172     printConfig :: Args -> IO ()
 173     printConfig Args {..} = do
 174       putStrLn "Comparing:"
 175       putStrLn $ "1: " ++ argCabal1 ++ " " ++ unwords argCabal1Flags
 176       callProcess argCabal1 ["--version"]
 177       putStrLn $ "2: " ++ argCabal2 ++ " " ++ unwords argCabal2Flags
 178       callProcess argCabal2 ["--version"]
 179       -- TODO: Print index state.
 180       putStrLn "Base package database:"
 181       callProcess "ghc-pkg" ["list"]
 182
 183     getPackages :: Args -> IO [PackageName]
 184     getPackages Args {..} = do
 185       pkgs <-
 186           if null argPackages
 187           then do
 188             putStrLn $ "Obtaining the package list (using " ++ argCabal1 ++ ") ..."
 189             list <- readProcess argCabal1 ["list", "--simple-output"] ""
 190             return $ nub [mkPackageName $ head (words line) | line <- lines list]
 191           else do
 192             putStrLn "Using given package list ..."
 193             return argPackages
 194       putStrLn $ "Done, got " ++ show (length pkgs) ++ " packages."
 195       return pkgs
 196
 197 data CabalUnderTest = CabalUnderTest1 | CabalUnderTest2
 198
 199 runCabal
 200     :: Int             -- ^ timeout in seconds
 201     -> CabalUnderTest  -- ^ cabal under test
 202     -> FilePath        -- ^ cabal
 203     -> [String]        -- ^ flags
 204     -> IO (PackageName  -> IO CabalTrial)  -- ^ testing function.
 205 runCabal timeoutSeconds cabalUnderTest cabal flags = do
 206   tmpDir <- getTemporaryDirectory
 207
 208   -- cabal directory for this cabal under test
 209   let cabalDir = tmpDir </> "solver-benchmarks-workdir" </> case cabalUnderTest of
 210           CabalUnderTest1 -> "cabal1"
 211           CabalUnderTest2 -> "cabal2"
 212
 213   putStrLn $ "Cabal directory (for " ++ cabal ++ ") " ++ cabalDir
 214   createDirectoryIfMissing True cabalDir
 215
 216   -- shell environment
 217   currEnv <- Map.fromList <$>  getEnvironment
 218   let thisEnv :: [(String, String)]
 219       thisEnv = Map.toList $ currEnv
 220           & Map.insert "CABAL_CONFIG" (cabalDir </> "config")
 221           & Map.insert "CABAL_DIR"     cabalDir
 222
 223   -- Run cabal update,
 224   putStrLn $ "Running cabal update (using " ++ cabal ++ ") ..."
 225   (ec, uout, uerr) <- readCreateProcessWithExitCode (proc cabal ["update"])
 226       { cwd = Just cabalDir
 227       , env = Just thisEnv
 228       }
 229       ""
 230   unless (ec == ExitSuccess) $ do
 231       putStrLn uout
 232       putStrLn uerr
 233       exitWith ec
 234
 235   -- return an actual runner
 236   return $ \pkg -> do
 237     ((exitCode, err), time) <- timeEvent $ do
 238
 239       let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
 240           cabalCmd = unwords $
 241               [ cabal
 242
 243               , "v2-install"
 244
 245                 -- These flags prevent a Cabal project or package environment from
 246                 -- affecting the install plan.
 247                 --
 248                 -- Note: we are somewhere in /tmp, hopefully there is no cabal.project on upper level
 249               , "--package-env=non-existent-package-env"
 250
 251                 -- --lib allows solving for packages with libraries or
 252                 -- executables.
 253               , "--lib"
 254
 255               , unPackageName pkg
 256
 257               , "--dry-run"
 258
 259                 -- The test doesn't currently handle stdout, so we suppress it
 260                 -- with silent. nowrap simplifies parsing the errors messages.
 261               , "-vsilent+nowrap"
 262
 263               ]
 264
 265                ++ flags
 266
 267           cmd = (shell (timeout ++ " " ++ cabalCmd))
 268               { std_err = CreatePipe
 269               , env = Just thisEnv
 270               , cwd = Just cabalDir
 271               }
 272
 273       -- TODO: Read stdout and compare the install plans.
 274       (_, _, Just errh, ph) <- createProcess cmd
 275       err <- BS.hGetContents errh
 276       (, err) <$> waitForProcess ph
 277
 278     let exhaustiveMsg =
 279             "After searching the rest of the dependency tree exhaustively"
 280         result
 281           | exitCode == ExitSuccess                                                          = Solution
 282           | exitCode == ExitFailure 124                                                      = Timeout
 283           | fromString exhaustiveMsg `BS.isInfixOf` err                                       = NoInstallPlan
 284           | fromString "Backjump limit reached" `BS.isInfixOf` err                            = BackjumpLimit
 285           | fromString "none of the components are available to build" `BS.isInfixOf` err     = Unbuildable
 286           | fromString "Dependency on unbuildable" `BS.isInfixOf` err                         = UnbuildableDep
 287           | fromString "Dependency cycle between the following components" `BS.isInfixOf` err = ComponentCycle
 288           | fromString "Problem with module re-exports" `BS.isInfixOf` err                    = ModReexpIssue
 289           | fromString "There is no package named" `BS.isInfixOf` err                         = PkgNotFound
 290           | otherwise                                                                        = Unknown
 291     return (CabalTrial time result)
 292
 293 isSampleLargeEnough :: PValue Double -> Int -> Bool
 294 isSampleLargeEnough pvalue trials =
 295     -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
 296     -- used when both sample sizes are less than or equal to 20.
 297     trials > 20 || isJust (mannWhitneyUCriticalValue (trials, trials) pvalue)
 298
 299 isSignificantTimeDifference :: PValue Double -> [NominalDiffTime] -> [NominalDiffTime] -> Bool
 300 isSignificantTimeDifference pvalue xs ys =
 301   let toVector = V.fromList . map diffTimeToDouble
 302   in case mannWhitneyUtest SamplesDiffer pvalue (toVector xs) (toVector ys) of
 303        Nothing             -> error "not enough data for mannWhitneyUtest"
 304        Just Significant    -> True
 305        Just NotSignificant -> False
 306
 307 -- Should we stop after the first trial of this package to save time? This
 308 -- function skips the package if the results are uninteresting and the times are
 309 -- within --min-run-time-percentage-difference-to-rerun.
 310 shouldContinueAfterFirstTrial :: Double
 311                               -> NominalDiffTime
 312                               -> NominalDiffTime
 313                               -> CabalResult
 314                               -> CabalResult
 315                               -> Bool
 316 shouldContinueAfterFirstTrial 0                            _  _  _       _       = True
 317 shouldContinueAfterFirstTrial _                            _  _  Timeout Timeout = False
 318 shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1      r2      =
 319     isSignificantResult r1 r2
 320  || abs (t1 - t2) / min t1 t2 >= realToFrac (maxRunTimeDifferenceToIgnore / 100)
 321
 322 isSignificantResult :: CabalResult -> CabalResult -> Bool
 323 isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
 324
 325 -- Is this result expected in a benchmark run on all of Hackage?
 326 isExpectedResult :: CabalResult -> Bool
 327 isExpectedResult Solution       = True
 328 isExpectedResult NoInstallPlan  = True
 329 isExpectedResult BackjumpLimit  = True
 330 isExpectedResult Timeout        = True
 331 isExpectedResult Unbuildable    = True
 332 isExpectedResult UnbuildableDep = True
 333 isExpectedResult ComponentCycle = True
 334 isExpectedResult ModReexpIssue  = True
 335 isExpectedResult PkgNotFound    = False
 336 isExpectedResult Unknown        = False
 337
 338 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
 339 -- should be the same. If they aren't the same, we returns Unknown.
 340 combineTrialResults :: [CabalResult] -> CabalResult
 341 combineTrialResults rs
 342   | allEqual rs                          = head rs
 343   | allEqual [r | r <- rs, r /= Timeout] = Timeout
 344   | otherwise                            = Unknown
 345   where
 346     allEqual :: Eq a => [a] -> Bool
 347     allEqual xs = length (nub xs) == 1
 348
 349 timeEvent :: IO a -> IO (a, NominalDiffTime)
 350 timeEvent task = do
 351   start <- getCurrentTime
 352   r <- task
 353   end <- getCurrentTime
 354   return (r, diffUTCTime end start)
 355
 356 diffTimeToDouble :: NominalDiffTime -> Double
 357 diffTimeToDouble = fromRational . toRational
 358
 359 parserInfo :: ParserInfo Args
 360 parserInfo = info (argParser <**> helper)
 361      ( fullDesc
 362     <> progDesc ("Find differences between two cabal commands when solving"
 363                    ++ " for all packages on Hackage.")
 364     <> header "hackage-benchmark" )
 365
 366 argParser :: Parser Args
 367 argParser = Args
 368     <$> strOption
 369          ( long "cabal1"
 370         <> metavar "PATH"
 371         <> help "First cabal executable")
 372     <*> strOption
 373          ( long "cabal2"
 374         <> metavar "PATH"
 375         <> help "Second cabal executable")
 376     <*> option (words <$> str)
 377          ( long "cabal1-flags"
 378         <> value []
 379         <> metavar "FLAGS"
 380         <> help "Extra flags for the first cabal executable")
 381     <*> option (words <$> str)
 382          ( long "cabal2-flags"
 383         <> value []
 384         <> metavar "FLAGS"
 385         <> help "Extra flags for the second cabal executable")
 386     <*> option (map mkPackageName . words <$> str)
 387          ( long "packages"
 388         <> value []
 389         <> metavar "PACKAGES"
 390         <> help ("Space separated list of packages to test, or all of Hackage"
 391                    ++ " if unspecified"))
 392     <*> option auto
 393          ( long "min-run-time-percentage-difference-to-rerun"
 394         <> showDefault
 395         <> value 0.0
 396         <> metavar "PERCENTAGE"
 397         <> help ("Stop testing a package when the difference in run times in"
 398                    ++ " the first trial are within this percentage, in order to"
 399                    ++ " save time"))
 400     <*> option (mkPValue <$> auto)
 401          ( long "pvalue"
 402         <> showDefault
 403         <> value (mkPValue 0.05)
 404         <> metavar "DOUBLE"
 405         <> help ("p-value used to determine whether to print the results for"
 406                    ++ " each package"))
 407     <*> option auto
 408          ( long "trials"
 409         <> showDefault
 410         <> value 10
 411         <> metavar "N"
 412         <> help "Number of trials for each package")
 413     <*> switch
 414          ( long "concurrently"
 415         <> help "Run cabals concurrently")
 416     <*> switch
 417          ( long "print-trials"
 418         <> help "Whether to include the results from individual trials in the output")
 419     <*> switch
 420          ( long "print-skipped-packages"
 421         <> help "Whether to include skipped packages in the output")
 422     <*> option auto
 423          ( long "timeout"
 424         <> showDefault
 425         <> value 90
 426         <> metavar "SECONDS"
 427         <> help "Maximum time to run a cabal command, in seconds")