1 {-# OPTIONS_GHC -fno-warn-orphans #-}
2 {-# LANGUAGE RecordWildCards #-}
3 {-# LANGUAGE TupleSections #-}
5 module HackageBenchmark
(
8 -- Exposed for testing:
10 , isSignificantTimeDifference
13 , shouldContinueAfterFirstTrial
16 import Control
.Monad
(forM_
, replicateM
, unless, when)
17 import qualified Data
.ByteString
as B
18 import Data
.List
(nub, unzip4)
19 import Data
.Maybe (isJust)
20 import Data
.Monoid
((<>))
21 import Data
.String (fromString
)
22 import Data
.Time
(NominalDiffTime
, diffUTCTime
, getCurrentTime
)
23 import qualified Data
.Vector
.Unboxed
as V
24 import Options
.Applicative
25 import Statistics
.Sample
(mean
, stdDev
)
26 import Statistics
.Test
.MannWhitneyU
( PositionTest
(..), TestResult
(..)
27 , mannWhitneyUCriticalValue
29 import Statistics
.Types
(PValue
, mkPValue
)
30 import System
.Exit
(ExitCode(..), exitFailure)
31 import System
.IO ( BufferMode(LineBuffering
), hPutStrLn, hSetBuffering, stderr
33 import System
.Process
( StdStream
(CreatePipe
), CreateProcess
(..), callProcess
34 , createProcess
, readProcess
, shell
, waitForProcess
)
35 import Text
.Printf
(printf
)
37 import Distribution
.Package
(PackageName
, mkPackageName
, unPackageName
)
41 , argCabal2
:: FilePath
42 , argCabal1Flags
:: [String]
43 , argCabal2Flags
:: [String]
44 , argPackages
:: [PackageName
]
45 , argMinRunTimeDifferenceToRerun
:: Double
46 , argPValue
:: PValue
Double
48 , argPrintTrials
:: Bool
49 , argPrintSkippedPackages
:: Bool
50 , argTimeoutSeconds
:: Int
53 data CabalTrial
= CabalTrial NominalDiffTime CabalResult
64 hackageBenchmarkMain
:: IO ()
65 hackageBenchmarkMain
= do
66 hSetBuffering stdout LineBuffering
67 args
@Args
{..} <- execParser parserInfo
70 pkgs
<- getPackages args
73 let -- The maximum length of the heading and package names.
74 nameColumnWidth
:: Int
76 maximum $ map length $ "package" : map unPackageName pkgs
77 runCabal1
= runCabal argTimeoutSeconds argCabal1 argCabal1Flags
78 runCabal2
= runCabal argTimeoutSeconds argCabal2 argCabal2Flags
80 -- When the output contains both trails and summaries, label each row as
81 -- "trial" or "summary".
82 when argPrintTrials
$ putStr $ printf
"%-16s " "trial/summary"
84 printf
"%-*s %-13s %-13s %11s %11s %11s %11s %11s"
85 nameColumnWidth
"package" "result1" "result2"
86 "mean1" "mean2" "stddev1" "stddev2" "speedup"
88 forM_ pkgs
$ \pkg
-> do
89 let printTrial msgType result1 result2 time1 time2
=
91 printf
"%-16s %-*s %-13s %-13s %10.3fs %10.3fs"
92 msgType nameColumnWidth
(unPackageName pkg
)
93 (show result1
) (show result2
)
94 (diffTimeToDouble time1
) (diffTimeToDouble time2
)
96 CabalTrial t1 r1
<- runCabal1 pkg
97 CabalTrial t2 r2
<- runCabal2 pkg
99 shouldContinueAfterFirstTrial argMinRunTimeDifferenceToRerun t1 t2 r1 r2
100 then when argPrintSkippedPackages
$
102 then printTrial
"trial (skipping)" r1 r2 t1 t2
103 else putStrLn $ printf
"%-*s (first run times were too similar)"
104 nameColumnWidth
(unPackageName pkg
)
106 when argPrintTrials
$ printTrial
"trial" r1 r2 t1 t2
107 (ts1
, ts2
, rs1
, rs2
) <- (unzip4 . ((t1
, t2
, r1
, r2
) :) <$>)
108 . replicateM
(argTrials
- 1) $ do
109 CabalTrial t1
' r1
' <- runCabal1 pkg
110 CabalTrial t2
' r2
' <- runCabal2 pkg
111 when argPrintTrials
$ printTrial
"trial" r1
' r2
' t1
' t2
'
112 return (t1
', t2
', r1
', r2
')
114 let result1
= combineTrialResults rs1
115 result2
= combineTrialResults rs2
116 times1
= V
.fromList
(map diffTimeToDouble ts1
)
117 times2
= V
.fromList
(map diffTimeToDouble ts2
)
120 stddev1
= stdDev times1
121 stddev2
= stdDev times2
122 speedup
= mean1
/ mean2
124 when argPrintTrials
$ putStr $ printf
"%-16s " "summary"
125 if isSignificantResult result1 result2
126 || isSignificantTimeDifference argPValue ts1 ts2
128 printf
"%-*s %-13s %-13s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
129 nameColumnWidth
(unPackageName pkg
)
130 (show result1
) (show result2
) mean1 mean2 stddev1 stddev2 speedup
131 else when (argPrintTrials || argPrintSkippedPackages
) $
133 printf
"%-*s (not significant)" nameColumnWidth
(unPackageName pkg
)
135 checkArgs
:: Args
-> IO ()
136 checkArgs Args
{..} = do
137 let die msg
= hPutStrLn stderr msg
>> exitFailure
138 unless (argTrials
> 0) $ die
"--trials must be greater than 0."
139 unless (argMinRunTimeDifferenceToRerun
>= 0) $
140 die
"--min-run-time-percentage-difference-to-rerun must be non-negative."
141 unless (isSampleLargeEnough argPValue argTrials
) $
142 die
"p-value is too small for the number of trials."
144 printConfig
:: Args
-> IO ()
145 printConfig Args
{..} = do
146 putStrLn "Comparing:"
147 putStrLn $ "1: " ++ argCabal1
++ " " ++ unwords argCabal1Flags
148 callProcess argCabal1
["--version"]
149 putStrLn $ "2: " ++ argCabal2
++ " " ++ unwords argCabal2Flags
150 callProcess argCabal2
["--version"]
151 -- TODO: Print index state.
152 putStrLn "Base package database:"
153 callProcess
"ghc-pkg" ["list"]
155 getPackages
:: Args
-> IO [PackageName
]
156 getPackages Args
{..} = do
160 putStrLn $ "Obtaining the package list (using " ++ argCabal1
++ ") ..."
161 list <- readProcess argCabal1
["list", "--simple-output"] ""
162 return $ nub [mkPackageName
$ head (words line
) | line
<- lines list]
164 putStrLn "Using given package list ..."
166 putStrLn $ "Done, got " ++ show (length pkgs
) ++ " packages."
169 runCabal
:: Int -> FilePath -> [String] -> PackageName
-> IO CabalTrial
170 runCabal timeoutSeconds cabal flags pkg
= do
171 ((exitCode
, err
), time
) <- timeEvent
$ do
172 let timeout
= "timeout --foreground -sINT " ++ show timeoutSeconds
175 [cabal
, "install", unPackageName pkg
, "--dry-run", "-v0"] ++ flags
176 cmd
= (shell
(timeout
++ " " ++ cabalCmd
)) { std_err
= CreatePipe
}
178 -- TODO: Read stdout and compare the install plans.
179 (_
, _
, Just errh
, ph
) <- createProcess cmd
180 err
<- B
.hGetContents errh
181 (, err
) <$> waitForProcess ph
183 "After searching the rest of the dependency tree exhaustively"
185 | exitCode
== ExitSuccess
= Solution
186 | exitCode
== ExitFailure
124 = Timeout
187 | fromString exhaustiveMsg `B
.isInfixOf` err
= NoInstallPlan
188 | fromString
"Backjump limit reached" `B
.isInfixOf` err
= BackjumpLimit
189 | fromString
"There is no package named" `B
.isInfixOf` err
= PkgNotFound
190 |
otherwise = Unknown
191 return (CabalTrial time result
)
193 isSampleLargeEnough
:: PValue
Double -> Int -> Bool
194 isSampleLargeEnough pvalue trials
=
195 -- mannWhitneyUCriticalValue, which can fail with too few samples, is only
196 -- used when both sample sizes are less than or equal to 20.
197 trials
> 20 ||
isJust (mannWhitneyUCriticalValue
(trials
, trials
) pvalue
)
199 isSignificantTimeDifference
:: PValue
Double -> [NominalDiffTime
] -> [NominalDiffTime
] -> Bool
200 isSignificantTimeDifference pvalue xs ys
=
201 let toVector
= V
.fromList
. map diffTimeToDouble
202 in case mannWhitneyUtest SamplesDiffer pvalue
(toVector xs
) (toVector ys
) of
203 Nothing
-> error "not enough data for mannWhitneyUtest"
204 Just Significant
-> True
205 Just NotSignificant
-> False
207 -- Should we stop after the first trial of this package to save time? This
208 -- function skips the package if the results are uninteresting and the times are
209 -- within --min-run-time-percentage-difference-to-rerun.
210 shouldContinueAfterFirstTrial
:: Double
216 shouldContinueAfterFirstTrial
0 _ _ _ _
= True
217 shouldContinueAfterFirstTrial _ _ _ Timeout Timeout
= False
218 shouldContinueAfterFirstTrial maxRunTimeDifferenceToIgnore t1 t2 r1 r2
=
219 isSignificantResult r1 r2
220 ||
abs (t1
- t2
) / min t1 t2
>= realToFrac (maxRunTimeDifferenceToIgnore
/ 100)
222 isSignificantResult
:: CabalResult
-> CabalResult
-> Bool
223 isSignificantResult r1 r2
= r1
/= r2 ||
not (isExpectedResult r1
)
225 -- Is this result expected in a benchmark run on all of Hackage?
226 isExpectedResult
:: CabalResult
-> Bool
227 isExpectedResult Solution
= True
228 isExpectedResult NoInstallPlan
= True
229 isExpectedResult BackjumpLimit
= True
230 isExpectedResult Timeout
= True
231 isExpectedResult PkgNotFound
= False
232 isExpectedResult Unknown
= False
234 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
235 -- should be the same. If they aren't the same, we returns Unknown.
236 combineTrialResults
:: [CabalResult
] -> CabalResult
237 combineTrialResults rs
238 | allEqual rs
= head rs
239 | allEqual
[r | r
<- rs
, r
/= Timeout
] = Timeout
240 |
otherwise = Unknown
242 allEqual
:: Eq a
=> [a
] -> Bool
243 allEqual xs
= length (nub xs
) == 1
245 timeEvent
:: IO a
-> IO (a
, NominalDiffTime
)
247 start
<- getCurrentTime
249 end
<- getCurrentTime
250 return (r
, diffUTCTime end start
)
252 diffTimeToDouble
:: NominalDiffTime
-> Double
253 diffTimeToDouble
= fromRational . toRational
255 parserInfo
:: ParserInfo Args
256 parserInfo
= info
(argParser
<**> helper
)
258 <> progDesc
("Find differences between two cabal commands when solving"
259 ++ " for all packages on Hackage.")
260 <> header
"hackage-benchmark" )
262 argParser
:: Parser Args
267 <> help
"First cabal executable")
271 <> help
"Second cabal executable")
272 <*> option
(words <$> str
)
273 ( long
"cabal1-flags"
276 <> help
"Extra flags for the first cabal executable")
277 <*> option
(words <$> str
)
278 ( long
"cabal2-flags"
281 <> help
"Extra flags for the second cabal executable")
282 <*> option
(map mkPackageName
. words <$> str
)
285 <> metavar
"PACKAGES"
286 <> help
("Space separated list of packages to test, or all of Hackage"
287 ++ " if unspecified"))
289 ( long
"min-run-time-percentage-difference-to-rerun"
292 <> metavar
"PERCENTAGE"
293 <> help
("Stop testing a package when the difference in run times in"
294 ++ " the first trial are within this percentage, in order to"
296 <*> option
(mkPValue
<$> auto
)
299 <> value (mkPValue
0.05)
301 <> help
("p-value used to determine whether to print the results for"
308 <> help
"Number of trials for each package")
310 ( long
"print-trials"
311 <> help
"Whether to include the results from individual trials in the output")
313 ( long
"print-skipped-packages"
314 <> help
"Whether to include skipped packages in the output")
320 <> help
"Maximum time to run a cabal command, in seconds")