benchtests/Makefile

   1 # Copyright (C) 2013-2023 Free Software Foundation, Inc.
   2 # Copyright The GNU Toolchain Authors.
   3 # This file is part of the GNU C Library.
   4
   5 # The GNU C Library is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU Lesser General Public
   7 # License as published by the Free Software Foundation; either
   8 # version 2.1 of the License, or (at your option) any later version.
   9
  10 # The GNU C Library is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 # Lesser General Public License for more details.
  14
  15 # You should have received a copy of the GNU Lesser General Public
  16 # License along with the GNU C Library; if not, see
  17 # <https://www.gnu.org/licenses/>.
  18
  19 # Makefile for benchmark tests.  The only useful target here is `bench`.
  20 # Add benchmark functions in alphabetical order.
  21
  22 subdir := benchtests
  23
  24 include ../Makeconfig
  25
  26 bench-math := \
  27   acos \
  28   acosh \
  29   asin \
  30   asinh \
  31   atan \
  32   atan2 \
  33   atanh \
  34   cbrt \
  35   cos \
  36   cosf \
  37   cosh \
  38   erf \
  39   erfc \
  40   exp \
  41   exp10 \
  42   exp10f \
  43   exp2 \
  44   exp2f \
  45   expf \
  46   expm1 \
  47   fmax \
  48   fmaxf \
  49   fmin \
  50   fminf \
  51   fmod \
  52   fmodf \
  53   hypot \
  54   hypotf \
  55   ilogb \
  56   ilogbf \
  57   isfinite \
  58   isinf \
  59   isnan \
  60   j0 \
  61   j1 \
  62   lgamma \
  63   log \
  64   log10 \
  65   log1p \
  66   log2 \
  67   log2f \
  68   logb \
  69   logbf \
  70   logf \
  71   modf \
  72   pow \
  73   powf \
  74   rint \
  75   roundeven \
  76   roundevenf \
  77   sin \
  78   sincos \
  79   sincosf \
  80   sinf \
  81   sinh \
  82   sqrt \
  83   tan \
  84   tanh \
  85   tgamma \
  86   trunc \
  87   truncf \
  88   y0 \
  89   y1 \
  90
  91 ifneq (,$(filter yes,$(float96-fcts)))
  92 bench-math += \
  93   cbrtl \
  94 # bench-math
  95 endif
  96
  97 ifneq (,$(filter yes,$(float128-fcts) $(float128-alias-fcts)))
  98 bench-math += \
  99   expf128 \
 100   ilogbf128 \
 101   powf128 \
 102   sinf128 \
 103 # bench-math
 104 endif
 105
 106 bench-pthread := \
 107   pthread-locks \
 108   pthread-mutex-lock \
 109   pthread-mutex-trylock \
 110   pthread-spin-lock \
 111   pthread-spin-trylock \
 112   pthread_once \
 113   thread_create \
 114 # bench-pthread
 115
 116 LDLIBS-bench-pthread-mutex-lock += -lm
 117 LDLIBS-bench-pthread-mutex-trylock += -lm
 118 LDLIBS-bench-pthread-spin-lock += -lm
 119 LDLIBS-bench-pthread-spin-trylock += -lm
 120
 121 bench-string := \
 122   ffs \
 123   ffsll \
 124 # bench-string
 125
 126 # String function benchmarks.
 127 string-benchset := \
 128   bzero \
 129   bzero-large \
 130   bzero-walk \
 131   memccpy \
 132   memchr \
 133   memcmp \
 134   memcmpeq \
 135   memcpy \
 136   memcpy-large \
 137   memcpy-random \
 138   memcpy-walk \
 139   memmem \
 140   memmove \
 141   memmove-large \
 142   memmove-walk \
 143   mempcpy \
 144   memrchr \
 145   memset \
 146   memset-large \
 147   memset-walk \
 148   memset-zero \
 149   memset-zero-large \
 150   memset-zero-walk \
 151   rawmemchr \
 152   stpcpy \
 153   stpcpy_chk \
 154   stpncpy \
 155   strcasecmp \
 156   strcasestr \
 157   strcat \
 158   strchr \
 159   strchrnul \
 160   strcmp \
 161   strcoll \
 162   strcpy \
 163   strcpy_chk \
 164   strcspn \
 165   strlen \
 166   strncasecmp \
 167   strncat \
 168   strncmp \
 169   strncpy \
 170   strnlen \
 171   strpbrk \
 172   strrchr \
 173   strsep \
 174   strspn \
 175   strstr \
 176   strtok \
 177 # string-benchset
 178
 179 # Build and run locale-dependent benchmarks only if we're building natively.
 180 ifeq (no,$(cross-compiling))
 181 wcsmbs-benchset := \
 182   wcpcpy \
 183   wcpncpy \
 184   wcrtomb \
 185   wcscat \
 186   wcschr \
 187   wcschrnul \
 188   wcscmp \
 189   wcscpy \
 190   wcscspn \
 191   wcslen \
 192   wcsncat \
 193   wcsncmp \
 194   wcsncpy \
 195   wcsnlen \
 196   wcspbrk \
 197   wcsrchr \
 198   wcsspn \
 199   wmemchr \
 200   wmemcmp \
 201   wmemset \
 202 # wcsmbs-benchset
 203 else
 204 wcsmbs-benchset :=
 205 endif
 206
 207 string-benchset-all := $(string-benchset) ${wcsmbs-benchset}
 208
 209 ifeq (no,$(cross-compiling))
 210 # We have to generate locales
 211 LOCALES := \
 212   ar_SA.UTF-8 \
 213   cs_CZ.UTF-8 \
 214   da_DK.UTF-8 \
 215   el_GR.UTF-8 \
 216   en_GB.UTF-8 \
 217   en_US.UTF-8 \
 218   es_ES.UTF-8 \
 219   fa_IR.UTF-8 \
 220   fr_FR.UTF-8 \
 221   he_IL.UTF-8 \
 222   hi_IN.UTF-8 \
 223   hu_HU.UTF-8 \
 224   is_IS.UTF-8 \
 225   it_IT.UTF-8 \
 226   ja_JP.UTF-8 \
 227   pl_PL.UTF-8 \
 228   pt_PT.UTF-8 \
 229   ru_RU.UTF-8 \
 230   si_LK.UTF-8 \
 231   sr_RS.UTF-8 \
 232   sv_SE.UTF-8 \
 233   tr_TR.UTF-8 \
 234   vi_VN.UTF-8 \
 235   zh_CN.UTF-8 \
 236 # LOCALES
 237 include ../gen-locales.mk
 238 endif
 239
 240 hash-benchset := \
 241   dl-elf-hash \
 242   dl-new-hash \
 243   nss-hash \
 244 # hash-benchset
 245
 246 stdlib-benchset := \
 247   arc4random \
 248   strtod \
 249   # stdlib-benchset
 250
 251 stdio-common-benchset := sprintf
 252
 253 math-benchset := math-inlines
 254
 255 ifeq (${BENCHSET},)
 256 benchset := $(string-benchset-all) $(stdlib-benchset) $(stdio-common-benchset) \
 257             $(math-benchset) $(hash-benchset)
 258 else
 259 benchset := $(foreach B,$(filter %-benchset,${BENCHSET}), ${${B}})
 260 endif
 261
 262 CFLAGS-bench-ffs.c += -fno-builtin
 263 CFLAGS-bench-ffsll.c += -fno-builtin
 264 CFLAGS-bench-sqrt.c += -fno-builtin
 265 CFLAGS-bench-fmin.c += -fno-builtin
 266 CFLAGS-bench-fminf.c += -fno-builtin
 267 CFLAGS-bench-fmax.c += -fno-builtin
 268 CFLAGS-bench-fmaxf.c += -fno-builtin
 269 CFLAGS-bench-trunc.c += -fno-builtin
 270 CFLAGS-bench-truncf.c += -fno-builtin
 271 CFLAGS-bench-roundeven.c += -fno-builtin
 272 CFLAGS-bench-roundevenf.c += -fno-builtin
 273 CFLAGS-bench-isnan.c += $(config-cflags-signaling-nans)
 274 CFLAGS-bench-isinf.c += $(config-cflags-signaling-nans)
 275 CFLAGS-bench-isfinite.c += $(config-cflags-signaling-nans)
 276
 277 ifeq (${BENCHSET},)
 278 bench-malloc := malloc-thread malloc-simple
 279 else
 280 bench-malloc := $(filter malloc-%,${BENCHSET})
 281 endif
 282
 283 ifeq (${STATIC-BENCHTESTS},yes)
 284 +link-benchtests = $(+link-static-tests)
 285 link-libc-benchtests = $(link-libc-static)
 286 libm-benchtests = $(common-objpfx)math/libm.a
 287 thread-library-benchtests = $(static-thread-library)
 288 else
 289 link-libc-benchtests = $(link-libc)
 290 +link-benchtests = $(+link-tests)
 291 thread-library-benchtests = $(shared-thread-library)
 292 libm-benchtests = $(libm)
 293 endif
 294
 295 $(addprefix $(objpfx)bench-,$(bench-math)): $(libm-benchtests)
 296 $(addprefix $(objpfx)bench-,$(math-benchset)): $(libm-benchtests)
 297 $(addprefix $(objpfx)bench-,$(bench-pthread)): $(thread-library-benchtests)
 298 $(addprefix $(objpfx)bench-,$(bench-malloc)): $(thread-library-benchtests)
 299 $(addprefix $(objpfx)bench-,pthread-locks): $(libm-benchtests)
 300 $(addprefix $(objpfx)bench-,pthread-mutex-locks): $(libm-benchtests)
 301
 302 \f
 303
 304 # Rules to build and execute the benchmarks.  Do not put any benchmark
 305 # parameters beyond this point.
 306
 307 # We don't want the benchmark programs to run in parallel since that could
 308 # affect their performance.
 309 .NOTPARALLEL:
 310
 311 bench-extra-objs = json-lib.o
 312
 313 extra-objs += $(bench-extra-objs)
 314 others-extras = $(bench-extra-objs)
 315
 316 # The default duration: 1 seconds.
 317 ifndef BENCH_DURATION
 318 BENCH_DURATION := 1
 319 endif
 320
 321 CPPFLAGS-nonlib += -DDURATION=$(BENCH_DURATION) -D_ISOMAC
 322
 323 # Use clock_gettime to measure performance of functions.  The default is
 324 # to use the architecture-specific high precision timing instructions.
 325 ifdef USE_CLOCK_GETTIME
 326 CPPFLAGS-nonlib += -DUSE_CLOCK_GETTIME
 327 else
 328 # On x86 processors, use RDTSCP, instead of RDTSC, to measure performance
 329 # of functions.  All x86 processors since 2010 support RDTSCP instruction.
 330 ifdef USE_RDTSCP
 331 CPPFLAGS-nonlib += -DUSE_RDTSCP
 332 endif
 333 endif
 334
 335 DETAILED_OPT :=
 336
 337 ifdef DETAILED
 338 DETAILED_OPT := -d
 339 endif
 340
 341 bench-deps := bench-skeleton.c bench-timing.h Makefile
 342
 343 run-bench = $(test-wrapper-env) \
 344             $(run-program-env) \
 345             $($*-ENV) $(test-via-rtld-prefix) $${run}
 346
 347 timing-type := $(objpfx)bench-timing-type
 348 extra-objs += bench-timing-type.o
 349
 350 include ../Rules
 351
 352 bench-math += $(bench-libmvec)
 353
 354 ifeq (${BENCHSET},)
 355 bench := $(bench-math) $(bench-pthread) $(bench-string)
 356 else
 357 bench := $(foreach B,$(filter bench-%,${BENCHSET}), ${${B}})
 358 endif
 359
 360 # NB: Use "=" instead of ":=" since sysdeps Makefiles may add more
 361 # benches.
 362 binaries-bench = $(addprefix $(objpfx)bench-,$(bench))
 363 extra-objs += $(addsuffix .o,$(addprefix bench-,$(bench)))
 364 binaries-benchset = $(addprefix $(objpfx)bench-,$(benchset))
 365 extra-objs += $(addsuffix .o,$(addprefix bench-,$(benchset)))
 366 binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc))
 367 extra-objs += $(addsuffix .o,$(addprefix bench-,$(bench-malloc)))
 368
 369 # This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
 370 # for all these modules.
 371 cpp-srcs-left := $(binaries-benchset:=.c) $(binaries-bench:=.c) \
 372                  $(binaries-bench-malloc:=.c) $(timing-type:=.c)
 373 lib := nonlib
 374 include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
 375
 376 bench-clean:
 377         rm -f $(binaries-bench) $(addsuffix .o,$(binaries-bench))
 378         rm -f $(binaries-benchset) $(addsuffix .o,$(binaries-benchset))
 379         rm -f $(binaries-bench-malloc) $(addsuffix .o,$(binaries-bench-malloc))
 380         rm -f $(timing-type) $(addsuffix .o,$(timing-type))
 381         rm -f $(addprefix $(objpfx),$(bench-extra-objs))
 382
 383 # Validate the passed in BENCHSET
 384 ifneq ($(strip ${BENCHSET}),)
 385 VALIDBENCHSETNAMES := \
 386   bench-math \
 387   bench-pthread \
 388   bench-string \
 389   hash-benchset \
 390   malloc-simple \
 391   malloc-thread \
 392   math-benchset \
 393   stdio-common-benchset \
 394   stdlib-benchset \
 395   string-benchset \
 396   wcsmbs-benchset \
 397 # VALIDBENCHSETNAMES
 398
 399 INVALIDBENCHSETNAMES := $(filter-out ${VALIDBENCHSETNAMES},${BENCHSET})
 400 ifneq (${INVALIDBENCHSETNAMES},)
 401 $(info The following values in BENCHSET are invalid: ${INVALIDBENCHSETNAMES})
 402 $(info The valid ones are: ${VALIDBENCHSETNAMES})
 403 $(error Invalid BENCHSET value)
 404 endif
 405 endif
 406
 407 bench: bench-build bench-set bench-func bench-malloc
 408
 409 # Target to only build the benchmark without running it.  We generate locales
 410 # only if we're building natively.
 411 ifeq (no,$(cross-compiling))
 412 bench-build: $(gen-locales) $(timing-type) $(binaries-bench) \
 413         $(binaries-benchset) $(binaries-bench-malloc)
 414 else
 415 bench-build: $(timing-type) $(binaries-bench) $(binaries-benchset) \
 416         $(binaries-bench-malloc)
 417 endif
 418
 419 bench-set: $(binaries-benchset)
 420         for run in $^; do \
 421           echo "Running $${run}"; \
 422           $(run-bench) > $${run}.out; \
 423         done
 424
 425 bench-malloc: $(binaries-bench-malloc)
 426         for run in $^; do \
 427           echo "$${run}"; \
 428           if [ `basename $${run}` = "bench-malloc-thread" ]; then \
 429                 for thr in 1 8 16 32; do \
 430                         echo "Running $${run} $${thr}"; \
 431                         $(run-bench) $${thr} > $${run}-$${thr}.out; \
 432                 done;\
 433           else \
 434                 for thr in 8 16 32 64 128 256 512 1024 2048 4096; do \
 435                   echo "Running $${run} $${thr}"; \
 436                   $(run-bench) $${thr} > $${run}-$${thr}.out; \
 437                 done;\
 438           fi;\
 439         done
 440
 441 # Build and execute the benchmark functions.  This target generates JSON
 442 # formatted bench.out.  Each of the programs produce independent JSON output,
 443 # so one could even execute them individually and process it using any JSON
 444 # capable language or tool.
 445 bench-func: $(binaries-bench)
 446         if [ -n '$^' ] ; then \
 447         { timing_type=$$($(test-wrapper-env) \
 448                          $(run-program-env) \
 449                          $(test-via-rtld-prefix) \
 450                          $(timing-type)); \
 451           echo "{\"timing_type\": \"$${timing_type}\","; \
 452           echo " \"functions\": {"; \
 453           for run in $^; do \
 454             op=$$($(run-bench) $(DETAILED_OPT)); \
 455             ret=$$?; \
 456             case "$${ret}" in \
 457               77) \
 458               echo "UNSUPPORTED $${run}: $${op}" >&2; \
 459                 ;; \
 460               0) \
 461                 echo "Running $${run}" >&2; \
 462                 if [ "$${run}" != "$<" ]; then \
 463                   echo ","; \
 464                 fi; \
 465                 echo "$${op}"; \
 466                 ;; \
 467               *) \
 468                 echo "FAILED $${run}" >&2; \
 469                 ;; \
 470             esac; \
 471           done; \
 472           echo; \
 473           echo " }"; \
 474           echo "}"; \
 475           } > $(objpfx)bench.out-tmp; \
 476           if [ -f $(objpfx)bench.out ]; then \
 477             mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
 478           fi; \
 479           mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out; \
 480           $(PYTHON) scripts/validate_benchout.py $(objpfx)bench.out \
 481           scripts/benchout.schema.json; \
 482         fi
 483
 484 ifeq ($(bind-now),yes)
 485 link-bench-bind-now = -Wl,-z,now
 486 endif
 487
 488 bench-link-targets = $(timing-type) $(binaries-bench) $(binaries-benchset) \
 489         $(binaries-bench-malloc)
 490
 491 $(bench-link-targets): %: %.o $(objpfx)json-lib.o \
 492         $(link-extra-libs-tests) \
 493   $(sort $(filter $(common-objpfx)lib%,$(link-libc-benchtests))) \
 494   $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
 495         $(+link-benchtests)
 496
 497 $(bench-link-targets): LDFLAGS += $(link-bench-bind-now)
 498
 499 $(objpfx)bench-%.c: %-inputs $(bench-deps)
 500         { if [ -n "$($*-INCLUDE)" ]; then \
 501           cat $($*-INCLUDE); \
 502         fi; \
 503         $(PYTHON) scripts/bench.py $(patsubst %-inputs,%,$<); } > $@-tmp
 504         mv -f $@-tmp $@