benchtests/Makefile

   1 # Copyright (C) 2013-2024 Free Software Foundation, Inc.
   2 # Copyright The GNU Toolchain Authors.
   3 # This file is part of the GNU C Library.
   4
   5 # The GNU C Library is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU Lesser General Public
   7 # License as published by the Free Software Foundation; either
   8 # version 2.1 of the License, or (at your option) any later version.
   9
  10 # The GNU C Library is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 # Lesser General Public License for more details.
  14
  15 # You should have received a copy of the GNU Lesser General Public
  16 # License along with the GNU C Library; if not, see
  17 # <https://www.gnu.org/licenses/>.
  18
  19 # Makefile for benchmark tests.  The only useful target here is `bench`.
  20 # Add benchmark functions in alphabetical order.
  21
  22 subdir := benchtests
  23
  24 include ../Makeconfig
  25
  26 bench-math := \
  27   acos \
  28   acosh \
  29   asin \
  30   asinh \
  31   atan \
  32   atan2 \
  33   atanh \
  34   cbrt \
  35   ceil \
  36   ceilf \
  37   cos \
  38   cosf \
  39   cosh \
  40   erf \
  41   erfc \
  42   exp \
  43   exp10 \
  44   exp10f \
  45   exp2 \
  46   exp2f \
  47   expf \
  48   expm1 \
  49   floor \
  50   floorf \
  51   fmax \
  52   fmaxf \
  53   fmin \
  54   fminf \
  55   fmod \
  56   fmodf \
  57   hypot \
  58   hypotf \
  59   ilogb \
  60   ilogbf \
  61   isfinite \
  62   isinf \
  63   isnan \
  64   j0 \
  65   j1 \
  66   lgamma \
  67   llrint \
  68   llrintf \
  69   log \
  70   log10 \
  71   log1p \
  72   log2 \
  73   log2f \
  74   logb \
  75   logbf \
  76   logf \
  77   lrint \
  78   lrintf \
  79   modf \
  80   nearbyint \
  81   nearbyintf \
  82   pow \
  83   powf \
  84   rint \
  85   rintf \
  86   roundeven \
  87   roundevenf \
  88   sin \
  89   sincos \
  90   sincosf \
  91   sinf \
  92   sinh \
  93   sqrt \
  94   tan \
  95   tanh \
  96   tgamma \
  97   trunc \
  98   truncf \
  99   y0 \
 100   y1 \
 101   # bench-math
 102
 103 ifneq (,$(filter yes,$(float96-fcts)))
 104 bench-math += \
 105   cbrtl \
 106   # bench-math
 107 endif
 108
 109 ifneq (,$(filter yes,$(float128-fcts) $(float128-alias-fcts)))
 110 bench-math += \
 111   expf128 \
 112   ilogbf128 \
 113   powf128 \
 114   sinf128 \
 115   # bench-math
 116 endif
 117
 118 bench-pthread := \
 119   pthread-locks \
 120   pthread-mutex-lock \
 121   pthread-mutex-trylock \
 122   pthread-spin-lock \
 123   pthread-spin-trylock \
 124   pthread_once \
 125   thread_create \
 126   # bench-pthread
 127
 128 LDLIBS-bench-pthread-mutex-lock += -lm
 129 LDLIBS-bench-pthread-mutex-trylock += -lm
 130 LDLIBS-bench-pthread-spin-lock += -lm
 131 LDLIBS-bench-pthread-spin-trylock += -lm
 132
 133 bench-string := \
 134   ffs \
 135   ffsll \
 136   # bench-string
 137
 138 # String function benchmarks.
 139 string-benchset := \
 140   bzero \
 141   bzero-large \
 142   bzero-walk \
 143   memccpy \
 144   memchr \
 145   memcmp \
 146   memcmpeq \
 147   memcpy \
 148   memcpy-large \
 149   memcpy-random \
 150   memcpy-walk \
 151   memmem \
 152   memmove \
 153   memmove-large \
 154   memmove-walk \
 155   mempcpy \
 156   memrchr \
 157   memset \
 158   memset-large \
 159   memset-walk \
 160   memset-zero \
 161   memset-zero-large \
 162   memset-zero-walk \
 163   rawmemchr \
 164   stpcpy \
 165   stpcpy_chk \
 166   stpncpy \
 167   strcasecmp \
 168   strcasestr \
 169   strcat \
 170   strchr \
 171   strchrnul \
 172   strcmp \
 173   strcoll \
 174   strcpy \
 175   strcpy_chk \
 176   strcspn \
 177   strlen \
 178   strncasecmp \
 179   strncat \
 180   strncmp \
 181   strncpy \
 182   strnlen \
 183   strpbrk \
 184   strrchr \
 185   strsep \
 186   strspn \
 187   strstr \
 188   strtok \
 189   # string-benchset
 190
 191 # Build and run locale-dependent benchmarks only if we're building natively.
 192 ifeq (no,$(cross-compiling))
 193 wcsmbs-benchset := \
 194   wcpcpy \
 195   wcpncpy \
 196   wcrtomb \
 197   wcscat \
 198   wcschr \
 199   wcschrnul \
 200   wcscmp \
 201   wcscpy \
 202   wcscspn \
 203   wcslen \
 204   wcsncat \
 205   wcsncmp \
 206   wcsncpy \
 207   wcsnlen \
 208   wcspbrk \
 209   wcsrchr \
 210   wcsspn \
 211   wmemchr \
 212   wmemcmp \
 213   wmemset \
 214   # wcsmbs-benchset
 215 else
 216 wcsmbs-benchset :=
 217 endif
 218
 219 string-benchset-all := $(string-benchset) ${wcsmbs-benchset}
 220
 221 ifeq (no,$(cross-compiling))
 222 # We have to generate locales
 223 LOCALES := \
 224   ar_SA.UTF-8 \
 225   cs_CZ.UTF-8 \
 226   da_DK.UTF-8 \
 227   el_GR.UTF-8 \
 228   en_GB.UTF-8 \
 229   en_US.UTF-8 \
 230   es_ES.UTF-8 \
 231   fa_IR.UTF-8 \
 232   fr_FR.UTF-8 \
 233   he_IL.UTF-8 \
 234   hi_IN.UTF-8 \
 235   hu_HU.UTF-8 \
 236   is_IS.UTF-8 \
 237   it_IT.UTF-8 \
 238   ja_JP.UTF-8 \
 239   pl_PL.UTF-8 \
 240   pt_PT.UTF-8 \
 241   ru_RU.UTF-8 \
 242   si_LK.UTF-8 \
 243   sr_RS.UTF-8 \
 244   sv_SE.UTF-8 \
 245   tr_TR.UTF-8 \
 246   vi_VN.UTF-8 \
 247   zh_CN.UTF-8 \
 248   # LOCALES
 249 include ../gen-locales.mk
 250 endif
 251
 252 hash-benchset := \
 253   dl-elf-hash \
 254   dl-new-hash \
 255   nss-hash \
 256   # hash-benchset
 257
 258 stdlib-benchset := \
 259   arc4random \
 260   strtod \
 261   # stdlib-benchset
 262
 263 stdio-common-benchset := sprintf
 264
 265 math-benchset := math-inlines
 266
 267 ifeq (${BENCHSET},)
 268 benchset := \
 269   $(hash-benchset) \
 270   $(math-benchset) \
 271   $(stdio-common-benchset) \
 272   $(stdlib-benchset) \
 273   $(string-benchset-all) \
 274   # benchset
 275 else
 276 benchset := $(foreach B,$(filter %-benchset,${BENCHSET}), ${${B}})
 277 endif
 278
 279 CFLAGS-bench-ffs.c += -fno-builtin
 280 CFLAGS-bench-ffsll.c += -fno-builtin
 281 CFLAGS-bench-sqrt.c += -fno-builtin
 282 CFLAGS-bench-fmin.c += -fno-builtin
 283 CFLAGS-bench-fminf.c += -fno-builtin
 284 CFLAGS-bench-fmax.c += -fno-builtin
 285 CFLAGS-bench-fmaxf.c += -fno-builtin
 286 CFLAGS-bench-trunc.c += -fno-builtin
 287 CFLAGS-bench-truncf.c += -fno-builtin
 288 CFLAGS-bench-roundeven.c += -fno-builtin
 289 CFLAGS-bench-roundevenf.c += -fno-builtin
 290 CFLAGS-bench-isnan.c += $(config-cflags-signaling-nans)
 291 CFLAGS-bench-isinf.c += $(config-cflags-signaling-nans)
 292 CFLAGS-bench-isfinite.c += $(config-cflags-signaling-nans)
 293
 294 ifeq (${BENCHSET},)
 295 bench-malloc := \
 296   malloc-simple \
 297   malloc-thread \
 298   # bench-malloc
 299 else
 300 bench-malloc := $(filter malloc-%,${BENCHSET})
 301 endif
 302
 303 ifeq (${STATIC-BENCHTESTS},yes)
 304 +link-benchtests = $(+link-static-tests)
 305 link-libc-benchtests = $(link-libc-static)
 306 libm-benchtests = $(common-objpfx)math/libm.a
 307 thread-library-benchtests = $(static-thread-library)
 308 else
 309 link-libc-benchtests = $(link-libc)
 310 +link-benchtests = $(+link-tests)
 311 thread-library-benchtests = $(shared-thread-library)
 312 libm-benchtests = $(libm)
 313 endif
 314
 315 $(addprefix $(objpfx)bench-,$(bench-math)): $(libm-benchtests)
 316 $(addprefix $(objpfx)bench-,$(math-benchset)): $(libm-benchtests)
 317 $(addprefix $(objpfx)bench-,$(bench-pthread)): $(thread-library-benchtests)
 318 $(addprefix $(objpfx)bench-,$(bench-malloc)): $(thread-library-benchtests)
 319 $(addprefix $(objpfx)bench-,pthread-locks): $(libm-benchtests)
 320 $(addprefix $(objpfx)bench-,pthread-mutex-locks): $(libm-benchtests)
 321
 322 \f
 323
 324 # Rules to build and execute the benchmarks.  Do not put any benchmark
 325 # parameters beyond this point.
 326
 327 # We don't want the benchmark programs to run in parallel since that could
 328 # affect their performance.
 329 .NOTPARALLEL:
 330
 331 bench-extra-objs = json-lib.o
 332
 333 extra-objs += $(bench-extra-objs)
 334 others-extras = $(bench-extra-objs)
 335
 336 # The default duration: 1 seconds.
 337 ifndef BENCH_DURATION
 338 BENCH_DURATION := 1
 339 endif
 340
 341 CPPFLAGS-nonlib += -DDURATION=$(BENCH_DURATION) -D_ISOMAC
 342
 343 # Use clock_gettime to measure performance of functions.  The default is
 344 # to use the architecture-specific high precision timing instructions.
 345 ifdef USE_CLOCK_GETTIME
 346 CPPFLAGS-nonlib += -DUSE_CLOCK_GETTIME
 347 else
 348 # On x86 processors, use RDTSCP, instead of RDTSC, to measure performance
 349 # of functions.  All x86 processors since 2010 support RDTSCP instruction.
 350 ifdef USE_RDTSCP
 351 CPPFLAGS-nonlib += -DUSE_RDTSCP
 352 endif
 353 endif
 354
 355 DETAILED_OPT :=
 356
 357 ifdef DETAILED
 358 DETAILED_OPT := -d
 359 endif
 360
 361 bench-deps := bench-skeleton.c bench-timing.h Makefile
 362
 363 run-bench = $(test-wrapper-env) \
 364             $(run-program-env) \
 365             $($*-ENV) $(test-via-rtld-prefix) $${run}
 366
 367 timing-type := $(objpfx)bench-timing-type
 368 extra-objs += bench-timing-type.o
 369
 370 include ../Rules
 371
 372 bench-math += $(bench-libmvec)
 373
 374 ifeq (${BENCHSET},)
 375 bench := \
 376   $(bench-math) \
 377   $(bench-pthread) \
 378   $(bench-string) \
 379   # bench
 380 else
 381 bench := $(foreach B,$(filter bench-%,${BENCHSET}), ${${B}})
 382 endif
 383
 384 # NB: Use "=" instead of ":=" since sysdeps Makefiles may add more
 385 # benches.
 386 binaries-bench = $(addprefix $(objpfx)bench-,$(bench))
 387 extra-objs += $(addsuffix .o,$(addprefix bench-,$(bench)))
 388 binaries-benchset = $(addprefix $(objpfx)bench-,$(benchset))
 389 extra-objs += $(addsuffix .o,$(addprefix bench-,$(benchset)))
 390 binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc))
 391 extra-objs += $(addsuffix .o,$(addprefix bench-,$(bench-malloc)))
 392
 393 # This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
 394 # for all these modules.
 395 cpp-srcs-left := \
 396   $(binaries-bench-malloc:=.c) \
 397   $(binaries-bench:=.c) \
 398   $(binaries-benchset:=.c) \
 399   $(timing-type:=.c) \
 400   # cpp-srcs-left
 401 lib := nonlib
 402 include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
 403
 404 bench-clean:
 405         rm -f $(binaries-bench) $(addsuffix .o,$(binaries-bench))
 406         rm -f $(binaries-benchset) $(addsuffix .o,$(binaries-benchset))
 407         rm -f $(binaries-bench-malloc) $(addsuffix .o,$(binaries-bench-malloc))
 408         rm -f $(timing-type) $(addsuffix .o,$(timing-type))
 409         rm -f $(addprefix $(objpfx),$(bench-extra-objs))
 410
 411 # Validate the passed in BENCHSET
 412 ifneq ($(strip ${BENCHSET}),)
 413 VALIDBENCHSETNAMES := \
 414   bench-math \
 415   bench-pthread \
 416   bench-string \
 417   hash-benchset \
 418   malloc-simple \
 419   malloc-thread \
 420   math-benchset \
 421   stdio-common-benchset \
 422   stdlib-benchset \
 423   string-benchset \
 424   wcsmbs-benchset \
 425   # VALIDBENCHSETNAMES
 426
 427 INVALIDBENCHSETNAMES := $(filter-out ${VALIDBENCHSETNAMES},${BENCHSET})
 428 ifneq (${INVALIDBENCHSETNAMES},)
 429 $(info The following values in BENCHSET are invalid: ${INVALIDBENCHSETNAMES})
 430 $(info The valid ones are: ${VALIDBENCHSETNAMES})
 431 $(error Invalid BENCHSET value)
 432 endif
 433 endif
 434
 435 bench: bench-build bench-set bench-func bench-malloc
 436
 437 # Target to only build the benchmark without running it.  We generate locales
 438 # only if we're building natively.
 439 ifeq (no,$(cross-compiling))
 440 bench-build: $(gen-locales) $(timing-type) $(binaries-bench) \
 441         $(binaries-benchset) $(binaries-bench-malloc)
 442 else
 443 bench-build: $(timing-type) $(binaries-bench) $(binaries-benchset) \
 444         $(binaries-bench-malloc)
 445 endif
 446
 447 bench-set: $(binaries-benchset)
 448         for run in $^; do \
 449           echo "Running $${run}"; \
 450           $(run-bench) > $${run}.out; \
 451         done
 452
 453 bench-malloc: $(binaries-bench-malloc)
 454         for run in $^; do \
 455           echo "$${run}"; \
 456           if [ `basename $${run}` = "bench-malloc-thread" ]; then \
 457                 for thr in 1 8 16 32; do \
 458                         echo "Running $${run} $${thr}"; \
 459                         $(run-bench) $${thr} > $${run}-$${thr}.out; \
 460                 done;\
 461           else \
 462                 for thr in 8 16 32 64 128 256 512 1024 2048 4096; do \
 463                   echo "Running $${run} $${thr}"; \
 464                   $(run-bench) $${thr} > $${run}-$${thr}.out; \
 465                 done;\
 466           fi;\
 467         done
 468
 469 # Build and execute the benchmark functions.  This target generates JSON
 470 # formatted bench.out.  Each of the programs produce independent JSON output,
 471 # so one could even execute them individually and process it using any JSON
 472 # capable language or tool.
 473 bench-func: $(binaries-bench)
 474         if [ -n '$^' ] ; then \
 475         { timing_type=$$($(test-wrapper-env) \
 476                          $(run-program-env) \
 477                          $(test-via-rtld-prefix) \
 478                          $(timing-type)); \
 479           echo "{\"timing_type\": \"$${timing_type}\","; \
 480           echo " \"functions\": {"; \
 481           for run in $^; do \
 482             op=$$($(run-bench) $(DETAILED_OPT)); \
 483             ret=$$?; \
 484             case "$${ret}" in \
 485               77) \
 486               echo "UNSUPPORTED $${run}: $${op}" >&2; \
 487                 ;; \
 488               0) \
 489                 echo "Running $${run}" >&2; \
 490                 if [ "$${run}" != "$<" ]; then \
 491                   echo ","; \
 492                 fi; \
 493                 echo "$${op}"; \
 494                 ;; \
 495               *) \
 496                 echo "FAILED $${run}" >&2; \
 497                 ;; \
 498             esac; \
 499           done; \
 500           echo; \
 501           echo " }"; \
 502           echo "}"; \
 503           } > $(objpfx)bench.out-tmp; \
 504           if [ -f $(objpfx)bench.out ]; then \
 505             mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
 506           fi; \
 507           mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out; \
 508           $(PYTHON) scripts/validate_benchout.py $(objpfx)bench.out \
 509           scripts/benchout.schema.json; \
 510         fi
 511
 512 ifeq ($(bind-now),yes)
 513 link-bench-bind-now = -Wl,-z,now
 514 endif
 515
 516 bench-link-targets = $(timing-type) $(binaries-bench) $(binaries-benchset) \
 517         $(binaries-bench-malloc)
 518
 519 $(bench-link-targets): %: %.o $(objpfx)json-lib.o \
 520         $(link-extra-libs-tests) \
 521   $(sort $(filter $(common-objpfx)lib%,$(link-libc-benchtests))) \
 522   $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
 523         $(+link-benchtests)
 524
 525 $(bench-link-targets): LDFLAGS += $(link-bench-bind-now)
 526
 527 $(objpfx)bench-%.c: %-inputs $(bench-deps)
 528         { if [ -n "$($*-INCLUDE)" ]; then \
 529           cat $($*-INCLUDE); \
 530         fi; \
 531         $(PYTHON) scripts/bench.py $(patsubst %-inputs,%,$<); } > $@-tmp
 532         mv -f $@-tmp $@