test/interbench/interbench.c

   1 /*******************************************
   2  *
   3  * Interbench - Interactivity benchmark
   4  *
   5  * Author:  Con Kolivas <kernel@kolivas.org>
   6  *
   7  * This program is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation; either version 2 of the License, or
  10  * (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program; if not, write to the Free Software
  19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20  *
  21  *******************************************/
  22
  23 #define _GNU_SOURCE
  24 #define _FILE_OFFSET_BITS 64    /* Large file support */
  25 #define INTERBENCH_VERSION      "0.30"
  26
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <stdarg.h>
  30 #include <strings.h>
  31 #include <string.h>
  32 #include <unistd.h>
  33 #include <fcntl.h>
  34 #include <sched.h>
  35 #include <time.h>
  36 #include <errno.h>
  37 #include <semaphore.h>
  38 #include <pthread.h>
  39 #include <math.h>
  40 #include <fenv.h>
  41 #include <signal.h>
  42 #include <sys/utsname.h>
  43 #include <sys/time.h>
  44 #include <sys/resource.h>
  45 #include <sys/types.h>
  46 #include <sys/mman.h>
  47 #include <sys/wait.h>
  48 #include <sys/stat.h>
  49 #include <sys/sysctl.h>
  50 #include <sys/vmmeter.h>
  51 #include "interbench.h"
  52
  53 #define MAX_UNAME_LENGTH        100
  54 #define MAX_LOG_LENGTH          ((MAX_UNAME_LENGTH) + 4)
  55 #define MIN_BLK_SIZE            1024
  56 #define DEFAULT_RESERVE         64
  57 #define MB                      (1024 * 1024)   /* 2^20 bytes */
  58 #define KB                      1024
  59 #define MAX_MEM_IN_MB           (1024 * 64)     /* 64 GB */
  60
  61 struct user_data {
  62         unsigned long loops_per_ms;
  63         unsigned long ram, swap;
  64         int duration;
  65         int do_rt;
  66         int bench_nice;
  67         int load_nice;
  68         unsigned long custom_run;
  69         unsigned long custom_interval;
  70         unsigned long cpu_load;
  71         char logfilename[MAX_LOG_LENGTH];
  72         int log;
  73         char unamer[MAX_UNAME_LENGTH];
  74         char datestamp[13];
  75         FILE *logfile;
  76 } ud = {
  77         .duration = 30,
  78         .cpu_load = 4,
  79         .log = 1,
  80 };
  81
  82 /* Pipes main to/from load and bench processes */
  83 static int m2l[2], l2m[2], m2b[2], b2m[2];
  84
  85 /* Which member of becnhmarks is used when not benchmarking */
  86 #define NOT_BENCHING    (THREADS)
  87 #define CUSTOM          (THREADS - 1)
  88
  89 /*
  90  * To add another load or a benchmark you need to increment the value of
  91  * THREADS, add a function prototype for your function and add an entry to
  92  * the threadlist. To specify whether the function is a benchmark or a load
  93  * set the benchmark and/or load flag as appropriate. The basic requirements
  94  * of a new load can be seen by using emulate_none as a template.
  95  */
  96
  97 void emulate_none(struct thread *th);
  98 void emulate_audio(struct thread *th);
  99 void emulate_video(struct thread *th);
 100 void emulate_x(struct thread *th);
 101 void emulate_game(struct thread *th);
 102 void emulate_burn(struct thread *th);
 103 void emulate_write(struct thread *th);
 104 void emulate_read(struct thread *th);
 105 void emulate_ring(struct thread *th);
 106 void emulate_compile(struct thread *th);
 107 void emulate_memload(struct thread *th);
 108 void emulate_hackbench(struct thread *th);
 109 void emulate_custom(struct thread *th);
 110
 111 struct thread threadlist[THREADS] = {
 112         {.label = "None", .name = emulate_none, .load = 1, .rtload = 1},
 113         {.label = "Audio", .name = emulate_audio, .bench = 1, .rtbench = 1},
 114         {.label = "Video", .name = emulate_video, .bench = 1, .rtbench = 1, .load = 1, .rtload = 1},
 115         {.label = "X", .name = emulate_x, .bench = 1, .load = 1, .rtload = 1},
 116         {.label = "Gaming", .name = emulate_game, .nodeadlines = 1, .bench = 1},
 117         {.label = "Burn", .name = emulate_burn, .load = 1, .rtload = 1},
 118         {.label = "Write", .name = emulate_write, .load = 1, .rtload = 1},
 119         {.label = "Read", .name = emulate_read, .load = 1, .rtload = 1},
 120         {.label = "Ring", .name = emulate_ring, .load = 0, .rtload = 0},        /* No useful data from this */
 121         {.label = "Compile", .name = emulate_compile, .load = 1, .rtload = 1},
 122         {.label = "Memload", .name = emulate_memload, .load = 1, .rtload = 1},
 123         {.label = "Hack", .name = emulate_hackbench, .load = 0, .rtload = 0},   /* This is causing signal headaches */
 124         {.label = "Custom", .name = emulate_custom},    /* Leave custom as last entry */
 125 };
 126
 127 void init_sem(sem_t *sem);
 128 void init_all_sems(struct sems *s);
 129 void initialise_thread(int i);
 130 void start_thread(struct thread *th);
 131 void stop_thread(struct thread *th);
 132
 133 void terminal_error(const char *name)
 134 {
 135         fprintf(stderr, "\n");
 136         perror(name);
 137         exit (1);
 138 }
 139
 140 void terminal_fileopen_error(FILE *fp, char *name)
 141 {
 142         if (fclose(fp) == -1)
 143                 terminal_error("fclose");
 144         terminal_error(name);
 145 }
 146
 147 unsigned long long get_nsecs(struct timespec *myts)
 148 {
 149         if (clock_gettime(CLOCK_REALTIME, myts))
 150                 terminal_error("clock_gettime");
 151         return (myts->tv_sec * 1000000000 + myts->tv_nsec );
 152 }
 153
 154 unsigned long get_usecs(struct timespec *myts)
 155 {
 156         if (clock_gettime(CLOCK_REALTIME, myts))
 157                 terminal_error("clock_gettime");
 158         return (myts->tv_sec * 1000000 + myts->tv_nsec / 1000 );
 159 }
 160
 161 void set_fifo(int prio)
 162 {
 163         struct sched_param sp;
 164
 165         memset(&sp, 0, sizeof(sp));
 166         sp.sched_priority = prio;
 167         if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
 168                 if (errno != EPERM)
 169                         terminal_error("sched_setscheduler");
 170         }
 171 }
 172
 173 void set_mlock(void)
 174 {
 175         int mlockflags;
 176
 177         mlockflags = MCL_CURRENT | MCL_FUTURE;
 178 #if 0
 179         mlockall(mlockflags);   /* Is not critical if this fails */
 180 #endif
 181 }
 182
 183 void set_munlock(void)
 184 {
 185 #if 0
 186         if (munlockall() == -1)
 187                 terminal_error("munlockall");
 188 #endif
 189 }
 190
 191 void set_thread_fifo(pthread_t pthread, int prio)
 192 {
 193         struct sched_param sp;
 194         memset(&sp, 0, sizeof(sp));
 195         sp.sched_priority = prio;
 196         if (pthread_setschedparam(pthread, SCHED_FIFO, &sp) == -1)
 197                 terminal_error("pthread_setschedparam");
 198 }
 199
 200 void set_normal(void)
 201 {
 202         struct sched_param sp;
 203         memset(&sp, 0, sizeof(sp));
 204         sp.sched_priority = 0;
 205         if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
 206                 fprintf(stderr, "Weird, could not unset RT scheduling!\n");
 207         }
 208 }
 209
 210 void set_nice(int prio)
 211 {
 212         if (setpriority(PRIO_PROCESS, 0, prio) == -1)
 213                 terminal_error("setpriority");
 214 }
 215
 216 int test_fifo(void)
 217 {
 218         struct sched_param sp;
 219         memset(&sp, 0, sizeof(sp));
 220         sp.sched_priority = 99;
 221         if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
 222                 if (errno != EPERM)
 223                         terminal_error("sched_setscheduler");
 224                 goto out_fail;
 225         }
 226         if (sched_getscheduler(0) != SCHED_FIFO)
 227                 goto out_fail;
 228         set_normal();
 229         return 1;
 230 out_fail:
 231         set_normal();
 232         return 0;
 233 }
 234
 235 void set_thread_normal(pthread_t pthread)
 236 {
 237         struct sched_param sp;
 238         memset(&sp, 0, sizeof(sp));
 239         sp.sched_priority = 0;
 240         if (pthread_setschedparam(pthread, SCHED_OTHER, &sp) == -1)
 241                 terminal_error("pthread_setschedparam");
 242 }
 243
 244 void sync_flush(void)
 245 {
 246         if ((fflush(NULL)) == EOF)
 247                 terminal_error("fflush");
 248         sync();
 249         sync();
 250         sync();
 251 }
 252
 253 unsigned long compute_allocable_mem(void)
 254 {
 255         unsigned long total = ud.ram + ud.swap;
 256         unsigned long usage = ud.ram * 110 / 100 ;
 257
 258         /* Leave at least DEFAULT_RESERVE free space and check for maths overflow. */
 259         if (total - DEFAULT_RESERVE < usage)
 260                 usage = total - DEFAULT_RESERVE;
 261         usage /= 1024;  /* to megabytes */
 262         if (usage > 2930)
 263                 usage = 2930;
 264         return usage;
 265 }
 266
 267 void burn_loops(unsigned long loops)
 268 {
 269         unsigned long i;
 270
 271         /*
 272          * We need some magic here to prevent the compiler from optimising
 273          * this loop away. Otherwise trying to emulate a fixed cpu load
 274          * with this loop will not work.
 275          */
 276         for (i = 0 ; i < loops ; i++)
 277              asm volatile("" : : : "memory");
 278 }
 279
 280 /* Use this many usecs of cpu time */
 281 void burn_usecs(unsigned long usecs)
 282 {
 283         unsigned long ms_loops;
 284
 285         ms_loops = ud.loops_per_ms / 1000 * usecs;
 286         burn_loops(ms_loops);
 287 }
 288
 289 void microsleep(unsigned long long usecs)
 290 {
 291         struct timespec req, rem;
 292
 293         rem.tv_sec = rem.tv_nsec = 0;
 294
 295         req.tv_sec = usecs / 1000000;
 296         req.tv_nsec = (usecs - (req.tv_sec * 1000000)) * 1000;
 297 continue_sleep:
 298         if ((nanosleep(&req, &rem)) == -1) {
 299                 if (errno == EINTR) {
 300                         if (rem.tv_sec || rem.tv_nsec) {
 301                                 req.tv_sec = rem.tv_sec;
 302                                 req.tv_nsec = rem.tv_nsec;
 303                                 goto continue_sleep;
 304                         }
 305                         goto out;
 306                 }
 307                 terminal_error("nanosleep");
 308         }
 309 out:
 310         return;
 311 }
 312
 313 /*
 314  * Yes, sem_post and sem_wait shouldn't return -1 but they do so we must
 315  * handle it.
 316  */
 317 inline void post_sem(sem_t *s)
 318 {
 319 retry:
 320         if ((sem_post(s)) == -1) {
 321                 if (errno == EINTR)
 322                         goto retry;
 323                 terminal_error("sem_post");
 324         }
 325 }
 326
 327 inline void wait_sem(sem_t *s)
 328 {
 329 retry:
 330         if ((sem_wait(s)) == -1) {
 331                 if (errno == EINTR)
 332                         goto retry;
 333                 terminal_error("sem_wait");
 334         }
 335 }
 336
 337 inline int trywait_sem(sem_t *s)
 338 {
 339         int ret;
 340
 341 retry:
 342         if ((ret = sem_trywait(s)) == -1) {
 343                 if (errno == EINTR)
 344                         goto retry;
 345                 if (errno != EAGAIN)
 346                         terminal_error("sem_trywait");
 347         }
 348         return ret;
 349 }
 350
 351 inline ssize_t Read(int fd, void *buf, size_t count)
 352 {
 353         ssize_t retval;
 354
 355 retry:
 356         retval = read(fd, buf, count);
 357         if (retval == -1) {
 358                 if (errno == EINTR)
 359                         goto retry;
 360                 terminal_error("read");
 361         }
 362         return retval;
 363 }
 364
 365 inline ssize_t Write(int fd, const void *buf, size_t count)
 366 {
 367         ssize_t retval;
 368
 369 retry:
 370         retval = write(fd, &buf, count);
 371         if (retval == -1) {
 372                 if (errno == EINTR)
 373                         goto retry;
 374                 terminal_error("write");
 375         }
 376         return retval;
 377 }
 378
 379 unsigned long periodic_schedule(struct thread *th, unsigned long run_usecs,
 380         unsigned long interval_usecs, unsigned long long deadline)
 381 {
 382         unsigned long long latency, missed_latency;
 383         unsigned long long current_time;
 384         struct tk_thread *tk;
 385         struct data_table *tb;
 386         struct timespec myts;
 387
 388         latency = 0;
 389         tb = th->dt;
 390         tk = &th->tkthread;
 391
 392         current_time = get_usecs(&myts);
 393         if (current_time > deadline + tk->slept_interval)
 394                 latency = current_time - deadline- tk->slept_interval;
 395
 396         /* calculate the latency for missed frames */
 397         missed_latency = 0;
 398
 399         current_time = get_usecs(&myts);
 400         if (interval_usecs && current_time > deadline + interval_usecs) {
 401                 /* We missed the deadline even before we consumed cpu */
 402                 unsigned long intervals;
 403
 404                 deadline += interval_usecs;
 405                 intervals = (current_time - deadline) /
 406                         interval_usecs + 1;
 407
 408                 tb->missed_deadlines += intervals;
 409                 missed_latency = intervals * interval_usecs;
 410                 deadline += intervals * interval_usecs;
 411                 tb->missed_burns += intervals;
 412                 goto bypass_burn;
 413         }
 414
 415         burn_usecs(run_usecs);
 416         current_time = get_usecs(&myts);
 417         tb->achieved_burns++;
 418
 419         /*
 420          * If we meet the deadline we move the deadline forward, otherwise
 421          * we consider it a missed deadline and dropped frame etc.
 422          */
 423         deadline += interval_usecs;
 424         if (deadline >= current_time) {
 425                 tb->deadlines_met++;
 426         } else {
 427                 if (interval_usecs) {
 428                         unsigned long intervals = (current_time - deadline) /
 429                                 interval_usecs + 1;
 430
 431                         tb->missed_deadlines += intervals;
 432                         missed_latency = intervals * interval_usecs;
 433                         deadline += intervals * interval_usecs;
 434                         if (intervals > 1)
 435                                 tb->missed_burns += intervals;
 436                 } else {
 437                         deadline = current_time;
 438                         goto out_nosleep;
 439                 }
 440         }
 441 bypass_burn:
 442         tk->sleep_interval = deadline - current_time;
 443
 444         post_sem(&tk->sem.start);
 445         wait_sem(&tk->sem.complete);
 446 out_nosleep:
 447         /*
 448          * Must add missed_latency to total here as this function may not be
 449          * called again and the missed latency can be lost
 450          */
 451         latency += missed_latency;
 452         if (latency > tb->max_latency)
 453                 tb->max_latency = latency;
 454         tb->total_latency += latency;
 455         tb->sum_latency_squared += latency * latency;
 456         tb->nr_samples++;
 457
 458         return deadline;
 459 }
 460
 461 void initialise_thread_data(struct data_table *tb)
 462 {
 463         tb->max_latency =
 464                 tb->total_latency =
 465                 tb->sum_latency_squared =
 466                 tb->deadlines_met =
 467                 tb->missed_deadlines =
 468                 tb->missed_burns =
 469                 tb->nr_samples = 0;
 470 }
 471
 472 void create_pthread(pthread_t  * thread, pthread_attr_t * attr,
 473         void * (*start_routine)(void *), void *arg)
 474 {
 475         if (pthread_create(thread, attr, start_routine, arg))
 476                 terminal_error("pthread_create");
 477 }
 478
 479 void join_pthread(pthread_t th, void **thread_return)
 480 {
 481         if (pthread_join(th, thread_return))
 482                 terminal_error("pthread_join");
 483 }
 484
 485 void emulate_none(struct thread *th)
 486 {
 487         sem_t *s = &th->sem.stop;
 488         wait_sem(s);
 489 }
 490
 491 #define AUDIO_INTERVAL  (50000)
 492 #define AUDIO_RUN       (AUDIO_INTERVAL / 20)
 493 /* We emulate audio by using 5% cpu and waking every 50ms */
 494 void emulate_audio(struct thread *th)
 495 {
 496         unsigned long long deadline;
 497         sem_t *s = &th->sem.stop;
 498         struct timespec myts;
 499
 500         th->decasecond_deadlines = 1000000 / AUDIO_INTERVAL * 10;
 501         deadline = get_usecs(&myts);
 502
 503         while (1) {
 504                 deadline = periodic_schedule(th, AUDIO_RUN, AUDIO_INTERVAL,
 505                         deadline);
 506                 if (!trywait_sem(s))
 507                         return;
 508         }
 509 }
 510
 511 /* We emulate video by using 40% cpu and waking for 60fps */
 512 #define VIDEO_INTERVAL  (1000000 / 60)
 513 #define VIDEO_RUN       (VIDEO_INTERVAL * 40 / 100)
 514 void emulate_video(struct thread *th)
 515 {
 516         unsigned long long deadline;
 517         sem_t *s = &th->sem.stop;
 518         struct timespec myts;
 519
 520         th->decasecond_deadlines = 1000000 / VIDEO_INTERVAL * 10;
 521         deadline = get_usecs(&myts);
 522
 523         while (1) {
 524                 deadline = periodic_schedule(th, VIDEO_RUN, VIDEO_INTERVAL,
 525                         deadline);
 526                 if (!trywait_sem(s))
 527                         return;
 528         }
 529 }
 530
 531 /*
 532  * We emulate X by running for a variable percentage of cpu from 0-100%
 533  * in 1ms chunks.
 534  */
 535 void emulate_x(struct thread *th)
 536 {
 537         unsigned long long deadline;
 538         sem_t *s = &th->sem.stop;
 539         struct timespec myts;
 540
 541         th->decasecond_deadlines = 100;
 542         deadline = get_usecs(&myts);
 543
 544         while (1) {
 545                 int i, j;
 546                 for (i = 0 ; i <= 100 ; i++) {
 547                         j = 100 - i;
 548                         deadline = periodic_schedule(th, i * 1000, j * 1000,
 549                                 deadline);
 550                         deadline += i * 1000;
 551                         if (!trywait_sem(s))
 552                                 return;
 553                 }
 554         }
 555 }
 556
 557 /*
 558  * We emulate gaming by using 100% cpu and seeing how many frames (jobs
 559  * completed) we can do in that time. Deadlines are meaningless with
 560  * unlocked frame rates. We do not use periodic schedule because for
 561  * this load because this never wants to sleep.
 562  */
 563 #define GAME_INTERVAL   (100000)
 564 #define GAME_RUN        (GAME_INTERVAL)
 565 void emulate_game(struct thread *th)
 566 {
 567         unsigned long long deadline, current_time, latency;
 568         sem_t *s = &th->sem.stop;
 569         struct timespec myts;
 570         struct data_table *tb;
 571
 572         tb = th->dt;
 573         th->decasecond_deadlines = 1000000 / GAME_INTERVAL * 10;
 574
 575         while (1) {
 576                 deadline = get_usecs(&myts) + GAME_INTERVAL;
 577                 burn_usecs(GAME_RUN);
 578                 current_time = get_usecs(&myts);
 579                 /* use usecs instead of simple count for game burn statistics */
 580                 tb->achieved_burns += GAME_RUN;
 581                 if (current_time > deadline) {
 582                         latency = current_time - deadline;
 583                         tb->missed_burns += latency;
 584                 } else
 585                         latency = 0;
 586                 if (latency > tb->max_latency)
 587                         tb->max_latency = latency;
 588                 tb->total_latency += latency;
 589                 tb->sum_latency_squared += latency * latency;
 590                 tb->nr_samples++;
 591                 if (!trywait_sem(s))
 592                         return;
 593         }
 594 }
 595
 596 void *burn_thread(void *t)
 597 {
 598         struct thread *th;
 599         sem_t *s;
 600         long i = (long)t;
 601
 602         th = &threadlist[i];
 603         s = &th->sem.stopchild;
 604
 605         while (1) {
 606                 burn_loops(ud.loops_per_ms);
 607                 if (!trywait_sem(s)) {
 608                         post_sem(s);
 609                         break;
 610                 }
 611         }
 612         return NULL;
 613 }
 614
 615 /* Have ud.cpu_load threads burn cpu continuously */
 616 void emulate_burn(struct thread *th)
 617 {
 618         sem_t *s = &th->sem.stop;
 619         unsigned long i;
 620         long t;
 621         pthread_t burnthreads[ud.cpu_load];
 622
 623         t = th->threadno;
 624         for (i = 0 ; i < ud.cpu_load ; i++)
 625                 create_pthread(&burnthreads[i], NULL, burn_thread,
 626                         (void*)(long) t);
 627         wait_sem(s);
 628         post_sem(&th->sem.stopchild);
 629         for (i = 0 ; i < ud.cpu_load ; i++)
 630                 join_pthread(burnthreads[i], NULL);
 631 }
 632
 633 /* Write a file the size of ram continuously */
 634 void emulate_write(struct thread *th)
 635 {
 636         sem_t *s = &th->sem.stop;
 637         FILE *fp;
 638         char *name = "interbench.write";
 639         void *buf = NULL;
 640         struct stat statbuf;
 641         unsigned long mem;
 642
 643         if (!(fp = fopen(name, "w")))
 644                 terminal_error("fopen");
 645         if (stat(name, &statbuf) == -1)
 646                 terminal_fileopen_error(fp, "stat");
 647         if (statbuf.st_blksize < MIN_BLK_SIZE)
 648                 statbuf.st_blksize = MIN_BLK_SIZE;
 649         mem = ud.ram / (statbuf.st_blksize / 1024);     /* kilobytes to blocks */
 650         if (!(buf = calloc(1, statbuf.st_blksize)))
 651                 terminal_fileopen_error(fp, "calloc");
 652         if (fclose(fp) == -1)
 653                 terminal_error("fclose");
 654
 655         while (1) {
 656                 unsigned int i;
 657
 658                 if (!(fp = fopen(name, "w")))
 659                         terminal_error("fopen");
 660                 if (stat(name, &statbuf) == -1)
 661                         terminal_fileopen_error(fp, "stat");
 662                 for (i = 0 ; i < mem; i++) {
 663                         if (fwrite(buf, statbuf.st_blksize, 1, fp) != 1)
 664                                 terminal_fileopen_error(fp, "fwrite");
 665                         if (!trywait_sem(s))
 666                                 goto out;
 667                 }
 668                 if (fclose(fp) == -1)
 669                         terminal_error("fclose");
 670         }
 671
 672 out:
 673         if (fclose(fp) == -1)
 674                 terminal_error("fclose");
 675         if (remove(name) == -1)
 676                 terminal_error("remove");
 677         sync_flush();
 678 }
 679
 680 /* Read a file the size of ram continuously */
 681 void emulate_read(struct thread *th)
 682 {
 683         sem_t *s = &th->sem.stop;
 684         char *name = "interbench.read";
 685         void *buf = NULL;
 686         struct stat statbuf;
 687         unsigned long bsize;
 688         int tmp;
 689
 690         if ((tmp = open(name, O_RDONLY)) == -1)
 691                 terminal_error("open");
 692         if (stat(name, &statbuf) == -1)
 693                 terminal_error("stat");
 694         bsize = statbuf.st_blksize;
 695         if (!(buf = malloc(bsize)))
 696                 terminal_error("malloc");
 697
 698         while (1) {
 699                 int rd;
 700
 701                 /*
 702                  * We have to read the whole file before quitting the load
 703                  * to prevent the data being cached for the next read. This
 704                  * is also the reason the file is the size of physical ram.
 705                  */
 706                 while ((rd = Read(tmp , buf, bsize)) > 0);
 707                 if(!trywait_sem(s))
 708                         return;
 709                 if (lseek(tmp, (off_t)0, SEEK_SET) == -1)
 710                         terminal_error("lseek");
 711         }
 712 }
 713
 714 #define RINGTHREADS     4
 715
 716 struct thread ringthreads[RINGTHREADS];
 717
 718 void *ring_thread(void *t)
 719 {
 720         struct thread *th;
 721         struct sems *s;
 722         int i, post_to;
 723
 724         i = (long)t;
 725         th = &ringthreads[i];
 726         s = &th->sem;
 727         post_to = i + 1;
 728         if (post_to == RINGTHREADS)
 729                 post_to = 0;
 730         if (i == 0)
 731                 post_sem(&s->ready);
 732
 733         while (1) {
 734                 wait_sem(&s->start);
 735                 post_sem(&ringthreads[post_to].sem.start);
 736                 if (!trywait_sem(&s->stop))
 737                         goto out;
 738         }
 739 out:
 740         post_sem(&ringthreads[post_to].sem.start);
 741         post_sem(&s->complete);
 742         return NULL;
 743 }
 744
 745 /* Create a ring of 4 processes that wake each other up in a circle */
 746 void emulate_ring(struct thread *th)
 747 {
 748         sem_t *s = &th->sem.stop;
 749         int i;
 750
 751         for (i = 0 ; i < RINGTHREADS ; i++) {
 752                 init_all_sems(&ringthreads[i].sem);
 753                 create_pthread(&ringthreads[i].pthread, NULL,
 754                         ring_thread, (void*)(long) i);
 755         }
 756
 757         wait_sem(&ringthreads[0].sem.ready);
 758         post_sem(&ringthreads[0].sem.start);
 759         wait_sem(s);
 760         for (i = 0 ; i < RINGTHREADS ; i++)
 761                 post_sem(&ringthreads[i].sem.stop);
 762         for (i = 0 ; i < RINGTHREADS ; i++) {
 763                 wait_sem(&ringthreads[i].sem.complete);
 764                 join_pthread(ringthreads[i].pthread, NULL);
 765         }
 766 }
 767
 768 /* We emulate a compile by running burn, write and read threads simultaneously */
 769 void emulate_compile(struct thread *th)
 770 {
 771         sem_t *s = &th->sem.stop;
 772         unsigned long i, threads[3];
 773
 774         bzero(threads, 3 * sizeof(threads[0]));
 775
 776         for (i = 0 ; i < THREADS ; i++) {
 777                 if (strcmp(threadlist[i].label, "Burn") == 0)
 778                         threads[0] = i;
 779                 if (strcmp(threadlist[i].label, "Write") == 0)
 780                         threads[1] = i;
 781                 if (strcmp(threadlist[i].label, "Read") == 0)
 782                         threads[2] = i;
 783         }
 784         for (i = 0 ; i < 3 ; i++) {
 785                 if (!threads[i]) {
 786                         fprintf(stderr, "Can't find all threads for compile load\n");
 787                         exit(1);
 788                 }
 789         }
 790         for (i = 0 ; i < 3 ; i++) {
 791                 initialise_thread(threads[i]);
 792                 start_thread(&threadlist[threads[i]]);
 793         }
 794         wait_sem(s);
 795         for (i = 0 ; i < 3 ; i++)
 796                 stop_thread(&threadlist[threads[i]]);
 797 }
 798
 799 int *grab_and_touch (char *block[], int i)
 800 {
 801         block[i] = (char *) malloc(MB);
 802         if (!block[i])
 803                 return NULL;
 804         return (memset(block[i], 1, MB));
 805 }
 806
 807 /* We emulate a memory load by allocating and torturing 110% of available ram */
 808 void emulate_memload(struct thread *th)
 809 {
 810         sem_t *s = &th->sem.stop;
 811         unsigned long touchable_mem, i;
 812         char *mem_block[MAX_MEM_IN_MB];
 813         void *success;
 814
 815         touchable_mem = compute_allocable_mem();
 816         /* loop until we're killed, frobbing memory in various perverted ways */
 817         while (1) {
 818                 for (i = 0;  i < touchable_mem; i++) {
 819                         success = grab_and_touch(mem_block, i);
 820                         if (!success) {
 821                                 touchable_mem = i-1;
 822                                 break;
 823                         }
 824                 }
 825                 if (!trywait_sem(s))
 826                         goto out_freemem;
 827                 for (i = 0;  i < touchable_mem; i++) {
 828                         memcpy(mem_block[i], mem_block[(i + touchable_mem / 2) %
 829                                 touchable_mem], MB);
 830                         if (!trywait_sem(s))
 831                                 goto out_freemem;
 832                 }
 833                 for (i = 0; i < touchable_mem; i++) {
 834                         free(mem_block[i]);
 835                 }
 836                 if (!trywait_sem(s))
 837                         goto out;
 838         }
 839 out_freemem:
 840         for (i = 0; i < touchable_mem; i++)
 841                 free(mem_block[i]);
 842 out:
 843         return;
 844 }
 845
 846 struct thread hackthread;
 847
 848 void emulate_hackbench(struct thread *th)
 849 {
 850         sem_t *s = &th->sem.stop;
 851
 852         init_all_sems(&hackthread.sem);
 853         create_pthread(&hackthread.pthread, NULL, hackbench_thread, (void *) 0);
 854
 855         wait_sem(s);
 856
 857         post_sem(&hackthread.sem.stop);
 858         wait_sem(&hackthread.sem.complete);
 859
 860         join_pthread(hackthread.pthread, NULL);
 861 }
 862
 863 #define CUSTOM_INTERVAL (ud.custom_interval)
 864 #define CUSTOM_RUN      (ud.custom_run)
 865 void emulate_custom(struct thread *th)
 866 {
 867         unsigned long long deadline;
 868         sem_t *s = &th->sem.stop;
 869         struct timespec myts;
 870
 871         th->decasecond_deadlines = 1000000 / CUSTOM_INTERVAL * 10;
 872         deadline = get_usecs(&myts);
 873
 874         while (1) {
 875                 deadline = periodic_schedule(th, CUSTOM_RUN, CUSTOM_INTERVAL,
 876                         deadline);
 877                 if (!trywait_sem(s))
 878                         return;
 879         }
 880 }
 881
 882 void *timekeeping_thread(void *t)
 883 {
 884         struct thread *th;
 885         struct tk_thread *tk;
 886         struct sems *s;
 887         struct timespec myts;
 888         long i = (long)t;
 889
 890         th = &threadlist[i];
 891         tk = &th->tkthread;
 892         s = &th->tkthread.sem;
 893         /*
 894          * If this timekeeping thread is that of a benchmarked thread we run
 895          * even higher priority than the benched thread is if running real
 896          * time. Otherwise, the load timekeeping thread, which does not need
 897          * accurate accounting remains SCHED_NORMAL;
 898          */
 899         if (th->dt != &th->benchmarks[NOT_BENCHING])
 900                 set_fifo(96);
 901         /* These values must be changed at the appropriate places or race */
 902         tk->sleep_interval = tk->slept_interval = 0;
 903         post_sem(&s->ready);
 904
 905         while (1) {
 906                 unsigned long start_time, now;
 907
 908                 if (!trywait_sem(&s->stop))
 909                         goto out;
 910                 wait_sem(&s->start);
 911                 tk->slept_interval = 0;
 912                 start_time = get_usecs(&myts);
 913                 if (!trywait_sem(&s->stop))
 914                         goto out;
 915                 if (tk->sleep_interval) {
 916                         unsigned long diff = 0;
 917                         microsleep(tk->sleep_interval);
 918                         now = get_usecs(&myts);
 919                         /* now should always be > start_time but... */
 920                         if (now > start_time) {
 921                                 diff = now - start_time;
 922                                 if (diff > tk->sleep_interval)
 923                                         tk->slept_interval = diff -
 924                                                 tk->sleep_interval;
 925                         }
 926                 }
 927                 tk->sleep_interval = 0;
 928                 post_sem(&s->complete);
 929         }
 930 out:
 931         return NULL;
 932 }
 933
 934 /*
 935  * All the sleep functions such as nanosleep can only guarantee that they
 936  * sleep for _at least_ the time requested. We work around this by having
 937  * a high priority real time thread that accounts for the extra time slept
 938  * in nanosleep. This allows wakeup latency of the tested thread to be
 939  * accurate and reflect true scheduling delays.
 940  */
 941 void *emulation_thread(void *t)
 942 {
 943         struct thread *th;
 944         struct tk_thread *tk;
 945         struct sems *s, *tks;
 946         long i = (long)t;
 947
 948         th = &threadlist[i];
 949         tk = &th->tkthread;
 950         s = &th->sem;
 951         tks = &tk->sem;
 952         init_all_sems(tks);
 953
 954         /* Start the timekeeping thread */
 955         create_pthread(&th->tk_pthread, NULL, timekeeping_thread,
 956                 (void*)(long) i);
 957         /* Wait for timekeeping thread to be ready */
 958         wait_sem(&tks->ready);
 959
 960         /* Tell main we're ready to start*/
 961         post_sem(&s->ready);
 962
 963         /* Wait for signal from main to start thread */
 964         wait_sem(&s->start);
 965
 966         /* Start the actual function being benched/or running as load */
 967         th->name(th);
 968
 969         /* Stop the timekeeping thread */
 970         post_sem(&tks->stop);
 971         post_sem(&tks->start);
 972         join_pthread(th->tk_pthread, NULL);
 973
 974         /* Tell main we've finished */
 975         post_sem(&s->complete);
 976         return NULL;
 977 }
 978
 979 /*
 980  * In an unoptimised loop we try to benchmark how many meaningless loops
 981  * per second we can perform on this hardware to fairly accurately
 982  * reproduce certain percentage cpu usage
 983  */
 984 void calibrate_loop(void)
 985 {
 986         unsigned long long start_time, loops_per_msec, run_time = 0;
 987         unsigned long loops;
 988         struct timespec myts;
 989
 990         loops_per_msec = 100000;
 991 redo:
 992         /* Calibrate to within 1% accuracy */
 993         while (run_time > 1010000 || run_time < 990000) {
 994                 loops = loops_per_msec;
 995                 start_time = get_nsecs(&myts);
 996                 burn_loops(loops);
 997                 run_time = get_nsecs(&myts) - start_time;
 998                 loops_per_msec = (1000000 * loops_per_msec / run_time ? :
 999                         loops_per_msec);
1000         }
1001
1002         /* Rechecking after a pause increases reproducibility */
1003         sleep(1);
1004         loops = loops_per_msec;
1005         start_time = get_nsecs(&myts);
1006         burn_loops(loops);
1007         run_time = get_nsecs(&myts) - start_time;
1008
1009         /* Tolerate 5% difference on checking */
1010         if (run_time > 1050000 || run_time < 950000)
1011                 goto redo;
1012
1013         ud.loops_per_ms = loops_per_msec;
1014 }
1015
1016 void log_output(const char *format, ...) __attribute__ ((format(printf, 1, 2)));
1017
1018 /* Output to console +/- logfile */
1019 void log_output(const char *format, ...)
1020 {
1021         va_list ap;
1022
1023         va_start(ap, format);
1024         if (vprintf(format, ap) == -1)
1025                 terminal_error("vprintf");
1026         va_end(ap);
1027         if (ud.log) {
1028                 va_start(ap, format);
1029                 if (vfprintf(ud.logfile, format, ap) == -1)
1030                         terminal_error("vpfrintf");
1031                 va_end(ap);
1032         }
1033         fflush(NULL);
1034 }
1035
1036 /* Calculate statistics and output them */
1037 void show_latencies(struct thread *th)
1038 {
1039         struct data_table *tbj;
1040         struct tk_thread *tk;
1041         double average_latency, deadlines_met, samples_met, sd, max_latency;
1042         long double variance = 0;
1043
1044         tbj = th->dt;
1045         tk = &th->tkthread;
1046
1047         if (tbj->nr_samples > 1) {
1048                 average_latency = tbj->total_latency / tbj->nr_samples;
1049                 variance = (tbj->sum_latency_squared - (average_latency *
1050                         average_latency) / tbj->nr_samples) / (tbj->nr_samples - 1);
1051                 sd = sqrt((double)variance);
1052         } else {
1053                 average_latency = tbj->total_latency;
1054                 sd = 0.0;
1055         }
1056
1057         /*
1058          * Landing on the boundary of a deadline can make loaded runs appear
1059          * to do more work than unloaded due to tiny duration differences.
1060          */
1061         if (tbj->achieved_burns > 0)
1062                 samples_met = (double)tbj->achieved_burns /
1063                     (double)(tbj->achieved_burns + tbj->missed_burns) * 100;
1064         else
1065                 samples_met = 0.0;
1066         max_latency = tbj->max_latency;
1067         /* When benchmarking rt we represent the data in us */
1068         if (!ud.do_rt) {
1069                 average_latency /= 1000;
1070                 sd /= 1000;
1071                 max_latency /= 1000;
1072         }
1073         if (tbj->deadlines_met == 0)
1074                 deadlines_met = 0;
1075         else
1076                 deadlines_met = (double)tbj->deadlines_met /
1077                     (double)(tbj->missed_deadlines + tbj->deadlines_met) * 100;
1078
1079         /* Messy nonsense to format the output nicely */
1080         if (average_latency >= 100)
1081                 log_output("%7.0f +/- ", average_latency);
1082         else
1083                 log_output("%7.3g +/- ", average_latency);
1084         if (sd >= 100)
1085                 log_output("%-9.0f", sd);
1086         else
1087                 log_output("%-9.3g", sd);
1088         if (max_latency >= 100)
1089                 log_output("%7.0f\t", max_latency);
1090         else
1091                 log_output("%7.3g\t", max_latency);
1092         log_output("\t%4.3g", samples_met);
1093         if (!th->nodeadlines)
1094                 log_output("\t%11.3g", deadlines_met);
1095         log_output("\n");
1096         sync_flush();
1097 }
1098
1099 void create_read_file(void)
1100 {
1101         unsigned int i;
1102         FILE *fp;
1103         char *name = "interbench.read";
1104         void *buf = NULL;
1105         struct stat statbuf;
1106         unsigned long mem, bsize;
1107         int tmp;
1108
1109         if ((tmp = open(name, O_RDONLY)) == -1) {
1110                 if (errno != ENOENT)
1111                         terminal_error("open");
1112                 goto write;
1113         }
1114         if (stat(name, &statbuf) == -1)
1115                 terminal_error("stat");
1116         if (statbuf.st_blksize < MIN_BLK_SIZE)
1117                 statbuf.st_blksize = MIN_BLK_SIZE;
1118         bsize = statbuf.st_blksize;
1119         if (statbuf.st_size / 1024 / bsize == ud.ram / bsize)
1120                 return;
1121         if (remove(name) == -1)
1122                 terminal_error("remove");
1123 write:
1124         fprintf(stderr,"Creating file for read load...\n");
1125         if (!(fp = fopen(name, "w")))
1126                 terminal_error("fopen");
1127         if (stat(name, &statbuf) == -1)
1128                 terminal_fileopen_error(fp, "stat");
1129         if (statbuf.st_blksize < MIN_BLK_SIZE)
1130                 statbuf.st_blksize = MIN_BLK_SIZE;
1131         bsize = statbuf.st_blksize;
1132         if (!(buf = calloc(1, bsize)))
1133                 terminal_fileopen_error(fp, "calloc");
1134         mem = ud.ram / (bsize / 1024);  /* kilobytes to blocks */
1135
1136         for (i = 0 ; i < mem; i++) {
1137                 if (fwrite(buf, bsize, 1, fp) != 1)
1138                         terminal_fileopen_error(fp, "fwrite");
1139         }
1140         if (fclose(fp) == -1)
1141                 terminal_error("fclose");
1142         sync_flush();
1143 }
1144
1145 void get_ram(void)
1146 {
1147         struct vmstats vms;
1148         size_t vms_size = sizeof(vms);
1149
1150         if (sysctlbyname("vm.vmstats", &vms, &vms_size, NULL, 0))
1151                 terminal_error("sysctlbyname: vm.vmstats");
1152
1153         ud.ram = vms.v_page_count * vms.v_page_size;
1154         ud.ram /= 1024; /* linux size is in kB */
1155         ud.swap = ud.ram; /* XXX: swap doesn't have to be the same as RAM */
1156
1157         if( !ud.ram || !ud.swap ) {
1158                 unsigned long i;
1159                 fprintf(stderr, "\nCould not get memory or swap size. ");
1160                 fprintf(stderr, "Will not perform mem_load\n");
1161                 for (i = 0 ; i < THREADS ; i++) {
1162                         if (strcmp(threadlist[i].label, "Memload") == 0) {
1163                                 threadlist[i].load = 0;
1164                                 threadlist[i].rtload = 0;
1165                         }
1166                 }
1167         }
1168 }
1169
1170 void get_logfilename(void)
1171 {
1172         struct tm *mytm;
1173         struct utsname buf;
1174         time_t t;
1175         int year, month, day, hours, minutes;
1176
1177         time(&t);
1178         if (uname(&buf) == -1)
1179                 terminal_error("uname");
1180         if (!(mytm = localtime(&t)))
1181                 terminal_error("localtime");
1182         year = mytm->tm_year + 1900;
1183         month = mytm->tm_mon + 1;
1184         day = mytm->tm_mday;
1185         hours = mytm->tm_hour;
1186         minutes = mytm->tm_min;
1187         strncpy(ud.unamer, buf.release, MAX_UNAME_LENGTH);
1188
1189         sprintf(ud.datestamp, "%2d%02d%02d%02d%02d",
1190                 year, month, day, hours, minutes);
1191         snprintf(ud.logfilename, MAX_LOG_LENGTH, "%s.log", ud.unamer);
1192 }
1193
1194 void start_thread(struct thread *th)
1195 {
1196         post_sem(&th->sem.start);
1197 }
1198
1199 void stop_thread(struct thread *th)
1200 {
1201         post_sem(&th->sem.stop);
1202         wait_sem(&th->sem.complete);
1203
1204         /* Kill the thread */
1205         join_pthread(th->pthread, NULL);
1206 }
1207
1208 void init_sem(sem_t *sem)
1209 {
1210         if (sem_init(sem, 0, 0))
1211                 terminal_error("sem_init");
1212 }
1213
1214 void init_all_sems(struct sems *s)
1215 {
1216         /* Initialise the semaphores */
1217         init_sem(&s->ready);
1218         init_sem(&s->start);
1219         init_sem(&s->stop);
1220         init_sem(&s->complete);
1221         init_sem(&s->stopchild);
1222 }
1223
1224 void initialise_thread(int i)
1225 {
1226         struct thread *th = &threadlist[i];
1227
1228         init_all_sems(&th->sem);
1229         /* Create the threads. Yes, the (long) cast is fugly but it's safe*/
1230         create_pthread(&th->pthread, NULL, emulation_thread, (void*)(long)i);
1231
1232         wait_sem(&th->sem.ready);
1233         /*
1234          * We set this pointer generically to NOT_BENCHING and set it to the
1235          * benchmarked array entry only on benched threads.
1236          */
1237         th->dt = &th->benchmarks[NOT_BENCHING];
1238         initialise_thread_data(th->dt);
1239
1240 }
1241
1242 /* A pseudo-semaphore for processes using a pipe */
1243 void wait_on(int pype)
1244 {
1245         int retval, buf = 0;
1246
1247         retval = Read(pype, &buf, sizeof(buf));
1248         if (retval == 0) {
1249                 fprintf(stderr, "\nread returned 0\n");
1250                 exit (1);
1251         }
1252 }
1253
1254 void wakeup_with(int pype)
1255 {
1256         int retval, buf = 1;
1257
1258         retval = Write(pype, &buf, sizeof(buf));
1259         if (retval == 0) {
1260                 fprintf(stderr, "\nwrite returned 0\n");
1261                 exit (1);
1262         }
1263 }
1264
1265 void run_loadchild(int j)
1266 {
1267         struct thread *thj;
1268         thj = &threadlist[j];
1269
1270         set_nice(ud.load_nice);
1271         initialise_thread(j);
1272
1273         /* Tell main we're ready */
1274         wakeup_with(l2m[1]);
1275
1276         /* Main tells us we're ready */
1277         wait_on(m2l[0]);
1278         start_thread(thj);
1279
1280         /* Tell main we received the start and are running */
1281         wakeup_with(l2m[1]);
1282
1283         /* Main tells us to stop */
1284         wait_on(m2l[0]);
1285         stop_thread(thj);
1286
1287         /* Tell main we've finished */
1288         wakeup_with(l2m[1]);
1289         exit (0);
1290 }
1291
1292 void run_benchchild(int i, int j)
1293 {
1294         struct thread *thi;
1295
1296         thi = &threadlist[i];
1297
1298         set_nice(ud.bench_nice);
1299         if (ud.do_rt)
1300                 set_mlock();
1301         initialise_thread(i);
1302         /* Point the data table to the appropriate load being tested */
1303         thi->dt = &thi->benchmarks[j];
1304         initialise_thread_data(thi->dt);
1305         if (ud.do_rt)
1306                 set_thread_fifo(thi->pthread, 95);
1307
1308         /* Tell main we're ready */
1309         wakeup_with(b2m[1]);
1310
1311         /* Main tells us we're ready */
1312         wait_on(m2b[0]);
1313         start_thread(thi);
1314
1315         /* Tell main we have started */
1316         wakeup_with(b2m[1]);
1317
1318         /* Main tells us to stop */
1319         wait_on(m2b[0]);
1320         stop_thread(thi);
1321
1322         if (ud.do_rt) {
1323                 set_thread_normal(thi->pthread);
1324                 set_munlock();
1325         }
1326         show_latencies(thi);
1327
1328         /* Tell main we've finished */
1329         wakeup_with(b2m[1]);
1330         exit(0);
1331 }
1332
1333 void bench(int i, int j)
1334 {
1335         pid_t bench_pid, load_pid;
1336
1337         if ((load_pid = fork()) == -1)
1338                 terminal_error("fork");
1339         if (!load_pid)
1340                 run_loadchild(j);
1341
1342         /* Wait for load process to be ready */
1343
1344         wait_on(l2m[0]);
1345         if ((bench_pid = fork()) == -1)
1346                 terminal_error("fork");
1347         if (!bench_pid)
1348                 run_benchchild(i, j);
1349
1350         /* Wait for bench process to be ready */
1351         wait_on(b2m[0]);
1352
1353         /*
1354          * We want to be higher priority than everything to signal them to
1355          * stop and we lock our memory if we can as well
1356          */
1357         set_fifo(99);
1358         set_mlock();
1359
1360         /* Wakeup the load process */
1361         wakeup_with(m2l[1]);
1362         /* Load tells it has received the first message and is running */
1363         wait_on(l2m[0]);
1364
1365         /* After a small delay, wake up the benched process */
1366         sleep(1);
1367         wakeup_with(m2b[1]);
1368
1369         /* Bench tells it has received the first message and is running */
1370         wait_on(b2m[0]);
1371         microsleep(ud.duration * 1000000);
1372
1373         /* Tell the benched process to stop its threads and output results */
1374         wakeup_with(m2b[1]);
1375
1376         /* Tell the load process to stop its threads */
1377         wakeup_with(m2l[1]);
1378
1379         /* Return to SCHED_NORMAL */
1380         set_normal();
1381         set_munlock();
1382
1383         /* Wait for load and bench processes to terminate */
1384         wait_on(l2m[0]);
1385         wait_on(b2m[0]);
1386 }
1387
1388 void init_pipe(int *pype)
1389 {
1390         if (pipe(pype) == -1)
1391                 terminal_error("pipe");
1392 }
1393
1394 void init_pipes(void)
1395 {
1396         init_pipe(m2l);
1397         init_pipe(l2m);
1398         init_pipe(m2b);
1399         init_pipe(b2m);
1400 }
1401
1402 void usage(void)
1403 {
1404         /* Affinity commented out till working on all architectures */
1405         fprintf(stderr, "interbench v " INTERBENCH_VERSION " by Con Kolivas\n");
1406         fprintf(stderr, "interbench [-l <int>] [-L <int>] [-t <int] [-B <int>] [-N <int>]\n");
1407         fprintf(stderr, "\t[-b] [-c] [-r] [-C <int> -I <int>] [-m <comment>]\n");
1408         fprintf(stderr, "\t[-w <load type>] [-x <load type>] [-W <bench>] [-X <bench>]\n");
1409         fprintf(stderr, "\t[-h]\n\n");
1410         fprintf(stderr, " -l\tUse <int> loops per sec (default: use saved benchmark)\n");
1411         fprintf(stderr, " -L\tUse cpu load of <int> with burn load (default: 4)\n");
1412         fprintf(stderr, " -t\tSeconds to run each benchmark (default: 30)\n");
1413         fprintf(stderr, " -B\tNice the benchmarked thread to <int> (default: 0)\n");
1414         fprintf(stderr, " -N\tNice the load thread to <int> (default: 0)\n");
1415         //fprintf(stderr, " -u\tImitate uniprocessor\n");
1416         fprintf(stderr, " -b\tBenchmark loops_per_ms even if it is already known\n");
1417         fprintf(stderr, " -c\tOutput to console only (default: use console and logfile)\n");
1418         fprintf(stderr, " -r\tPerform real time scheduling benchmarks (default: non-rt)\n");
1419         fprintf(stderr, " -C\tUse <int> percentage cpu as a custom load (default: no custom load)\n");
1420         fprintf(stderr, " -I\tUse <int> microsecond intervals for custom load (needs -C as well)\n");
1421         fprintf(stderr, " -m\tAdd <comment> to the log file as a separate line\n");
1422         fprintf(stderr, " -w\tAdd <load type> to the list of loads to be tested against\n");
1423         fprintf(stderr, " -x\tExclude <load type> from the list of loads to be tested against\n");
1424         fprintf(stderr, " -W\tAdd <bench> to the list of benchmarks to be tested\n");
1425         fprintf(stderr, " -X\tExclude <bench> from the list of benchmarks to be tested\n");
1426         fprintf(stderr, " -h\tShow this help\n");
1427         fprintf(stderr, "\nIf run without parameters interbench will run a standard benchmark\n\n");
1428 }
1429
1430 #ifdef DEBUG
1431 void deadchild(int crap)
1432 {
1433         pid_t retval;
1434         int status;
1435
1436         crap = 0;
1437
1438         if ((retval = waitpid(-1, &status, WNOHANG)) == -1) {
1439                 if (errno == ECHILD)
1440                         return;
1441                 terminal_error("waitpid");
1442         }
1443         if (WIFEXITED(status) && WEXITSTATUS(status) == 0)
1444                 return;
1445         fprintf(stderr, "\nChild terminated abnormally ");
1446         if (WIFSIGNALED(status))
1447                 fprintf(stderr, "with signal %d", WTERMSIG(status));
1448         fprintf(stderr, "\n");
1449         exit (1);
1450 }
1451 #endif
1452
1453 int load_index(const char* loadname)
1454 {
1455         int i;
1456
1457         for (i = 0 ; i < THREADS ; i++)
1458                 if (strcasecmp(loadname, threadlist[i].label) == 0)
1459                         return i;
1460         return -1;
1461 }
1462
1463 inline int bit_is_on(const unsigned int mask, int index)
1464 {
1465         return (mask & (1 << index)) != 0;
1466 }
1467
1468 inline void set_bit_on(unsigned int *mask, int index)
1469 {
1470         *mask |= (1 << index);
1471 }
1472
1473 int main(int argc, char **argv)
1474 {
1475         unsigned long custom_cpu = 0;
1476         int q, i, j, affinity, benchmark = 0;
1477         unsigned int selected_loads = 0;
1478         unsigned int excluded_loads = 0;
1479         unsigned int selected_benches = 0;
1480         unsigned int excluded_benches = 0;
1481         FILE *fp;
1482         /*
1483          * This file stores the loops_per_ms to be reused in a filename that
1484          * can't be confused
1485          */
1486         char *fname = "interbench.loops_per_ms";
1487         char *comment = NULL;
1488 #ifdef DEBUG
1489         feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
1490         if (signal(SIGCHLD, deadchild) == SIG_ERR)
1491                 terminal_error("signal");
1492 #endif
1493
1494         while ((q = getopt(argc, argv, "hl:L:B:N:ut:bcnrC:I:m:w:x:W:X:")) != -1) {
1495                 switch (q) {
1496                         case 'h':
1497                                 usage();
1498                                 return (0);
1499                         case 'l':
1500                                 ud.loops_per_ms = atoi(optarg);
1501                                 break;
1502                         case 't':
1503                                 ud.duration = atoi(optarg);
1504                                 break;
1505                         case 'L':
1506                                 ud.cpu_load = atoi(optarg);
1507                                 break;
1508                         case 'B':
1509                                 ud.bench_nice = atoi(optarg);
1510                                 break;
1511                         case 'N':
1512                                 ud.load_nice = atoi(optarg);
1513                                 break;
1514                         case 'u':
1515                                 affinity = 1;
1516                                 break;
1517                         case 'b':
1518                                 benchmark = 1;
1519                                 break;
1520                         case 'c':
1521                                 ud.log = 0;
1522                                 break;
1523                         case 'r':
1524                                 ud.do_rt = 1;
1525                                 break;
1526                         case 'C':
1527                                 custom_cpu = (unsigned long)atol(optarg);
1528                                 break;
1529                         case 'I':
1530                                 ud.custom_interval = atol(optarg);
1531                                 break;
1532                         case 'm':
1533                                 comment = optarg;
1534                                 break;
1535                         case 'w':
1536                                 i = load_index(optarg);
1537                                 if (i == -1) {
1538                                         fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1539                                         return (-2);
1540                                 }
1541                                 set_bit_on(&selected_loads, i);
1542                                 break;
1543                         case 'x':
1544                                 i = load_index(optarg);
1545                                 if (i == -1) {
1546                                         fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1547                                         return (-2);
1548                                 }
1549                                 set_bit_on(&excluded_loads, i);
1550                                 break;
1551                         case 'W':
1552                                 i = load_index(optarg);
1553                                 if (i == -1) {
1554                                         fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1555                                         return (-2);
1556                                 }
1557                                 set_bit_on(&selected_benches, i);
1558                                 break;
1559                         case 'X':
1560                                 i = load_index(optarg);
1561                                 if (i == -1) {
1562                                         fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1563                                         return (-2);
1564                                 }
1565                                 set_bit_on(&excluded_benches, i);
1566                                 break;
1567                         default:
1568                                 usage();
1569                                 return (1);
1570                 }
1571         }
1572         argc -= optind;
1573         argv += optind;
1574         /* default is all loads */
1575         if (selected_loads == 0)
1576                 selected_loads = (unsigned int)-1;
1577         selected_loads &= ~excluded_loads;
1578         /* default is all benches */
1579         if (selected_benches == 0)
1580                 selected_benches = (unsigned int)-1;
1581         selected_benches &= ~excluded_benches;
1582
1583         if (!test_fifo()) {
1584                 fprintf(stderr, "Unable to get SCHED_FIFO (real time scheduling).\n");
1585                 fprintf(stderr, "You either need to run this as root user or have support for real time RLIMITS.\n");
1586                 if (ud.do_rt) {
1587                         fprintf(stderr, "Real time tests were requested, aborting.\n");
1588                         exit (1);
1589                 }
1590                 fprintf(stderr, "Results will be unreliable.\n");
1591         }
1592         if (!ud.cpu_load) {
1593                 fprintf(stderr, "Invalid cpu load\n");
1594                 exit (1);
1595         }
1596
1597         if ((custom_cpu && !ud.custom_interval) ||
1598                 (ud.custom_interval && !custom_cpu) ||
1599                 custom_cpu > 100) {
1600                         fprintf(stderr, "Invalid custom values, aborting.\n");
1601                         exit (1);
1602         }
1603
1604         if (custom_cpu && ud.custom_interval) {
1605                 ud.custom_run = ud.custom_interval * custom_cpu / 100;
1606                 threadlist[CUSTOM].bench = 1;
1607                 threadlist[CUSTOM].load = 1;
1608                 threadlist[CUSTOM].rtbench = 1;
1609                 threadlist[CUSTOM].rtload = 1;
1610         }
1611
1612         /*FIXME Affinity commented out till working on all architectures */
1613 #if 0
1614         if (affinity) {
1615 #ifdef CPU_SET  /* Current glibc expects cpu_set_t */
1616                 cpu_set_t cpumask;
1617
1618                 CPU_ZERO(&cpumask);
1619                 CPU_SET(0, &cpumask);
1620 #else           /* Old glibc expects unsigned long */
1621                 unsigned long cpumask = 1;
1622 #endif
1623                 if (sched_setaffinity(0, sizeof(cpumask), &cpumask) == -1) {
1624                         if (errno != EPERM)
1625                                 terminal_error("sched_setaffinity");
1626                         fprintf(stderr, "could not set cpu affinity\n");
1627                 }
1628         }
1629 #endif
1630
1631         /* Make benchmark a multiple of 10 seconds for proper range of X loads */
1632         if (ud.duration % 10)
1633                 ud.duration += 10 - ud.duration % 10;
1634
1635         if (benchmark)
1636                 ud.loops_per_ms = 0;
1637         /*
1638          * Try to get loops_per_ms from command line first, file second, and
1639          * benchmark if not available.
1640          */
1641         if (!ud.loops_per_ms) {
1642                 if (benchmark)
1643                         goto bench;
1644                 if ((fp = fopen(fname, "r"))) {
1645                         fscanf(fp, "%lu", &ud.loops_per_ms);
1646                         if (fclose(fp) == -1)
1647                                 terminal_error("fclose");
1648                         if (ud.loops_per_ms) {
1649                                 fprintf(stderr,
1650                                         "%lu loops_per_ms read from file interbench.loops_per_ms\n",
1651                                         ud.loops_per_ms);
1652                                 goto loops_known;
1653                         }
1654                 } else
1655                         if (errno != ENOENT)
1656                                 terminal_error("fopen");
1657 bench:
1658                 fprintf(stderr, "loops_per_ms unknown; benchmarking...\n");
1659
1660                 /*
1661                  * To get as accurate a loop as possible we time it running
1662                  * SCHED_FIFO if we can
1663                  */
1664                 set_fifo(99);
1665                 calibrate_loop();
1666                 set_normal();
1667         } else
1668                 fprintf(stderr, "loops_per_ms specified from command line\n");
1669
1670         if (!(fp = fopen(fname, "w"))) {
1671                 if (errno != EACCES)    /* No write access is not terminal */
1672                         terminal_error("fopen");
1673                 fprintf(stderr, "Unable to write to file interbench.loops_per_ms\n");
1674                 goto loops_known;
1675         }
1676         fprintf(fp, "%lu", ud.loops_per_ms);
1677         fprintf(stderr, "%lu loops_per_ms saved to file interbench.loops_per_ms\n",
1678                 ud.loops_per_ms);
1679         if (fclose(fp) == -1)
1680                 terminal_error("fclose");
1681
1682 loops_known:
1683         get_ram();
1684         get_logfilename();
1685         create_read_file();
1686         init_pipes();
1687
1688         if (ud.log && !(ud.logfile = fopen(ud.logfilename, "a"))) {
1689                 if (errno != EACCES)
1690                         terminal_error("fopen");
1691                 fprintf(stderr, "Unable to write to logfile\n");
1692                 ud.log = 0;
1693         }
1694         log_output("\n");
1695         log_output("Using %lu loops per ms, running every load for %d seconds\n",
1696                 ud.loops_per_ms, ud.duration);
1697         log_output("Benchmarking kernel %s at datestamp %s\n",
1698                 ud.unamer, ud.datestamp);
1699         if (comment)
1700                 log_output("Comment: %s\n", comment);
1701         log_output("\n");
1702
1703         for (i = 0 ; i < THREADS ; i++)
1704                 threadlist[i].threadno = i;
1705
1706         for (i = 0 ; i < THREADS ; i++) {
1707                 struct thread *thi = &threadlist[i];
1708                 int *benchme;
1709
1710                 if (ud.do_rt)
1711                         benchme = &threadlist[i].rtbench;
1712                 else
1713                         benchme = &threadlist[i].bench;
1714
1715                 if (!*benchme || !bit_is_on(selected_benches, i))
1716                         continue;
1717
1718                 log_output("--- Benchmarking simulated cpu of %s ", threadlist[i].label);
1719                 if (ud.do_rt)
1720                         log_output("real time ");
1721                 else if (ud.bench_nice)
1722                         log_output("nice %d ", ud.bench_nice);
1723                 log_output("in the presence of simulated ");
1724                 if (ud.load_nice)
1725                         log_output("nice %d ", ud.load_nice);
1726                 log_output("---\n");
1727
1728                 log_output("Load");
1729                 if (ud.do_rt)
1730                         log_output("\tLatency +/- SD (us)");
1731                 else
1732                         log_output("\tLatency +/- SD (ms)");
1733                 log_output("  Max Latency ");
1734                 log_output("  %% Desired CPU");
1735                 if (!thi->nodeadlines)
1736                         log_output("  %% Deadlines Met");
1737                 log_output("\n");
1738
1739                 for (j = 0 ; j < THREADS ; j++) {
1740                         struct thread *thj = &threadlist[j];
1741
1742                         if (j == i || !bit_is_on(selected_loads, j) ||
1743                                 (!threadlist[j].load && !ud.do_rt) ||
1744                                 (!threadlist[j].rtload && ud.do_rt))
1745                                         continue;
1746                         log_output("%s\t", thj->label);
1747                         sync_flush();
1748                         bench(i, j);
1749                 }
1750                 log_output("\n");
1751         }
1752         log_output("\n");
1753         if (ud.log)
1754                 fclose(ud.logfile);
1755
1756         return 0;
1757 }