1 /*******************************************
3 * Interbench - Interactivity benchmark
5 * Author: Con Kolivas <kernel@kolivas.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 *******************************************/
24 #define _FILE_OFFSET_BITS 64 /* Large file support */
25 #define INTERBENCH_VERSION "0.30"
37 #include <semaphore.h>
42 #include <sys/utsname.h>
44 #include <sys/resource.h>
45 #include <sys/types.h>
49 #include <sys/sysctl.h>
50 #include <sys/vmmeter.h>
51 #include "interbench.h"
53 #define MAX_UNAME_LENGTH 100
54 #define MAX_LOG_LENGTH ((MAX_UNAME_LENGTH) + 4)
55 #define MIN_BLK_SIZE 1024
56 #define DEFAULT_RESERVE 64
57 #define MB (1024 * 1024) /* 2^20 bytes */
59 #define MAX_MEM_IN_MB (1024 * 64) /* 64 GB */
62 unsigned long loops_per_ms
;
63 unsigned long ram
, swap
;
68 unsigned long custom_run
;
69 unsigned long custom_interval
;
70 unsigned long cpu_load
;
71 char logfilename
[MAX_LOG_LENGTH
];
73 char unamer
[MAX_UNAME_LENGTH
];
82 /* Pipes main to/from load and bench processes */
83 static int m2l
[2], l2m
[2], m2b
[2], b2m
[2];
85 /* Which member of becnhmarks is used when not benchmarking */
86 #define NOT_BENCHING (THREADS)
87 #define CUSTOM (THREADS - 1)
90 * To add another load or a benchmark you need to increment the value of
91 * THREADS, add a function prototype for your function and add an entry to
92 * the threadlist. To specify whether the function is a benchmark or a load
93 * set the benchmark and/or load flag as appropriate. The basic requirements
94 * of a new load can be seen by using emulate_none as a template.
97 void emulate_none(struct thread
*th
);
98 void emulate_audio(struct thread
*th
);
99 void emulate_video(struct thread
*th
);
100 void emulate_x(struct thread
*th
);
101 void emulate_game(struct thread
*th
);
102 void emulate_burn(struct thread
*th
);
103 void emulate_write(struct thread
*th
);
104 void emulate_read(struct thread
*th
);
105 void emulate_ring(struct thread
*th
);
106 void emulate_compile(struct thread
*th
);
107 void emulate_memload(struct thread
*th
);
108 void emulate_hackbench(struct thread
*th
);
109 void emulate_custom(struct thread
*th
);
111 struct thread threadlist
[THREADS
] = {
112 {.label
= "None", .name
= emulate_none
, .load
= 1, .rtload
= 1},
113 {.label
= "Audio", .name
= emulate_audio
, .bench
= 1, .rtbench
= 1},
114 {.label
= "Video", .name
= emulate_video
, .bench
= 1, .rtbench
= 1, .load
= 1, .rtload
= 1},
115 {.label
= "X", .name
= emulate_x
, .bench
= 1, .load
= 1, .rtload
= 1},
116 {.label
= "Gaming", .name
= emulate_game
, .nodeadlines
= 1, .bench
= 1},
117 {.label
= "Burn", .name
= emulate_burn
, .load
= 1, .rtload
= 1},
118 {.label
= "Write", .name
= emulate_write
, .load
= 1, .rtload
= 1},
119 {.label
= "Read", .name
= emulate_read
, .load
= 1, .rtload
= 1},
120 {.label
= "Ring", .name
= emulate_ring
, .load
= 0, .rtload
= 0}, /* No useful data from this */
121 {.label
= "Compile", .name
= emulate_compile
, .load
= 1, .rtload
= 1},
122 {.label
= "Memload", .name
= emulate_memload
, .load
= 1, .rtload
= 1},
123 {.label
= "Hack", .name
= emulate_hackbench
, .load
= 0, .rtload
= 0}, /* This is causing signal headaches */
124 {.label
= "Custom", .name
= emulate_custom
}, /* Leave custom as last entry */
127 void init_sem(sem_t
*sem
);
128 void init_all_sems(struct sems
*s
);
129 void initialise_thread(int i
);
130 void start_thread(struct thread
*th
);
131 void stop_thread(struct thread
*th
);
133 void terminal_error(const char *name
)
135 fprintf(stderr
, "\n");
140 void terminal_fileopen_error(FILE *fp
, char *name
)
142 if (fclose(fp
) == -1)
143 terminal_error("fclose");
144 terminal_error(name
);
147 unsigned long long get_nsecs(struct timespec
*myts
)
149 if (clock_gettime(CLOCK_REALTIME
, myts
))
150 terminal_error("clock_gettime");
151 return (myts
->tv_sec
* 1000000000 + myts
->tv_nsec
);
154 unsigned long get_usecs(struct timespec
*myts
)
156 if (clock_gettime(CLOCK_REALTIME
, myts
))
157 terminal_error("clock_gettime");
158 return (myts
->tv_sec
* 1000000 + myts
->tv_nsec
/ 1000 );
161 void set_fifo(int prio
)
163 struct sched_param sp
;
165 memset(&sp
, 0, sizeof(sp
));
166 sp
.sched_priority
= prio
;
167 if (sched_setscheduler(0, SCHED_FIFO
, &sp
) == -1) {
169 terminal_error("sched_setscheduler");
177 mlockflags
= MCL_CURRENT
| MCL_FUTURE
;
179 mlockall(mlockflags
); /* Is not critical if this fails */
183 void set_munlock(void)
186 if (munlockall() == -1)
187 terminal_error("munlockall");
191 void set_thread_fifo(pthread_t pthread
, int prio
)
193 struct sched_param sp
;
194 memset(&sp
, 0, sizeof(sp
));
195 sp
.sched_priority
= prio
;
196 if (pthread_setschedparam(pthread
, SCHED_FIFO
, &sp
) == -1)
197 terminal_error("pthread_setschedparam");
200 void set_normal(void)
202 struct sched_param sp
;
203 memset(&sp
, 0, sizeof(sp
));
204 sp
.sched_priority
= 0;
205 if (sched_setscheduler(0, SCHED_OTHER
, &sp
) == -1) {
206 fprintf(stderr
, "Weird, could not unset RT scheduling!\n");
210 void set_nice(int prio
)
212 if (setpriority(PRIO_PROCESS
, 0, prio
) == -1)
213 terminal_error("setpriority");
218 struct sched_param sp
;
219 memset(&sp
, 0, sizeof(sp
));
220 sp
.sched_priority
= 99;
221 if (sched_setscheduler(0, SCHED_FIFO
, &sp
) == -1) {
223 terminal_error("sched_setscheduler");
226 if (sched_getscheduler(0) != SCHED_FIFO
)
235 void set_thread_normal(pthread_t pthread
)
237 struct sched_param sp
;
238 memset(&sp
, 0, sizeof(sp
));
239 sp
.sched_priority
= 0;
240 if (pthread_setschedparam(pthread
, SCHED_OTHER
, &sp
) == -1)
241 terminal_error("pthread_setschedparam");
244 void sync_flush(void)
246 if ((fflush(NULL
)) == EOF
)
247 terminal_error("fflush");
253 unsigned long compute_allocable_mem(void)
255 unsigned long total
= ud
.ram
+ ud
.swap
;
256 unsigned long usage
= ud
.ram
* 110 / 100 ;
258 /* Leave at least DEFAULT_RESERVE free space and check for maths overflow. */
259 if (total
- DEFAULT_RESERVE
< usage
)
260 usage
= total
- DEFAULT_RESERVE
;
261 usage
/= 1024; /* to megabytes */
267 void burn_loops(unsigned long loops
)
272 * We need some magic here to prevent the compiler from optimising
273 * this loop away. Otherwise trying to emulate a fixed cpu load
274 * with this loop will not work.
276 for (i
= 0 ; i
< loops
; i
++)
277 asm volatile("" : : : "memory");
280 /* Use this many usecs of cpu time */
281 void burn_usecs(unsigned long usecs
)
283 unsigned long ms_loops
;
285 ms_loops
= ud
.loops_per_ms
/ 1000 * usecs
;
286 burn_loops(ms_loops
);
289 void microsleep(unsigned long long usecs
)
291 struct timespec req
, rem
;
293 rem
.tv_sec
= rem
.tv_nsec
= 0;
295 req
.tv_sec
= usecs
/ 1000000;
296 req
.tv_nsec
= (usecs
- (req
.tv_sec
* 1000000)) * 1000;
298 if ((nanosleep(&req
, &rem
)) == -1) {
299 if (errno
== EINTR
) {
300 if (rem
.tv_sec
|| rem
.tv_nsec
) {
301 req
.tv_sec
= rem
.tv_sec
;
302 req
.tv_nsec
= rem
.tv_nsec
;
307 terminal_error("nanosleep");
314 * Yes, sem_post and sem_wait shouldn't return -1 but they do so we must
317 inline void post_sem(sem_t
*s
)
320 if ((sem_post(s
)) == -1) {
323 terminal_error("sem_post");
327 inline void wait_sem(sem_t
*s
)
330 if ((sem_wait(s
)) == -1) {
333 terminal_error("sem_wait");
337 inline int trywait_sem(sem_t
*s
)
342 if ((ret
= sem_trywait(s
)) == -1) {
346 terminal_error("sem_trywait");
351 inline ssize_t
Read(int fd
, void *buf
, size_t count
)
356 retval
= read(fd
, buf
, count
);
360 terminal_error("read");
365 inline ssize_t
Write(int fd
, const void *buf
, size_t count
)
370 retval
= write(fd
, &buf
, count
);
374 terminal_error("write");
379 unsigned long periodic_schedule(struct thread
*th
, unsigned long run_usecs
,
380 unsigned long interval_usecs
, unsigned long long deadline
)
382 unsigned long long latency
, missed_latency
;
383 unsigned long long current_time
;
384 struct tk_thread
*tk
;
385 struct data_table
*tb
;
386 struct timespec myts
;
392 current_time
= get_usecs(&myts
);
393 if (current_time
> deadline
+ tk
->slept_interval
)
394 latency
= current_time
- deadline
- tk
->slept_interval
;
396 /* calculate the latency for missed frames */
399 current_time
= get_usecs(&myts
);
400 if (interval_usecs
&& current_time
> deadline
+ interval_usecs
) {
401 /* We missed the deadline even before we consumed cpu */
402 unsigned long intervals
;
404 deadline
+= interval_usecs
;
405 intervals
= (current_time
- deadline
) /
408 tb
->missed_deadlines
+= intervals
;
409 missed_latency
= intervals
* interval_usecs
;
410 deadline
+= intervals
* interval_usecs
;
411 tb
->missed_burns
+= intervals
;
415 burn_usecs(run_usecs
);
416 current_time
= get_usecs(&myts
);
417 tb
->achieved_burns
++;
420 * If we meet the deadline we move the deadline forward, otherwise
421 * we consider it a missed deadline and dropped frame etc.
423 deadline
+= interval_usecs
;
424 if (deadline
>= current_time
) {
427 if (interval_usecs
) {
428 unsigned long intervals
= (current_time
- deadline
) /
431 tb
->missed_deadlines
+= intervals
;
432 missed_latency
= intervals
* interval_usecs
;
433 deadline
+= intervals
* interval_usecs
;
435 tb
->missed_burns
+= intervals
;
437 deadline
= current_time
;
442 tk
->sleep_interval
= deadline
- current_time
;
444 post_sem(&tk
->sem
.start
);
445 wait_sem(&tk
->sem
.complete
);
448 * Must add missed_latency to total here as this function may not be
449 * called again and the missed latency can be lost
451 latency
+= missed_latency
;
452 if (latency
> tb
->max_latency
)
453 tb
->max_latency
= latency
;
454 tb
->total_latency
+= latency
;
455 tb
->sum_latency_squared
+= latency
* latency
;
461 void initialise_thread_data(struct data_table
*tb
)
465 tb
->sum_latency_squared
=
467 tb
->missed_deadlines
=
472 void create_pthread(pthread_t
* thread
, pthread_attr_t
* attr
,
473 void * (*start_routine
)(void *), void *arg
)
475 if (pthread_create(thread
, attr
, start_routine
, arg
))
476 terminal_error("pthread_create");
479 void join_pthread(pthread_t th
, void **thread_return
)
481 if (pthread_join(th
, thread_return
))
482 terminal_error("pthread_join");
485 void emulate_none(struct thread
*th
)
487 sem_t
*s
= &th
->sem
.stop
;
491 #define AUDIO_INTERVAL (50000)
492 #define AUDIO_RUN (AUDIO_INTERVAL / 20)
493 /* We emulate audio by using 5% cpu and waking every 50ms */
494 void emulate_audio(struct thread
*th
)
496 unsigned long long deadline
;
497 sem_t
*s
= &th
->sem
.stop
;
498 struct timespec myts
;
500 th
->decasecond_deadlines
= 1000000 / AUDIO_INTERVAL
* 10;
501 deadline
= get_usecs(&myts
);
504 deadline
= periodic_schedule(th
, AUDIO_RUN
, AUDIO_INTERVAL
,
511 /* We emulate video by using 40% cpu and waking for 60fps */
512 #define VIDEO_INTERVAL (1000000 / 60)
513 #define VIDEO_RUN (VIDEO_INTERVAL * 40 / 100)
514 void emulate_video(struct thread
*th
)
516 unsigned long long deadline
;
517 sem_t
*s
= &th
->sem
.stop
;
518 struct timespec myts
;
520 th
->decasecond_deadlines
= 1000000 / VIDEO_INTERVAL
* 10;
521 deadline
= get_usecs(&myts
);
524 deadline
= periodic_schedule(th
, VIDEO_RUN
, VIDEO_INTERVAL
,
532 * We emulate X by running for a variable percentage of cpu from 0-100%
535 void emulate_x(struct thread
*th
)
537 unsigned long long deadline
;
538 sem_t
*s
= &th
->sem
.stop
;
539 struct timespec myts
;
541 th
->decasecond_deadlines
= 100;
542 deadline
= get_usecs(&myts
);
546 for (i
= 0 ; i
<= 100 ; i
++) {
548 deadline
= periodic_schedule(th
, i
* 1000, j
* 1000,
550 deadline
+= i
* 1000;
558 * We emulate gaming by using 100% cpu and seeing how many frames (jobs
559 * completed) we can do in that time. Deadlines are meaningless with
560 * unlocked frame rates. We do not use periodic schedule because for
561 * this load because this never wants to sleep.
563 #define GAME_INTERVAL (100000)
564 #define GAME_RUN (GAME_INTERVAL)
565 void emulate_game(struct thread
*th
)
567 unsigned long long deadline
, current_time
, latency
;
568 sem_t
*s
= &th
->sem
.stop
;
569 struct timespec myts
;
570 struct data_table
*tb
;
573 th
->decasecond_deadlines
= 1000000 / GAME_INTERVAL
* 10;
576 deadline
= get_usecs(&myts
) + GAME_INTERVAL
;
577 burn_usecs(GAME_RUN
);
578 current_time
= get_usecs(&myts
);
579 /* use usecs instead of simple count for game burn statistics */
580 tb
->achieved_burns
+= GAME_RUN
;
581 if (current_time
> deadline
) {
582 latency
= current_time
- deadline
;
583 tb
->missed_burns
+= latency
;
586 if (latency
> tb
->max_latency
)
587 tb
->max_latency
= latency
;
588 tb
->total_latency
+= latency
;
589 tb
->sum_latency_squared
+= latency
* latency
;
596 void *burn_thread(void *t
)
603 s
= &th
->sem
.stopchild
;
606 burn_loops(ud
.loops_per_ms
);
607 if (!trywait_sem(s
)) {
615 /* Have ud.cpu_load threads burn cpu continuously */
616 void emulate_burn(struct thread
*th
)
618 sem_t
*s
= &th
->sem
.stop
;
621 pthread_t burnthreads
[ud
.cpu_load
];
624 for (i
= 0 ; i
< ud
.cpu_load
; i
++)
625 create_pthread(&burnthreads
[i
], NULL
, burn_thread
,
628 post_sem(&th
->sem
.stopchild
);
629 for (i
= 0 ; i
< ud
.cpu_load
; i
++)
630 join_pthread(burnthreads
[i
], NULL
);
633 /* Write a file the size of ram continuously */
634 void emulate_write(struct thread
*th
)
636 sem_t
*s
= &th
->sem
.stop
;
638 char *name
= "interbench.write";
643 if (!(fp
= fopen(name
, "w")))
644 terminal_error("fopen");
645 if (stat(name
, &statbuf
) == -1)
646 terminal_fileopen_error(fp
, "stat");
647 if (statbuf
.st_blksize
< MIN_BLK_SIZE
)
648 statbuf
.st_blksize
= MIN_BLK_SIZE
;
649 mem
= ud
.ram
/ (statbuf
.st_blksize
/ 1024); /* kilobytes to blocks */
650 if (!(buf
= calloc(1, statbuf
.st_blksize
)))
651 terminal_fileopen_error(fp
, "calloc");
652 if (fclose(fp
) == -1)
653 terminal_error("fclose");
658 if (!(fp
= fopen(name
, "w")))
659 terminal_error("fopen");
660 if (stat(name
, &statbuf
) == -1)
661 terminal_fileopen_error(fp
, "stat");
662 for (i
= 0 ; i
< mem
; i
++) {
663 if (fwrite(buf
, statbuf
.st_blksize
, 1, fp
) != 1)
664 terminal_fileopen_error(fp
, "fwrite");
668 if (fclose(fp
) == -1)
669 terminal_error("fclose");
673 if (fclose(fp
) == -1)
674 terminal_error("fclose");
675 if (remove(name
) == -1)
676 terminal_error("remove");
680 /* Read a file the size of ram continuously */
681 void emulate_read(struct thread
*th
)
683 sem_t
*s
= &th
->sem
.stop
;
684 char *name
= "interbench.read";
690 if ((tmp
= open(name
, O_RDONLY
)) == -1)
691 terminal_error("open");
692 if (stat(name
, &statbuf
) == -1)
693 terminal_error("stat");
694 bsize
= statbuf
.st_blksize
;
695 if (!(buf
= malloc(bsize
)))
696 terminal_error("malloc");
702 * We have to read the whole file before quitting the load
703 * to prevent the data being cached for the next read. This
704 * is also the reason the file is the size of physical ram.
706 while ((rd
= Read(tmp
, buf
, bsize
)) > 0);
709 if (lseek(tmp
, (off_t
)0, SEEK_SET
) == -1)
710 terminal_error("lseek");
714 #define RINGTHREADS 4
716 struct thread ringthreads
[RINGTHREADS
];
718 void *ring_thread(void *t
)
725 th
= &ringthreads
[i
];
728 if (post_to
== RINGTHREADS
)
735 post_sem(&ringthreads
[post_to
].sem
.start
);
736 if (!trywait_sem(&s
->stop
))
740 post_sem(&ringthreads
[post_to
].sem
.start
);
741 post_sem(&s
->complete
);
745 /* Create a ring of 4 processes that wake each other up in a circle */
746 void emulate_ring(struct thread
*th
)
748 sem_t
*s
= &th
->sem
.stop
;
751 for (i
= 0 ; i
< RINGTHREADS
; i
++) {
752 init_all_sems(&ringthreads
[i
].sem
);
753 create_pthread(&ringthreads
[i
].pthread
, NULL
,
754 ring_thread
, (void*)(long) i
);
757 wait_sem(&ringthreads
[0].sem
.ready
);
758 post_sem(&ringthreads
[0].sem
.start
);
760 for (i
= 0 ; i
< RINGTHREADS
; i
++)
761 post_sem(&ringthreads
[i
].sem
.stop
);
762 for (i
= 0 ; i
< RINGTHREADS
; i
++) {
763 wait_sem(&ringthreads
[i
].sem
.complete
);
764 join_pthread(ringthreads
[i
].pthread
, NULL
);
768 /* We emulate a compile by running burn, write and read threads simultaneously */
769 void emulate_compile(struct thread
*th
)
771 sem_t
*s
= &th
->sem
.stop
;
772 unsigned long i
, threads
[3];
774 bzero(threads
, 3 * sizeof(threads
[0]));
776 for (i
= 0 ; i
< THREADS
; i
++) {
777 if (strcmp(threadlist
[i
].label
, "Burn") == 0)
779 if (strcmp(threadlist
[i
].label
, "Write") == 0)
781 if (strcmp(threadlist
[i
].label
, "Read") == 0)
784 for (i
= 0 ; i
< 3 ; i
++) {
786 fprintf(stderr
, "Can't find all threads for compile load\n");
790 for (i
= 0 ; i
< 3 ; i
++) {
791 initialise_thread(threads
[i
]);
792 start_thread(&threadlist
[threads
[i
]]);
795 for (i
= 0 ; i
< 3 ; i
++)
796 stop_thread(&threadlist
[threads
[i
]]);
799 int *grab_and_touch (char *block
[], int i
)
801 block
[i
] = (char *) malloc(MB
);
804 return (memset(block
[i
], 1, MB
));
807 /* We emulate a memory load by allocating and torturing 110% of available ram */
808 void emulate_memload(struct thread
*th
)
810 sem_t
*s
= &th
->sem
.stop
;
811 unsigned long touchable_mem
, i
;
812 char *mem_block
[MAX_MEM_IN_MB
];
815 touchable_mem
= compute_allocable_mem();
816 /* loop until we're killed, frobbing memory in various perverted ways */
818 for (i
= 0; i
< touchable_mem
; i
++) {
819 success
= grab_and_touch(mem_block
, i
);
827 for (i
= 0; i
< touchable_mem
; i
++) {
828 memcpy(mem_block
[i
], mem_block
[(i
+ touchable_mem
/ 2) %
833 for (i
= 0; i
< touchable_mem
; i
++) {
840 for (i
= 0; i
< touchable_mem
; i
++)
846 struct thread hackthread
;
848 void emulate_hackbench(struct thread
*th
)
850 sem_t
*s
= &th
->sem
.stop
;
852 init_all_sems(&hackthread
.sem
);
853 create_pthread(&hackthread
.pthread
, NULL
, hackbench_thread
, (void *) 0);
857 post_sem(&hackthread
.sem
.stop
);
858 wait_sem(&hackthread
.sem
.complete
);
860 join_pthread(hackthread
.pthread
, NULL
);
863 #define CUSTOM_INTERVAL (ud.custom_interval)
864 #define CUSTOM_RUN (ud.custom_run)
865 void emulate_custom(struct thread
*th
)
867 unsigned long long deadline
;
868 sem_t
*s
= &th
->sem
.stop
;
869 struct timespec myts
;
871 th
->decasecond_deadlines
= 1000000 / CUSTOM_INTERVAL
* 10;
872 deadline
= get_usecs(&myts
);
875 deadline
= periodic_schedule(th
, CUSTOM_RUN
, CUSTOM_INTERVAL
,
882 void *timekeeping_thread(void *t
)
885 struct tk_thread
*tk
;
887 struct timespec myts
;
892 s
= &th
->tkthread
.sem
;
894 * If this timekeeping thread is that of a benchmarked thread we run
895 * even higher priority than the benched thread is if running real
896 * time. Otherwise, the load timekeeping thread, which does not need
897 * accurate accounting remains SCHED_NORMAL;
899 if (th
->dt
!= &th
->benchmarks
[NOT_BENCHING
])
901 /* These values must be changed at the appropriate places or race */
902 tk
->sleep_interval
= tk
->slept_interval
= 0;
906 unsigned long start_time
, now
;
908 if (!trywait_sem(&s
->stop
))
911 tk
->slept_interval
= 0;
912 start_time
= get_usecs(&myts
);
913 if (!trywait_sem(&s
->stop
))
915 if (tk
->sleep_interval
) {
916 unsigned long diff
= 0;
917 microsleep(tk
->sleep_interval
);
918 now
= get_usecs(&myts
);
919 /* now should always be > start_time but... */
920 if (now
> start_time
) {
921 diff
= now
- start_time
;
922 if (diff
> tk
->sleep_interval
)
923 tk
->slept_interval
= diff
-
927 tk
->sleep_interval
= 0;
928 post_sem(&s
->complete
);
935 * All the sleep functions such as nanosleep can only guarantee that they
936 * sleep for _at least_ the time requested. We work around this by having
937 * a high priority real time thread that accounts for the extra time slept
938 * in nanosleep. This allows wakeup latency of the tested thread to be
939 * accurate and reflect true scheduling delays.
941 void *emulation_thread(void *t
)
944 struct tk_thread
*tk
;
945 struct sems
*s
, *tks
;
954 /* Start the timekeeping thread */
955 create_pthread(&th
->tk_pthread
, NULL
, timekeeping_thread
,
957 /* Wait for timekeeping thread to be ready */
958 wait_sem(&tks
->ready
);
960 /* Tell main we're ready to start*/
963 /* Wait for signal from main to start thread */
966 /* Start the actual function being benched/or running as load */
969 /* Stop the timekeeping thread */
970 post_sem(&tks
->stop
);
971 post_sem(&tks
->start
);
972 join_pthread(th
->tk_pthread
, NULL
);
974 /* Tell main we've finished */
975 post_sem(&s
->complete
);
980 * In an unoptimised loop we try to benchmark how many meaningless loops
981 * per second we can perform on this hardware to fairly accurately
982 * reproduce certain percentage cpu usage
984 void calibrate_loop(void)
986 unsigned long long start_time
, loops_per_msec
, run_time
= 0;
988 struct timespec myts
;
990 loops_per_msec
= 100000;
992 /* Calibrate to within 1% accuracy */
993 while (run_time
> 1010000 || run_time
< 990000) {
994 loops
= loops_per_msec
;
995 start_time
= get_nsecs(&myts
);
997 run_time
= get_nsecs(&myts
) - start_time
;
998 loops_per_msec
= (1000000 * loops_per_msec
/ run_time
? :
1002 /* Rechecking after a pause increases reproducibility */
1004 loops
= loops_per_msec
;
1005 start_time
= get_nsecs(&myts
);
1007 run_time
= get_nsecs(&myts
) - start_time
;
1009 /* Tolerate 5% difference on checking */
1010 if (run_time
> 1050000 || run_time
< 950000)
1013 ud
.loops_per_ms
= loops_per_msec
;
1016 void log_output(const char *format
, ...) __attribute__ ((format(printf
, 1, 2)));
1018 /* Output to console +/- logfile */
1019 void log_output(const char *format
, ...)
1023 va_start(ap
, format
);
1024 if (vprintf(format
, ap
) == -1)
1025 terminal_error("vprintf");
1028 va_start(ap
, format
);
1029 if (vfprintf(ud
.logfile
, format
, ap
) == -1)
1030 terminal_error("vpfrintf");
1036 /* Calculate statistics and output them */
1037 void show_latencies(struct thread
*th
)
1039 struct data_table
*tbj
;
1040 struct tk_thread
*tk
;
1041 double average_latency
, deadlines_met
, samples_met
, sd
, max_latency
;
1042 long double variance
= 0;
1047 if (tbj
->nr_samples
> 1) {
1048 average_latency
= tbj
->total_latency
/ tbj
->nr_samples
;
1049 variance
= (tbj
->sum_latency_squared
- (average_latency
*
1050 average_latency
) / tbj
->nr_samples
) / (tbj
->nr_samples
- 1);
1051 sd
= sqrt((double)variance
);
1053 average_latency
= tbj
->total_latency
;
1058 * Landing on the boundary of a deadline can make loaded runs appear
1059 * to do more work than unloaded due to tiny duration differences.
1061 if (tbj
->achieved_burns
> 0)
1062 samples_met
= (double)tbj
->achieved_burns
/
1063 (double)(tbj
->achieved_burns
+ tbj
->missed_burns
) * 100;
1066 max_latency
= tbj
->max_latency
;
1067 /* When benchmarking rt we represent the data in us */
1069 average_latency
/= 1000;
1071 max_latency
/= 1000;
1073 if (tbj
->deadlines_met
== 0)
1076 deadlines_met
= (double)tbj
->deadlines_met
/
1077 (double)(tbj
->missed_deadlines
+ tbj
->deadlines_met
) * 100;
1079 /* Messy nonsense to format the output nicely */
1080 if (average_latency
>= 100)
1081 log_output("%7.0f +/- ", average_latency
);
1083 log_output("%7.3g +/- ", average_latency
);
1085 log_output("%-9.0f", sd
);
1087 log_output("%-9.3g", sd
);
1088 if (max_latency
>= 100)
1089 log_output("%7.0f\t", max_latency
);
1091 log_output("%7.3g\t", max_latency
);
1092 log_output("\t%4.3g", samples_met
);
1093 if (!th
->nodeadlines
)
1094 log_output("\t%11.3g", deadlines_met
);
1099 void create_read_file(void)
1103 char *name
= "interbench.read";
1105 struct stat statbuf
;
1106 unsigned long mem
, bsize
;
1109 if ((tmp
= open(name
, O_RDONLY
)) == -1) {
1110 if (errno
!= ENOENT
)
1111 terminal_error("open");
1114 if (stat(name
, &statbuf
) == -1)
1115 terminal_error("stat");
1116 if (statbuf
.st_blksize
< MIN_BLK_SIZE
)
1117 statbuf
.st_blksize
= MIN_BLK_SIZE
;
1118 bsize
= statbuf
.st_blksize
;
1119 if (statbuf
.st_size
/ 1024 / bsize
== ud
.ram
/ bsize
)
1121 if (remove(name
) == -1)
1122 terminal_error("remove");
1124 fprintf(stderr
,"Creating file for read load...\n");
1125 if (!(fp
= fopen(name
, "w")))
1126 terminal_error("fopen");
1127 if (stat(name
, &statbuf
) == -1)
1128 terminal_fileopen_error(fp
, "stat");
1129 if (statbuf
.st_blksize
< MIN_BLK_SIZE
)
1130 statbuf
.st_blksize
= MIN_BLK_SIZE
;
1131 bsize
= statbuf
.st_blksize
;
1132 if (!(buf
= calloc(1, bsize
)))
1133 terminal_fileopen_error(fp
, "calloc");
1134 mem
= ud
.ram
/ (bsize
/ 1024); /* kilobytes to blocks */
1136 for (i
= 0 ; i
< mem
; i
++) {
1137 if (fwrite(buf
, bsize
, 1, fp
) != 1)
1138 terminal_fileopen_error(fp
, "fwrite");
1140 if (fclose(fp
) == -1)
1141 terminal_error("fclose");
1148 size_t vms_size
= sizeof(vms
);
1150 if (sysctlbyname("vm.vmstats", &vms
, &vms_size
, NULL
, 0))
1151 terminal_error("sysctlbyname: vm.vmstats");
1153 ud
.ram
= vms
.v_page_count
* vms
.v_page_size
;
1154 ud
.ram
/= 1024; /* linux size is in kB */
1155 ud
.swap
= ud
.ram
; /* XXX: swap doesn't have to be the same as RAM */
1157 if( !ud
.ram
|| !ud
.swap
) {
1159 fprintf(stderr
, "\nCould not get memory or swap size. ");
1160 fprintf(stderr
, "Will not perform mem_load\n");
1161 for (i
= 0 ; i
< THREADS
; i
++) {
1162 if (strcmp(threadlist
[i
].label
, "Memload") == 0) {
1163 threadlist
[i
].load
= 0;
1164 threadlist
[i
].rtload
= 0;
1170 void get_logfilename(void)
1175 int year
, month
, day
, hours
, minutes
;
1178 if (uname(&buf
) == -1)
1179 terminal_error("uname");
1180 if (!(mytm
= localtime(&t
)))
1181 terminal_error("localtime");
1182 year
= mytm
->tm_year
+ 1900;
1183 month
= mytm
->tm_mon
+ 1;
1184 day
= mytm
->tm_mday
;
1185 hours
= mytm
->tm_hour
;
1186 minutes
= mytm
->tm_min
;
1187 strncpy(ud
.unamer
, buf
.release
, MAX_UNAME_LENGTH
);
1189 sprintf(ud
.datestamp
, "%2d%02d%02d%02d%02d",
1190 year
, month
, day
, hours
, minutes
);
1191 snprintf(ud
.logfilename
, MAX_LOG_LENGTH
, "%s.log", ud
.unamer
);
1194 void start_thread(struct thread
*th
)
1196 post_sem(&th
->sem
.start
);
1199 void stop_thread(struct thread
*th
)
1201 post_sem(&th
->sem
.stop
);
1202 wait_sem(&th
->sem
.complete
);
1204 /* Kill the thread */
1205 join_pthread(th
->pthread
, NULL
);
1208 void init_sem(sem_t
*sem
)
1210 if (sem_init(sem
, 0, 0))
1211 terminal_error("sem_init");
1214 void init_all_sems(struct sems
*s
)
1216 /* Initialise the semaphores */
1217 init_sem(&s
->ready
);
1218 init_sem(&s
->start
);
1220 init_sem(&s
->complete
);
1221 init_sem(&s
->stopchild
);
1224 void initialise_thread(int i
)
1226 struct thread
*th
= &threadlist
[i
];
1228 init_all_sems(&th
->sem
);
1229 /* Create the threads. Yes, the (long) cast is fugly but it's safe*/
1230 create_pthread(&th
->pthread
, NULL
, emulation_thread
, (void*)(long)i
);
1232 wait_sem(&th
->sem
.ready
);
1234 * We set this pointer generically to NOT_BENCHING and set it to the
1235 * benchmarked array entry only on benched threads.
1237 th
->dt
= &th
->benchmarks
[NOT_BENCHING
];
1238 initialise_thread_data(th
->dt
);
1242 /* A pseudo-semaphore for processes using a pipe */
1243 void wait_on(int pype
)
1245 int retval
, buf
= 0;
1247 retval
= Read(pype
, &buf
, sizeof(buf
));
1249 fprintf(stderr
, "\nread returned 0\n");
1254 void wakeup_with(int pype
)
1256 int retval
, buf
= 1;
1258 retval
= Write(pype
, &buf
, sizeof(buf
));
1260 fprintf(stderr
, "\nwrite returned 0\n");
1265 void run_loadchild(int j
)
1268 thj
= &threadlist
[j
];
1270 set_nice(ud
.load_nice
);
1271 initialise_thread(j
);
1273 /* Tell main we're ready */
1274 wakeup_with(l2m
[1]);
1276 /* Main tells us we're ready */
1280 /* Tell main we received the start and are running */
1281 wakeup_with(l2m
[1]);
1283 /* Main tells us to stop */
1287 /* Tell main we've finished */
1288 wakeup_with(l2m
[1]);
1292 void run_benchchild(int i
, int j
)
1296 thi
= &threadlist
[i
];
1298 set_nice(ud
.bench_nice
);
1301 initialise_thread(i
);
1302 /* Point the data table to the appropriate load being tested */
1303 thi
->dt
= &thi
->benchmarks
[j
];
1304 initialise_thread_data(thi
->dt
);
1306 set_thread_fifo(thi
->pthread
, 95);
1308 /* Tell main we're ready */
1309 wakeup_with(b2m
[1]);
1311 /* Main tells us we're ready */
1315 /* Tell main we have started */
1316 wakeup_with(b2m
[1]);
1318 /* Main tells us to stop */
1323 set_thread_normal(thi
->pthread
);
1326 show_latencies(thi
);
1328 /* Tell main we've finished */
1329 wakeup_with(b2m
[1]);
1333 void bench(int i
, int j
)
1335 pid_t bench_pid
, load_pid
;
1337 if ((load_pid
= fork()) == -1)
1338 terminal_error("fork");
1342 /* Wait for load process to be ready */
1345 if ((bench_pid
= fork()) == -1)
1346 terminal_error("fork");
1348 run_benchchild(i
, j
);
1350 /* Wait for bench process to be ready */
1354 * We want to be higher priority than everything to signal them to
1355 * stop and we lock our memory if we can as well
1360 /* Wakeup the load process */
1361 wakeup_with(m2l
[1]);
1362 /* Load tells it has received the first message and is running */
1365 /* After a small delay, wake up the benched process */
1367 wakeup_with(m2b
[1]);
1369 /* Bench tells it has received the first message and is running */
1371 microsleep(ud
.duration
* 1000000);
1373 /* Tell the benched process to stop its threads and output results */
1374 wakeup_with(m2b
[1]);
1376 /* Tell the load process to stop its threads */
1377 wakeup_with(m2l
[1]);
1379 /* Return to SCHED_NORMAL */
1383 /* Wait for load and bench processes to terminate */
1388 void init_pipe(int *pype
)
1390 if (pipe(pype
) == -1)
1391 terminal_error("pipe");
1394 void init_pipes(void)
1404 /* Affinity commented out till working on all architectures */
1405 fprintf(stderr
, "interbench v " INTERBENCH_VERSION
" by Con Kolivas\n");
1406 fprintf(stderr
, "interbench [-l <int>] [-L <int>] [-t <int] [-B <int>] [-N <int>]\n");
1407 fprintf(stderr
, "\t[-b] [-c] [-r] [-C <int> -I <int>] [-m <comment>]\n");
1408 fprintf(stderr
, "\t[-w <load type>] [-x <load type>] [-W <bench>] [-X <bench>]\n");
1409 fprintf(stderr
, "\t[-h]\n\n");
1410 fprintf(stderr
, " -l\tUse <int> loops per sec (default: use saved benchmark)\n");
1411 fprintf(stderr
, " -L\tUse cpu load of <int> with burn load (default: 4)\n");
1412 fprintf(stderr
, " -t\tSeconds to run each benchmark (default: 30)\n");
1413 fprintf(stderr
, " -B\tNice the benchmarked thread to <int> (default: 0)\n");
1414 fprintf(stderr
, " -N\tNice the load thread to <int> (default: 0)\n");
1415 //fprintf(stderr, " -u\tImitate uniprocessor\n");
1416 fprintf(stderr
, " -b\tBenchmark loops_per_ms even if it is already known\n");
1417 fprintf(stderr
, " -c\tOutput to console only (default: use console and logfile)\n");
1418 fprintf(stderr
, " -r\tPerform real time scheduling benchmarks (default: non-rt)\n");
1419 fprintf(stderr
, " -C\tUse <int> percentage cpu as a custom load (default: no custom load)\n");
1420 fprintf(stderr
, " -I\tUse <int> microsecond intervals for custom load (needs -C as well)\n");
1421 fprintf(stderr
, " -m\tAdd <comment> to the log file as a separate line\n");
1422 fprintf(stderr
, " -w\tAdd <load type> to the list of loads to be tested against\n");
1423 fprintf(stderr
, " -x\tExclude <load type> from the list of loads to be tested against\n");
1424 fprintf(stderr
, " -W\tAdd <bench> to the list of benchmarks to be tested\n");
1425 fprintf(stderr
, " -X\tExclude <bench> from the list of benchmarks to be tested\n");
1426 fprintf(stderr
, " -h\tShow this help\n");
1427 fprintf(stderr
, "\nIf run without parameters interbench will run a standard benchmark\n\n");
1431 void deadchild(int crap
)
1438 if ((retval
= waitpid(-1, &status
, WNOHANG
)) == -1) {
1439 if (errno
== ECHILD
)
1441 terminal_error("waitpid");
1443 if (WIFEXITED(status
) && WEXITSTATUS(status
) == 0)
1445 fprintf(stderr
, "\nChild terminated abnormally ");
1446 if (WIFSIGNALED(status
))
1447 fprintf(stderr
, "with signal %d", WTERMSIG(status
));
1448 fprintf(stderr
, "\n");
1453 int load_index(const char* loadname
)
1457 for (i
= 0 ; i
< THREADS
; i
++)
1458 if (strcasecmp(loadname
, threadlist
[i
].label
) == 0)
1463 inline int bit_is_on(const unsigned int mask
, int index
)
1465 return (mask
& (1 << index
)) != 0;
1468 inline void set_bit_on(unsigned int *mask
, int index
)
1470 *mask
|= (1 << index
);
1473 int main(int argc
, char **argv
)
1475 unsigned long custom_cpu
= 0;
1476 int q
, i
, j
, affinity
, benchmark
= 0;
1477 unsigned int selected_loads
= 0;
1478 unsigned int excluded_loads
= 0;
1479 unsigned int selected_benches
= 0;
1480 unsigned int excluded_benches
= 0;
1483 * This file stores the loops_per_ms to be reused in a filename that
1486 char *fname
= "interbench.loops_per_ms";
1487 char *comment
= NULL
;
1489 feenableexcept(FE_DIVBYZERO
| FE_INVALID
| FE_OVERFLOW
);
1490 if (signal(SIGCHLD
, deadchild
) == SIG_ERR
)
1491 terminal_error("signal");
1494 while ((q
= getopt(argc
, argv
, "hl:L:B:N:ut:bcnrC:I:m:w:x:W:X:")) != -1) {
1500 ud
.loops_per_ms
= atoi(optarg
);
1503 ud
.duration
= atoi(optarg
);
1506 ud
.cpu_load
= atoi(optarg
);
1509 ud
.bench_nice
= atoi(optarg
);
1512 ud
.load_nice
= atoi(optarg
);
1527 custom_cpu
= (unsigned long)atol(optarg
);
1530 ud
.custom_interval
= atol(optarg
);
1536 i
= load_index(optarg
);
1538 fprintf(stderr
, "Unknown load \"%s\"\n", optarg
);
1541 set_bit_on(&selected_loads
, i
);
1544 i
= load_index(optarg
);
1546 fprintf(stderr
, "Unknown load \"%s\"\n", optarg
);
1549 set_bit_on(&excluded_loads
, i
);
1552 i
= load_index(optarg
);
1554 fprintf(stderr
, "Unknown bench \"%s\"\n", optarg
);
1557 set_bit_on(&selected_benches
, i
);
1560 i
= load_index(optarg
);
1562 fprintf(stderr
, "Unknown bench \"%s\"\n", optarg
);
1565 set_bit_on(&excluded_benches
, i
);
1574 /* default is all loads */
1575 if (selected_loads
== 0)
1576 selected_loads
= (unsigned int)-1;
1577 selected_loads
&= ~excluded_loads
;
1578 /* default is all benches */
1579 if (selected_benches
== 0)
1580 selected_benches
= (unsigned int)-1;
1581 selected_benches
&= ~excluded_benches
;
1584 fprintf(stderr
, "Unable to get SCHED_FIFO (real time scheduling).\n");
1585 fprintf(stderr
, "You either need to run this as root user or have support for real time RLIMITS.\n");
1587 fprintf(stderr
, "Real time tests were requested, aborting.\n");
1590 fprintf(stderr
, "Results will be unreliable.\n");
1593 fprintf(stderr
, "Invalid cpu load\n");
1597 if ((custom_cpu
&& !ud
.custom_interval
) ||
1598 (ud
.custom_interval
&& !custom_cpu
) ||
1600 fprintf(stderr
, "Invalid custom values, aborting.\n");
1604 if (custom_cpu
&& ud
.custom_interval
) {
1605 ud
.custom_run
= ud
.custom_interval
* custom_cpu
/ 100;
1606 threadlist
[CUSTOM
].bench
= 1;
1607 threadlist
[CUSTOM
].load
= 1;
1608 threadlist
[CUSTOM
].rtbench
= 1;
1609 threadlist
[CUSTOM
].rtload
= 1;
1612 /*FIXME Affinity commented out till working on all architectures */
1615 #ifdef CPU_SET /* Current glibc expects cpu_set_t */
1619 CPU_SET(0, &cpumask
);
1620 #else /* Old glibc expects unsigned long */
1621 unsigned long cpumask
= 1;
1623 if (sched_setaffinity(0, sizeof(cpumask
), &cpumask
) == -1) {
1625 terminal_error("sched_setaffinity");
1626 fprintf(stderr
, "could not set cpu affinity\n");
1631 /* Make benchmark a multiple of 10 seconds for proper range of X loads */
1632 if (ud
.duration
% 10)
1633 ud
.duration
+= 10 - ud
.duration
% 10;
1636 ud
.loops_per_ms
= 0;
1638 * Try to get loops_per_ms from command line first, file second, and
1639 * benchmark if not available.
1641 if (!ud
.loops_per_ms
) {
1644 if ((fp
= fopen(fname
, "r"))) {
1645 fscanf(fp
, "%lu", &ud
.loops_per_ms
);
1646 if (fclose(fp
) == -1)
1647 terminal_error("fclose");
1648 if (ud
.loops_per_ms
) {
1650 "%lu loops_per_ms read from file interbench.loops_per_ms\n",
1655 if (errno
!= ENOENT
)
1656 terminal_error("fopen");
1658 fprintf(stderr
, "loops_per_ms unknown; benchmarking...\n");
1661 * To get as accurate a loop as possible we time it running
1662 * SCHED_FIFO if we can
1668 fprintf(stderr
, "loops_per_ms specified from command line\n");
1670 if (!(fp
= fopen(fname
, "w"))) {
1671 if (errno
!= EACCES
) /* No write access is not terminal */
1672 terminal_error("fopen");
1673 fprintf(stderr
, "Unable to write to file interbench.loops_per_ms\n");
1676 fprintf(fp
, "%lu", ud
.loops_per_ms
);
1677 fprintf(stderr
, "%lu loops_per_ms saved to file interbench.loops_per_ms\n",
1679 if (fclose(fp
) == -1)
1680 terminal_error("fclose");
1688 if (ud
.log
&& !(ud
.logfile
= fopen(ud
.logfilename
, "a"))) {
1689 if (errno
!= EACCES
)
1690 terminal_error("fopen");
1691 fprintf(stderr
, "Unable to write to logfile\n");
1695 log_output("Using %lu loops per ms, running every load for %d seconds\n",
1696 ud
.loops_per_ms
, ud
.duration
);
1697 log_output("Benchmarking kernel %s at datestamp %s\n",
1698 ud
.unamer
, ud
.datestamp
);
1700 log_output("Comment: %s\n", comment
);
1703 for (i
= 0 ; i
< THREADS
; i
++)
1704 threadlist
[i
].threadno
= i
;
1706 for (i
= 0 ; i
< THREADS
; i
++) {
1707 struct thread
*thi
= &threadlist
[i
];
1711 benchme
= &threadlist
[i
].rtbench
;
1713 benchme
= &threadlist
[i
].bench
;
1715 if (!*benchme
|| !bit_is_on(selected_benches
, i
))
1718 log_output("--- Benchmarking simulated cpu of %s ", threadlist
[i
].label
);
1720 log_output("real time ");
1721 else if (ud
.bench_nice
)
1722 log_output("nice %d ", ud
.bench_nice
);
1723 log_output("in the presence of simulated ");
1725 log_output("nice %d ", ud
.load_nice
);
1726 log_output("---\n");
1730 log_output("\tLatency +/- SD (us)");
1732 log_output("\tLatency +/- SD (ms)");
1733 log_output(" Max Latency ");
1734 log_output(" %% Desired CPU");
1735 if (!thi
->nodeadlines
)
1736 log_output(" %% Deadlines Met");
1739 for (j
= 0 ; j
< THREADS
; j
++) {
1740 struct thread
*thj
= &threadlist
[j
];
1742 if (j
== i
|| !bit_is_on(selected_loads
, j
) ||
1743 (!threadlist
[j
].load
&& !ud
.do_rt
) ||
1744 (!threadlist
[j
].rtload
&& ud
.do_rt
))
1746 log_output("%s\t", thj
->label
);