example-clients/simdtests.cpp

   1 /*
   2  *  simdtests.c -- test accuracy and performance of simd optimizations
   3  *
   4  *  Copyright (C) 2017 Andreas Mueller.
   5  *
   6  *  This program is free software; you can redistribute it and/or modify
   7  *  it under the terms of the GNU General Public License as published by
   8  *  the Free Software Foundation; either version 2 of the License, or
   9  *  (at your option) any later version.
  10  *
  11  *  This program is distributed in the hope that it will be useful,
  12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  *  GNU General Public License for more details.
  15  *
  16  *  You should have received a copy of the GNU General Public License
  17  *  along with this program; if not, write to the Free Software
  18  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19  */
  20
  21 /* We must include all headers memops.c includes to avoid trouble with
  22  * out namespace game below.
  23  */
  24 #include <stdio.h>
  25 #include <string.h>
  26 #include <math.h>
  27 #include <memory.h>
  28 #include <stdlib.h>
  29 #include <stdint.h>
  30 #include <limits.h>
  31 #ifdef __linux__
  32 #include <endian.h>
  33 #endif
  34 #include "memops.h"
  35
  36 #if defined (__SSE2__) && !defined (__sun__)
  37 #include <emmintrin.h>
  38 #ifdef __SSE4_1__
  39 #include <smmintrin.h>
  40 #endif
  41 #endif
  42
  43 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
  44 #include <arm_neon.h>
  45 #endif
  46
  47 // our additional headers
  48 #include <time.h>
  49
  50 /* Dirty: include mempos.c twice the second time with SIMD disabled
  51  * so we can compare aceelerated non accelerated
  52  */
  53 namespace accelerated {
  54 #include "../common/memops.c"
  55 }
  56
  57 namespace origerated {
  58 #ifdef __SSE2__
  59 #undef __SSE2__
  60 #endif
  61
  62 #ifdef __ARM_NEON__
  63 #undef __ARM_NEON__
  64 #endif
  65
  66 #ifdef __ARM_NEON
  67 #undef __ARM_NEON
  68 #endif
  69
  70 #include "../common/memops.c"
  71 }
  72
  73 // define conversion function types
  74 typedef void (*t_jack_to_integer)(
  75         char *dst,
  76         jack_default_audio_sample_t *src,
  77         unsigned long nsamples,
  78         unsigned long dst_skip,
  79         dither_state_t *state);
  80
  81 typedef void (*t_integer_to_jack)(
  82         jack_default_audio_sample_t *dst,
  83         char *src,
  84         unsigned long nsamples,
  85         unsigned long src_skip);
  86
  87 // define/setup test case data
  88 typedef struct test_case_data {
  89         uint32_t frame_size;
  90         uint32_t sample_size;
  91         bool reverse;
  92         t_jack_to_integer jack_to_integer_accel;
  93         t_jack_to_integer jack_to_integer_orig;
  94         t_integer_to_jack integer_to_jack_accel;
  95         t_integer_to_jack integer_to_jack_orig;
  96         dither_state_t *ditherstate;
  97         const char *name;
  98 } test_case_data_t;
  99
 100 test_case_data_t test_cases[] = {
 101         {
 102                 4,
 103                 3,
 104                 true,
 105                 accelerated::sample_move_d32u24_sSs,
 106                 origerated::sample_move_d32u24_sSs,
 107                 accelerated::sample_move_dS_s32u24s,
 108                 origerated::sample_move_dS_s32u24s,
 109                 NULL,
 110                 "32u24s" },
 111         {
 112                 4,
 113                 3,
 114                 false,
 115                 accelerated::sample_move_d32u24_sS,
 116                 origerated::sample_move_d32u24_sS,
 117                 accelerated::sample_move_dS_s32u24,
 118                 origerated::sample_move_dS_s32u24,
 119                 NULL,
 120                 "32u24" },
 121         {
 122                 3,
 123                 3,
 124                 true,
 125                 accelerated::sample_move_d24_sSs,
 126                 origerated::sample_move_d24_sSs,
 127                 accelerated::sample_move_dS_s24s,
 128                 origerated::sample_move_dS_s24s,
 129                 NULL,
 130                 "24s" },
 131         {
 132                 3,
 133                 3,
 134                 false,
 135                 accelerated::sample_move_d24_sS,
 136                 origerated::sample_move_d24_sS,
 137                 accelerated::sample_move_dS_s24,
 138                 origerated::sample_move_dS_s24,
 139                 NULL,
 140                 "24" },
 141         {
 142                 2,
 143                 2,
 144                 true,
 145                 accelerated::sample_move_d16_sSs,
 146                 origerated::sample_move_d16_sSs,
 147                 accelerated::sample_move_dS_s16s,
 148                 origerated::sample_move_dS_s16s,
 149                 NULL,
 150                 "16s" },
 151         {
 152                 2,
 153                 2,
 154                 false,
 155                 accelerated::sample_move_d16_sS,
 156                 origerated::sample_move_d16_sS,
 157                 accelerated::sample_move_dS_s16,
 158                 origerated::sample_move_dS_s16,
 159                 NULL,
 160                 "16" },
 161 };
 162
 163 // we need to repeat for better accuracy at time measurement
 164 const uint32_t retry_per_case = 1000;
 165
 166 // setup test buffers
 167 #define TESTBUFF_SIZE 1024
 168 jack_default_audio_sample_t jackbuffer_source[TESTBUFF_SIZE];
 169 // integer buffers: max 4 bytes per value / * 2 for stereo
 170 char integerbuffer_accel[TESTBUFF_SIZE*4*2];
 171 char integerbuffer_orig[TESTBUFF_SIZE*4*2];
 172 // float buffers
 173 jack_default_audio_sample_t jackfloatbuffer_accel[TESTBUFF_SIZE];
 174 jack_default_audio_sample_t jackfloatbuffer_orig[TESTBUFF_SIZE];
 175
 176 // comparing unsigned makes life easier
 177 uint32_t extract_integer(
 178         char* buff,
 179         uint32_t offset,
 180         uint32_t frame_size,
 181         uint32_t sample_size,
 182         bool big_endian)
 183 {
 184         uint32_t retval = 0;
 185         unsigned char* curr;
 186         uint32_t mult = 1;
 187         if(big_endian) {
 188                 curr = (unsigned char*)buff + offset + sample_size-1;
 189                 for(uint32_t i=0; i<sample_size; i++) {
 190                         retval += *(curr--) * mult;
 191                         mult*=256;
 192                 }
 193         }
 194         else {
 195                 curr = (unsigned char*)buff + offset + frame_size-sample_size;
 196                 for(uint32_t i=0; i<sample_size; i++) {
 197                         retval += *(curr++) * mult;
 198                         mult*=256;
 199                 }
 200         }
 201         return retval;
 202 }
 203
 204 int main(int argc, char *argv[])
 205 {
 206 //      parse_arguments(argc, argv);
 207         uint32_t maxerr_displayed = 10;
 208
 209         // fill jackbuffer
 210         for(int i=0; i<TESTBUFF_SIZE; i++) {
 211                 // ramp
 212                 jack_default_audio_sample_t value =
 213                         ((jack_default_audio_sample_t)((i % TESTBUFF_SIZE) - TESTBUFF_SIZE/2)) / (TESTBUFF_SIZE/2);
 214                 // force clipping
 215                 value *= 1.02;
 216                 jackbuffer_source[i] = value;
 217         }
 218
 219         for(uint32_t testcase=0; testcase<sizeof(test_cases)/sizeof(test_case_data_t); testcase++) {
 220                 // test mono/stereo
 221                 for(uint32_t channels=1; channels<=2; channels++) {
 222                         //////////////////////////////////////////////////////////////////////////////
 223                         // jackfloat -> integer
 224
 225                         // clean target buffers
 226                         memset(integerbuffer_accel, 0, sizeof(integerbuffer_accel));
 227                         memset(integerbuffer_orig, 0, sizeof(integerbuffer_orig));
 228                         // accel
 229                         clock_t time_to_integer_accel = clock();
 230                         for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
 231                         {
 232                                 test_cases[testcase].jack_to_integer_accel(
 233                                         integerbuffer_accel,
 234                                         jackbuffer_source,
 235                                         TESTBUFF_SIZE,
 236                                         test_cases[testcase].frame_size*channels,
 237                                         test_cases[testcase].ditherstate);
 238                         }
 239                         float timediff_to_integer_accel = ((float)(clock() - time_to_integer_accel)) / CLOCKS_PER_SEC;
 240                         // orig
 241                         clock_t time_to_integer_orig = clock();
 242                         for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
 243                         {
 244                                 test_cases[testcase].jack_to_integer_orig(
 245                                         integerbuffer_orig,
 246                                         jackbuffer_source,
 247                                         TESTBUFF_SIZE,
 248                                         test_cases[testcase].frame_size*channels,
 249                                         test_cases[testcase].ditherstate);
 250                         }
 251                         float timediff_to_integer_orig = ((float)(clock() - time_to_integer_orig)) / CLOCKS_PER_SEC;
 252                         // output performance results
 253                         printf(
 254                                 "JackFloat->Integer @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
 255                                 test_cases[testcase].name,
 256                                 channels,
 257                                 timediff_to_integer_orig,
 258                                 timediff_to_integer_accel,
 259                                 (timediff_to_integer_orig/timediff_to_integer_accel-1)*100.0);
 260                         uint32_t int_deviation_max = 0;
 261                         uint32_t int_error_count = 0;
 262                         // output error (avoid spam -> limit error lines per test case)
 263                         for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
 264                                 uint32_t sample_offset = sample*test_cases[testcase].frame_size*channels;
 265                                 // compare both results
 266                                 uint32_t intval_accel=extract_integer(
 267                                         integerbuffer_accel,
 268                                         sample_offset,
 269                                         test_cases[testcase].frame_size,
 270                                         test_cases[testcase].sample_size,
 271 #if __BYTE_ORDER == __BIG_ENDIAN
 272                                         !test_cases[testcase].reverse);
 273 #else
 274                                         test_cases[testcase].reverse);
 275 #endif
 276                                 uint32_t intval_orig=extract_integer(
 277                                         integerbuffer_orig,
 278                                         sample_offset,
 279                                         test_cases[testcase].frame_size,
 280                                         test_cases[testcase].sample_size,
 281 #if __BYTE_ORDER == __BIG_ENDIAN
 282                                         !test_cases[testcase].reverse);
 283 #else
 284                                         test_cases[testcase].reverse);
 285 #endif
 286                                 if(intval_accel != intval_orig) {
 287                                         if(int_error_count<maxerr_displayed) {
 288                                                 printf("Value error sample %u:", sample);
 289                                                 printf(" Orig 0x");
 290                                                 char formatstr[10];
 291                                                 sprintf(formatstr, "%%0%uX", test_cases[testcase].sample_size*2);
 292                                                 printf(formatstr, intval_orig);
 293                                                 printf(" Accel 0x");
 294                                                 printf(formatstr, intval_accel);
 295                                                 printf("\n");
 296                                         }
 297                                         int_error_count++;
 298                                         uint32_t int_deviation;
 299                                         if(intval_accel > intval_orig)
 300                                                 int_deviation = intval_accel-intval_orig;
 301                                         else
 302                                                 int_deviation = intval_orig-intval_accel;
 303                                         if(int_deviation > int_deviation_max)
 304                                                 int_deviation_max = int_deviation;
 305                                 }
 306                         }
 307                         printf(
 308                                 "JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n",
 309                                 test_cases[testcase].name,
 310                                 channels,
 311                                 int_error_count,
 312                                 int_deviation_max);
 313
 314                         //////////////////////////////////////////////////////////////////////////////
 315                         // integer -> jackfloat
 316
 317                         // clean target buffers
 318                         memset(jackfloatbuffer_accel, 0, sizeof(jackfloatbuffer_accel));
 319                         memset(jackfloatbuffer_orig, 0, sizeof(jackfloatbuffer_orig));
 320                         // accel
 321                         clock_t time_to_float_accel = clock();
 322                         for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
 323                         {
 324                                 test_cases[testcase].integer_to_jack_accel(
 325                                         jackfloatbuffer_accel,
 326                                         integerbuffer_orig,
 327                                         TESTBUFF_SIZE,
 328                                         test_cases[testcase].frame_size*channels);
 329                         }
 330                         float timediff_to_float_accel = ((float)(clock() - time_to_float_accel)) / CLOCKS_PER_SEC;
 331                         // orig
 332                         clock_t time_to_float_orig = clock();
 333                         for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
 334                         {
 335                                 test_cases[testcase].integer_to_jack_orig(
 336                                         jackfloatbuffer_orig,
 337                                         integerbuffer_orig,
 338                                         TESTBUFF_SIZE,
 339                                         test_cases[testcase].frame_size*channels);
 340                         }
 341                         float timediff_to_float_orig = ((float)(clock() - time_to_float_orig)) / CLOCKS_PER_SEC;
 342                         // output performance results
 343                         printf(
 344                                 "Integer->JackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
 345                                 test_cases[testcase].name,
 346                                 channels,
 347                                 timediff_to_float_orig,
 348                                 timediff_to_float_accel,
 349                                 (timediff_to_float_orig/timediff_to_float_accel-1)*100.0);
 350                         jack_default_audio_sample_t float_deviation_max = 0.0;
 351                         uint32_t float_error_count = 0;
 352                         // output error (avoid spam -> limit error lines per test case)
 353                         for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
 354                                 // For easier estimation/readability we scale floats back to integer
 355                                 jack_default_audio_sample_t sample_scaling;
 356                                 switch(test_cases[testcase].sample_size) {
 357                                         case 2:
 358                                                 sample_scaling = SAMPLE_16BIT_SCALING;
 359                                                 break;
 360                                         default:
 361                                                 sample_scaling = SAMPLE_24BIT_SCALING;
 362                                                 break;
 363                                 }
 364                                 jack_default_audio_sample_t floatval_accel = jackfloatbuffer_accel[sample] * sample_scaling;
 365                                 jack_default_audio_sample_t floatval_orig = jackfloatbuffer_orig[sample] * sample_scaling;
 366                                 // compare both results
 367                                 jack_default_audio_sample_t float_deviation;
 368                                 if(floatval_accel > floatval_orig)
 369                                         float_deviation = floatval_accel-floatval_orig;
 370                                 else
 371                                         float_deviation = floatval_orig-floatval_accel;
 372                                 if(float_deviation > float_deviation_max)
 373                                         float_deviation_max = float_deviation;
 374                                 // deviation > half bit => error
 375                                 if(float_deviation > 0.5) {
 376                                         if(float_error_count<maxerr_displayed) {
 377                                                 printf("Value error sample %u:", sample);
 378                                                 printf(" Orig %8.1f Accel %8.1f\n", floatval_orig, floatval_accel);
 379                                         }
 380                                         float_error_count++;
 381                                 }
 382                         }
 383                         printf(
 384                                 "Integer->JackFloat @%7.7s/%u: Errors: %u Max deviation %f\n",
 385                                 test_cases[testcase].name,
 386                                 channels,
 387                                 float_error_count,
 388                                 float_deviation_max);
 389
 390                         printf("\n");
 391                 }
 392         }
 393         return 0;
 394 }