Add zalsa configure flag, enabled by default if possible
[jack2.git] / example-clients / simdtests.cpp
blobdc50be625cfb0afa13be7d611f6296ebb1583303
1 /*
2 * simdtests.c -- test accuracy and performance of simd optimizations
4 * Copyright (C) 2017 Andreas Mueller.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 /* We must include all headers memops.c includes to avoid trouble with
22 * out namespace game below.
24 #include <stdio.h>
25 #include <string.h>
26 #include <math.h>
27 #include <memory.h>
28 #include <stdlib.h>
29 #include <stdint.h>
30 #include <limits.h>
31 #ifdef __linux__
32 #include <endian.h>
33 #endif
34 #include "memops.h"
36 #if defined (__SSE2__) && !defined (__sun__)
37 #include <emmintrin.h>
38 #ifdef __SSE4_1__
39 #include <smmintrin.h>
40 #endif
41 #endif
43 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
44 #include <arm_neon.h>
45 #endif
47 // our additional headers
48 #include <time.h>
50 /* Dirty: include mempos.c twice the second time with SIMD disabled
51 * so we can compare aceelerated non accelerated
53 namespace accelerated {
54 #include "../common/memops.c"
57 namespace origerated {
58 #ifdef __SSE2__
59 #undef __SSE2__
60 #endif
62 #ifdef __ARM_NEON__
63 #undef __ARM_NEON__
64 #endif
66 #ifdef __ARM_NEON
67 #undef __ARM_NEON
68 #endif
70 #include "../common/memops.c"
73 // define conversion function types
74 typedef void (*t_jack_to_integer)(
75 char *dst,
76 jack_default_audio_sample_t *src,
77 unsigned long nsamples,
78 unsigned long dst_skip,
79 dither_state_t *state);
81 typedef void (*t_integer_to_jack)(
82 jack_default_audio_sample_t *dst,
83 char *src,
84 unsigned long nsamples,
85 unsigned long src_skip);
87 // define/setup test case data
88 typedef struct test_case_data {
89 uint32_t frame_size;
90 uint32_t sample_size;
91 bool reverse;
92 t_jack_to_integer jack_to_integer_accel;
93 t_jack_to_integer jack_to_integer_orig;
94 t_integer_to_jack integer_to_jack_accel;
95 t_integer_to_jack integer_to_jack_orig;
96 dither_state_t *ditherstate;
97 const char *name;
98 } test_case_data_t;
100 test_case_data_t test_cases[] = {
104 true,
105 accelerated::sample_move_d32u24_sSs,
106 origerated::sample_move_d32u24_sSs,
107 accelerated::sample_move_dS_s32u24s,
108 origerated::sample_move_dS_s32u24s,
109 NULL,
110 "32u24s" },
114 false,
115 accelerated::sample_move_d32u24_sS,
116 origerated::sample_move_d32u24_sS,
117 accelerated::sample_move_dS_s32u24,
118 origerated::sample_move_dS_s32u24,
119 NULL,
120 "32u24" },
124 true,
125 accelerated::sample_move_d24_sSs,
126 origerated::sample_move_d24_sSs,
127 accelerated::sample_move_dS_s24s,
128 origerated::sample_move_dS_s24s,
129 NULL,
130 "24s" },
134 false,
135 accelerated::sample_move_d24_sS,
136 origerated::sample_move_d24_sS,
137 accelerated::sample_move_dS_s24,
138 origerated::sample_move_dS_s24,
139 NULL,
140 "24" },
144 true,
145 accelerated::sample_move_d16_sSs,
146 origerated::sample_move_d16_sSs,
147 accelerated::sample_move_dS_s16s,
148 origerated::sample_move_dS_s16s,
149 NULL,
150 "16s" },
154 false,
155 accelerated::sample_move_d16_sS,
156 origerated::sample_move_d16_sS,
157 accelerated::sample_move_dS_s16,
158 origerated::sample_move_dS_s16,
159 NULL,
160 "16" },
163 // we need to repeat for better accuracy at time measurement
164 const uint32_t retry_per_case = 1000;
166 // setup test buffers
167 #define TESTBUFF_SIZE 1024
168 jack_default_audio_sample_t jackbuffer_source[TESTBUFF_SIZE];
169 // integer buffers: max 4 bytes per value / * 2 for stereo
170 char integerbuffer_accel[TESTBUFF_SIZE*4*2];
171 char integerbuffer_orig[TESTBUFF_SIZE*4*2];
172 // float buffers
173 jack_default_audio_sample_t jackfloatbuffer_accel[TESTBUFF_SIZE];
174 jack_default_audio_sample_t jackfloatbuffer_orig[TESTBUFF_SIZE];
176 // comparing unsigned makes life easier
177 uint32_t extract_integer(
178 char* buff,
179 uint32_t offset,
180 uint32_t frame_size,
181 uint32_t sample_size,
182 bool big_endian)
184 uint32_t retval = 0;
185 unsigned char* curr;
186 uint32_t mult = 1;
187 if(big_endian) {
188 curr = (unsigned char*)buff + offset + sample_size-1;
189 for(uint32_t i=0; i<sample_size; i++) {
190 retval += *(curr--) * mult;
191 mult*=256;
194 else {
195 curr = (unsigned char*)buff + offset + frame_size-sample_size;
196 for(uint32_t i=0; i<sample_size; i++) {
197 retval += *(curr++) * mult;
198 mult*=256;
201 return retval;
204 int main(int argc, char *argv[])
206 // parse_arguments(argc, argv);
207 uint32_t maxerr_displayed = 10;
209 // fill jackbuffer
210 for(int i=0; i<TESTBUFF_SIZE; i++) {
211 // ramp
212 jack_default_audio_sample_t value =
213 ((jack_default_audio_sample_t)((i % TESTBUFF_SIZE) - TESTBUFF_SIZE/2)) / (TESTBUFF_SIZE/2);
214 // force clipping
215 value *= 1.02;
216 jackbuffer_source[i] = value;
219 for(uint32_t testcase=0; testcase<sizeof(test_cases)/sizeof(test_case_data_t); testcase++) {
220 // test mono/stereo
221 for(uint32_t channels=1; channels<=2; channels++) {
222 //////////////////////////////////////////////////////////////////////////////
223 // jackfloat -> integer
225 // clean target buffers
226 memset(integerbuffer_accel, 0, sizeof(integerbuffer_accel));
227 memset(integerbuffer_orig, 0, sizeof(integerbuffer_orig));
228 // accel
229 clock_t time_to_integer_accel = clock();
230 for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
232 test_cases[testcase].jack_to_integer_accel(
233 integerbuffer_accel,
234 jackbuffer_source,
235 TESTBUFF_SIZE,
236 test_cases[testcase].frame_size*channels,
237 test_cases[testcase].ditherstate);
239 float timediff_to_integer_accel = ((float)(clock() - time_to_integer_accel)) / CLOCKS_PER_SEC;
240 // orig
241 clock_t time_to_integer_orig = clock();
242 for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
244 test_cases[testcase].jack_to_integer_orig(
245 integerbuffer_orig,
246 jackbuffer_source,
247 TESTBUFF_SIZE,
248 test_cases[testcase].frame_size*channels,
249 test_cases[testcase].ditherstate);
251 float timediff_to_integer_orig = ((float)(clock() - time_to_integer_orig)) / CLOCKS_PER_SEC;
252 // output performance results
253 printf(
254 "JackFloat->Integer @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
255 test_cases[testcase].name,
256 channels,
257 timediff_to_integer_orig,
258 timediff_to_integer_accel,
259 (timediff_to_integer_orig/timediff_to_integer_accel-1)*100.0);
260 uint32_t int_deviation_max = 0;
261 uint32_t int_error_count = 0;
262 // output error (avoid spam -> limit error lines per test case)
263 for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
264 uint32_t sample_offset = sample*test_cases[testcase].frame_size*channels;
265 // compare both results
266 uint32_t intval_accel=extract_integer(
267 integerbuffer_accel,
268 sample_offset,
269 test_cases[testcase].frame_size,
270 test_cases[testcase].sample_size,
271 #if __BYTE_ORDER == __BIG_ENDIAN
272 !test_cases[testcase].reverse);
273 #else
274 test_cases[testcase].reverse);
275 #endif
276 uint32_t intval_orig=extract_integer(
277 integerbuffer_orig,
278 sample_offset,
279 test_cases[testcase].frame_size,
280 test_cases[testcase].sample_size,
281 #if __BYTE_ORDER == __BIG_ENDIAN
282 !test_cases[testcase].reverse);
283 #else
284 test_cases[testcase].reverse);
285 #endif
286 if(intval_accel != intval_orig) {
287 if(int_error_count<maxerr_displayed) {
288 printf("Value error sample %u:", sample);
289 printf(" Orig 0x");
290 char formatstr[10];
291 sprintf(formatstr, "%%0%uX", test_cases[testcase].sample_size*2);
292 printf(formatstr, intval_orig);
293 printf(" Accel 0x");
294 printf(formatstr, intval_accel);
295 printf("\n");
297 int_error_count++;
298 uint32_t int_deviation;
299 if(intval_accel > intval_orig)
300 int_deviation = intval_accel-intval_orig;
301 else
302 int_deviation = intval_orig-intval_accel;
303 if(int_deviation > int_deviation_max)
304 int_deviation_max = int_deviation;
307 printf(
308 "JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n",
309 test_cases[testcase].name,
310 channels,
311 int_error_count,
312 int_deviation_max);
314 //////////////////////////////////////////////////////////////////////////////
315 // integer -> jackfloat
317 // clean target buffers
318 memset(jackfloatbuffer_accel, 0, sizeof(jackfloatbuffer_accel));
319 memset(jackfloatbuffer_orig, 0, sizeof(jackfloatbuffer_orig));
320 // accel
321 clock_t time_to_float_accel = clock();
322 for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
324 test_cases[testcase].integer_to_jack_accel(
325 jackfloatbuffer_accel,
326 integerbuffer_orig,
327 TESTBUFF_SIZE,
328 test_cases[testcase].frame_size*channels);
330 float timediff_to_float_accel = ((float)(clock() - time_to_float_accel)) / CLOCKS_PER_SEC;
331 // orig
332 clock_t time_to_float_orig = clock();
333 for(uint32_t repetition=0; repetition<retry_per_case; repetition++)
335 test_cases[testcase].integer_to_jack_orig(
336 jackfloatbuffer_orig,
337 integerbuffer_orig,
338 TESTBUFF_SIZE,
339 test_cases[testcase].frame_size*channels);
341 float timediff_to_float_orig = ((float)(clock() - time_to_float_orig)) / CLOCKS_PER_SEC;
342 // output performance results
343 printf(
344 "Integer->JackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
345 test_cases[testcase].name,
346 channels,
347 timediff_to_float_orig,
348 timediff_to_float_accel,
349 (timediff_to_float_orig/timediff_to_float_accel-1)*100.0);
350 jack_default_audio_sample_t float_deviation_max = 0.0;
351 uint32_t float_error_count = 0;
352 // output error (avoid spam -> limit error lines per test case)
353 for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) {
354 // For easier estimation/readability we scale floats back to integer
355 jack_default_audio_sample_t sample_scaling;
356 switch(test_cases[testcase].sample_size) {
357 case 2:
358 sample_scaling = SAMPLE_16BIT_SCALING;
359 break;
360 default:
361 sample_scaling = SAMPLE_24BIT_SCALING;
362 break;
364 jack_default_audio_sample_t floatval_accel = jackfloatbuffer_accel[sample] * sample_scaling;
365 jack_default_audio_sample_t floatval_orig = jackfloatbuffer_orig[sample] * sample_scaling;
366 // compare both results
367 jack_default_audio_sample_t float_deviation;
368 if(floatval_accel > floatval_orig)
369 float_deviation = floatval_accel-floatval_orig;
370 else
371 float_deviation = floatval_orig-floatval_accel;
372 if(float_deviation > float_deviation_max)
373 float_deviation_max = float_deviation;
374 // deviation > half bit => error
375 if(float_deviation > 0.5) {
376 if(float_error_count<maxerr_displayed) {
377 printf("Value error sample %u:", sample);
378 printf(" Orig %8.1f Accel %8.1f\n", floatval_orig, floatval_accel);
380 float_error_count++;
383 printf(
384 "Integer->JackFloat @%7.7s/%u: Errors: %u Max deviation %f\n",
385 test_cases[testcase].name,
386 channels,
387 float_error_count,
388 float_deviation_max);
390 printf("\n");
393 return 0;