2 * simdtests.c -- test accuracy and performance of simd optimizations
4 * Copyright (C) 2017 Andreas Mueller.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 /* We must include all headers memops.c includes to avoid trouble with
22 * out namespace game below.
36 #if defined (__SSE2__) && !defined (__sun__)
37 #include <emmintrin.h>
39 #include <smmintrin.h>
43 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
47 // our additional headers
50 /* Dirty: include mempos.c twice the second time with SIMD disabled
51 * so we can compare aceelerated non accelerated
53 namespace accelerated
{
54 #include "../common/memops.c"
57 namespace origerated
{
70 #include "../common/memops.c"
73 // define conversion function types
74 typedef void (*t_jack_to_integer
)(
76 jack_default_audio_sample_t
*src
,
77 unsigned long nsamples
,
78 unsigned long dst_skip
,
79 dither_state_t
*state
);
81 typedef void (*t_integer_to_jack
)(
82 jack_default_audio_sample_t
*dst
,
84 unsigned long nsamples
,
85 unsigned long src_skip
);
87 // define/setup test case data
88 typedef struct test_case_data
{
92 t_jack_to_integer jack_to_integer_accel
;
93 t_jack_to_integer jack_to_integer_orig
;
94 t_integer_to_jack integer_to_jack_accel
;
95 t_integer_to_jack integer_to_jack_orig
;
96 dither_state_t
*ditherstate
;
100 test_case_data_t test_cases
[] = {
105 accelerated::sample_move_d32u24_sSs
,
106 origerated::sample_move_d32u24_sSs
,
107 accelerated::sample_move_dS_s32u24s
,
108 origerated::sample_move_dS_s32u24s
,
115 accelerated::sample_move_d32u24_sS
,
116 origerated::sample_move_d32u24_sS
,
117 accelerated::sample_move_dS_s32u24
,
118 origerated::sample_move_dS_s32u24
,
125 accelerated::sample_move_d24_sSs
,
126 origerated::sample_move_d24_sSs
,
127 accelerated::sample_move_dS_s24s
,
128 origerated::sample_move_dS_s24s
,
135 accelerated::sample_move_d24_sS
,
136 origerated::sample_move_d24_sS
,
137 accelerated::sample_move_dS_s24
,
138 origerated::sample_move_dS_s24
,
145 accelerated::sample_move_d16_sSs
,
146 origerated::sample_move_d16_sSs
,
147 accelerated::sample_move_dS_s16s
,
148 origerated::sample_move_dS_s16s
,
155 accelerated::sample_move_d16_sS
,
156 origerated::sample_move_d16_sS
,
157 accelerated::sample_move_dS_s16
,
158 origerated::sample_move_dS_s16
,
163 // we need to repeat for better accuracy at time measurement
164 const uint32_t retry_per_case
= 1000;
166 // setup test buffers
167 #define TESTBUFF_SIZE 1024
168 jack_default_audio_sample_t jackbuffer_source
[TESTBUFF_SIZE
];
169 // integer buffers: max 4 bytes per value / * 2 for stereo
170 char integerbuffer_accel
[TESTBUFF_SIZE
*4*2];
171 char integerbuffer_orig
[TESTBUFF_SIZE
*4*2];
173 jack_default_audio_sample_t jackfloatbuffer_accel
[TESTBUFF_SIZE
];
174 jack_default_audio_sample_t jackfloatbuffer_orig
[TESTBUFF_SIZE
];
176 // comparing unsigned makes life easier
177 uint32_t extract_integer(
181 uint32_t sample_size
,
188 curr
= (unsigned char*)buff
+ offset
+ sample_size
-1;
189 for(uint32_t i
=0; i
<sample_size
; i
++) {
190 retval
+= *(curr
--) * mult
;
195 curr
= (unsigned char*)buff
+ offset
+ frame_size
-sample_size
;
196 for(uint32_t i
=0; i
<sample_size
; i
++) {
197 retval
+= *(curr
++) * mult
;
204 int main(int argc
, char *argv
[])
206 // parse_arguments(argc, argv);
207 uint32_t maxerr_displayed
= 10;
210 for(int i
=0; i
<TESTBUFF_SIZE
; i
++) {
212 jack_default_audio_sample_t value
=
213 ((jack_default_audio_sample_t
)((i
% TESTBUFF_SIZE
) - TESTBUFF_SIZE
/2)) / (TESTBUFF_SIZE
/2);
216 jackbuffer_source
[i
] = value
;
219 for(uint32_t testcase
=0; testcase
<sizeof(test_cases
)/sizeof(test_case_data_t
); testcase
++) {
221 for(uint32_t channels
=1; channels
<=2; channels
++) {
222 //////////////////////////////////////////////////////////////////////////////
223 // jackfloat -> integer
225 // clean target buffers
226 memset(integerbuffer_accel
, 0, sizeof(integerbuffer_accel
));
227 memset(integerbuffer_orig
, 0, sizeof(integerbuffer_orig
));
229 clock_t time_to_integer_accel
= clock();
230 for(uint32_t repetition
=0; repetition
<retry_per_case
; repetition
++)
232 test_cases
[testcase
].jack_to_integer_accel(
236 test_cases
[testcase
].frame_size
*channels
,
237 test_cases
[testcase
].ditherstate
);
239 float timediff_to_integer_accel
= ((float)(clock() - time_to_integer_accel
)) / CLOCKS_PER_SEC
;
241 clock_t time_to_integer_orig
= clock();
242 for(uint32_t repetition
=0; repetition
<retry_per_case
; repetition
++)
244 test_cases
[testcase
].jack_to_integer_orig(
248 test_cases
[testcase
].frame_size
*channels
,
249 test_cases
[testcase
].ditherstate
);
251 float timediff_to_integer_orig
= ((float)(clock() - time_to_integer_orig
)) / CLOCKS_PER_SEC
;
252 // output performance results
254 "JackFloat->Integer @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
255 test_cases
[testcase
].name
,
257 timediff_to_integer_orig
,
258 timediff_to_integer_accel
,
259 (timediff_to_integer_orig
/timediff_to_integer_accel
-1)*100.0);
260 uint32_t int_deviation_max
= 0;
261 uint32_t int_error_count
= 0;
262 // output error (avoid spam -> limit error lines per test case)
263 for(uint32_t sample
=0; sample
<TESTBUFF_SIZE
; sample
++) {
264 uint32_t sample_offset
= sample
*test_cases
[testcase
].frame_size
*channels
;
265 // compare both results
266 uint32_t intval_accel
=extract_integer(
269 test_cases
[testcase
].frame_size
,
270 test_cases
[testcase
].sample_size
,
271 #if __BYTE_ORDER == __BIG_ENDIAN
272 !test_cases
[testcase
].reverse
);
274 test_cases
[testcase
].reverse
);
276 uint32_t intval_orig
=extract_integer(
279 test_cases
[testcase
].frame_size
,
280 test_cases
[testcase
].sample_size
,
281 #if __BYTE_ORDER == __BIG_ENDIAN
282 !test_cases
[testcase
].reverse
);
284 test_cases
[testcase
].reverse
);
286 if(intval_accel
!= intval_orig
) {
287 if(int_error_count
<maxerr_displayed
) {
288 printf("Value error sample %u:", sample
);
291 sprintf(formatstr
, "%%0%uX", test_cases
[testcase
].sample_size
*2);
292 printf(formatstr
, intval_orig
);
294 printf(formatstr
, intval_accel
);
298 uint32_t int_deviation
;
299 if(intval_accel
> intval_orig
)
300 int_deviation
= intval_accel
-intval_orig
;
302 int_deviation
= intval_orig
-intval_accel
;
303 if(int_deviation
> int_deviation_max
)
304 int_deviation_max
= int_deviation
;
308 "JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n",
309 test_cases
[testcase
].name
,
314 //////////////////////////////////////////////////////////////////////////////
315 // integer -> jackfloat
317 // clean target buffers
318 memset(jackfloatbuffer_accel
, 0, sizeof(jackfloatbuffer_accel
));
319 memset(jackfloatbuffer_orig
, 0, sizeof(jackfloatbuffer_orig
));
321 clock_t time_to_float_accel
= clock();
322 for(uint32_t repetition
=0; repetition
<retry_per_case
; repetition
++)
324 test_cases
[testcase
].integer_to_jack_accel(
325 jackfloatbuffer_accel
,
328 test_cases
[testcase
].frame_size
*channels
);
330 float timediff_to_float_accel
= ((float)(clock() - time_to_float_accel
)) / CLOCKS_PER_SEC
;
332 clock_t time_to_float_orig
= clock();
333 for(uint32_t repetition
=0; repetition
<retry_per_case
; repetition
++)
335 test_cases
[testcase
].integer_to_jack_orig(
336 jackfloatbuffer_orig
,
339 test_cases
[testcase
].frame_size
*channels
);
341 float timediff_to_float_orig
= ((float)(clock() - time_to_float_orig
)) / CLOCKS_PER_SEC
;
342 // output performance results
344 "Integer->JackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n",
345 test_cases
[testcase
].name
,
347 timediff_to_float_orig
,
348 timediff_to_float_accel
,
349 (timediff_to_float_orig
/timediff_to_float_accel
-1)*100.0);
350 jack_default_audio_sample_t float_deviation_max
= 0.0;
351 uint32_t float_error_count
= 0;
352 // output error (avoid spam -> limit error lines per test case)
353 for(uint32_t sample
=0; sample
<TESTBUFF_SIZE
; sample
++) {
354 // For easier estimation/readability we scale floats back to integer
355 jack_default_audio_sample_t sample_scaling
;
356 switch(test_cases
[testcase
].sample_size
) {
358 sample_scaling
= SAMPLE_16BIT_SCALING
;
361 sample_scaling
= SAMPLE_24BIT_SCALING
;
364 jack_default_audio_sample_t floatval_accel
= jackfloatbuffer_accel
[sample
] * sample_scaling
;
365 jack_default_audio_sample_t floatval_orig
= jackfloatbuffer_orig
[sample
] * sample_scaling
;
366 // compare both results
367 jack_default_audio_sample_t float_deviation
;
368 if(floatval_accel
> floatval_orig
)
369 float_deviation
= floatval_accel
-floatval_orig
;
371 float_deviation
= floatval_orig
-floatval_accel
;
372 if(float_deviation
> float_deviation_max
)
373 float_deviation_max
= float_deviation
;
374 // deviation > half bit => error
375 if(float_deviation
> 0.5) {
376 if(float_error_count
<maxerr_displayed
) {
377 printf("Value error sample %u:", sample
);
378 printf(" Orig %8.1f Accel %8.1f\n", floatval_orig
, floatval_accel
);
384 "Integer->JackFloat @%7.7s/%u: Errors: %u Max deviation %f\n",
385 test_cases
[testcase
].name
,
388 float_deviation_max
);