common/memops.c

   1 /*
   2     Copyright (C) 2000 Paul Davis
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 #define _ISOC9X_SOURCE  1
  21 #define _ISOC99_SOURCE  1
  22
  23 #define __USE_ISOC9X    1
  24 #define __USE_ISOC99    1
  25
  26 #include <stdio.h>
  27 #include <string.h>
  28 #include <math.h>
  29 #include <memory.h>
  30 #include <stdlib.h>
  31 #include <stdint.h>
  32 #include <limits.h>
  33 #ifdef __linux__
  34 #include <endian.h>
  35 #endif
  36 #include "memops.h"
  37
  38 #if defined (__SSE2__) && !defined (__sun__)
  39 #include <emmintrin.h>
  40 #ifdef __SSE4_1__
  41 #include <smmintrin.h>
  42 #endif
  43 #endif
  44
  45 /* Notes about these *_SCALING values.
  46
  47    the MAX_<N>BIT values are floating point. when multiplied by
  48    a full-scale normalized floating point sample value (-1.0..+1.0)
  49    they should give the maxium value representable with an integer
  50    sample type of N bits. Note that this is asymmetric. Sample ranges
  51    for signed integer, 2's complement values are -(2^(N-1) to +(2^(N-1)-1)
  52
  53    Complications
  54    -------------
  55    If we use +2^(N-1) for the scaling factors, we run into a problem:
  56
  57    if we start with a normalized float value of -1.0, scaling
  58    to 24 bits would give -8388608 (-2^23), which is ideal.
  59    But with +1.0, we get +8388608, which is technically out of range.
  60
  61    We never multiply a full range normalized value by this constant,
  62    but we could multiply it by a positive value that is close enough to +1.0
  63    to produce a value > +(2^(N-1)-1.
  64
  65    There is no way around this paradox without wasting CPU cycles to determine
  66    which scaling factor to use (i.e. determine if its negative or not,
  67    use the right factor).
  68
  69    So, for now (October 2008) we use 2^(N-1)-1 as the scaling factor.
  70 */
  71
  72 #define SAMPLE_24BIT_SCALING  8388607.0f
  73 #define SAMPLE_16BIT_SCALING  32767.0f
  74
  75 /* these are just values to use if the floating point value was out of range
  76
  77    advice from Fons Adriaensen: make the limits symmetrical
  78  */
  79
  80 #define SAMPLE_24BIT_MAX  8388607
  81 #define SAMPLE_24BIT_MIN  -8388607
  82 #define SAMPLE_24BIT_MAX_F  8388607.0f
  83 #define SAMPLE_24BIT_MIN_F  -8388607.0f
  84
  85 #define SAMPLE_16BIT_MAX  32767
  86 #define SAMPLE_16BIT_MIN  -32767
  87 #define SAMPLE_16BIT_MAX_F  32767.0f
  88 #define SAMPLE_16BIT_MIN_F  -32767.0f
  89
  90 /* these mark the outer edges of the range considered "within" range
  91    for a floating point sample value. values outside (and on the boundaries)
  92    of this range will be clipped before conversion; values within this
  93    range will be scaled to appropriate values for the target sample
  94    type.
  95 */
  96
  97 #define NORMALIZED_FLOAT_MIN -1.0f
  98 #define NORMALIZED_FLOAT_MAX  1.0f
  99
 100 /* define this in case we end up on a platform that is missing
 101    the real lrintf functions
 102 */
 103
 104 #define f_round(f) lrintf(f)
 105
 106 #define float_16(s, d)\
 107         if ((s) <= NORMALIZED_FLOAT_MIN) {\
 108                 (d) = SAMPLE_16BIT_MIN;\
 109         } else if ((s) >= NORMALIZED_FLOAT_MAX) {\
 110                 (d) = SAMPLE_16BIT_MAX;\
 111         } else {\
 112                 (d) = f_round ((s) * SAMPLE_16BIT_SCALING);\
 113         }
 114
 115 /* call this when "s" has already been scaled (e.g. when dithering)
 116  */
 117
 118 #define float_16_scaled(s, d)\
 119         if ((s) <= SAMPLE_16BIT_MIN_F) {\
 120                 (d) = SAMPLE_16BIT_MIN_F;\
 121         } else if ((s) >= SAMPLE_16BIT_MAX_F) { \
 122                 (d) = SAMPLE_16BIT_MAX;\
 123         } else {\
 124                 (d) = f_round ((s));\
 125         }
 126
 127 #define float_24u32(s, d) \
 128         if ((s) <= NORMALIZED_FLOAT_MIN) {\
 129                 (d) = SAMPLE_24BIT_MIN << 8;\
 130         } else if ((s) >= NORMALIZED_FLOAT_MAX) {\
 131                 (d) = SAMPLE_24BIT_MAX << 8;\
 132         } else {\
 133                 (d) = f_round ((s) * SAMPLE_24BIT_SCALING) << 8;\
 134         }
 135
 136 /* call this when "s" has already been scaled (e.g. when dithering)
 137  */
 138
 139 #define float_24u32_scaled(s, d)\
 140         if ((s) <= SAMPLE_24BIT_MIN_F) {\
 141                 (d) = SAMPLE_24BIT_MIN << 8;\
 142         } else if ((s) >= SAMPLE_24BIT_MAX_F) { \
 143                 (d) = SAMPLE_24BIT_MAX << 8;            \
 144         } else {\
 145                 (d) = f_round ((s)) << 8; \
 146         }
 147
 148 #define float_24(s, d) \
 149         if ((s) <= NORMALIZED_FLOAT_MIN) {\
 150                 (d) = SAMPLE_24BIT_MIN;\
 151         } else if ((s) >= NORMALIZED_FLOAT_MAX) {\
 152                 (d) = SAMPLE_24BIT_MAX;\
 153         } else {\
 154                 (d) = f_round ((s) * SAMPLE_24BIT_SCALING);\
 155         }
 156
 157 /* call this when "s" has already been scaled (e.g. when dithering)
 158  */
 159
 160 #define float_24_scaled(s, d)\
 161         if ((s) <= SAMPLE_24BIT_MIN_F) {\
 162                 (d) = SAMPLE_24BIT_MIN;\
 163         } else if ((s) >= SAMPLE_24BIT_MAX_F) { \
 164                 (d) = SAMPLE_24BIT_MAX;         \
 165         } else {\
 166                 (d) = f_round ((s)); \
 167         }
 168
 169
 170 #if defined (__SSE2__) && !defined (__sun__)
 171
 172 /* generates same as _mm_set_ps(1.f, 1.f, 1f., 1f) but faster  */
 173 static inline __m128 gen_one(void)
 174 {
 175     volatile __m128i x;
 176     __m128i ones = _mm_cmpeq_epi32(x, x);
 177     return (__m128)_mm_slli_epi32 (_mm_srli_epi32(ones, 25), 23);
 178 }
 179
 180 static inline __m128 clip(__m128 s, __m128 min, __m128 max)
 181 {
 182     return _mm_min_ps(max, _mm_max_ps(s, min));
 183 }
 184
 185 static inline __m128i float_24_sse(__m128 s)
 186 {
 187     const __m128 upper_bound = gen_one(); /* NORMALIZED_FLOAT_MAX */
 188     const __m128 lower_bound = _mm_sub_ps(_mm_setzero_ps(), upper_bound);
 189
 190     __m128 clipped = clip(s, lower_bound, upper_bound);
 191     __m128 scaled = _mm_mul_ps(clipped, _mm_set1_ps(SAMPLE_24BIT_SCALING));
 192     return _mm_cvtps_epi32(scaled);
 193 }
 194 #endif
 195
 196 /* Linear Congruential noise generator. From the music-dsp list
 197  * less random than rand(), but good enough and 10x faster
 198  */
 199 static unsigned int seed = 22222;
 200
 201 inline unsigned int fast_rand() {
 202         seed = (seed * 96314165) + 907633515;
 203         return seed;
 204 }
 205
 206 /* functions for native float sample data */
 207
 208 void sample_move_floatLE_sSs (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) {
 209         while (nsamples--) {
 210                 *dst = *((float *) src);
 211                 dst++;
 212                 src += src_skip;
 213         }
 214 }
 215
 216 void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) {
 217         while (nsamples--) {
 218                 *((float *) dst) = *src;
 219                 dst += dst_skip;
 220                 src++;
 221         }
 222 }
 223
 224 /* NOTES on function naming:
 225
 226    foo_bar_d<TYPE>_s<TYPE>
 227
 228    the "d<TYPE>" component defines the destination type for the operation
 229    the "s<TYPE>" component defines the source type for the operation
 230
 231    TYPE can be one of:
 232
 233    S      - sample is a jack_default_audio_sample_t, currently (October 2008) a 32 bit floating point value
 234    Ss     - like S but reverse endian from the host CPU
 235    32u24  - sample is an signed 32 bit integer value, but data is in upper 24 bits only
 236    32u24s - like 32u24 but reverse endian from the host CPU
 237    24     - sample is an signed 24 bit integer value
 238    24s    - like 24 but reverse endian from the host CPU
 239    16     - sample is an signed 16 bit integer value
 240    16s    - like 16 but reverse endian from the host CPU
 241
 242    For obvious reasons, the reverse endian versions only show as source types.
 243
 244    This covers all known sample formats at 16 bits or larger.
 245 */
 246
 247 /* functions for native integer sample data */
 248
 249 void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 250 {
 251         int32_t z;
 252
 253         while (nsamples--) {
 254
 255                 float_24u32 (*src, z);
 256
 257 #if __BYTE_ORDER == __LITTLE_ENDIAN
 258                 dst[0]=(char)(z>>24);
 259                 dst[1]=(char)(z>>16);
 260                 dst[2]=(char)(z>>8);
 261                 dst[3]=(char)(z);
 262 #elif __BYTE_ORDER == __BIG_ENDIAN
 263                 dst[0]=(char)(z);
 264                 dst[1]=(char)(z>>8);
 265                 dst[2]=(char)(z>>16);
 266                 dst[3]=(char)(z>>24);
 267 #endif
 268                 dst += dst_skip;
 269                 src++;
 270         }
 271 }
 272
 273 void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 274 {
 275 #if defined (__SSE2__) && !defined (__sun__)
 276         __m128 int_max = _mm_set1_ps(SAMPLE_24BIT_MAX_F);
 277         __m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max);
 278         __m128 factor = int_max;
 279
 280         unsigned long unrolled = nsamples / 4;
 281         nsamples = nsamples & 3;
 282
 283         while (unrolled--) {
 284                 __m128 in = _mm_load_ps(src);
 285                 __m128 scaled = _mm_mul_ps(in, factor);
 286                 __m128 clipped = clip(scaled, int_min, int_max);
 287
 288                 __m128i y = _mm_cvttps_epi32(clipped);
 289                 __m128i shifted = _mm_slli_epi32(y, 8);
 290
 291 #ifdef __SSE4_1__
 292                 *(int32_t*)dst              = _mm_extract_epi32(shifted, 0);
 293                 *(int32_t*)(dst+dst_skip)   = _mm_extract_epi32(shifted, 1);
 294                 *(int32_t*)(dst+2*dst_skip) = _mm_extract_epi32(shifted, 2);
 295                 *(int32_t*)(dst+3*dst_skip) = _mm_extract_epi32(shifted, 3);
 296 #else
 297                 __m128i shuffled1 = _mm_shuffle_epi32(shifted, _MM_SHUFFLE(0, 3, 2, 1));
 298                 __m128i shuffled2 = _mm_shuffle_epi32(shifted, _MM_SHUFFLE(1, 0, 3, 2));
 299                 __m128i shuffled3 = _mm_shuffle_epi32(shifted, _MM_SHUFFLE(2, 1, 0, 3));
 300
 301                 _mm_store_ss((float*)dst, (__m128)shifted);
 302
 303                 _mm_store_ss((float*)(dst+dst_skip), (__m128)shuffled1);
 304                 _mm_store_ss((float*)(dst+2*dst_skip), (__m128)shuffled2);
 305                 _mm_store_ss((float*)(dst+3*dst_skip), (__m128)shuffled3);
 306 #endif
 307                 dst += 4*dst_skip;
 308
 309                 src+= 4;
 310         }
 311
 312         while (nsamples--) {
 313                 __m128 in = _mm_load_ss(src);
 314                 __m128 scaled = _mm_mul_ss(in, factor);
 315                 __m128 clipped = _mm_min_ss(int_max, _mm_max_ss(scaled, int_min));
 316
 317                 int y = _mm_cvttss_si32(clipped);
 318                 *((int *) dst) = y<<8;
 319
 320                 dst += dst_skip;
 321                 src++;
 322         }
 323
 324 #else
 325         while (nsamples--) {
 326                 float_24u32 (*src, *((int32_t*) dst));
 327                 dst += dst_skip;
 328                 src++;
 329         }
 330 #endif
 331 }
 332
 333 void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 334 {
 335         /* ALERT: signed sign-extension portability !!! */
 336
 337         const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING;
 338
 339         while (nsamples--) {
 340                 int x;
 341 #if __BYTE_ORDER == __LITTLE_ENDIAN
 342                 x = (unsigned char)(src[0]);
 343                 x <<= 8;
 344                 x |= (unsigned char)(src[1]);
 345                 x <<= 8;
 346                 x |= (unsigned char)(src[2]);
 347                 x <<= 8;
 348                 x |= (unsigned char)(src[3]);
 349 #elif __BYTE_ORDER == __BIG_ENDIAN
 350                 x = (unsigned char)(src[3]);
 351                 x <<= 8;
 352                 x |= (unsigned char)(src[2]);
 353                 x <<= 8;
 354                 x |= (unsigned char)(src[1]);
 355                 x <<= 8;
 356                 x |= (unsigned char)(src[0]);
 357 #endif
 358                 *dst = (x >> 8) * scaling;
 359                 dst++;
 360                 src += src_skip;
 361         }
 362 }
 363
 364 void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 365 {
 366 #if defined (__SSE2__) && !defined (__sun__)
 367         unsigned long unrolled = nsamples / 4;
 368         static float inv_sample_max_24bit = 1.0 / SAMPLE_24BIT_SCALING;
 369         __m128 factor = _mm_set1_ps(inv_sample_max_24bit);
 370         while (unrolled--)
 371         {
 372                 int i1 = *((int *) src);
 373                 src+= src_skip;
 374                 int i2 = *((int *) src);
 375                 src+= src_skip;
 376                 int i3 = *((int *) src);
 377                 src+= src_skip;
 378                 int i4 = *((int *) src);
 379                 src+= src_skip;
 380
 381                 __m128i src = _mm_set_epi32(i4, i3, i2, i1);
 382                 __m128i shifted = _mm_srai_epi32(src, 8);
 383
 384                 __m128 as_float = _mm_cvtepi32_ps(shifted);
 385                 __m128 divided = _mm_mul_ps(as_float, factor);
 386
 387                 _mm_storeu_ps(dst, divided);
 388
 389                 dst += 4;
 390         }
 391         nsamples = nsamples & 3;
 392 #endif
 393
 394         /* ALERT: signed sign-extension portability !!! */
 395
 396         const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING;
 397         while (nsamples--) {
 398                 *dst = (*((int *) src) >> 8) * scaling;
 399                 dst++;
 400                 src += src_skip;
 401         }
 402 }
 403
 404 void sample_move_d24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 405 {
 406         int32_t z;
 407
 408         while (nsamples--) {
 409                 float_24 (*src, z);
 410 #if __BYTE_ORDER == __LITTLE_ENDIAN
 411                 dst[0]=(char)(z>>16);
 412                 dst[1]=(char)(z>>8);
 413                 dst[2]=(char)(z);
 414 #elif __BYTE_ORDER == __BIG_ENDIAN
 415                 dst[0]=(char)(z);
 416                 dst[1]=(char)(z>>8);
 417                 dst[2]=(char)(z>>16);
 418 #endif
 419                 dst += dst_skip;
 420                 src++;
 421         }
 422 }
 423
 424 void sample_move_d24_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 425 {
 426 #if defined (__SSE2__) && !defined (__sun__)
 427         _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
 428         while (nsamples >= 4) {
 429                 int i;
 430                 int32_t z[4];
 431                 __m128 samples = _mm_loadu_ps(src);
 432                 __m128i converted = float_24_sse(samples);
 433
 434 #ifdef __SSE4_1__
 435                 z[0] = _mm_extract_epi32(converted, 0);
 436                 z[1] = _mm_extract_epi32(converted, 1);
 437                 z[2] = _mm_extract_epi32(converted, 2);
 438                 z[3] = _mm_extract_epi32(converted, 3);
 439 #else
 440                 __m128i shuffled1 = _mm_shuffle_epi32(converted, _MM_SHUFFLE(0, 3, 2, 1));
 441                 __m128i shuffled2 = _mm_shuffle_epi32(converted, _MM_SHUFFLE(1, 0, 3, 2));
 442                 __m128i shuffled3 = _mm_shuffle_epi32(converted, _MM_SHUFFLE(2, 1, 0, 3));
 443
 444                 _mm_store_ss((float*)z, (__m128)converted);
 445                 _mm_store_ss((float*)z+1, (__m128)shuffled1);
 446                 _mm_store_ss((float*)z+2, (__m128)shuffled2);
 447                 _mm_store_ss((float*)z+3, (__m128)shuffled3);
 448
 449                 for (i = 0; i != 4; ++i) {
 450                         memcpy (dst, z+i, 3);
 451                         dst += dst_skip;
 452                 }
 453 #endif
 454
 455                 nsamples -= 4;
 456                 src += 4;
 457         }
 458 #endif
 459
 460     int32_t z;
 461
 462         while (nsamples--) {
 463                 float_24 (*src, z);
 464 #if __BYTE_ORDER == __LITTLE_ENDIAN
 465                 memcpy (dst, &z, 3);
 466 #elif __BYTE_ORDER == __BIG_ENDIAN
 467                 memcpy (dst, (char *)&z + 1, 3);
 468 #endif
 469                 dst += dst_skip;
 470                 src++;
 471         }
 472 }
 473
 474 void sample_move_dS_s24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 475 {
 476         /* ALERT: signed sign-extension portability !!! */
 477
 478         const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING;
 479         while (nsamples--) {
 480                 int x;
 481 #if __BYTE_ORDER == __LITTLE_ENDIAN
 482                 x = (unsigned char)(src[0]);
 483                 x <<= 8;
 484                 x |= (unsigned char)(src[1]);
 485                 x <<= 8;
 486                 x |= (unsigned char)(src[2]);
 487                 /* correct sign bit and the rest of the top byte */
 488                 if (src[0] & 0x80) {
 489                         x |= 0xff << 24;
 490                 }
 491 #elif __BYTE_ORDER == __BIG_ENDIAN
 492                 x = (unsigned char)(src[2]);
 493                 x <<= 8;
 494                 x |= (unsigned char)(src[1]);
 495                 x <<= 8;
 496                 x |= (unsigned char)(src[0]);
 497                 /* correct sign bit and the rest of the top byte */
 498                 if (src[2] & 0x80) {
 499                         x |= 0xff << 24;
 500                 }
 501 #endif
 502                 *dst = x * scaling;
 503                 dst++;
 504                 src += src_skip;
 505         }
 506 }
 507
 508 void sample_move_dS_s24 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 509 {
 510         const jack_default_audio_sample_t scaling = 1.f/SAMPLE_24BIT_SCALING;
 511
 512 #if defined (__SSE2__) && !defined (__sun__)
 513         const __m128 scaling_block = _mm_set_ps1(scaling);
 514         while (nsamples >= 4) {
 515                 int x0, x1, x2, x3;
 516
 517                 memcpy((char*)&x0 + 1, src, 3);
 518                 memcpy((char*)&x1 + 1, src+src_skip, 3);
 519                 memcpy((char*)&x2 + 1, src+2*src_skip, 3);
 520                 memcpy((char*)&x3 + 1, src+3*src_skip, 3);
 521                 src += 4 * src_skip;
 522
 523                 const __m128i block_i = _mm_set_epi32(x3, x2, x1, x0);
 524                 const __m128i shifted = _mm_srai_epi32(block_i, 8);
 525                 const __m128 converted = _mm_cvtepi32_ps (shifted);
 526                 const __m128 scaled = _mm_mul_ps(converted, scaling_block);
 527                 _mm_storeu_ps(dst, scaled);
 528                 dst += 4;
 529                 nsamples -= 4;
 530         }
 531 #endif
 532
 533         while (nsamples--) {
 534                 int x;
 535 #if __BYTE_ORDER == __LITTLE_ENDIAN
 536                 memcpy((char*)&x + 1, src, 3);
 537 #elif __BYTE_ORDER == __BIG_ENDIAN
 538                 memcpy(&x, src, 3);
 539 #endif
 540                 x >>= 8;
 541                 *dst = x * scaling;
 542                 dst++;
 543                 src += src_skip;
 544         }
 545 }
 546
 547
 548 void sample_move_d16_sSs (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 549 {
 550         int16_t tmp;
 551
 552         while (nsamples--) {
 553                 // float_16 (*src, tmp);
 554
 555                 if (*src <= NORMALIZED_FLOAT_MIN) {
 556                         tmp = SAMPLE_16BIT_MIN;
 557                 } else if (*src >= NORMALIZED_FLOAT_MAX) {
 558                         tmp = SAMPLE_16BIT_MAX;
 559                 } else {
 560                         tmp = (int16_t) f_round (*src * SAMPLE_16BIT_SCALING);
 561                 }
 562
 563 #if __BYTE_ORDER == __LITTLE_ENDIAN
 564                 dst[0]=(char)(tmp>>8);
 565                 dst[1]=(char)(tmp);
 566 #elif __BYTE_ORDER == __BIG_ENDIAN
 567                 dst[0]=(char)(tmp);
 568                 dst[1]=(char)(tmp>>8);
 569 #endif
 570                 dst += dst_skip;
 571                 src++;
 572         }
 573 }
 574
 575 void sample_move_d16_sS (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 576 {
 577         while (nsamples--) {
 578                 float_16 (*src, *((int16_t*) dst));
 579                 dst += dst_skip;
 580                 src++;
 581         }
 582 }
 583
 584 void sample_move_dither_rect_d16_sSs (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 585 {
 586         jack_default_audio_sample_t val;
 587         int16_t      tmp;
 588
 589         while (nsamples--) {
 590                 val = (*src * SAMPLE_16BIT_SCALING) + fast_rand() / (float) UINT_MAX - 0.5f;
 591                 float_16_scaled (val, tmp);
 592 #if __BYTE_ORDER == __LITTLE_ENDIAN
 593                 dst[0]=(char)(tmp>>8);
 594                 dst[1]=(char)(tmp);
 595 #elif __BYTE_ORDER == __BIG_ENDIAN
 596                 dst[0]=(char)(tmp);
 597                 dst[1]=(char)(tmp>>8);
 598 #endif
 599                 dst += dst_skip;
 600                 src++;
 601         }
 602 }
 603
 604 void sample_move_dither_rect_d16_sS (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 605 {
 606         jack_default_audio_sample_t val;
 607
 608         while (nsamples--) {
 609                 val = (*src * SAMPLE_16BIT_SCALING) + fast_rand() / (float)UINT_MAX - 0.5f;
 610                 float_16_scaled (val, *((int16_t*) dst));
 611                 dst += dst_skip;
 612                 src++;
 613         }
 614 }
 615
 616 void sample_move_dither_tri_d16_sSs (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 617 {
 618         jack_default_audio_sample_t val;
 619         int16_t      tmp;
 620
 621         while (nsamples--) {
 622                 val = (*src * SAMPLE_16BIT_SCALING) + ((float)fast_rand() + (float)fast_rand()) / (float)UINT_MAX - 1.0f;
 623                 float_16_scaled (val, tmp);
 624
 625 #if __BYTE_ORDER == __LITTLE_ENDIAN
 626                 dst[0]=(char)(tmp>>8);
 627                 dst[1]=(char)(tmp);
 628 #elif __BYTE_ORDER == __BIG_ENDIAN
 629                 dst[0]=(char)(tmp);
 630                 dst[1]=(char)(tmp>>8);
 631 #endif
 632                 dst += dst_skip;
 633                 src++;
 634         }
 635 }
 636
 637 void sample_move_dither_tri_d16_sS (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 638 {
 639         jack_default_audio_sample_t val;
 640
 641         while (nsamples--) {
 642                 val = (*src * SAMPLE_16BIT_SCALING) + ((float)fast_rand() + (float)fast_rand()) / (float)UINT_MAX - 1.0f;
 643                 float_16_scaled (val, *((int16_t*) dst));
 644                 dst += dst_skip;
 645                 src++;
 646         }
 647 }
 648
 649 void sample_move_dither_shaped_d16_sSs (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 650 {
 651         jack_default_audio_sample_t     x;
 652         jack_default_audio_sample_t     xe; /* the innput sample - filtered error */
 653         jack_default_audio_sample_t     xp; /* x' */
 654         float        r;
 655         float        rm1 = state->rm1;
 656         unsigned int idx = state->idx;
 657         int16_t      tmp;
 658
 659         while (nsamples--) {
 660                 x = *src * SAMPLE_16BIT_SCALING;
 661                 r = ((float)fast_rand() + (float)fast_rand())  / (float)UINT_MAX - 1.0f;
 662                 /* Filter the error with Lipshitz's minimally audible FIR:
 663                    [2.033 -2.165 1.959 -1.590 0.6149] */
 664                 xe = x
 665                      - state->e[idx] * 2.033f
 666                      + state->e[(idx - 1) & DITHER_BUF_MASK] * 2.165f
 667                      - state->e[(idx - 2) & DITHER_BUF_MASK] * 1.959f
 668                      + state->e[(idx - 3) & DITHER_BUF_MASK] * 1.590f
 669                      - state->e[(idx - 4) & DITHER_BUF_MASK] * 0.6149f;
 670                 xp = xe + r - rm1;
 671                 rm1 = r;
 672
 673                 float_16_scaled (xp, tmp);
 674
 675                 /* Intrinsic z^-1 delay */
 676                 idx = (idx + 1) & DITHER_BUF_MASK;
 677                 state->e[idx] = xp - xe;
 678
 679 #if __BYTE_ORDER == __LITTLE_ENDIAN
 680                 dst[0]=(char)(tmp>>8);
 681                 dst[1]=(char)(tmp);
 682 #elif __BYTE_ORDER == __BIG_ENDIAN
 683                 dst[0]=(char)(tmp);
 684                 dst[1]=(char)(tmp>>8);
 685 #endif
 686                 dst += dst_skip;
 687                 src++;
 688         }
 689         state->rm1 = rm1;
 690         state->idx = idx;
 691 }
 692
 693 void sample_move_dither_shaped_d16_sS (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 694 {
 695         jack_default_audio_sample_t     x;
 696         jack_default_audio_sample_t     xe; /* the innput sample - filtered error */
 697         jack_default_audio_sample_t     xp; /* x' */
 698         float        r;
 699         float        rm1 = state->rm1;
 700         unsigned int idx = state->idx;
 701
 702         while (nsamples--) {
 703                 x = *src * SAMPLE_16BIT_SCALING;
 704                 r = ((float)fast_rand() + (float)fast_rand()) / (float)UINT_MAX - 1.0f;
 705                 /* Filter the error with Lipshitz's minimally audible FIR:
 706                    [2.033 -2.165 1.959 -1.590 0.6149] */
 707                 xe = x
 708                      - state->e[idx] * 2.033f
 709                      + state->e[(idx - 1) & DITHER_BUF_MASK] * 2.165f
 710                      - state->e[(idx - 2) & DITHER_BUF_MASK] * 1.959f
 711                      + state->e[(idx - 3) & DITHER_BUF_MASK] * 1.590f
 712                      - state->e[(idx - 4) & DITHER_BUF_MASK] * 0.6149f;
 713                 xp = xe + r - rm1;
 714                 rm1 = r;
 715
 716                 float_16_scaled (xp, *((int16_t*) dst));
 717
 718                 /* Intrinsic z^-1 delay */
 719                 idx = (idx + 1) & DITHER_BUF_MASK;
 720                 state->e[idx] = *((int16_t*) dst) - xe;
 721
 722                 dst += dst_skip;
 723                 src++;
 724         }
 725         state->rm1 = rm1;
 726         state->idx = idx;
 727 }
 728
 729 void sample_move_dS_s16s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 730 {
 731         short z;
 732         const jack_default_audio_sample_t scaling = 1.0/SAMPLE_16BIT_SCALING;
 733
 734         /* ALERT: signed sign-extension portability !!! */
 735         while (nsamples--) {
 736 #if __BYTE_ORDER == __LITTLE_ENDIAN
 737                 z = (unsigned char)(src[0]);
 738                 z <<= 8;
 739                 z |= (unsigned char)(src[1]);
 740 #elif __BYTE_ORDER == __BIG_ENDIAN
 741                 z = (unsigned char)(src[1]);
 742                 z <<= 8;
 743                 z |= (unsigned char)(src[0]);
 744 #endif
 745                 *dst = z * scaling;
 746                 dst++;
 747                 src += src_skip;
 748         }
 749 }
 750
 751 void sample_move_dS_s16 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 752 {
 753         /* ALERT: signed sign-extension portability !!! */
 754         const jack_default_audio_sample_t scaling = 1.0/SAMPLE_16BIT_SCALING;
 755         while (nsamples--) {
 756                 *dst = (*((short *) src)) * scaling;
 757                 dst++;
 758                 src += src_skip;
 759         }
 760 }
 761
 762 void memset_interleave (char *dst, char val, unsigned long bytes,
 763                         unsigned long unit_bytes,
 764                         unsigned long skip_bytes)
 765 {
 766         switch (unit_bytes) {
 767         case 1:
 768                 while (bytes--) {
 769                         *dst = val;
 770                         dst += skip_bytes;
 771                 }
 772                 break;
 773         case 2:
 774                 while (bytes) {
 775                         *((short *) dst) = (short) val;
 776                         dst += skip_bytes;
 777                         bytes -= 2;
 778                 }
 779                 break;
 780         case 4:
 781                 while (bytes) {
 782                         *((int *) dst) = (int) val;
 783                         dst += skip_bytes;
 784                         bytes -= 4;
 785                 }
 786                 break;
 787         default:
 788                 while (bytes) {
 789                         memset(dst, val, unit_bytes);
 790                         dst += skip_bytes;
 791                         bytes -= unit_bytes;
 792                 }
 793                 break;
 794         }
 795 }
 796
 797 /* COPY FUNCTIONS: used to move data from an input channel to an
 798    output channel. Note that we assume that the skip distance
 799    is the same for both channels. This is completely fine
 800    unless the input and output were on different audio interfaces that
 801    were interleaved differently. We don't try to handle that.
 802 */
 803
 804 void
 805 memcpy_fake (char *dst, char *src, unsigned long src_bytes, unsigned long foo, unsigned long bar)
 806 {
 807         memcpy (dst, src, src_bytes);
 808 }
 809
 810 void
 811 memcpy_interleave_d16_s16 (char *dst, char *src, unsigned long src_bytes,
 812                            unsigned long dst_skip_bytes, unsigned long src_skip_bytes)
 813 {
 814         while (src_bytes) {
 815                 *((short *) dst) = *((short *) src);
 816                 dst += dst_skip_bytes;
 817                 src += src_skip_bytes;
 818                 src_bytes -= 2;
 819         }
 820 }
 821
 822 void
 823 memcpy_interleave_d24_s24 (char *dst, char *src, unsigned long src_bytes,
 824                            unsigned long dst_skip_bytes, unsigned long src_skip_bytes)
 825 {
 826         while (src_bytes) {
 827                 memcpy(dst, src, 3);
 828                 dst += dst_skip_bytes;
 829                 src += src_skip_bytes;
 830                 src_bytes -= 3;
 831         }
 832 }
 833
 834 void
 835 memcpy_interleave_d32_s32 (char *dst, char *src, unsigned long src_bytes,
 836                            unsigned long dst_skip_bytes, unsigned long src_skip_bytes)
 837 {
 838         while (src_bytes) {
 839                 *((int *) dst) = *((int *) src);
 840                 dst += dst_skip_bytes;
 841                 src += src_skip_bytes;
 842                 src_bytes -= 4;
 843         }
 844 }
 845