common/memops.c

   1 /*
   2     Copyright (C) 2000 Paul Davis
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 #define _ISOC9X_SOURCE  1
  21 #define _ISOC99_SOURCE  1
  22
  23 #define __USE_ISOC9X    1
  24 #define __USE_ISOC99    1
  25
  26 #include <stdio.h>
  27 #include <string.h>
  28 #include <math.h>
  29 #include <memory.h>
  30 #include <stdlib.h>
  31 #include <stdint.h>
  32 #include <limits.h>
  33 #ifdef __linux__
  34 #include <endian.h>
  35 #endif
  36 #include "memops.h"
  37
  38 #if defined (__SSE2__) && !defined (__sun__)
  39 #include <emmintrin.h>
  40 #endif
  41
  42 /* Notes about these *_SCALING values.
  43
  44    the MAX_<N>BIT values are floating point. when multiplied by
  45    a full-scale normalized floating point sample value (-1.0..+1.0)
  46    they should give the maxium value representable with an integer
  47    sample type of N bits. Note that this is asymmetric. Sample ranges
  48    for signed integer, 2's complement values are -(2^(N-1) to +(2^(N-1)-1)
  49
  50    Complications
  51    -------------
  52    If we use +2^(N-1) for the scaling factors, we run into a problem:
  53
  54    if we start with a normalized float value of -1.0, scaling
  55    to 24 bits would give -8388608 (-2^23), which is ideal.
  56    But with +1.0, we get +8388608, which is technically out of range.
  57
  58    We never multiply a full range normalized value by this constant,
  59    but we could multiply it by a positive value that is close enough to +1.0
  60    to produce a value > +(2^(N-1)-1.
  61
  62    There is no way around this paradox without wasting CPU cycles to determine
  63    which scaling factor to use (i.e. determine if its negative or not,
  64    use the right factor).
  65
  66    So, for now (October 2008) we use 2^(N-1)-1 as the scaling factor.
  67 */
  68
  69 #define SAMPLE_24BIT_SCALING  8388607.0f
  70 #define SAMPLE_16BIT_SCALING  32767.0f
  71
  72 /* these are just values to use if the floating point value was out of range
  73
  74    advice from Fons Adriaensen: make the limits symmetrical
  75  */
  76
  77 #define SAMPLE_24BIT_MAX  8388607
  78 #define SAMPLE_24BIT_MIN  -8388607
  79 #define SAMPLE_24BIT_MAX_F  8388607.0f
  80 #define SAMPLE_24BIT_MIN_F  -8388607.0f
  81
  82 #define SAMPLE_16BIT_MAX  32767
  83 #define SAMPLE_16BIT_MIN  -32767
  84 #define SAMPLE_16BIT_MAX_F  32767.0f
  85 #define SAMPLE_16BIT_MIN_F  -32767.0f
  86
  87 /* these mark the outer edges of the range considered "within" range
  88    for a floating point sample value. values outside (and on the boundaries)
  89    of this range will be clipped before conversion; values within this
  90    range will be scaled to appropriate values for the target sample
  91    type.
  92 */
  93
  94 #define NORMALIZED_FLOAT_MIN -1.0f
  95 #define NORMALIZED_FLOAT_MAX  1.0f
  96
  97 /* define this in case we end up on a platform that is missing
  98    the real lrintf functions
  99 */
 100
 101 #define f_round(f) lrintf(f)
 102
 103 #define float_16(s, d)\
 104         if ((s) <= NORMALIZED_FLOAT_MIN) {\
 105                 (d) = SAMPLE_16BIT_MIN;\
 106         } else if ((s) >= NORMALIZED_FLOAT_MAX) {\
 107                 (d) = SAMPLE_16BIT_MAX;\
 108         } else {\
 109                 (d) = f_round ((s) * SAMPLE_16BIT_SCALING);\
 110         }
 111
 112 /* call this when "s" has already been scaled (e.g. when dithering)
 113  */
 114
 115 #define float_16_scaled(s, d)\
 116         if ((s) <= SAMPLE_16BIT_MIN_F) {\
 117                 (d) = SAMPLE_16BIT_MIN_F;\
 118         } else if ((s) >= SAMPLE_16BIT_MAX_F) { \
 119                 (d) = SAMPLE_16BIT_MAX;\
 120         } else {\
 121                 (d) = f_round ((s));\
 122         }
 123
 124 #define float_24u32(s, d) \
 125         if ((s) <= NORMALIZED_FLOAT_MIN) {\
 126                 (d) = SAMPLE_24BIT_MIN << 8;\
 127         } else if ((s) >= NORMALIZED_FLOAT_MAX) {\
 128                 (d) = SAMPLE_24BIT_MAX << 8;\
 129         } else {\
 130                 (d) = f_round ((s) * SAMPLE_24BIT_SCALING) << 8;\
 131         }
 132
 133 /* call this when "s" has already been scaled (e.g. when dithering)
 134  */
 135
 136 #define float_24u32_scaled(s, d)\
 137         if ((s) <= SAMPLE_24BIT_MIN_F) {\
 138                 (d) = SAMPLE_24BIT_MIN << 8;\
 139         } else if ((s) >= SAMPLE_24BIT_MAX_F) { \
 140                 (d) = SAMPLE_24BIT_MAX << 8;            \
 141         } else {\
 142                 (d) = f_round ((s)) << 8; \
 143         }
 144
 145 #define float_24(s, d) \
 146         if ((s) <= NORMALIZED_FLOAT_MIN) {\
 147                 (d) = SAMPLE_24BIT_MIN;\
 148         } else if ((s) >= NORMALIZED_FLOAT_MAX) {\
 149                 (d) = SAMPLE_24BIT_MAX;\
 150         } else {\
 151                 (d) = f_round ((s) * SAMPLE_24BIT_SCALING);\
 152         }
 153
 154 /* call this when "s" has already been scaled (e.g. when dithering)
 155  */
 156
 157 #define float_24_scaled(s, d)\
 158         if ((s) <= SAMPLE_24BIT_MIN_F) {\
 159                 (d) = SAMPLE_24BIT_MIN;\
 160         } else if ((s) >= SAMPLE_24BIT_MAX_F) { \
 161                 (d) = SAMPLE_24BIT_MAX;         \
 162         } else {\
 163                 (d) = f_round ((s)); \
 164         }
 165
 166
 167 #if defined (__SSE2__) && !defined (__sun__)
 168
 169 /* generates same as _mm_set_ps(1.f, 1.f, 1f., 1f) but faster  */
 170 static inline __m128 gen_one(void)
 171 {
 172     volatile __m128i x;
 173     __m128i ones = _mm_cmpeq_epi32(x, x);
 174     return (__m128)_mm_slli_epi32 (_mm_srli_epi32(ones, 25), 23);
 175 }
 176
 177 static inline __m128 clip(__m128 s, __m128 min, __m128 max)
 178 {
 179     return _mm_min_ps(max, _mm_max_ps(s, min));
 180 }
 181
 182 static inline __m128i float_24_sse(__m128 s)
 183 {
 184     const __m128 upper_bound = gen_one(); /* NORMALIZED_FLOAT_MAX */
 185     const __m128 lower_bound = _mm_sub_ps(_mm_setzero_ps(), upper_bound);
 186
 187     __m128 clipped = clip(s, lower_bound, upper_bound);
 188     __m128 scaled = _mm_mul_ps(clipped, _mm_set1_ps(SAMPLE_24BIT_SCALING));
 189     return _mm_cvtps_epi32(scaled);
 190 }
 191 #endif
 192
 193 /* Linear Congruential noise generator. From the music-dsp list
 194  * less random than rand(), but good enough and 10x faster
 195  */
 196 static unsigned int seed = 22222;
 197
 198 inline unsigned int fast_rand() {
 199         seed = (seed * 96314165) + 907633515;
 200         return seed;
 201 }
 202
 203 /* functions for native float sample data */
 204
 205 void sample_move_floatLE_sSs (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) {
 206         while (nsamples--) {
 207                 *dst = *((float *) src);
 208                 dst++;
 209                 src += src_skip;
 210         }
 211 }
 212
 213 void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) {
 214         while (nsamples--) {
 215                 *((float *) dst) = *src;
 216                 dst += dst_skip;
 217                 src++;
 218         }
 219 }
 220
 221 /* NOTES on function naming:
 222
 223    foo_bar_d<TYPE>_s<TYPE>
 224
 225    the "d<TYPE>" component defines the destination type for the operation
 226    the "s<TYPE>" component defines the source type for the operation
 227
 228    TYPE can be one of:
 229
 230    S      - sample is a jack_default_audio_sample_t, currently (October 2008) a 32 bit floating point value
 231    Ss     - like S but reverse endian from the host CPU
 232    32u24  - sample is an signed 32 bit integer value, but data is in upper 24 bits only
 233    32u24s - like 32u24 but reverse endian from the host CPU
 234    24     - sample is an signed 24 bit integer value
 235    24s    - like 24 but reverse endian from the host CPU
 236    16     - sample is an signed 16 bit integer value
 237    16s    - like 16 but reverse endian from the host CPU
 238
 239    For obvious reasons, the reverse endian versions only show as source types.
 240
 241    This covers all known sample formats at 16 bits or larger.
 242 */
 243
 244 /* functions for native integer sample data */
 245
 246 void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 247 {
 248         int32_t z;
 249
 250         while (nsamples--) {
 251
 252                 float_24u32 (*src, z);
 253
 254 #if __BYTE_ORDER == __LITTLE_ENDIAN
 255                 dst[0]=(char)(z>>24);
 256                 dst[1]=(char)(z>>16);
 257                 dst[2]=(char)(z>>8);
 258                 dst[3]=(char)(z);
 259 #elif __BYTE_ORDER == __BIG_ENDIAN
 260                 dst[0]=(char)(z);
 261                 dst[1]=(char)(z>>8);
 262                 dst[2]=(char)(z>>16);
 263                 dst[3]=(char)(z>>24);
 264 #endif
 265                 dst += dst_skip;
 266                 src++;
 267         }
 268 }
 269
 270 void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 271 {
 272 #if defined (__SSE2__) && !defined (__sun__)
 273         __m128 int_max = _mm_set1_ps(SAMPLE_24BIT_MAX_F);
 274         __m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max);
 275         __m128 factor = int_max;
 276
 277         unsigned long unrolled = nsamples / 4;
 278         nsamples = nsamples & 3;
 279
 280         while (unrolled--) {
 281                 __m128 in = _mm_load_ps(src);
 282                 __m128 scaled = _mm_mul_ps(in, factor);
 283                 __m128 clipped = clip(scaled, int_min, int_max);
 284
 285                 __m128i y = _mm_cvttps_epi32(clipped);
 286                 __m128i shifted = _mm_slli_epi32(y, 8);
 287
 288                 __m128i shuffled1 = _mm_shuffle_epi32(shifted, _MM_SHUFFLE(0, 3, 2, 1));
 289                 __m128i shuffled2 = _mm_shuffle_epi32(shifted, _MM_SHUFFLE(1, 0, 3, 2));
 290                 __m128i shuffled3 = _mm_shuffle_epi32(shifted, _MM_SHUFFLE(2, 1, 0, 3));
 291
 292                 _mm_store_ss((float*)dst, (__m128)shifted);
 293
 294                 _mm_store_ss((float*)(dst+dst_skip), (__m128)shuffled1);
 295                 _mm_store_ss((float*)(dst+2*dst_skip), (__m128)shuffled2);
 296                 _mm_store_ss((float*)(dst+3*dst_skip), (__m128)shuffled3);
 297                 dst += 4*dst_skip;
 298
 299                 src+= 4;
 300         }
 301
 302         while (nsamples--) {
 303                 __m128 in = _mm_load_ss(src);
 304                 __m128 scaled = _mm_mul_ss(in, factor);
 305                 __m128 clipped = _mm_min_ss(int_max, _mm_max_ss(scaled, int_min));
 306
 307                 int y = _mm_cvttss_si32(clipped);
 308                 *((int *) dst) = y<<8;
 309
 310                 dst += dst_skip;
 311                 src++;
 312         }
 313
 314 #else
 315         while (nsamples--) {
 316                 float_24u32 (*src, *((int32_t*) dst));
 317                 dst += dst_skip;
 318                 src++;
 319         }
 320 #endif
 321 }
 322
 323 void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 324 {
 325         /* ALERT: signed sign-extension portability !!! */
 326
 327         const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING;
 328
 329         while (nsamples--) {
 330                 int x;
 331 #if __BYTE_ORDER == __LITTLE_ENDIAN
 332                 x = (unsigned char)(src[0]);
 333                 x <<= 8;
 334                 x |= (unsigned char)(src[1]);
 335                 x <<= 8;
 336                 x |= (unsigned char)(src[2]);
 337                 x <<= 8;
 338                 x |= (unsigned char)(src[3]);
 339 #elif __BYTE_ORDER == __BIG_ENDIAN
 340                 x = (unsigned char)(src[3]);
 341                 x <<= 8;
 342                 x |= (unsigned char)(src[2]);
 343                 x <<= 8;
 344                 x |= (unsigned char)(src[1]);
 345                 x <<= 8;
 346                 x |= (unsigned char)(src[0]);
 347 #endif
 348                 *dst = (x >> 8) * scaling;
 349                 dst++;
 350                 src += src_skip;
 351         }
 352 }
 353
 354 void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 355 {
 356 #if defined (__SSE2__) && !defined (__sun__)
 357         unsigned long unrolled = nsamples / 4;
 358         static float inv_sample_max_24bit = 1.0 / SAMPLE_24BIT_SCALING;
 359         __m128 factor = _mm_set1_ps(inv_sample_max_24bit);
 360         while (unrolled--)
 361         {
 362                 int i1 = *((int *) src);
 363                 src+= src_skip;
 364                 int i2 = *((int *) src);
 365                 src+= src_skip;
 366                 int i3 = *((int *) src);
 367                 src+= src_skip;
 368                 int i4 = *((int *) src);
 369                 src+= src_skip;
 370
 371                 __m128i src = _mm_set_epi32(i4, i3, i2, i1);
 372                 __m128i shifted = _mm_srai_epi32(src, 8);
 373
 374                 __m128 as_float = _mm_cvtepi32_ps(shifted);
 375                 __m128 divided = _mm_mul_ps(as_float, factor);
 376
 377                 _mm_storeu_ps(dst, divided);
 378
 379                 dst += 4;
 380         }
 381         nsamples = nsamples & 3;
 382 #endif
 383
 384         /* ALERT: signed sign-extension portability !!! */
 385
 386         const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING;
 387         while (nsamples--) {
 388                 *dst = (*((int *) src) >> 8) * scaling;
 389                 dst++;
 390                 src += src_skip;
 391         }
 392 }
 393
 394 void sample_move_d24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 395 {
 396         int32_t z;
 397
 398         while (nsamples--) {
 399                 float_24 (*src, z);
 400 #if __BYTE_ORDER == __LITTLE_ENDIAN
 401                 dst[0]=(char)(z>>16);
 402                 dst[1]=(char)(z>>8);
 403                 dst[2]=(char)(z);
 404 #elif __BYTE_ORDER == __BIG_ENDIAN
 405                 dst[0]=(char)(z);
 406                 dst[1]=(char)(z>>8);
 407                 dst[2]=(char)(z>>16);
 408 #endif
 409                 dst += dst_skip;
 410                 src++;
 411         }
 412 }
 413
 414 void sample_move_d24_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 415 {
 416 #if defined (__SSE2__) && !defined (__sun__)
 417         _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
 418         while (nsamples >= 4) {
 419                 int i;
 420                 int32_t z[4];
 421                 __m128 samples = _mm_loadu_ps(src);
 422                 __m128i converted = float_24_sse(samples);
 423
 424                 __m128i shuffled1 = _mm_shuffle_epi32(converted, _MM_SHUFFLE(0, 3, 2, 1));
 425                 __m128i shuffled2 = _mm_shuffle_epi32(converted, _MM_SHUFFLE(1, 0, 3, 2));
 426                 __m128i shuffled3 = _mm_shuffle_epi32(converted, _MM_SHUFFLE(2, 1, 0, 3));
 427
 428                 _mm_store_ss((float*)z, (__m128)converted);
 429                 _mm_store_ss((float*)z+1, (__m128)shuffled1);
 430                 _mm_store_ss((float*)z+2, (__m128)shuffled2);
 431                 _mm_store_ss((float*)z+3, (__m128)shuffled3);
 432
 433                 for (i = 0; i != 4; ++i) {
 434 #if __BYTE_ORDER == __LITTLE_ENDIAN
 435                         memcpy (dst, z+i, 3);
 436 #elif __BYTE_ORDER == __BIG_ENDIAN
 437                         memcpy (dst, (float*)((char *)&z + 1)+i, 3);
 438 #endif
 439                         dst += dst_skip;
 440                 }
 441                 nsamples -= 4;
 442                 src += 4;
 443         }
 444 #endif
 445
 446     int32_t z;
 447
 448         while (nsamples--) {
 449                 float_24 (*src, z);
 450 #if __BYTE_ORDER == __LITTLE_ENDIAN
 451                 memcpy (dst, &z, 3);
 452 #elif __BYTE_ORDER == __BIG_ENDIAN
 453                 memcpy (dst, (char *)&z + 1, 3);
 454 #endif
 455                 dst += dst_skip;
 456                 src++;
 457         }
 458 }
 459
 460 void sample_move_dS_s24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 461 {
 462         /* ALERT: signed sign-extension portability !!! */
 463
 464         const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING;
 465         while (nsamples--) {
 466                 int x;
 467 #if __BYTE_ORDER == __LITTLE_ENDIAN
 468                 x = (unsigned char)(src[0]);
 469                 x <<= 8;
 470                 x |= (unsigned char)(src[1]);
 471                 x <<= 8;
 472                 x |= (unsigned char)(src[2]);
 473                 /* correct sign bit and the rest of the top byte */
 474                 if (src[0] & 0x80) {
 475                         x |= 0xff << 24;
 476                 }
 477 #elif __BYTE_ORDER == __BIG_ENDIAN
 478                 x = (unsigned char)(src[2]);
 479                 x <<= 8;
 480                 x |= (unsigned char)(src[1]);
 481                 x <<= 8;
 482                 x |= (unsigned char)(src[0]);
 483                 /* correct sign bit and the rest of the top byte */
 484                 if (src[0] & 0x80) {
 485                         x |= 0xff << 24;
 486                 }
 487 #endif
 488                 *dst = x * scaling;
 489                 dst++;
 490                 src += src_skip;
 491         }
 492 }
 493
 494 void sample_move_dS_s24 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 495 {
 496         const jack_default_audio_sample_t scaling = 1.f/SAMPLE_24BIT_SCALING;
 497
 498 #if defined (__SSE2__) && !defined (__sun__)
 499         const __m128 scaling_block = _mm_set_ps1(scaling);
 500         while (nsamples >= 4) {
 501                 int x0, x1, x2, x3;
 502
 503 #if __BYTE_ORDER == __LITTLE_ENDIAN
 504                 memcpy((char*)&x0 + 1, src, 3);
 505                 memcpy((char*)&x1 + 1, src+src_skip, 3);
 506                 memcpy((char*)&x2 + 1, src+2*src_skip, 3);
 507                 memcpy((char*)&x3 + 1, src+3*src_skip, 3);
 508 #elif __BYTE_ORDER == __BIG_ENDIAN
 509                 memcpy(&x0, src, 3);
 510                 memcpy(&x1, src+src_skip, 3);
 511                 memcpy(&x2, src+2*src_skip, 3);
 512                 memcpy(&x3, src+3*src_skip, 3);
 513 #endif
 514                 src += 4 * src_skip;
 515
 516                 const __m128i block_i = _mm_set_epi32(x3, x2, x1, x0);
 517                 const __m128i shifted = _mm_srai_epi32(block_i, 8);
 518                 const __m128 converted = _mm_cvtepi32_ps (shifted);
 519                 const __m128 scaled = _mm_mul_ps(converted, scaling_block);
 520                 _mm_storeu_ps(dst, scaled);
 521                 dst += 4;
 522                 nsamples -= 4;
 523         }
 524 #endif
 525
 526         while (nsamples--) {
 527                 int x;
 528 #if __BYTE_ORDER == __LITTLE_ENDIAN
 529                 memcpy((char*)&x + 1, src, 3);
 530 #elif __BYTE_ORDER == __BIG_ENDIAN
 531                 memcpy(&x, src, 3);
 532 #endif
 533                 x >>= 8;
 534                 *dst = x * scaling;
 535                 dst++;
 536                 src += src_skip;
 537         }
 538 }
 539
 540
 541 void sample_move_d16_sSs (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 542 {
 543         int16_t tmp;
 544
 545         while (nsamples--) {
 546                 // float_16 (*src, tmp);
 547
 548                 if (*src <= NORMALIZED_FLOAT_MIN) {
 549                         tmp = SAMPLE_16BIT_MIN;
 550                 } else if (*src >= NORMALIZED_FLOAT_MAX) {
 551                         tmp = SAMPLE_16BIT_MAX;
 552                 } else {
 553                         tmp = (int16_t) f_round (*src * SAMPLE_16BIT_SCALING);
 554                 }
 555
 556 #if __BYTE_ORDER == __LITTLE_ENDIAN
 557                 dst[0]=(char)(tmp>>8);
 558                 dst[1]=(char)(tmp);
 559 #elif __BYTE_ORDER == __BIG_ENDIAN
 560                 dst[0]=(char)(tmp);
 561                 dst[1]=(char)(tmp>>8);
 562 #endif
 563                 dst += dst_skip;
 564                 src++;
 565         }
 566 }
 567
 568 void sample_move_d16_sS (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 569 {
 570         while (nsamples--) {
 571                 float_16 (*src, *((int16_t*) dst));
 572                 dst += dst_skip;
 573                 src++;
 574         }
 575 }
 576
 577 void sample_move_dither_rect_d16_sSs (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 578 {
 579         jack_default_audio_sample_t val;
 580         int16_t      tmp;
 581
 582         while (nsamples--) {
 583                 val = (*src * SAMPLE_16BIT_SCALING) + fast_rand() / (float) UINT_MAX - 0.5f;
 584                 float_16_scaled (val, tmp);
 585 #if __BYTE_ORDER == __LITTLE_ENDIAN
 586                 dst[0]=(char)(tmp>>8);
 587                 dst[1]=(char)(tmp);
 588 #elif __BYTE_ORDER == __BIG_ENDIAN
 589                 dst[0]=(char)(tmp);
 590                 dst[1]=(char)(tmp>>8);
 591 #endif
 592                 dst += dst_skip;
 593                 src++;
 594         }
 595 }
 596
 597 void sample_move_dither_rect_d16_sS (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 598 {
 599         jack_default_audio_sample_t val;
 600
 601         while (nsamples--) {
 602                 val = (*src * SAMPLE_16BIT_SCALING) + fast_rand() / (float)UINT_MAX - 0.5f;
 603                 float_16_scaled (val, *((int16_t*) dst));
 604                 dst += dst_skip;
 605                 src++;
 606         }
 607 }
 608
 609 void sample_move_dither_tri_d16_sSs (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 610 {
 611         jack_default_audio_sample_t val;
 612         int16_t      tmp;
 613
 614         while (nsamples--) {
 615                 val = (*src * SAMPLE_16BIT_SCALING) + ((float)fast_rand() + (float)fast_rand()) / (float)UINT_MAX - 1.0f;
 616                 float_16_scaled (val, tmp);
 617
 618 #if __BYTE_ORDER == __LITTLE_ENDIAN
 619                 dst[0]=(char)(tmp>>8);
 620                 dst[1]=(char)(tmp);
 621 #elif __BYTE_ORDER == __BIG_ENDIAN
 622                 dst[0]=(char)(tmp);
 623                 dst[1]=(char)(tmp>>8);
 624 #endif
 625                 dst += dst_skip;
 626                 src++;
 627         }
 628 }
 629
 630 void sample_move_dither_tri_d16_sS (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 631 {
 632         jack_default_audio_sample_t val;
 633
 634         while (nsamples--) {
 635                 val = (*src * SAMPLE_16BIT_SCALING) + ((float)fast_rand() + (float)fast_rand()) / (float)UINT_MAX - 1.0f;
 636                 float_16_scaled (val, *((int16_t*) dst));
 637                 dst += dst_skip;
 638                 src++;
 639         }
 640 }
 641
 642 void sample_move_dither_shaped_d16_sSs (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 643 {
 644         jack_default_audio_sample_t     x;
 645         jack_default_audio_sample_t     xe; /* the innput sample - filtered error */
 646         jack_default_audio_sample_t     xp; /* x' */
 647         float        r;
 648         float        rm1 = state->rm1;
 649         unsigned int idx = state->idx;
 650         int16_t      tmp;
 651
 652         while (nsamples--) {
 653                 x = *src * SAMPLE_16BIT_SCALING;
 654                 r = ((float)fast_rand() + (float)fast_rand())  / (float)UINT_MAX - 1.0f;
 655                 /* Filter the error with Lipshitz's minimally audible FIR:
 656                    [2.033 -2.165 1.959 -1.590 0.6149] */
 657                 xe = x
 658                      - state->e[idx] * 2.033f
 659                      + state->e[(idx - 1) & DITHER_BUF_MASK] * 2.165f
 660                      - state->e[(idx - 2) & DITHER_BUF_MASK] * 1.959f
 661                      + state->e[(idx - 3) & DITHER_BUF_MASK] * 1.590f
 662                      - state->e[(idx - 4) & DITHER_BUF_MASK] * 0.6149f;
 663                 xp = xe + r - rm1;
 664                 rm1 = r;
 665
 666                 float_16_scaled (xp, tmp);
 667
 668                 /* Intrinsic z^-1 delay */
 669                 idx = (idx + 1) & DITHER_BUF_MASK;
 670                 state->e[idx] = xp - xe;
 671
 672 #if __BYTE_ORDER == __LITTLE_ENDIAN
 673                 dst[0]=(char)(tmp>>8);
 674                 dst[1]=(char)(tmp);
 675 #elif __BYTE_ORDER == __BIG_ENDIAN
 676                 dst[0]=(char)(tmp);
 677                 dst[1]=(char)(tmp>>8);
 678 #endif
 679                 dst += dst_skip;
 680                 src++;
 681         }
 682         state->rm1 = rm1;
 683         state->idx = idx;
 684 }
 685
 686 void sample_move_dither_shaped_d16_sS (char *dst,  jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
 687 {
 688         jack_default_audio_sample_t     x;
 689         jack_default_audio_sample_t     xe; /* the innput sample - filtered error */
 690         jack_default_audio_sample_t     xp; /* x' */
 691         float        r;
 692         float        rm1 = state->rm1;
 693         unsigned int idx = state->idx;
 694
 695         while (nsamples--) {
 696                 x = *src * SAMPLE_16BIT_SCALING;
 697                 r = ((float)fast_rand() + (float)fast_rand()) / (float)UINT_MAX - 1.0f;
 698                 /* Filter the error with Lipshitz's minimally audible FIR:
 699                    [2.033 -2.165 1.959 -1.590 0.6149] */
 700                 xe = x
 701                      - state->e[idx] * 2.033f
 702                      + state->e[(idx - 1) & DITHER_BUF_MASK] * 2.165f
 703                      - state->e[(idx - 2) & DITHER_BUF_MASK] * 1.959f
 704                      + state->e[(idx - 3) & DITHER_BUF_MASK] * 1.590f
 705                      - state->e[(idx - 4) & DITHER_BUF_MASK] * 0.6149f;
 706                 xp = xe + r - rm1;
 707                 rm1 = r;
 708
 709                 float_16_scaled (xp, *((int16_t*) dst));
 710
 711                 /* Intrinsic z^-1 delay */
 712                 idx = (idx + 1) & DITHER_BUF_MASK;
 713                 state->e[idx] = *((int16_t*) dst) - xe;
 714
 715                 dst += dst_skip;
 716                 src++;
 717         }
 718         state->rm1 = rm1;
 719         state->idx = idx;
 720 }
 721
 722 void sample_move_dS_s16s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 723 {
 724         short z;
 725         const jack_default_audio_sample_t scaling = 1.0/SAMPLE_16BIT_SCALING;
 726
 727         /* ALERT: signed sign-extension portability !!! */
 728         while (nsamples--) {
 729 #if __BYTE_ORDER == __LITTLE_ENDIAN
 730                 z = (unsigned char)(src[0]);
 731                 z <<= 8;
 732                 z |= (unsigned char)(src[1]);
 733 #elif __BYTE_ORDER == __BIG_ENDIAN
 734                 z = (unsigned char)(src[1]);
 735                 z <<= 8;
 736                 z |= (unsigned char)(src[0]);
 737 #endif
 738                 *dst = z * scaling;
 739                 dst++;
 740                 src += src_skip;
 741         }
 742 }
 743
 744 void sample_move_dS_s16 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 745 {
 746         /* ALERT: signed sign-extension portability !!! */
 747         const jack_default_audio_sample_t scaling = 1.0/SAMPLE_16BIT_SCALING;
 748         while (nsamples--) {
 749                 *dst = (*((short *) src)) * scaling;
 750                 dst++;
 751                 src += src_skip;
 752         }
 753 }
 754
 755 void memset_interleave (char *dst, char val, unsigned long bytes,
 756                         unsigned long unit_bytes,
 757                         unsigned long skip_bytes)
 758 {
 759         switch (unit_bytes) {
 760         case 1:
 761                 while (bytes--) {
 762                         *dst = val;
 763                         dst += skip_bytes;
 764                 }
 765                 break;
 766         case 2:
 767                 while (bytes) {
 768                         *((short *) dst) = (short) val;
 769                         dst += skip_bytes;
 770                         bytes -= 2;
 771                 }
 772                 break;
 773         case 4:
 774                 while (bytes) {
 775                         *((int *) dst) = (int) val;
 776                         dst += skip_bytes;
 777                         bytes -= 4;
 778                 }
 779                 break;
 780         default:
 781                 while (bytes) {
 782                         memset(dst, val, unit_bytes);
 783                         dst += skip_bytes;
 784                         bytes -= unit_bytes;
 785                 }
 786                 break;
 787         }
 788 }
 789
 790 /* COPY FUNCTIONS: used to move data from an input channel to an
 791    output channel. Note that we assume that the skip distance
 792    is the same for both channels. This is completely fine
 793    unless the input and output were on different audio interfaces that
 794    were interleaved differently. We don't try to handle that.
 795 */
 796
 797 void
 798 memcpy_fake (char *dst, char *src, unsigned long src_bytes, unsigned long foo, unsigned long bar)
 799 {
 800         memcpy (dst, src, src_bytes);
 801 }
 802
 803 void
 804 memcpy_interleave_d16_s16 (char *dst, char *src, unsigned long src_bytes,
 805                            unsigned long dst_skip_bytes, unsigned long src_skip_bytes)
 806 {
 807         while (src_bytes) {
 808                 *((short *) dst) = *((short *) src);
 809                 dst += dst_skip_bytes;
 810                 src += src_skip_bytes;
 811                 src_bytes -= 2;
 812         }
 813 }
 814
 815 void
 816 memcpy_interleave_d24_s24 (char *dst, char *src, unsigned long src_bytes,
 817                            unsigned long dst_skip_bytes, unsigned long src_skip_bytes)
 818 {
 819         while (src_bytes) {
 820                 memcpy(dst, src, 3);
 821                 dst += dst_skip_bytes;
 822                 src += src_skip_bytes;
 823                 src_bytes -= 3;
 824         }
 825 }
 826
 827 void
 828 memcpy_interleave_d32_s32 (char *dst, char *src, unsigned long src_bytes,
 829                            unsigned long dst_skip_bytes, unsigned long src_skip_bytes)
 830 {
 831         while (src_bytes) {
 832                 *((int *) dst) = *((int *) src);
 833                 dst += dst_skip_bytes;
 834                 src += src_skip_bytes;
 835                 src_bytes -= 4;
 836         }
 837 }
 838