third_party/qcms/src/transform_util.c

   1 //  qcms
   2 //  Copyright (C) 2009 Mozilla Foundation
   3 //
   4 // Permission is hereby granted, free of charge, to any person obtaining
   5 // a copy of this software and associated documentation files (the "Software"),
   6 // to deal in the Software without restriction, including without limitation
   7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 // and/or sell copies of the Software, and to permit persons to whom the Software
   9 // is furnished to do so, subject to the following conditions:
  10 //
  11 // The above copyright notice and this permission notice shall be included in
  12 // all copies or substantial portions of the Software.
  13 //
  14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  16 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  18 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  19 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  20 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21
  22 #define _ISOC99_SOURCE  /* for INFINITY */
  23
  24 #include <math.h>
  25 #include <assert.h>
  26 #include <string.h> //memcpy
  27 #include "qcmsint.h"
  28 #include "transform_util.h"
  29 #include "matrix.h"
  30
  31 #if !defined(INFINITY)
  32 #define INFINITY HUGE_VAL
  33 #endif
  34
  35 #define PARAMETRIC_CURVE_TYPE 0x70617261 //'para'
  36
  37 /* value must be a value between 0 and 1 */
  38 //XXX: is the above a good restriction to have?
  39 // the output range of this function is 0..1
  40 float lut_interp_linear(double input_value, uint16_t *table, size_t length)
  41 {
  42         int upper, lower;
  43         float value;
  44         input_value = input_value * (length - 1); // scale to length of the array
  45         upper = ceil(input_value);
  46         lower = floor(input_value);
  47         //XXX: can we be more performant here?
  48         value = table[upper]*(1. - (upper - input_value)) + table[lower]*(upper - input_value);
  49         /* scale the value */
  50         return value * (1.f/65535.f);
  51 }
  52
  53 /* same as above but takes and returns a uint16_t value representing a range from 0..1 */
  54 uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length)
  55 {
  56         /* Start scaling input_value to the length of the array: 65535*(length-1).
  57          * We'll divide out the 65535 next */
  58         uintptr_t value = (input_value * (length - 1));
  59         uint32_t upper = (value + 65534) / 65535; /* equivalent to ceil(value/65535) */
  60         uint32_t lower = value / 65535;           /* equivalent to floor(value/65535) */
  61         /* interp is the distance from upper to value scaled to 0..65535 */
  62         uint32_t interp = value % 65535;
  63
  64         value = (table[upper]*(interp) + table[lower]*(65535 - interp))/65535; // 0..65535*65535
  65
  66         return value;
  67 }
  68
  69 /* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX
  70  * and returns a uint8_t value representing a range from 0..1 */
  71 static
  72 uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, size_t length)
  73 {
  74         /* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1).
  75          * We'll divide out the PRECACHE_OUTPUT_MAX next */
  76         uintptr_t value = (input_value * (length - 1));
  77
  78         /* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */
  79         uint32_t upper = (value + PRECACHE_OUTPUT_MAX-1) / PRECACHE_OUTPUT_MAX;
  80         /* equivalent to floor(value/PRECACHE_OUTPUT_MAX) */
  81         uint32_t lower = value / PRECACHE_OUTPUT_MAX;
  82         /* interp is the distance from upper to value scaled to 0..PRECACHE_OUTPUT_MAX */
  83         uint32_t interp = value % PRECACHE_OUTPUT_MAX;
  84
  85         /* the table values range from 0..65535 */
  86         value = (table[upper]*(interp) + table[lower]*(PRECACHE_OUTPUT_MAX - interp)); // 0..(65535*PRECACHE_OUTPUT_MAX)
  87
  88         /* round and scale */
  89         value += (PRECACHE_OUTPUT_MAX*65535/255)/2;
  90         value /= (PRECACHE_OUTPUT_MAX*65535/255); // scale to 0..255
  91         return value;
  92 }
  93
  94 /* value must be a value between 0 and 1 */
  95 //XXX: is the above a good restriction to have?
  96 float lut_interp_linear_float(float value, float *table, size_t length)
  97 {
  98         int upper, lower;
  99         value = value * (length - 1);
 100         upper = ceil(value);
 101         lower = floor(value);
 102         //XXX: can we be more performant here?
 103         value = table[upper]*(1. - (upper - value)) + table[lower]*(upper - value);
 104         /* scale the value */
 105         return value;
 106 }
 107
 108 #if 0
 109 /* if we use a different representation i.e. one that goes from 0 to 0x1000 we can be more efficient
 110  * because we can avoid the divisions and use a shifting instead */
 111 /* same as above but takes and returns a uint16_t value representing a range from 0..1 */
 112 uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
 113 {
 114         uint32_t value = (input_value * (length - 1));
 115         uint32_t upper = (value + 4095) / 4096; /* equivalent to ceil(value/4096) */
 116         uint32_t lower = value / 4096;           /* equivalent to floor(value/4096) */
 117         uint32_t interp = value % 4096;
 118
 119         value = (table[upper]*(interp) + table[lower]*(4096 - interp))/4096; // 0..4096*4096
 120
 121         return value;
 122 }
 123 #endif
 124
 125 void compute_curve_gamma_table_type1(float gamma_table[256], uint16_t gamma)
 126 {
 127         unsigned int i;
 128         float gamma_float = u8Fixed8Number_to_float(gamma);
 129         for (i = 0; i < 256; i++) {
 130                 // 0..1^(0..255 + 255/256) will always be between 0 and 1
 131                 gamma_table[i] = pow(i/255., gamma_float);
 132         }
 133 }
 134
 135 void compute_curve_gamma_table_type2(float gamma_table[256], uint16_t *table, size_t length)
 136 {
 137         unsigned int i;
 138         for (i = 0; i < 256; i++) {
 139                 gamma_table[i] = lut_interp_linear(i/255., table, length);
 140         }
 141 }
 142
 143 void compute_curve_gamma_table_type_parametric(float gamma_table[256], float parameter[7], int count)
 144 {
 145         size_t X;
 146         float interval;
 147         float a, b, c, e, f;
 148         float y = parameter[0];
 149         if (count == 0) {
 150                 a = 1;
 151                 b = 0;
 152                 c = 0;
 153                 e = 0;
 154                 f = 0;
 155                 interval = -INFINITY;
 156         } else if(count == 1) {
 157                 a = parameter[1];
 158                 b = parameter[2];
 159                 c = 0;
 160                 e = 0;
 161                 f = 0;
 162                 interval = -1 * parameter[2] / parameter[1];
 163         } else if(count == 2) {
 164                 a = parameter[1];
 165                 b = parameter[2];
 166                 c = 0;
 167                 e = parameter[3];
 168                 f = parameter[3];
 169                 interval = -1 * parameter[2] / parameter[1];
 170         } else if(count == 3) {
 171                 a = parameter[1];
 172                 b = parameter[2];
 173                 c = parameter[3];
 174                 e = -c;
 175                 f = 0;
 176                 interval = parameter[4];
 177         } else if(count == 4) {
 178                 a = parameter[1];
 179                 b = parameter[2];
 180                 c = parameter[3];
 181                 e = parameter[5] - c;
 182                 f = parameter[6];
 183                 interval = parameter[4];
 184         } else {
 185                 assert(0 && "invalid parametric function type.");
 186                 a = 1;
 187                 b = 0;
 188                 c = 0;
 189                 e = 0;
 190                 f = 0;
 191                 interval = -INFINITY;
 192         }
 193         for (X = 0; X < 256; X++) {
 194                 if (X >= interval) {
 195                         // XXX The equations are not exactly as definied in the spec but are
 196                         //     algebraic equivilent.
 197                         // TODO Should division by 255 be for the whole expression.
 198                         gamma_table[X] = clamp_float(pow(a * X / 255. + b, y) + c + e);
 199                 } else {
 200                         gamma_table[X] = clamp_float(c * X / 255. + f);
 201                 }
 202         }
 203 }
 204
 205 void compute_curve_gamma_table_type0(float gamma_table[256])
 206 {
 207         unsigned int i;
 208         for (i = 0; i < 256; i++) {
 209                 gamma_table[i] = i/255.;
 210         }
 211 }
 212
 213 float clamp_float(float a)
 214 {
 215         /* One would naturally write this function as the following:
 216         if (a > 1.)
 217                 return 1.;
 218         else if (a < 0)
 219                 return 0;
 220         else
 221                 return a;
 222
 223         However, that version will let NaNs pass through which is undesirable
 224         for most consumers.
 225         */
 226
 227         if (a > 1.)
 228                 return 1.;
 229         else if (a >= 0)
 230                 return a;
 231         else // a < 0 or a is NaN
 232                 return 0;
 233 }
 234
 235 unsigned char clamp_u8(float v)
 236 {
 237         if (v > 255.)
 238                 return 255;
 239         else if (v < 0)
 240                 return 0;
 241         else
 242                 return floor(v+.5);
 243 }
 244
 245 float u8Fixed8Number_to_float(uint16_t x)
 246 {
 247         // 0x0000 = 0.
 248         // 0x0100 = 1.
 249         // 0xffff = 255  + 255/256
 250         return x/256.;
 251 }
 252
 253 /* The SSE2 code uses min & max which let NaNs pass through.
 254    We want to try to prevent that here by ensuring that
 255    gamma table is within expected values. */
 256 void validate_gamma_table(float gamma_table[256])
 257 {
 258         int i;
 259         for (i = 0; i < 256; i++) {
 260                 // Note: we check that the gamma is not in range
 261                 // instead of out of range so that we catch NaNs
 262                 if (!(gamma_table[i] >= 0.f && gamma_table[i] <= 1.f)) {
 263                         gamma_table[i] = 0.f;
 264                 }
 265         }
 266 }
 267
 268 float *build_input_gamma_table(struct curveType *TRC)
 269 {
 270         float *gamma_table;
 271
 272         if (!TRC) return NULL;
 273         gamma_table = malloc(sizeof(float)*256);
 274         if (gamma_table) {
 275                 if (TRC->type == PARAMETRIC_CURVE_TYPE) {
 276                         compute_curve_gamma_table_type_parametric(gamma_table, TRC->parameter, TRC->count);
 277                 } else {
 278                         if (TRC->count == 0) {
 279                                 compute_curve_gamma_table_type0(gamma_table);
 280                         } else if (TRC->count == 1) {
 281                                 compute_curve_gamma_table_type1(gamma_table, TRC->data[0]);
 282                         } else {
 283                                 compute_curve_gamma_table_type2(gamma_table, TRC->data, TRC->count);
 284                         }
 285                 }
 286         }
 287
 288         validate_gamma_table(gamma_table);
 289
 290         return gamma_table;
 291 }
 292
 293 struct matrix build_colorant_matrix(qcms_profile *p)
 294 {
 295         struct matrix result;
 296         result.m[0][0] = s15Fixed16Number_to_float(p->redColorant.X);
 297         result.m[0][1] = s15Fixed16Number_to_float(p->greenColorant.X);
 298         result.m[0][2] = s15Fixed16Number_to_float(p->blueColorant.X);
 299         result.m[1][0] = s15Fixed16Number_to_float(p->redColorant.Y);
 300         result.m[1][1] = s15Fixed16Number_to_float(p->greenColorant.Y);
 301         result.m[1][2] = s15Fixed16Number_to_float(p->blueColorant.Y);
 302         result.m[2][0] = s15Fixed16Number_to_float(p->redColorant.Z);
 303         result.m[2][1] = s15Fixed16Number_to_float(p->greenColorant.Z);
 304         result.m[2][2] = s15Fixed16Number_to_float(p->blueColorant.Z);
 305         result.invalid = false;
 306         return result;
 307 }
 308
 309 /* The following code is copied nearly directly from lcms.
 310  * I think it could be much better. For example, Argyll seems to have better code in
 311  * icmTable_lookup_bwd and icmTable_setup_bwd. However, for now this is a quick way
 312  * to a working solution and allows for easy comparing with lcms. */
 313 uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int length)
 314 {
 315         int l = 1;
 316         int r = 0x10000;
 317         int x = 0, res;       // 'int' Give spacing for negative values
 318         int NumZeroes, NumPoles;
 319         int cell0, cell1;
 320         double val2;
 321         double y0, y1, x0, x1;
 322         double a, b, f;
 323
 324         // July/27 2001 - Expanded to handle degenerated curves with an arbitrary
 325         // number of elements containing 0 at the begining of the table (Zeroes)
 326         // and another arbitrary number of poles (FFFFh) at the end.
 327         // First the zero and pole extents are computed, then value is compared.
 328
 329         NumZeroes = 0;
 330         while (LutTable[NumZeroes] == 0 && NumZeroes < length-1)
 331             NumZeroes++;
 332
 333         // There are no zeros at the beginning and we are trying to find a zero, so
 334         // return anything. It seems zero would be the less destructive choice
 335         /* I'm not sure that this makes sense, but oh well... */
 336         if (NumZeroes == 0 && Value == 0)
 337             return 0;
 338
 339         NumPoles = 0;
 340         while (LutTable[length-1- NumPoles] == 0xFFFF && NumPoles < length-1)
 341             NumPoles++;
 342
 343         // Does the curve belong to this case?
 344         if (NumZeroes > 1 || NumPoles > 1)
 345         {
 346                 int a, b, sample;
 347
 348                 // Identify if value fall downto 0 or FFFF zone
 349                 if (Value == 0) return 0;
 350                 // if (Value == 0xFFFF) return 0xFFFF;
 351                 sample = (length-1) * ((double) Value * (1./65535.));
 352                 if (LutTable[sample] == 0xffff)
 353                     return 0xffff;
 354
 355                 // else restrict to valid zone
 356
 357                 a = ((NumZeroes-1) * 0xFFFF) / (length-1);
 358                 b = ((length-1 - NumPoles) * 0xFFFF) / (length-1);
 359
 360                 l = a - 1;
 361                 r = b + 1;
 362
 363                 // Ensure a valid binary search range
 364
 365                 if (l < 1)
 366                     l = 1;
 367                 if (r > 0x10000)
 368                     r = 0x10000;
 369
 370                 // If the search range is inverted due to degeneracy,
 371                 // deem LutTable non-invertible in this search range.
 372                 // Refer to https://bugzil.la/1132467
 373
 374                 if (r <= l)
 375                     return 0;
 376         }
 377
 378         // For input 0, return that to maintain black level. Note the binary search
 379         // does not. For example, it inverts the standard sRGB gamma curve to 7 at
 380         // the origin, causing a black level error.
 381
 382         if (Value == 0 && NumZeroes) {
 383             return 0;
 384         }
 385
 386         // Seems not a degenerated case... apply binary search
 387
 388         while (r > l) {
 389
 390                 x = (l + r) / 2;
 391
 392                 res = (int) lut_interp_linear16((uint16_fract_t) (x-1), LutTable, length);
 393
 394                 if (res == Value) {
 395
 396                     // Found exact match.
 397
 398                     return (uint16_fract_t) (x - 1);
 399                 }
 400
 401                 if (res > Value) r = x - 1;
 402                 else l = x + 1;
 403         }
 404
 405         // Not found, should we interpolate?
 406
 407         // Get surrounding nodes
 408
 409         assert(x >= 1);
 410
 411         val2 = (length-1) * ((double) (x - 1) / 65535.0);
 412
 413         cell0 = (int) floor(val2);
 414         cell1 = (int) ceil(val2);
 415
 416         assert(cell0 >= 0);
 417         assert(cell1 >= 0);
 418         assert(cell0 < length);
 419         assert(cell1 < length);
 420
 421         if (cell0 == cell1) return (uint16_fract_t) x;
 422
 423         y0 = LutTable[cell0] ;
 424         x0 = (65535.0 * cell0) / (length-1);
 425
 426         y1 = LutTable[cell1] ;
 427         x1 = (65535.0 * cell1) / (length-1);
 428
 429         a = (y1 - y0) / (x1 - x0);
 430         b = y0 - a * x0;
 431
 432         if (fabs(a) < 0.01) return (uint16_fract_t) x;
 433
 434         f = ((Value - b) / a);
 435
 436         if (f < 0.0) return (uint16_fract_t) 0;
 437         if (f >= 65535.0) return (uint16_fract_t) 0xFFFF;
 438
 439         return (uint16_fract_t) floor(f + 0.5);
 440 }
 441
 442 /*
 443  The number of entries needed to invert a lookup table should not
 444  necessarily be the same as the original number of entries.  This is
 445  especially true of lookup tables that have a small number of entries.
 446
 447  For example:
 448  Using a table like:
 449     {0, 3104, 14263, 34802, 65535}
 450  invert_lut will produce an inverse of:
 451     {3, 34459, 47529, 56801, 65535}
 452  which has an maximum error of about 9855 (pixel difference of ~38.346)
 453
 454  For now, we punt the decision of output size to the caller. */
 455 static uint16_t *invert_lut(uint16_t *table, int length, size_t out_length)
 456 {
 457         int i;
 458         /* for now we invert the lut by creating a lut of size out_length
 459          * and attempting to lookup a value for each entry using lut_inverse_interp16 */
 460         uint16_t *output = malloc(sizeof(uint16_t)*out_length);
 461         if (!output)
 462                 return NULL;
 463
 464         for (i = 0; i < out_length; i++) {
 465                 double x = ((double) i * 65535.) / (double) (out_length - 1);
 466                 uint16_fract_t input = floor(x + .5);
 467                 output[i] = lut_inverse_interp16(input, table, length);
 468         }
 469         return output;
 470 }
 471
 472 static void compute_precache_pow(uint8_t *output, float gamma)
 473 {
 474         uint32_t v = 0;
 475         for (v = 0; v < PRECACHE_OUTPUT_SIZE; v++) {
 476                 //XXX: don't do integer/float conversion... and round?
 477                 output[v] = 255. * pow(v/(double)PRECACHE_OUTPUT_MAX, gamma);
 478         }
 479 }
 480
 481 void compute_precache_lut(uint8_t *output, uint16_t *table, int length)
 482 {
 483         uint32_t v = 0;
 484         for (v = 0; v < PRECACHE_OUTPUT_SIZE; v++) {
 485                 output[v] = lut_interp_linear_precache_output(v, table, length);
 486         }
 487 }
 488
 489 void compute_precache_linear(uint8_t *output)
 490 {
 491         uint32_t v = 0;
 492         for (v = 0; v < PRECACHE_OUTPUT_SIZE; v++) {
 493                 //XXX: round?
 494                 output[v] = v / (PRECACHE_OUTPUT_SIZE/256);
 495         }
 496 }
 497
 498 qcms_bool compute_precache(struct curveType *trc, uint8_t *output)
 499 {
 500
 501         if (trc->type == PARAMETRIC_CURVE_TYPE) {
 502                         float gamma_table[256];
 503                         uint16_t gamma_table_uint[256];
 504                         uint16_t i;
 505                         uint16_t *inverted;
 506                         int inverted_size = 256;
 507
 508                         compute_curve_gamma_table_type_parametric(gamma_table, trc->parameter, trc->count);
 509                         for(i = 0; i < 256; i++) {
 510                                 gamma_table_uint[i] = (uint16_t)(gamma_table[i] * 65535);
 511                         }
 512
 513                         //XXX: the choice of a minimum of 256 here is not backed by any theory,
 514                         //     measurement or data, howeve r it is what lcms uses.
 515                         //     the maximum number we would need is 65535 because that's the
 516                         //     accuracy used for computing the pre cache table
 517                         if (inverted_size < 256)
 518                                 inverted_size = 256;
 519
 520                         inverted = invert_lut(gamma_table_uint, 256, inverted_size);
 521                         if (!inverted)
 522                                 return false;
 523                         compute_precache_lut(output, inverted, inverted_size);
 524                         free(inverted);
 525         } else {
 526                 if (trc->count == 0) {
 527                         compute_precache_linear(output);
 528                 } else if (trc->count == 1) {
 529                         compute_precache_pow(output, 1./u8Fixed8Number_to_float(trc->data[0]));
 530                 } else {
 531                         uint16_t *inverted;
 532                         int inverted_size = trc->count;
 533                         //XXX: the choice of a minimum of 256 here is not backed by any theory,
 534                         //     measurement or data, howeve r it is what lcms uses.
 535                         //     the maximum number we would need is 65535 because that's the
 536                         //     accuracy used for computing the pre cache table
 537                         if (inverted_size < 256)
 538                                 inverted_size = 256;
 539
 540                         inverted = invert_lut(trc->data, trc->count, inverted_size);
 541                         if (!inverted)
 542                                 return false;
 543                         compute_precache_lut(output, inverted, inverted_size);
 544                         free(inverted);
 545                 }
 546         }
 547         return true;
 548 }
 549
 550
 551 static uint16_t *build_linear_table(int length)
 552 {
 553         int i;
 554         uint16_t *output = malloc(sizeof(uint16_t)*length);
 555         if (!output)
 556                 return NULL;
 557
 558         for (i = 0; i < length; i++) {
 559                 double x = ((double) i * 65535.) / (double) (length - 1);
 560                 uint16_fract_t input = floor(x + .5);
 561                 output[i] = input;
 562         }
 563         return output;
 564 }
 565
 566 static uint16_t *build_pow_table(float gamma, int length)
 567 {
 568         int i;
 569         uint16_t *output = malloc(sizeof(uint16_t)*length);
 570         if (!output)
 571                 return NULL;
 572
 573         for (i = 0; i < length; i++) {
 574                 uint16_fract_t result;
 575                 double x = ((double) i) / (double) (length - 1);
 576                 x = pow(x, gamma);                //XXX turn this conversion into a function
 577                 result = floor(x*65535. + .5);
 578                 output[i] = result;
 579         }
 580         return output;
 581 }
 582
 583 void build_output_lut(struct curveType *trc,
 584                 uint16_t **output_gamma_lut, size_t *output_gamma_lut_length)
 585 {
 586         if (trc->type == PARAMETRIC_CURVE_TYPE) {
 587                 float gamma_table[256];
 588                 uint16_t i;
 589                 uint16_t *output = malloc(sizeof(uint16_t)*256);
 590
 591                 if (!output) {
 592                         *output_gamma_lut = NULL;
 593                         return;
 594                 }
 595
 596                 compute_curve_gamma_table_type_parametric(gamma_table, trc->parameter, trc->count);
 597                 *output_gamma_lut_length = 256;
 598                 for(i = 0; i < 256; i++) {
 599                         output[i] = (uint16_t)(gamma_table[i] * 65535);
 600                 }
 601                 *output_gamma_lut = output;
 602         } else {
 603                 if (trc->count == 0) {
 604                         *output_gamma_lut = build_linear_table(4096);
 605                         *output_gamma_lut_length = 4096;
 606                 } else if (trc->count == 1) {
 607                         float gamma = 1./u8Fixed8Number_to_float(trc->data[0]);
 608                         *output_gamma_lut = build_pow_table(gamma, 4096);
 609                         *output_gamma_lut_length = 4096;
 610                 } else {
 611                         //XXX: the choice of a minimum of 256 here is not backed by any theory,
 612                         //     measurement or data, however it is what lcms uses.
 613                         *output_gamma_lut_length = trc->count;
 614                         if (*output_gamma_lut_length < 256)
 615                                 *output_gamma_lut_length = 256;
 616
 617                         *output_gamma_lut = invert_lut(trc->data, trc->count, *output_gamma_lut_length);
 618                 }
 619         }
 620
 621 }