Bumping manifests a=b2g-bump
[gecko.git] / gfx / qcms / transform.c
blobf7bade6e8c439103e668e484c7096ddb7117ff70
1 /* vim: set ts=8 sw=8 noexpandtab: */
2 // qcms
3 // Copyright (C) 2009 Mozilla Corporation
4 // Copyright (C) 1998-2007 Marti Maria
5 //
6 // Permission is hereby granted, free of charge, to any person obtaining
7 // a copy of this software and associated documentation files (the "Software"),
8 // to deal in the Software without restriction, including without limitation
9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 // and/or sell copies of the Software, and to permit persons to whom the Software
11 // is furnished to do so, subject to the following conditions:
13 // The above copyright notice and this permission notice shall be included in
14 // all copies or substantial portions of the Software.
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include <stdlib.h>
25 #include <math.h>
26 #include <assert.h>
27 #include <string.h> //memcpy
28 #include "qcmsint.h"
29 #include "chain.h"
30 #include "matrix.h"
31 #include "transform_util.h"
33 /* for MSVC, GCC, Intel, and Sun compilers */
34 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64)
35 #define X86
36 #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */
38 /**
39 * AltiVec detection for PowerPC CPUs
40 * In case we have a method of detecting do the runtime detection.
41 * Otherwise statically choose the AltiVec path in case the compiler
42 * was told to build with AltiVec support.
44 #if (defined(__POWERPC__) || defined(__powerpc__))
45 #if defined(__linux__)
46 #include <unistd.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <elf.h>
50 #include <linux/auxvec.h>
51 #include <asm/cputable.h>
52 #include <link.h>
54 static inline qcms_bool have_altivec() {
55 static int available = -1;
56 int new_avail = 0;
57 ElfW(auxv_t) auxv;
58 ssize_t count;
59 int fd, i;
61 if (available != -1)
62 return (available != 0 ? true : false);
64 fd = open("/proc/self/auxv", O_RDONLY);
65 if (fd < 0)
66 goto out;
67 do {
68 count = read(fd, &auxv, sizeof(auxv));
69 if (count < 0)
70 goto out_close;
72 if (auxv.a_type == AT_HWCAP) {
73 new_avail = !!(auxv.a_un.a_val & PPC_FEATURE_HAS_ALTIVEC);
74 goto out_close;
76 } while (auxv.a_type != AT_NULL);
78 out_close:
79 close(fd);
80 out:
81 available = new_avail;
82 return (available != 0 ? true : false);
84 #elif defined(__APPLE__) && defined(__MACH__)
85 #include <sys/sysctl.h>
87 /**
88 * rip-off from ffmpeg AltiVec detection code.
89 * this code also appears on Apple's AltiVec pages.
91 static inline qcms_bool have_altivec() {
92 int sels[2] = {CTL_HW, HW_VECTORUNIT};
93 static int available = -1;
94 size_t len = sizeof(available);
95 int err;
97 if (available != -1)
98 return (available != 0 ? true : false);
100 err = sysctl(sels, 2, &available, &len, NULL, 0);
102 if (err == 0)
103 if (available != 0)
104 return true;
106 return false;
108 #elif defined(__ALTIVEC__) || defined(__APPLE_ALTIVEC__)
109 #define have_altivec() true
110 #else
111 #define have_altivec() false
112 #endif
113 #endif // (defined(__POWERPC__) || defined(__powerpc__))
115 // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
116 // This is just an approximation, I am not handling all the non-linear
117 // aspects of the RGB to XYZ process, and assumming that the gamma correction
118 // has transitive property in the tranformation chain.
120 // the alghoritm:
122 // - First I build the absolute conversion matrix using
123 // primaries in XYZ. This matrix is next inverted
124 // - Then I eval the source white point across this matrix
125 // obtaining the coeficients of the transformation
126 // - Then, I apply these coeficients to the original matrix
127 static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_CIE_xyYTRIPLE primrs)
129 struct matrix primaries;
130 struct matrix primaries_invert;
131 struct matrix result;
132 struct vector white_point;
133 struct vector coefs;
135 double xn, yn;
136 double xr, yr;
137 double xg, yg;
138 double xb, yb;
140 xn = white.x;
141 yn = white.y;
143 if (yn == 0.0)
144 return matrix_invalid();
146 xr = primrs.red.x;
147 yr = primrs.red.y;
148 xg = primrs.green.x;
149 yg = primrs.green.y;
150 xb = primrs.blue.x;
151 yb = primrs.blue.y;
153 primaries.m[0][0] = xr;
154 primaries.m[0][1] = xg;
155 primaries.m[0][2] = xb;
157 primaries.m[1][0] = yr;
158 primaries.m[1][1] = yg;
159 primaries.m[1][2] = yb;
161 primaries.m[2][0] = 1 - xr - yr;
162 primaries.m[2][1] = 1 - xg - yg;
163 primaries.m[2][2] = 1 - xb - yb;
164 primaries.invalid = false;
166 white_point.v[0] = xn/yn;
167 white_point.v[1] = 1.;
168 white_point.v[2] = (1.0-xn-yn)/yn;
170 primaries_invert = matrix_invert(primaries);
172 coefs = matrix_eval(primaries_invert, white_point);
174 result.m[0][0] = coefs.v[0]*xr;
175 result.m[0][1] = coefs.v[1]*xg;
176 result.m[0][2] = coefs.v[2]*xb;
178 result.m[1][0] = coefs.v[0]*yr;
179 result.m[1][1] = coefs.v[1]*yg;
180 result.m[1][2] = coefs.v[2]*yb;
182 result.m[2][0] = coefs.v[0]*(1.-xr-yr);
183 result.m[2][1] = coefs.v[1]*(1.-xg-yg);
184 result.m[2][2] = coefs.v[2]*(1.-xb-yb);
185 result.invalid = primaries_invert.invalid;
187 return result;
190 struct CIE_XYZ {
191 double X;
192 double Y;
193 double Z;
196 /* CIE Illuminant D50 */
197 static const struct CIE_XYZ D50_XYZ = {
198 0.9642,
199 1.0000,
200 0.8249
203 /* from lcms: xyY2XYZ()
204 * corresponds to argyll: icmYxy2XYZ() */
205 static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source)
207 struct CIE_XYZ dest;
208 dest.X = (source.x / source.y) * source.Y;
209 dest.Y = source.Y;
210 dest.Z = ((1 - source.x - source.y) / source.y) * source.Y;
211 return dest;
214 /* from lcms: ComputeChromaticAdaption */
215 // Compute chromatic adaption matrix using chad as cone matrix
216 static struct matrix
217 compute_chromatic_adaption(struct CIE_XYZ source_white_point,
218 struct CIE_XYZ dest_white_point,
219 struct matrix chad)
221 struct matrix chad_inv;
222 struct vector cone_source_XYZ, cone_source_rgb;
223 struct vector cone_dest_XYZ, cone_dest_rgb;
224 struct matrix cone, tmp;
226 tmp = chad;
227 chad_inv = matrix_invert(tmp);
229 cone_source_XYZ.v[0] = source_white_point.X;
230 cone_source_XYZ.v[1] = source_white_point.Y;
231 cone_source_XYZ.v[2] = source_white_point.Z;
233 cone_dest_XYZ.v[0] = dest_white_point.X;
234 cone_dest_XYZ.v[1] = dest_white_point.Y;
235 cone_dest_XYZ.v[2] = dest_white_point.Z;
237 cone_source_rgb = matrix_eval(chad, cone_source_XYZ);
238 cone_dest_rgb = matrix_eval(chad, cone_dest_XYZ);
240 cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0];
241 cone.m[0][1] = 0;
242 cone.m[0][2] = 0;
243 cone.m[1][0] = 0;
244 cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1];
245 cone.m[1][2] = 0;
246 cone.m[2][0] = 0;
247 cone.m[2][1] = 0;
248 cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2];
249 cone.invalid = false;
251 // Normalize
252 return matrix_multiply(chad_inv, matrix_multiply(cone, chad));
255 /* from lcms: cmsAdaptionMatrix */
256 // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
257 // Bradford is assumed
258 static struct matrix
259 adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
261 struct matrix lam_rigg = {{ // Bradford matrix
262 { 0.8951, 0.2664, -0.1614 },
263 { -0.7502, 1.7135, 0.0367 },
264 { 0.0389, -0.0685, 1.0296 }
266 return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
269 /* from lcms: cmsAdaptMatrixToD50 */
270 static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_white_pt)
272 struct CIE_XYZ Dn;
273 struct matrix Bradford;
275 if (source_white_pt.y == 0.0)
276 return matrix_invalid();
278 Dn = xyY2XYZ(source_white_pt);
280 Bradford = adaption_matrix(Dn, D50_XYZ);
281 return matrix_multiply(Bradford, r);
284 qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries)
286 struct matrix colorants;
287 colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
288 colorants = adapt_matrix_to_D50(colorants, white_point);
290 if (colorants.invalid)
291 return false;
293 /* note: there's a transpose type of operation going on here */
294 profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]);
295 profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]);
296 profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]);
298 profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]);
299 profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]);
300 profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]);
302 profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]);
303 profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]);
304 profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]);
306 return true;
309 qcms_bool get_rgb_colorants(struct matrix *colorants, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries)
311 *colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
312 *colorants = adapt_matrix_to_D50(*colorants, white_point);
314 return (colorants->invalid ? true : false);
317 #if 0
318 static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
320 int i;
321 float (*mat)[4] = transform->matrix;
322 for (i=0; i<length; i++) {
323 unsigned char device_r = *src++;
324 unsigned char device_g = *src++;
325 unsigned char device_b = *src++;
327 float linear_r = transform->input_gamma_table_r[device_r];
328 float linear_g = transform->input_gamma_table_g[device_g];
329 float linear_b = transform->input_gamma_table_b[device_b];
331 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
332 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
333 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
335 float out_device_r = pow(out_linear_r, transform->out_gamma_r);
336 float out_device_g = pow(out_linear_g, transform->out_gamma_g);
337 float out_device_b = pow(out_linear_b, transform->out_gamma_b);
339 dest[OUTPUT_R_INDEX] = clamp_u8(255*out_device_r);
340 dest[OUTPUT_G_INDEX] = clamp_u8(255*out_device_g);
341 dest[OUTPUT_B_INDEX] = clamp_u8(255*out_device_b);
342 dest += RGB_OUTPUT_COMPONENTS;
345 #endif
347 static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
349 unsigned int i;
350 for (i = 0; i < length; i++) {
351 float out_device_r, out_device_g, out_device_b;
352 unsigned char device = *src++;
354 float linear = transform->input_gamma_table_gray[device];
356 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
357 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
358 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
360 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
361 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
362 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
363 dest += RGB_OUTPUT_COMPONENTS;
367 /* Alpha is not corrected.
368 A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If
369 RGB Is?" Tech Memo 17 (December 14, 1998).
370 See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
373 static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
375 unsigned int i;
376 for (i = 0; i < length; i++) {
377 float out_device_r, out_device_g, out_device_b;
378 unsigned char device = *src++;
379 unsigned char alpha = *src++;
381 float linear = transform->input_gamma_table_gray[device];
383 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
384 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
385 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
387 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
388 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
389 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
390 dest[OUTPUT_A_INDEX] = alpha;
391 dest += RGBA_OUTPUT_COMPONENTS;
396 static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
398 unsigned int i;
399 for (i = 0; i < length; i++) {
400 unsigned char device = *src++;
401 uint16_t gray;
403 float linear = transform->input_gamma_table_gray[device];
405 /* we could round here... */
406 gray = linear * PRECACHE_OUTPUT_MAX;
408 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray];
409 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray];
410 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray];
411 dest += RGB_OUTPUT_COMPONENTS;
415 static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
417 unsigned int i;
418 for (i = 0; i < length; i++) {
419 unsigned char device = *src++;
420 unsigned char alpha = *src++;
421 uint16_t gray;
423 float linear = transform->input_gamma_table_gray[device];
425 /* we could round here... */
426 gray = linear * PRECACHE_OUTPUT_MAX;
428 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray];
429 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray];
430 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray];
431 dest[OUTPUT_A_INDEX] = alpha;
432 dest += RGBA_OUTPUT_COMPONENTS;
436 static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
438 unsigned int i;
439 float (*mat)[4] = transform->matrix;
440 for (i = 0; i < length; i++) {
441 unsigned char device_r = *src++;
442 unsigned char device_g = *src++;
443 unsigned char device_b = *src++;
444 uint16_t r, g, b;
446 float linear_r = transform->input_gamma_table_r[device_r];
447 float linear_g = transform->input_gamma_table_g[device_g];
448 float linear_b = transform->input_gamma_table_b[device_b];
450 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
451 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
452 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
454 out_linear_r = clamp_float(out_linear_r);
455 out_linear_g = clamp_float(out_linear_g);
456 out_linear_b = clamp_float(out_linear_b);
458 /* we could round here... */
459 r = out_linear_r * PRECACHE_OUTPUT_MAX;
460 g = out_linear_g * PRECACHE_OUTPUT_MAX;
461 b = out_linear_b * PRECACHE_OUTPUT_MAX;
463 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r];
464 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g];
465 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b];
466 dest += RGB_OUTPUT_COMPONENTS;
470 static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
472 unsigned int i;
473 float (*mat)[4] = transform->matrix;
474 for (i = 0; i < length; i++) {
475 unsigned char device_r = *src++;
476 unsigned char device_g = *src++;
477 unsigned char device_b = *src++;
478 unsigned char alpha = *src++;
479 uint16_t r, g, b;
481 float linear_r = transform->input_gamma_table_r[device_r];
482 float linear_g = transform->input_gamma_table_g[device_g];
483 float linear_b = transform->input_gamma_table_b[device_b];
485 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
486 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
487 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
489 out_linear_r = clamp_float(out_linear_r);
490 out_linear_g = clamp_float(out_linear_g);
491 out_linear_b = clamp_float(out_linear_b);
493 /* we could round here... */
494 r = out_linear_r * PRECACHE_OUTPUT_MAX;
495 g = out_linear_g * PRECACHE_OUTPUT_MAX;
496 b = out_linear_b * PRECACHE_OUTPUT_MAX;
498 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r];
499 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g];
500 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b];
501 dest[OUTPUT_A_INDEX] = alpha;
502 dest += RGBA_OUTPUT_COMPONENTS;
506 // Not used
508 static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
509 unsigned int i;
510 int xy_len = 1;
511 int x_len = transform->grid_size;
512 int len = x_len * x_len;
513 float* r_table = transform->r_clut;
514 float* g_table = transform->g_clut;
515 float* b_table = transform->b_clut;
517 for (i = 0; i < length; i++) {
518 unsigned char in_r = *src++;
519 unsigned char in_g = *src++;
520 unsigned char in_b = *src++;
521 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
523 int x = floorf(linear_r * (transform->grid_size-1));
524 int y = floorf(linear_g * (transform->grid_size-1));
525 int z = floorf(linear_b * (transform->grid_size-1));
526 int x_n = ceilf(linear_r * (transform->grid_size-1));
527 int y_n = ceilf(linear_g * (transform->grid_size-1));
528 int z_n = ceilf(linear_b * (transform->grid_size-1));
529 float x_d = linear_r * (transform->grid_size-1) - x;
530 float y_d = linear_g * (transform->grid_size-1) - y;
531 float z_d = linear_b * (transform->grid_size-1) - z;
533 float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d);
534 float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);
535 float r_y1 = lerp(r_x1, r_x2, y_d);
536 float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);
537 float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d);
538 float r_y2 = lerp(r_x3, r_x4, y_d);
539 float clut_r = lerp(r_y1, r_y2, z_d);
541 float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d);
542 float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);
543 float g_y1 = lerp(g_x1, g_x2, y_d);
544 float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);
545 float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d);
546 float g_y2 = lerp(g_x3, g_x4, y_d);
547 float clut_g = lerp(g_y1, g_y2, z_d);
549 float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d);
550 float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);
551 float b_y1 = lerp(b_x1, b_x2, y_d);
552 float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);
553 float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d);
554 float b_y2 = lerp(b_x3, b_x4, y_d);
555 float clut_b = lerp(b_y1, b_y2, z_d);
557 *dest++ = clamp_u8(clut_r*255.0f);
558 *dest++ = clamp_u8(clut_g*255.0f);
559 *dest++ = clamp_u8(clut_b*255.0f);
564 static int int_div_ceil(int value, int div) {
565 return ((value + div - 1) / div);
568 // Using lcms' tetra interpolation algorithm.
569 static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
570 unsigned int i;
571 int xy_len = 1;
572 int x_len = transform->grid_size;
573 int len = x_len * x_len;
574 float* r_table = transform->r_clut;
575 float* g_table = transform->g_clut;
576 float* b_table = transform->b_clut;
577 float c0_r, c1_r, c2_r, c3_r;
578 float c0_g, c1_g, c2_g, c3_g;
579 float c0_b, c1_b, c2_b, c3_b;
580 float clut_r, clut_g, clut_b;
581 for (i = 0; i < length; i++) {
582 unsigned char in_r = *src++;
583 unsigned char in_g = *src++;
584 unsigned char in_b = *src++;
585 unsigned char in_a = *src++;
586 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
588 int x = in_r * (transform->grid_size-1) / 255;
589 int y = in_g * (transform->grid_size-1) / 255;
590 int z = in_b * (transform->grid_size-1) / 255;
591 int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255);
592 int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255);
593 int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255);
594 float rx = linear_r * (transform->grid_size-1) - x;
595 float ry = linear_g * (transform->grid_size-1) - y;
596 float rz = linear_b * (transform->grid_size-1) - z;
598 c0_r = CLU(r_table, x, y, z);
599 c0_g = CLU(g_table, x, y, z);
600 c0_b = CLU(b_table, x, y, z);
602 if( rx >= ry ) {
603 if (ry >= rz) { //rx >= ry && ry >= rz
604 c1_r = CLU(r_table, x_n, y, z) - c0_r;
605 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
606 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
607 c1_g = CLU(g_table, x_n, y, z) - c0_g;
608 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
609 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
610 c1_b = CLU(b_table, x_n, y, z) - c0_b;
611 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
612 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
613 } else {
614 if (rx >= rz) { //rx >= rz && rz >= ry
615 c1_r = CLU(r_table, x_n, y, z) - c0_r;
616 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
617 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
618 c1_g = CLU(g_table, x_n, y, z) - c0_g;
619 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
620 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
621 c1_b = CLU(b_table, x_n, y, z) - c0_b;
622 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
623 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
624 } else { //rz > rx && rx >= ry
625 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
626 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
627 c3_r = CLU(r_table, x, y, z_n) - c0_r;
628 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
629 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
630 c3_g = CLU(g_table, x, y, z_n) - c0_g;
631 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
632 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
633 c3_b = CLU(b_table, x, y, z_n) - c0_b;
636 } else {
637 if (rx >= rz) { //ry > rx && rx >= rz
638 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
639 c2_r = CLU(r_table, x, y_n, z) - c0_r;
640 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
641 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
642 c2_g = CLU(g_table, x, y_n, z) - c0_g;
643 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
644 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
645 c2_b = CLU(b_table, x, y_n, z) - c0_b;
646 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
647 } else {
648 if (ry >= rz) { //ry >= rz && rz > rx
649 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
650 c2_r = CLU(r_table, x, y_n, z) - c0_r;
651 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
652 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
653 c2_g = CLU(g_table, x, y_n, z) - c0_g;
654 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
655 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
656 c2_b = CLU(b_table, x, y_n, z) - c0_b;
657 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
658 } else { //rz > ry && ry > rx
659 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
660 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
661 c3_r = CLU(r_table, x, y, z_n) - c0_r;
662 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
663 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
664 c3_g = CLU(g_table, x, y, z_n) - c0_g;
665 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
666 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
667 c3_b = CLU(b_table, x, y, z_n) - c0_b;
672 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
673 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
674 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
676 dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f);
677 dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f);
678 dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f);
679 dest[OUTPUT_A_INDEX] = in_a;
680 dest += RGBA_OUTPUT_COMPONENTS;
684 // Using lcms' tetra interpolation code.
685 static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
686 unsigned int i;
687 int xy_len = 1;
688 int x_len = transform->grid_size;
689 int len = x_len * x_len;
690 float* r_table = transform->r_clut;
691 float* g_table = transform->g_clut;
692 float* b_table = transform->b_clut;
693 float c0_r, c1_r, c2_r, c3_r;
694 float c0_g, c1_g, c2_g, c3_g;
695 float c0_b, c1_b, c2_b, c3_b;
696 float clut_r, clut_g, clut_b;
697 for (i = 0; i < length; i++) {
698 unsigned char in_r = *src++;
699 unsigned char in_g = *src++;
700 unsigned char in_b = *src++;
701 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
703 int x = in_r * (transform->grid_size-1) / 255;
704 int y = in_g * (transform->grid_size-1) / 255;
705 int z = in_b * (transform->grid_size-1) / 255;
706 int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255);
707 int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255);
708 int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255);
709 float rx = linear_r * (transform->grid_size-1) - x;
710 float ry = linear_g * (transform->grid_size-1) - y;
711 float rz = linear_b * (transform->grid_size-1) - z;
713 c0_r = CLU(r_table, x, y, z);
714 c0_g = CLU(g_table, x, y, z);
715 c0_b = CLU(b_table, x, y, z);
717 if( rx >= ry ) {
718 if (ry >= rz) { //rx >= ry && ry >= rz
719 c1_r = CLU(r_table, x_n, y, z) - c0_r;
720 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
721 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
722 c1_g = CLU(g_table, x_n, y, z) - c0_g;
723 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
724 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
725 c1_b = CLU(b_table, x_n, y, z) - c0_b;
726 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
727 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
728 } else {
729 if (rx >= rz) { //rx >= rz && rz >= ry
730 c1_r = CLU(r_table, x_n, y, z) - c0_r;
731 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
732 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
733 c1_g = CLU(g_table, x_n, y, z) - c0_g;
734 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
735 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
736 c1_b = CLU(b_table, x_n, y, z) - c0_b;
737 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
738 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
739 } else { //rz > rx && rx >= ry
740 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
741 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
742 c3_r = CLU(r_table, x, y, z_n) - c0_r;
743 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
744 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
745 c3_g = CLU(g_table, x, y, z_n) - c0_g;
746 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
747 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
748 c3_b = CLU(b_table, x, y, z_n) - c0_b;
751 } else {
752 if (rx >= rz) { //ry > rx && rx >= rz
753 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
754 c2_r = CLU(r_table, x, y_n, z) - c0_r;
755 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
756 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
757 c2_g = CLU(g_table, x, y_n, z) - c0_g;
758 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
759 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
760 c2_b = CLU(b_table, x, y_n, z) - c0_b;
761 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
762 } else {
763 if (ry >= rz) { //ry >= rz && rz > rx
764 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
765 c2_r = CLU(r_table, x, y_n, z) - c0_r;
766 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
767 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
768 c2_g = CLU(g_table, x, y_n, z) - c0_g;
769 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
770 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
771 c2_b = CLU(b_table, x, y_n, z) - c0_b;
772 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
773 } else { //rz > ry && ry > rx
774 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
775 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
776 c3_r = CLU(r_table, x, y, z_n) - c0_r;
777 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
778 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
779 c3_g = CLU(g_table, x, y, z_n) - c0_g;
780 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
781 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
782 c3_b = CLU(b_table, x, y, z_n) - c0_b;
787 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
788 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
789 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
791 dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f);
792 dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f);
793 dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f);
794 dest += RGB_OUTPUT_COMPONENTS;
798 static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
800 unsigned int i;
801 float (*mat)[4] = transform->matrix;
802 for (i = 0; i < length; i++) {
803 unsigned char device_r = *src++;
804 unsigned char device_g = *src++;
805 unsigned char device_b = *src++;
806 float out_device_r, out_device_g, out_device_b;
808 float linear_r = transform->input_gamma_table_r[device_r];
809 float linear_g = transform->input_gamma_table_g[device_g];
810 float linear_b = transform->input_gamma_table_b[device_b];
812 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
813 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
814 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
816 out_linear_r = clamp_float(out_linear_r);
817 out_linear_g = clamp_float(out_linear_g);
818 out_linear_b = clamp_float(out_linear_b);
820 out_device_r = lut_interp_linear(out_linear_r,
821 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
822 out_device_g = lut_interp_linear(out_linear_g,
823 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
824 out_device_b = lut_interp_linear(out_linear_b,
825 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
827 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
828 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
829 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
830 dest += RGB_OUTPUT_COMPONENTS;
834 static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
836 unsigned int i;
837 float (*mat)[4] = transform->matrix;
838 for (i = 0; i < length; i++) {
839 unsigned char device_r = *src++;
840 unsigned char device_g = *src++;
841 unsigned char device_b = *src++;
842 unsigned char alpha = *src++;
843 float out_device_r, out_device_g, out_device_b;
845 float linear_r = transform->input_gamma_table_r[device_r];
846 float linear_g = transform->input_gamma_table_g[device_g];
847 float linear_b = transform->input_gamma_table_b[device_b];
849 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
850 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
851 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
853 out_linear_r = clamp_float(out_linear_r);
854 out_linear_g = clamp_float(out_linear_g);
855 out_linear_b = clamp_float(out_linear_b);
857 out_device_r = lut_interp_linear(out_linear_r,
858 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
859 out_device_g = lut_interp_linear(out_linear_g,
860 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
861 out_device_b = lut_interp_linear(out_linear_b,
862 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
864 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
865 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
866 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
867 dest[OUTPUT_A_INDEX] = alpha;
868 dest += RGBA_OUTPUT_COMPONENTS;
872 #if 0
873 static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
875 int i;
876 float (*mat)[4] = transform->matrix;
877 for (i = 0; i < length; i++) {
878 unsigned char device_r = *src++;
879 unsigned char device_g = *src++;
880 unsigned char device_b = *src++;
882 float linear_r = transform->input_gamma_table_r[device_r];
883 float linear_g = transform->input_gamma_table_g[device_g];
884 float linear_b = transform->input_gamma_table_b[device_b];
886 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
887 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
888 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
890 *dest++ = clamp_u8(out_linear_r*255);
891 *dest++ = clamp_u8(out_linear_g*255);
892 *dest++ = clamp_u8(out_linear_b*255);
895 #endif
898 * If users create and destroy objects on different threads, even if the same
899 * objects aren't used on different threads at the same time, we can still run
900 * in to trouble with refcounts if they aren't atomic.
902 * This can lead to us prematurely deleting the precache if threads get unlucky
903 * and write the wrong value to the ref count.
905 static struct precache_output *precache_reference(struct precache_output *p)
907 qcms_atomic_increment(p->ref_count);
908 return p;
911 static struct precache_output *precache_create()
913 struct precache_output *p = malloc(sizeof(struct precache_output));
914 if (p)
915 p->ref_count = 1;
916 return p;
919 void precache_release(struct precache_output *p)
921 if (qcms_atomic_decrement(p->ref_count) == 0) {
922 free(p);
926 #ifdef HAVE_POSIX_MEMALIGN
927 static qcms_transform *transform_alloc(void)
929 qcms_transform *t;
931 void *allocated_memory;
932 if (!posix_memalign(&allocated_memory, 16, sizeof(qcms_transform))) {
933 /* Doing a memset to initialise all bits to 'zero'*/
934 memset(allocated_memory, 0, sizeof(qcms_transform));
935 t = allocated_memory;
936 return t;
937 } else {
938 return NULL;
941 static void transform_free(qcms_transform *t)
943 free(t);
945 #else
946 static qcms_transform *transform_alloc(void)
948 /* transform needs to be aligned on a 16byte boundrary */
949 char *original_block = calloc(sizeof(qcms_transform) + sizeof(void*) + 16, 1);
950 /* make room for a pointer to the block returned by calloc */
951 void *transform_start = original_block + sizeof(void*);
952 /* align transform_start */
953 qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transform_start + 15) & ~0xf);
955 /* store a pointer to the block returned by calloc so that we can free it later */
956 void **(original_block_ptr) = (void**)transform_aligned;
957 if (!original_block)
958 return NULL;
959 original_block_ptr--;
960 *original_block_ptr = original_block;
962 return transform_aligned;
964 static void transform_free(qcms_transform *t)
966 /* get at the pointer to the unaligned block returned by calloc */
967 void **p = (void**)t;
968 p--;
969 free(*p);
971 #endif
973 void qcms_transform_release(qcms_transform *t)
975 /* ensure we only free the gamma tables once even if there are
976 * multiple references to the same data */
978 if (t->output_table_r)
979 precache_release(t->output_table_r);
980 if (t->output_table_g)
981 precache_release(t->output_table_g);
982 if (t->output_table_b)
983 precache_release(t->output_table_b);
985 free(t->input_gamma_table_r);
986 if (t->input_gamma_table_g != t->input_gamma_table_r)
987 free(t->input_gamma_table_g);
988 if (t->input_gamma_table_g != t->input_gamma_table_r &&
989 t->input_gamma_table_g != t->input_gamma_table_b)
990 free(t->input_gamma_table_b);
992 free(t->input_gamma_table_gray);
994 free(t->output_gamma_lut_r);
995 free(t->output_gamma_lut_g);
996 free(t->output_gamma_lut_b);
998 transform_free(t);
1001 #ifdef X86
1002 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
1003 // mozilla/jpeg)
1004 // -------------------------------------------------------------------------
1005 #if defined(_M_IX86) && defined(_MSC_VER)
1006 #define HAS_CPUID
1007 /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
1008 register - I'm not sure if that ever happens on windows, but cpuid isn't
1009 on the critical path so we just preserve the register to be safe and to be
1010 consistent with the non-windows version. */
1011 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
1012 uint32_t a_, b_, c_, d_;
1013 __asm {
1014 xchg ebx, esi
1015 mov eax, fxn
1016 cpuid
1017 mov a_, eax
1018 mov b_, ebx
1019 mov c_, ecx
1020 mov d_, edx
1021 xchg ebx, esi
1023 *a = a_;
1024 *b = b_;
1025 *c = c_;
1026 *d = d_;
1028 #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386))
1029 #define HAS_CPUID
1030 /* Get us a CPUID function. We can't use ebx because it's the PIC register on
1031 some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
1032 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
1034 uint32_t a_, b_, c_, d_;
1035 __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
1036 : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
1037 *a = a_;
1038 *b = b_;
1039 *c = c_;
1040 *d = d_;
1042 #endif
1044 // -------------------------Runtime SSEx Detection-----------------------------
1046 /* MMX is always supported per
1047 * Gecko v1.9.1 minimum CPU requirements */
1048 #define SSE1_EDX_MASK (1UL << 25)
1049 #define SSE2_EDX_MASK (1UL << 26)
1050 #define SSE3_ECX_MASK (1UL << 0)
1052 static int sse_version_available(void)
1054 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
1055 /* we know at build time that 64-bit CPUs always have SSE2
1056 * this tells the compiler that non-SSE2 branches will never be
1057 * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
1058 return 2;
1059 #elif defined(HAS_CPUID)
1060 static int sse_version = -1;
1061 uint32_t a, b, c, d;
1062 uint32_t function = 0x00000001;
1064 if (sse_version == -1) {
1065 sse_version = 0;
1066 cpuid(function, &a, &b, &c, &d);
1067 if (c & SSE3_ECX_MASK)
1068 sse_version = 3;
1069 else if (d & SSE2_EDX_MASK)
1070 sse_version = 2;
1071 else if (d & SSE1_EDX_MASK)
1072 sse_version = 1;
1075 return sse_version;
1076 #else
1077 return 0;
1078 #endif
1080 #endif
1082 static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f},
1083 {-0.7502f, 1.7135f, 0.0367f},
1084 { 0.0389f,-0.0685f, 1.0296f}},
1085 false};
1087 static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0.1599627f},
1088 { 0.4323053f, 0.5183603f, 0.0492912f},
1089 {-0.0085287f, 0.0400428f, 0.9684867f}},
1090 false};
1092 // See ICCv4 E.3
1093 struct matrix compute_whitepoint_adaption(float X, float Y, float Z) {
1094 float p = (0.96422f*bradford_matrix.m[0][0] + 1.000f*bradford_matrix.m[1][0] + 0.82521f*bradford_matrix.m[2][0]) /
1095 (X*bradford_matrix.m[0][0] + Y*bradford_matrix.m[1][0] + Z*bradford_matrix.m[2][0] );
1096 float y = (0.96422f*bradford_matrix.m[0][1] + 1.000f*bradford_matrix.m[1][1] + 0.82521f*bradford_matrix.m[2][1]) /
1097 (X*bradford_matrix.m[0][1] + Y*bradford_matrix.m[1][1] + Z*bradford_matrix.m[2][1] );
1098 float b = (0.96422f*bradford_matrix.m[0][2] + 1.000f*bradford_matrix.m[1][2] + 0.82521f*bradford_matrix.m[2][2]) /
1099 (X*bradford_matrix.m[0][2] + Y*bradford_matrix.m[1][2] + Z*bradford_matrix.m[2][2] );
1100 struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false};
1101 return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adaption, bradford_matrix) );
1104 void qcms_profile_precache_output_transform(qcms_profile *profile)
1106 /* we only support precaching on rgb profiles */
1107 if (profile->color_space != RGB_SIGNATURE)
1108 return;
1110 if (qcms_supports_iccv4) {
1111 /* don't precache since we will use the B2A LUT */
1112 if (profile->B2A0)
1113 return;
1115 /* don't precache since we will use the mBA LUT */
1116 if (profile->mBA)
1117 return;
1120 /* don't precache if we do not have the TRC curves */
1121 if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
1122 return;
1124 if (!profile->output_table_r) {
1125 profile->output_table_r = precache_create();
1126 if (profile->output_table_r &&
1127 !compute_precache(profile->redTRC, profile->output_table_r->data)) {
1128 precache_release(profile->output_table_r);
1129 profile->output_table_r = NULL;
1132 if (!profile->output_table_g) {
1133 profile->output_table_g = precache_create();
1134 if (profile->output_table_g &&
1135 !compute_precache(profile->greenTRC, profile->output_table_g->data)) {
1136 precache_release(profile->output_table_g);
1137 profile->output_table_g = NULL;
1140 if (!profile->output_table_b) {
1141 profile->output_table_b = precache_create();
1142 if (profile->output_table_b &&
1143 !compute_precache(profile->blueTRC, profile->output_table_b->data)) {
1144 precache_release(profile->output_table_b);
1145 profile->output_table_b = NULL;
1150 /* Replace the current transformation with a LUT transformation using a given number of sample points */
1151 qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms_profile *in, qcms_profile *out,
1152 int samples, qcms_data_type in_type)
1154 /* The range between which 2 consecutive sample points can be used to interpolate */
1155 uint16_t x,y,z;
1156 uint32_t l;
1157 uint32_t lutSize = 3 * samples * samples * samples;
1158 float* src = NULL;
1159 float* dest = NULL;
1160 float* lut = NULL;
1162 src = malloc(lutSize*sizeof(float));
1163 dest = malloc(lutSize*sizeof(float));
1165 if (src && dest) {
1166 /* Prepare a list of points we want to sample */
1167 l = 0;
1168 for (x = 0; x < samples; x++) {
1169 for (y = 0; y < samples; y++) {
1170 for (z = 0; z < samples; z++) {
1171 src[l++] = x / (float)(samples-1);
1172 src[l++] = y / (float)(samples-1);
1173 src[l++] = z / (float)(samples-1);
1178 lut = qcms_chain_transform(in, out, src, dest, lutSize);
1179 if (lut) {
1180 transform->r_clut = &lut[0];
1181 transform->g_clut = &lut[1];
1182 transform->b_clut = &lut[2];
1183 transform->grid_size = samples;
1184 if (in_type == QCMS_DATA_RGBA_8) {
1185 transform->transform_fn = qcms_transform_data_tetra_clut_rgba;
1186 } else {
1187 transform->transform_fn = qcms_transform_data_tetra_clut;
1193 //XXX: qcms_modular_transform_data may return either the src or dest buffer. If so it must not be free-ed
1194 if (src && lut != src) {
1195 free(src);
1197 if (dest && lut != dest) {
1198 free(dest);
1201 if (lut == NULL) {
1202 return NULL;
1204 return transform;
1207 #define NO_MEM_TRANSFORM NULL
1209 qcms_transform* qcms_transform_create(
1210 qcms_profile *in, qcms_data_type in_type,
1211 qcms_profile *out, qcms_data_type out_type,
1212 qcms_intent intent)
1214 bool precache = false;
1216 qcms_transform *transform = transform_alloc();
1217 if (!transform) {
1218 return NULL;
1220 if (out_type != QCMS_DATA_RGB_8 &&
1221 out_type != QCMS_DATA_RGBA_8) {
1222 assert(0 && "output type");
1223 transform_free(transform);
1224 return NULL;
1227 if (out->output_table_r &&
1228 out->output_table_g &&
1229 out->output_table_b) {
1230 precache = true;
1233 // This precache assumes RGB_SIGNATURE (fails on GRAY_SIGNATURE, for instance)
1234 if (qcms_supports_iccv4 &&
1235 (in_type == QCMS_DATA_RGB_8 || in_type == QCMS_DATA_RGBA_8) &&
1236 (in->A2B0 || out->B2A0 || in->mAB || out->mAB))
1238 // Precache the transformation to a CLUT 33x33x33 in size.
1239 // 33 is used by many profiles and works well in pratice.
1240 // This evenly divides 256 into blocks of 8x8x8.
1241 // TODO For transforming small data sets of about 200x200 or less
1242 // precaching should be avoided.
1243 qcms_transform *result = qcms_transform_precacheLUT_float(transform, in, out, 33, in_type);
1244 if (!result) {
1245 assert(0 && "precacheLUT failed");
1246 transform_free(transform);
1247 return NULL;
1249 return result;
1252 if (precache) {
1253 transform->output_table_r = precache_reference(out->output_table_r);
1254 transform->output_table_g = precache_reference(out->output_table_g);
1255 transform->output_table_b = precache_reference(out->output_table_b);
1256 } else {
1257 if (!out->redTRC || !out->greenTRC || !out->blueTRC) {
1258 qcms_transform_release(transform);
1259 return NO_MEM_TRANSFORM;
1261 build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &transform->output_gamma_lut_r_length);
1262 build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length);
1263 build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, &transform->output_gamma_lut_b_length);
1264 if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g || !transform->output_gamma_lut_b) {
1265 qcms_transform_release(transform);
1266 return NO_MEM_TRANSFORM;
1270 if (in->color_space == RGB_SIGNATURE) {
1271 struct matrix in_matrix, out_matrix, result;
1273 if (in_type != QCMS_DATA_RGB_8 &&
1274 in_type != QCMS_DATA_RGBA_8){
1275 assert(0 && "input type");
1276 transform_free(transform);
1277 return NULL;
1279 if (precache) {
1280 #ifdef X86
1281 if (sse_version_available() >= 2) {
1282 if (in_type == QCMS_DATA_RGB_8)
1283 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
1284 else
1285 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
1287 #if !(defined(_MSC_VER) && defined(_M_AMD64))
1288 /* Microsoft Compiler for x64 doesn't support MMX.
1289 * SSE code uses MMX so that we disable on x64 */
1290 } else
1291 if (sse_version_available() >= 1) {
1292 if (in_type == QCMS_DATA_RGB_8)
1293 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
1294 else
1295 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
1296 #endif
1297 } else
1298 #endif
1299 #if (defined(__POWERPC__) || defined(__powerpc__))
1300 if (have_altivec()) {
1301 if (in_type == QCMS_DATA_RGB_8)
1302 transform->transform_fn = qcms_transform_data_rgb_out_lut_altivec;
1303 else
1304 transform->transform_fn = qcms_transform_data_rgba_out_lut_altivec;
1305 } else
1306 #endif
1308 if (in_type == QCMS_DATA_RGB_8)
1309 transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
1310 else
1311 transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
1313 } else {
1314 if (in_type == QCMS_DATA_RGB_8)
1315 transform->transform_fn = qcms_transform_data_rgb_out_lut;
1316 else
1317 transform->transform_fn = qcms_transform_data_rgba_out_lut;
1320 //XXX: avoid duplicating tables if we can
1321 transform->input_gamma_table_r = build_input_gamma_table(in->redTRC);
1322 transform->input_gamma_table_g = build_input_gamma_table(in->greenTRC);
1323 transform->input_gamma_table_b = build_input_gamma_table(in->blueTRC);
1324 if (!transform->input_gamma_table_r || !transform->input_gamma_table_g || !transform->input_gamma_table_b) {
1325 qcms_transform_release(transform);
1326 return NO_MEM_TRANSFORM;
1330 /* build combined colorant matrix */
1331 in_matrix = build_colorant_matrix(in);
1332 out_matrix = build_colorant_matrix(out);
1333 out_matrix = matrix_invert(out_matrix);
1334 if (out_matrix.invalid) {
1335 qcms_transform_release(transform);
1336 return NULL;
1338 result = matrix_multiply(out_matrix, in_matrix);
1340 /* store the results in column major mode
1341 * this makes doing the multiplication with sse easier */
1342 transform->matrix[0][0] = result.m[0][0];
1343 transform->matrix[1][0] = result.m[0][1];
1344 transform->matrix[2][0] = result.m[0][2];
1345 transform->matrix[0][1] = result.m[1][0];
1346 transform->matrix[1][1] = result.m[1][1];
1347 transform->matrix[2][1] = result.m[1][2];
1348 transform->matrix[0][2] = result.m[2][0];
1349 transform->matrix[1][2] = result.m[2][1];
1350 transform->matrix[2][2] = result.m[2][2];
1352 } else if (in->color_space == GRAY_SIGNATURE) {
1353 if (in_type != QCMS_DATA_GRAY_8 &&
1354 in_type != QCMS_DATA_GRAYA_8){
1355 assert(0 && "input type");
1356 transform_free(transform);
1357 return NULL;
1360 transform->input_gamma_table_gray = build_input_gamma_table(in->grayTRC);
1361 if (!transform->input_gamma_table_gray) {
1362 qcms_transform_release(transform);
1363 return NO_MEM_TRANSFORM;
1366 if (precache) {
1367 if (in_type == QCMS_DATA_GRAY_8) {
1368 transform->transform_fn = qcms_transform_data_gray_out_precache;
1369 } else {
1370 transform->transform_fn = qcms_transform_data_graya_out_precache;
1372 } else {
1373 if (in_type == QCMS_DATA_GRAY_8) {
1374 transform->transform_fn = qcms_transform_data_gray_out_lut;
1375 } else {
1376 transform->transform_fn = qcms_transform_data_graya_out_lut;
1379 } else {
1380 assert(0 && "unexpected colorspace");
1381 transform_free(transform);
1382 return NULL;
1384 return transform;
1387 #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
1388 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
1389 __attribute__((__force_align_arg_pointer__))
1390 #endif
1391 void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
1393 transform->transform_fn(transform, src, dest, length);
1396 qcms_bool qcms_supports_iccv4;
1397 void qcms_enable_iccv4()
1399 qcms_supports_iccv4 = true;