Bug 1508381 - remove now-unnecessary TASKCLUSTER_* variables r=tomprince
[gecko.git] / gfx / qcms / transform.c
blob349a5a96a48e8e19c668770ae7e82c49388eb813
1 /* vim: set ts=8 sw=8 noexpandtab: */
2 // qcms
3 // Copyright (C) 2009 Mozilla Corporation
4 // Copyright (C) 1998-2007 Marti Maria
5 //
6 // Permission is hereby granted, free of charge, to any person obtaining
7 // a copy of this software and associated documentation files (the "Software"),
8 // to deal in the Software without restriction, including without limitation
9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 // and/or sell copies of the Software, and to permit persons to whom the Software
11 // is furnished to do so, subject to the following conditions:
13 // The above copyright notice and this permission notice shall be included in
14 // all copies or substantial portions of the Software.
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include <stdlib.h>
25 #include <math.h>
26 #include <assert.h>
27 #include <string.h> //memcpy
28 #include "qcmsint.h"
29 #include "chain.h"
30 #include "matrix.h"
31 #include "transform_util.h"
33 /* for MSVC, GCC, Intel, and Sun compilers */
34 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64)
35 #define X86
36 #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */
38 /**
39 * AltiVec detection for PowerPC CPUs
40 * In case we have a method of detecting do the runtime detection.
41 * Otherwise statically choose the AltiVec path in case the compiler
42 * was told to build with AltiVec support.
44 #if (defined(__POWERPC__) || defined(__powerpc__))
45 #if defined(__linux__)
46 #include <unistd.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <elf.h>
50 #include <linux/auxvec.h>
51 #include <asm/cputable.h>
52 #include <link.h>
54 static inline qcms_bool have_altivec() {
55 static int available = -1;
56 int new_avail = 0;
57 ElfW(auxv_t) auxv;
58 ssize_t count;
59 int fd, i;
61 if (available != -1)
62 return (available != 0 ? true : false);
64 fd = open("/proc/self/auxv", O_RDONLY);
65 if (fd < 0)
66 goto out;
67 do {
68 count = read(fd, &auxv, sizeof(auxv));
69 if (count < 0)
70 goto out_close;
72 if (auxv.a_type == AT_HWCAP) {
73 new_avail = !!(auxv.a_un.a_val & PPC_FEATURE_HAS_ALTIVEC);
74 goto out_close;
76 } while (auxv.a_type != AT_NULL);
78 out_close:
79 close(fd);
80 out:
81 available = new_avail;
82 return (available != 0 ? true : false);
84 #elif defined(__APPLE__) && defined(__MACH__)
85 #include <sys/sysctl.h>
87 /**
88 * rip-off from ffmpeg AltiVec detection code.
89 * this code also appears on Apple's AltiVec pages.
91 static inline qcms_bool have_altivec() {
92 int sels[2] = {CTL_HW, HW_VECTORUNIT};
93 static int available = -1;
94 size_t len = sizeof(available);
95 int err;
97 if (available != -1)
98 return (available != 0 ? true : false);
100 err = sysctl(sels, 2, &available, &len, NULL, 0);
102 if (err == 0)
103 if (available != 0)
104 return true;
106 return false;
108 #elif defined(__ALTIVEC__) || defined(__APPLE_ALTIVEC__)
109 #define have_altivec() true
110 #else
111 #define have_altivec() false
112 #endif
113 #endif // (defined(__POWERPC__) || defined(__powerpc__))
115 // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
116 // This is just an approximation, I am not handling all the non-linear
117 // aspects of the RGB to XYZ process, and assumming that the gamma correction
118 // has transitive property in the tranformation chain.
120 // the alghoritm:
122 // - First I build the absolute conversion matrix using
123 // primaries in XYZ. This matrix is next inverted
124 // - Then I eval the source white point across this matrix
125 // obtaining the coeficients of the transformation
126 // - Then, I apply these coeficients to the original matrix
127 static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_CIE_xyYTRIPLE primrs)
129 struct matrix primaries;
130 struct matrix primaries_invert;
131 struct matrix result;
132 struct vector white_point;
133 struct vector coefs;
135 double xn, yn;
136 double xr, yr;
137 double xg, yg;
138 double xb, yb;
140 xn = white.x;
141 yn = white.y;
143 if (yn == 0.0)
144 return matrix_invalid();
146 xr = primrs.red.x;
147 yr = primrs.red.y;
148 xg = primrs.green.x;
149 yg = primrs.green.y;
150 xb = primrs.blue.x;
151 yb = primrs.blue.y;
153 primaries.m[0][0] = xr;
154 primaries.m[0][1] = xg;
155 primaries.m[0][2] = xb;
157 primaries.m[1][0] = yr;
158 primaries.m[1][1] = yg;
159 primaries.m[1][2] = yb;
161 primaries.m[2][0] = 1 - xr - yr;
162 primaries.m[2][1] = 1 - xg - yg;
163 primaries.m[2][2] = 1 - xb - yb;
164 primaries.invalid = false;
166 white_point.v[0] = xn/yn;
167 white_point.v[1] = 1.;
168 white_point.v[2] = (1.0-xn-yn)/yn;
170 primaries_invert = matrix_invert(primaries);
171 if (primaries_invert.invalid) {
172 return matrix_invalid();
175 coefs = matrix_eval(primaries_invert, white_point);
177 result.m[0][0] = coefs.v[0]*xr;
178 result.m[0][1] = coefs.v[1]*xg;
179 result.m[0][2] = coefs.v[2]*xb;
181 result.m[1][0] = coefs.v[0]*yr;
182 result.m[1][1] = coefs.v[1]*yg;
183 result.m[1][2] = coefs.v[2]*yb;
185 result.m[2][0] = coefs.v[0]*(1.-xr-yr);
186 result.m[2][1] = coefs.v[1]*(1.-xg-yg);
187 result.m[2][2] = coefs.v[2]*(1.-xb-yb);
188 result.invalid = primaries_invert.invalid;
190 return result;
193 struct CIE_XYZ {
194 double X;
195 double Y;
196 double Z;
199 /* CIE Illuminant D50 */
200 static const struct CIE_XYZ D50_XYZ = {
201 0.9642,
202 1.0000,
203 0.8249
206 /* from lcms: xyY2XYZ()
207 * corresponds to argyll: icmYxy2XYZ() */
208 static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source)
210 struct CIE_XYZ dest;
211 dest.X = (source.x / source.y) * source.Y;
212 dest.Y = source.Y;
213 dest.Z = ((1 - source.x - source.y) / source.y) * source.Y;
214 return dest;
217 /* from lcms: ComputeChromaticAdaption */
218 // Compute chromatic adaption matrix using chad as cone matrix
219 static struct matrix
220 compute_chromatic_adaption(struct CIE_XYZ source_white_point,
221 struct CIE_XYZ dest_white_point,
222 struct matrix chad)
224 struct matrix chad_inv;
225 struct vector cone_source_XYZ, cone_source_rgb;
226 struct vector cone_dest_XYZ, cone_dest_rgb;
227 struct matrix cone, tmp;
229 tmp = chad;
230 chad_inv = matrix_invert(tmp);
231 if (chad_inv.invalid) {
232 return matrix_invalid();
235 cone_source_XYZ.v[0] = source_white_point.X;
236 cone_source_XYZ.v[1] = source_white_point.Y;
237 cone_source_XYZ.v[2] = source_white_point.Z;
239 cone_dest_XYZ.v[0] = dest_white_point.X;
240 cone_dest_XYZ.v[1] = dest_white_point.Y;
241 cone_dest_XYZ.v[2] = dest_white_point.Z;
243 cone_source_rgb = matrix_eval(chad, cone_source_XYZ);
244 cone_dest_rgb = matrix_eval(chad, cone_dest_XYZ);
246 cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0];
247 cone.m[0][1] = 0;
248 cone.m[0][2] = 0;
249 cone.m[1][0] = 0;
250 cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1];
251 cone.m[1][2] = 0;
252 cone.m[2][0] = 0;
253 cone.m[2][1] = 0;
254 cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2];
255 cone.invalid = false;
257 // Normalize
258 return matrix_multiply(chad_inv, matrix_multiply(cone, chad));
261 /* from lcms: cmsAdaptionMatrix */
262 // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
263 // Bradford is assumed
264 static struct matrix
265 adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
267 struct matrix lam_rigg = {{ // Bradford matrix
268 { 0.8951f, 0.2664f, -0.1614f },
269 { -0.7502f, 1.7135f, 0.0367f },
270 { 0.0389f, -0.0685f, 1.0296f }
272 return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
275 /* from lcms: cmsAdaptMatrixToD50 */
276 static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_white_pt)
278 struct CIE_XYZ Dn;
279 struct matrix Bradford;
281 if (source_white_pt.y == 0.0) {
282 return matrix_invalid();
285 Dn = xyY2XYZ(source_white_pt);
287 Bradford = adaption_matrix(Dn, D50_XYZ);
288 if (Bradford.invalid) {
289 return matrix_invalid();
291 return matrix_multiply(Bradford, r);
294 qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries)
296 struct matrix colorants;
297 colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
298 colorants = adapt_matrix_to_D50(colorants, white_point);
300 if (colorants.invalid)
301 return false;
303 /* note: there's a transpose type of operation going on here */
304 profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]);
305 profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]);
306 profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]);
308 profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]);
309 profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]);
310 profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]);
312 profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]);
313 profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]);
314 profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]);
316 return true;
319 qcms_bool get_rgb_colorants(struct matrix *colorants, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries)
321 *colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
322 *colorants = adapt_matrix_to_D50(*colorants, white_point);
324 return (colorants->invalid ? true : false);
327 #if 0
328 static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
330 int i;
331 float (*mat)[4] = transform->matrix;
332 for (i=0; i<length; i++) {
333 unsigned char device_r = *src++;
334 unsigned char device_g = *src++;
335 unsigned char device_b = *src++;
337 float linear_r = transform->input_gamma_table_r[device_r];
338 float linear_g = transform->input_gamma_table_g[device_g];
339 float linear_b = transform->input_gamma_table_b[device_b];
341 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
342 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
343 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
345 float out_device_r = pow(out_linear_r, transform->out_gamma_r);
346 float out_device_g = pow(out_linear_g, transform->out_gamma_g);
347 float out_device_b = pow(out_linear_b, transform->out_gamma_b);
349 dest[OUTPUT_R_INDEX] = clamp_u8(255*out_device_r);
350 dest[OUTPUT_G_INDEX] = clamp_u8(255*out_device_g);
351 dest[OUTPUT_B_INDEX] = clamp_u8(255*out_device_b);
352 dest += RGB_OUTPUT_COMPONENTS;
355 #endif
357 static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
359 unsigned int i;
360 for (i = 0; i < length; i++) {
361 float out_device_r, out_device_g, out_device_b;
362 unsigned char device = *src++;
364 float linear = transform->input_gamma_table_gray[device];
366 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
367 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
368 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
370 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
371 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
372 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
373 dest += RGB_OUTPUT_COMPONENTS;
377 /* Alpha is not corrected.
378 A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If
379 RGB Is?" Tech Memo 17 (December 14, 1998).
380 See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
383 static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
385 unsigned int i;
386 for (i = 0; i < length; i++) {
387 float out_device_r, out_device_g, out_device_b;
388 unsigned char device = *src++;
389 unsigned char alpha = *src++;
391 float linear = transform->input_gamma_table_gray[device];
393 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
394 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
395 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
397 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
398 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
399 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
400 dest[OUTPUT_A_INDEX] = alpha;
401 dest += RGBA_OUTPUT_COMPONENTS;
406 static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
408 unsigned int i;
409 for (i = 0; i < length; i++) {
410 unsigned char device = *src++;
411 uint16_t gray;
413 float linear = transform->input_gamma_table_gray[device];
415 /* we could round here... */
416 gray = linear * PRECACHE_OUTPUT_MAX;
418 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray];
419 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray];
420 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray];
421 dest += RGB_OUTPUT_COMPONENTS;
425 static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
427 unsigned int i;
428 for (i = 0; i < length; i++) {
429 unsigned char device = *src++;
430 unsigned char alpha = *src++;
431 uint16_t gray;
433 float linear = transform->input_gamma_table_gray[device];
435 /* we could round here... */
436 gray = linear * PRECACHE_OUTPUT_MAX;
438 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray];
439 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray];
440 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray];
441 dest[OUTPUT_A_INDEX] = alpha;
442 dest += RGBA_OUTPUT_COMPONENTS;
446 static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
448 unsigned int i;
449 float (*mat)[4] = transform->matrix;
450 for (i = 0; i < length; i++) {
451 unsigned char device_r = *src++;
452 unsigned char device_g = *src++;
453 unsigned char device_b = *src++;
454 uint16_t r, g, b;
456 float linear_r = transform->input_gamma_table_r[device_r];
457 float linear_g = transform->input_gamma_table_g[device_g];
458 float linear_b = transform->input_gamma_table_b[device_b];
460 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
461 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
462 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
464 out_linear_r = clamp_float(out_linear_r);
465 out_linear_g = clamp_float(out_linear_g);
466 out_linear_b = clamp_float(out_linear_b);
468 /* we could round here... */
469 r = out_linear_r * PRECACHE_OUTPUT_MAX;
470 g = out_linear_g * PRECACHE_OUTPUT_MAX;
471 b = out_linear_b * PRECACHE_OUTPUT_MAX;
473 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r];
474 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g];
475 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b];
476 dest += RGB_OUTPUT_COMPONENTS;
480 static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
482 unsigned int i;
483 float (*mat)[4] = transform->matrix;
484 for (i = 0; i < length; i++) {
485 unsigned char device_r = *src++;
486 unsigned char device_g = *src++;
487 unsigned char device_b = *src++;
488 unsigned char alpha = *src++;
489 uint16_t r, g, b;
491 float linear_r = transform->input_gamma_table_r[device_r];
492 float linear_g = transform->input_gamma_table_g[device_g];
493 float linear_b = transform->input_gamma_table_b[device_b];
495 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
496 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
497 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
499 out_linear_r = clamp_float(out_linear_r);
500 out_linear_g = clamp_float(out_linear_g);
501 out_linear_b = clamp_float(out_linear_b);
503 /* we could round here... */
504 r = out_linear_r * PRECACHE_OUTPUT_MAX;
505 g = out_linear_g * PRECACHE_OUTPUT_MAX;
506 b = out_linear_b * PRECACHE_OUTPUT_MAX;
508 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r];
509 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g];
510 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b];
511 dest[OUTPUT_A_INDEX] = alpha;
512 dest += RGBA_OUTPUT_COMPONENTS;
516 // Not used
518 static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
519 unsigned int i;
520 int xy_len = 1;
521 int x_len = transform->grid_size;
522 int len = x_len * x_len;
523 float* r_table = transform->r_clut;
524 float* g_table = transform->g_clut;
525 float* b_table = transform->b_clut;
527 for (i = 0; i < length; i++) {
528 unsigned char in_r = *src++;
529 unsigned char in_g = *src++;
530 unsigned char in_b = *src++;
531 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
533 int x = floorf(linear_r * (transform->grid_size-1));
534 int y = floorf(linear_g * (transform->grid_size-1));
535 int z = floorf(linear_b * (transform->grid_size-1));
536 int x_n = ceilf(linear_r * (transform->grid_size-1));
537 int y_n = ceilf(linear_g * (transform->grid_size-1));
538 int z_n = ceilf(linear_b * (transform->grid_size-1));
539 float x_d = linear_r * (transform->grid_size-1) - x;
540 float y_d = linear_g * (transform->grid_size-1) - y;
541 float z_d = linear_b * (transform->grid_size-1) - z;
543 float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d);
544 float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);
545 float r_y1 = lerp(r_x1, r_x2, y_d);
546 float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);
547 float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d);
548 float r_y2 = lerp(r_x3, r_x4, y_d);
549 float clut_r = lerp(r_y1, r_y2, z_d);
551 float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d);
552 float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);
553 float g_y1 = lerp(g_x1, g_x2, y_d);
554 float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);
555 float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d);
556 float g_y2 = lerp(g_x3, g_x4, y_d);
557 float clut_g = lerp(g_y1, g_y2, z_d);
559 float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d);
560 float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);
561 float b_y1 = lerp(b_x1, b_x2, y_d);
562 float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);
563 float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d);
564 float b_y2 = lerp(b_x3, b_x4, y_d);
565 float clut_b = lerp(b_y1, b_y2, z_d);
567 *dest++ = clamp_u8(clut_r*255.0f);
568 *dest++ = clamp_u8(clut_g*255.0f);
569 *dest++ = clamp_u8(clut_b*255.0f);
574 static int int_div_ceil(int value, int div) {
575 return ((value + div - 1) / div);
578 // Using lcms' tetra interpolation algorithm.
579 static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
580 unsigned int i;
581 int xy_len = 1;
582 int x_len = transform->grid_size;
583 int len = x_len * x_len;
584 float* r_table = transform->r_clut;
585 float* g_table = transform->g_clut;
586 float* b_table = transform->b_clut;
587 float c0_r, c1_r, c2_r, c3_r;
588 float c0_g, c1_g, c2_g, c3_g;
589 float c0_b, c1_b, c2_b, c3_b;
590 float clut_r, clut_g, clut_b;
591 for (i = 0; i < length; i++) {
592 unsigned char in_r = *src++;
593 unsigned char in_g = *src++;
594 unsigned char in_b = *src++;
595 unsigned char in_a = *src++;
596 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
598 int x = in_r * (transform->grid_size-1) / 255;
599 int y = in_g * (transform->grid_size-1) / 255;
600 int z = in_b * (transform->grid_size-1) / 255;
601 int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255);
602 int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255);
603 int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255);
604 float rx = linear_r * (transform->grid_size-1) - x;
605 float ry = linear_g * (transform->grid_size-1) - y;
606 float rz = linear_b * (transform->grid_size-1) - z;
608 c0_r = CLU(r_table, x, y, z);
609 c0_g = CLU(g_table, x, y, z);
610 c0_b = CLU(b_table, x, y, z);
612 if( rx >= ry ) {
613 if (ry >= rz) { //rx >= ry && ry >= rz
614 c1_r = CLU(r_table, x_n, y, z) - c0_r;
615 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
616 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
617 c1_g = CLU(g_table, x_n, y, z) - c0_g;
618 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
619 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
620 c1_b = CLU(b_table, x_n, y, z) - c0_b;
621 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
622 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
623 } else {
624 if (rx >= rz) { //rx >= rz && rz >= ry
625 c1_r = CLU(r_table, x_n, y, z) - c0_r;
626 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
627 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
628 c1_g = CLU(g_table, x_n, y, z) - c0_g;
629 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
630 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
631 c1_b = CLU(b_table, x_n, y, z) - c0_b;
632 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
633 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
634 } else { //rz > rx && rx >= ry
635 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
636 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
637 c3_r = CLU(r_table, x, y, z_n) - c0_r;
638 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
639 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
640 c3_g = CLU(g_table, x, y, z_n) - c0_g;
641 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
642 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
643 c3_b = CLU(b_table, x, y, z_n) - c0_b;
646 } else {
647 if (rx >= rz) { //ry > rx && rx >= rz
648 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
649 c2_r = CLU(r_table, x, y_n, z) - c0_r;
650 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
651 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
652 c2_g = CLU(g_table, x, y_n, z) - c0_g;
653 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
654 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
655 c2_b = CLU(b_table, x, y_n, z) - c0_b;
656 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
657 } else {
658 if (ry >= rz) { //ry >= rz && rz > rx
659 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
660 c2_r = CLU(r_table, x, y_n, z) - c0_r;
661 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
662 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
663 c2_g = CLU(g_table, x, y_n, z) - c0_g;
664 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
665 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
666 c2_b = CLU(b_table, x, y_n, z) - c0_b;
667 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
668 } else { //rz > ry && ry > rx
669 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
670 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
671 c3_r = CLU(r_table, x, y, z_n) - c0_r;
672 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
673 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
674 c3_g = CLU(g_table, x, y, z_n) - c0_g;
675 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
676 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
677 c3_b = CLU(b_table, x, y, z_n) - c0_b;
682 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
683 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
684 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
686 dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f);
687 dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f);
688 dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f);
689 dest[OUTPUT_A_INDEX] = in_a;
690 dest += RGBA_OUTPUT_COMPONENTS;
694 // Using lcms' tetra interpolation code.
695 static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
696 unsigned int i;
697 int xy_len = 1;
698 int x_len = transform->grid_size;
699 int len = x_len * x_len;
700 float* r_table = transform->r_clut;
701 float* g_table = transform->g_clut;
702 float* b_table = transform->b_clut;
703 float c0_r, c1_r, c2_r, c3_r;
704 float c0_g, c1_g, c2_g, c3_g;
705 float c0_b, c1_b, c2_b, c3_b;
706 float clut_r, clut_g, clut_b;
707 for (i = 0; i < length; i++) {
708 unsigned char in_r = *src++;
709 unsigned char in_g = *src++;
710 unsigned char in_b = *src++;
711 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
713 int x = in_r * (transform->grid_size-1) / 255;
714 int y = in_g * (transform->grid_size-1) / 255;
715 int z = in_b * (transform->grid_size-1) / 255;
716 int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255);
717 int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255);
718 int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255);
719 float rx = linear_r * (transform->grid_size-1) - x;
720 float ry = linear_g * (transform->grid_size-1) - y;
721 float rz = linear_b * (transform->grid_size-1) - z;
723 c0_r = CLU(r_table, x, y, z);
724 c0_g = CLU(g_table, x, y, z);
725 c0_b = CLU(b_table, x, y, z);
727 if( rx >= ry ) {
728 if (ry >= rz) { //rx >= ry && ry >= rz
729 c1_r = CLU(r_table, x_n, y, z) - c0_r;
730 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
731 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
732 c1_g = CLU(g_table, x_n, y, z) - c0_g;
733 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
734 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
735 c1_b = CLU(b_table, x_n, y, z) - c0_b;
736 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
737 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
738 } else {
739 if (rx >= rz) { //rx >= rz && rz >= ry
740 c1_r = CLU(r_table, x_n, y, z) - c0_r;
741 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
742 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
743 c1_g = CLU(g_table, x_n, y, z) - c0_g;
744 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
745 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
746 c1_b = CLU(b_table, x_n, y, z) - c0_b;
747 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
748 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
749 } else { //rz > rx && rx >= ry
750 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
751 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
752 c3_r = CLU(r_table, x, y, z_n) - c0_r;
753 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
754 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
755 c3_g = CLU(g_table, x, y, z_n) - c0_g;
756 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
757 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
758 c3_b = CLU(b_table, x, y, z_n) - c0_b;
761 } else {
762 if (rx >= rz) { //ry > rx && rx >= rz
763 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
764 c2_r = CLU(r_table, x, y_n, z) - c0_r;
765 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
766 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
767 c2_g = CLU(g_table, x, y_n, z) - c0_g;
768 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
769 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
770 c2_b = CLU(b_table, x, y_n, z) - c0_b;
771 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
772 } else {
773 if (ry >= rz) { //ry >= rz && rz > rx
774 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
775 c2_r = CLU(r_table, x, y_n, z) - c0_r;
776 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
777 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
778 c2_g = CLU(g_table, x, y_n, z) - c0_g;
779 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
780 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
781 c2_b = CLU(b_table, x, y_n, z) - c0_b;
782 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
783 } else { //rz > ry && ry > rx
784 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
785 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
786 c3_r = CLU(r_table, x, y, z_n) - c0_r;
787 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
788 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
789 c3_g = CLU(g_table, x, y, z_n) - c0_g;
790 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
791 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
792 c3_b = CLU(b_table, x, y, z_n) - c0_b;
797 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
798 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
799 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
801 dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f);
802 dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f);
803 dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f);
804 dest += RGB_OUTPUT_COMPONENTS;
808 static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
810 unsigned int i;
811 float (*mat)[4] = transform->matrix;
812 for (i = 0; i < length; i++) {
813 unsigned char device_r = *src++;
814 unsigned char device_g = *src++;
815 unsigned char device_b = *src++;
816 float out_device_r, out_device_g, out_device_b;
818 float linear_r = transform->input_gamma_table_r[device_r];
819 float linear_g = transform->input_gamma_table_g[device_g];
820 float linear_b = transform->input_gamma_table_b[device_b];
822 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
823 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
824 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
826 out_linear_r = clamp_float(out_linear_r);
827 out_linear_g = clamp_float(out_linear_g);
828 out_linear_b = clamp_float(out_linear_b);
830 out_device_r = lut_interp_linear(out_linear_r,
831 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
832 out_device_g = lut_interp_linear(out_linear_g,
833 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
834 out_device_b = lut_interp_linear(out_linear_b,
835 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
837 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
838 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
839 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
840 dest += RGB_OUTPUT_COMPONENTS;
844 static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
846 unsigned int i;
847 float (*mat)[4] = transform->matrix;
848 for (i = 0; i < length; i++) {
849 unsigned char device_r = *src++;
850 unsigned char device_g = *src++;
851 unsigned char device_b = *src++;
852 unsigned char alpha = *src++;
853 float out_device_r, out_device_g, out_device_b;
855 float linear_r = transform->input_gamma_table_r[device_r];
856 float linear_g = transform->input_gamma_table_g[device_g];
857 float linear_b = transform->input_gamma_table_b[device_b];
859 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
860 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
861 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
863 out_linear_r = clamp_float(out_linear_r);
864 out_linear_g = clamp_float(out_linear_g);
865 out_linear_b = clamp_float(out_linear_b);
867 out_device_r = lut_interp_linear(out_linear_r,
868 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
869 out_device_g = lut_interp_linear(out_linear_g,
870 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
871 out_device_b = lut_interp_linear(out_linear_b,
872 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
874 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
875 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
876 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
877 dest[OUTPUT_A_INDEX] = alpha;
878 dest += RGBA_OUTPUT_COMPONENTS;
882 #if 0
883 static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
885 int i;
886 float (*mat)[4] = transform->matrix;
887 for (i = 0; i < length; i++) {
888 unsigned char device_r = *src++;
889 unsigned char device_g = *src++;
890 unsigned char device_b = *src++;
892 float linear_r = transform->input_gamma_table_r[device_r];
893 float linear_g = transform->input_gamma_table_g[device_g];
894 float linear_b = transform->input_gamma_table_b[device_b];
896 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
897 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
898 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
900 *dest++ = clamp_u8(out_linear_r*255);
901 *dest++ = clamp_u8(out_linear_g*255);
902 *dest++ = clamp_u8(out_linear_b*255);
905 #endif
908 * If users create and destroy objects on different threads, even if the same
909 * objects aren't used on different threads at the same time, we can still run
910 * in to trouble with refcounts if they aren't atomic.
912 * This can lead to us prematurely deleting the precache if threads get unlucky
913 * and write the wrong value to the ref count.
915 static struct precache_output *precache_reference(struct precache_output *p)
917 qcms_atomic_increment(p->ref_count);
918 return p;
921 static struct precache_output *precache_create()
923 struct precache_output *p = malloc(sizeof(struct precache_output));
924 if (p)
925 p->ref_count = 1;
926 return p;
929 void precache_release(struct precache_output *p)
931 if (qcms_atomic_decrement(p->ref_count) == 0) {
932 free(p);
936 #ifdef HAVE_POSIX_MEMALIGN
937 static qcms_transform *transform_alloc(void)
939 qcms_transform *t;
941 void *allocated_memory;
942 if (!posix_memalign(&allocated_memory, 16, sizeof(qcms_transform))) {
943 /* Doing a memset to initialise all bits to 'zero'*/
944 memset(allocated_memory, 0, sizeof(qcms_transform));
945 t = allocated_memory;
946 return t;
947 } else {
948 return NULL;
951 static void transform_free(qcms_transform *t)
953 free(t);
955 #else
956 static qcms_transform *transform_alloc(void)
958 /* transform needs to be aligned on a 16byte boundrary */
959 char *original_block = calloc(sizeof(qcms_transform) + sizeof(void*) + 16, 1);
960 /* make room for a pointer to the block returned by calloc */
961 void *transform_start = original_block + sizeof(void*);
962 /* align transform_start */
963 qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transform_start + 15) & ~0xf);
965 /* store a pointer to the block returned by calloc so that we can free it later */
966 void **(original_block_ptr) = (void**)transform_aligned;
967 if (!original_block)
968 return NULL;
969 original_block_ptr--;
970 *original_block_ptr = original_block;
972 return transform_aligned;
974 static void transform_free(qcms_transform *t)
976 /* get at the pointer to the unaligned block returned by calloc */
977 void **p = (void**)t;
978 p--;
979 free(*p);
981 #endif
983 void qcms_transform_release(qcms_transform *t)
985 /* ensure we only free the gamma tables once even if there are
986 * multiple references to the same data */
988 if (t->output_table_r)
989 precache_release(t->output_table_r);
990 if (t->output_table_g)
991 precache_release(t->output_table_g);
992 if (t->output_table_b)
993 precache_release(t->output_table_b);
995 free(t->input_gamma_table_r);
996 if (t->input_gamma_table_g != t->input_gamma_table_r)
997 free(t->input_gamma_table_g);
998 if (t->input_gamma_table_g != t->input_gamma_table_r &&
999 t->input_gamma_table_g != t->input_gamma_table_b)
1000 free(t->input_gamma_table_b);
1002 free(t->input_gamma_table_gray);
1004 free(t->output_gamma_lut_r);
1005 free(t->output_gamma_lut_g);
1006 free(t->output_gamma_lut_b);
1008 /* r_clut points to beginning of buffer allocated in qcms_transform_precacheLUT_float */
1009 if (t->r_clut)
1010 free(t->r_clut);
1012 transform_free(t);
1015 #ifdef X86
1016 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
1017 // mozilla/jpeg)
1018 // -------------------------------------------------------------------------
1019 #if defined(_M_IX86) && defined(_MSC_VER)
1020 #define HAS_CPUID
1021 /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
1022 register - I'm not sure if that ever happens on windows, but cpuid isn't
1023 on the critical path so we just preserve the register to be safe and to be
1024 consistent with the non-windows version. */
1025 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
1026 uint32_t a_, b_, c_, d_;
1027 __asm {
1028 xchg ebx, esi
1029 mov eax, fxn
1030 cpuid
1031 mov a_, eax
1032 mov b_, ebx
1033 mov c_, ecx
1034 mov d_, edx
1035 xchg ebx, esi
1037 *a = a_;
1038 *b = b_;
1039 *c = c_;
1040 *d = d_;
1042 #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386))
1043 #define HAS_CPUID
1044 /* Get us a CPUID function. We can't use ebx because it's the PIC register on
1045 some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
1046 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
1048 uint32_t a_, b_, c_, d_;
1049 __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
1050 : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
1051 *a = a_;
1052 *b = b_;
1053 *c = c_;
1054 *d = d_;
1056 #endif
1058 // -------------------------Runtime SSEx Detection-----------------------------
1060 /* MMX is always supported per
1061 * Gecko v1.9.1 minimum CPU requirements */
1062 #define SSE1_EDX_MASK (1UL << 25)
1063 #define SSE2_EDX_MASK (1UL << 26)
1064 #define SSE3_ECX_MASK (1UL << 0)
1066 static int sse_version_available(void)
1068 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
1069 /* we know at build time that 64-bit CPUs always have SSE2
1070 * this tells the compiler that non-SSE2 branches will never be
1071 * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
1072 return 2;
1073 #elif defined(HAS_CPUID)
1074 static int sse_version = -1;
1075 uint32_t a, b, c, d;
1076 uint32_t function = 0x00000001;
1078 if (sse_version == -1) {
1079 sse_version = 0;
1080 cpuid(function, &a, &b, &c, &d);
1081 if (c & SSE3_ECX_MASK)
1082 sse_version = 3;
1083 else if (d & SSE2_EDX_MASK)
1084 sse_version = 2;
1085 else if (d & SSE1_EDX_MASK)
1086 sse_version = 1;
1089 return sse_version;
1090 #else
1091 return 0;
1092 #endif
1094 #endif
1096 static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f},
1097 {-0.7502f, 1.7135f, 0.0367f},
1098 { 0.0389f,-0.0685f, 1.0296f}},
1099 false};
1101 static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0.1599627f},
1102 { 0.4323053f, 0.5183603f, 0.0492912f},
1103 {-0.0085287f, 0.0400428f, 0.9684867f}},
1104 false};
1106 // See ICCv4 E.3
1107 struct matrix compute_whitepoint_adaption(float X, float Y, float Z) {
1108 float p = (0.96422f*bradford_matrix.m[0][0] + 1.000f*bradford_matrix.m[1][0] + 0.82521f*bradford_matrix.m[2][0]) /
1109 (X*bradford_matrix.m[0][0] + Y*bradford_matrix.m[1][0] + Z*bradford_matrix.m[2][0] );
1110 float y = (0.96422f*bradford_matrix.m[0][1] + 1.000f*bradford_matrix.m[1][1] + 0.82521f*bradford_matrix.m[2][1]) /
1111 (X*bradford_matrix.m[0][1] + Y*bradford_matrix.m[1][1] + Z*bradford_matrix.m[2][1] );
1112 float b = (0.96422f*bradford_matrix.m[0][2] + 1.000f*bradford_matrix.m[1][2] + 0.82521f*bradford_matrix.m[2][2]) /
1113 (X*bradford_matrix.m[0][2] + Y*bradford_matrix.m[1][2] + Z*bradford_matrix.m[2][2] );
1114 struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false};
1115 return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adaption, bradford_matrix) );
1118 void qcms_profile_precache_output_transform(qcms_profile *profile)
1120 /* we only support precaching on rgb profiles */
1121 if (profile->color_space != RGB_SIGNATURE)
1122 return;
1124 if (qcms_supports_iccv4) {
1125 /* don't precache since we will use the B2A LUT */
1126 if (profile->B2A0)
1127 return;
1129 /* don't precache since we will use the mBA LUT */
1130 if (profile->mBA)
1131 return;
1134 /* don't precache if we do not have the TRC curves */
1135 if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
1136 return;
1138 if (!profile->output_table_r) {
1139 profile->output_table_r = precache_create();
1140 if (profile->output_table_r &&
1141 !compute_precache(profile->redTRC, profile->output_table_r->data)) {
1142 precache_release(profile->output_table_r);
1143 profile->output_table_r = NULL;
1146 if (!profile->output_table_g) {
1147 profile->output_table_g = precache_create();
1148 if (profile->output_table_g &&
1149 !compute_precache(profile->greenTRC, profile->output_table_g->data)) {
1150 precache_release(profile->output_table_g);
1151 profile->output_table_g = NULL;
1154 if (!profile->output_table_b) {
1155 profile->output_table_b = precache_create();
1156 if (profile->output_table_b &&
1157 !compute_precache(profile->blueTRC, profile->output_table_b->data)) {
1158 precache_release(profile->output_table_b);
1159 profile->output_table_b = NULL;
1164 /* Replace the current transformation with a LUT transformation using a given number of sample points */
1165 qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms_profile *in, qcms_profile *out,
1166 int samples, qcms_data_type in_type)
1168 /* The range between which 2 consecutive sample points can be used to interpolate */
1169 uint16_t x,y,z;
1170 uint32_t l;
1171 uint32_t lutSize = 3 * samples * samples * samples;
1172 float* src = NULL;
1173 float* dest = NULL;
1174 float* lut = NULL;
1176 src = malloc(lutSize*sizeof(float));
1177 dest = malloc(lutSize*sizeof(float));
1179 if (src && dest) {
1180 /* Prepare a list of points we want to sample */
1181 l = 0;
1182 for (x = 0; x < samples; x++) {
1183 for (y = 0; y < samples; y++) {
1184 for (z = 0; z < samples; z++) {
1185 src[l++] = x / (float)(samples-1);
1186 src[l++] = y / (float)(samples-1);
1187 src[l++] = z / (float)(samples-1);
1192 lut = qcms_chain_transform(in, out, src, dest, lutSize);
1193 if (lut) {
1194 transform->r_clut = &lut[0];
1195 transform->g_clut = &lut[1];
1196 transform->b_clut = &lut[2];
1197 transform->grid_size = samples;
1198 if (in_type == QCMS_DATA_RGBA_8) {
1199 transform->transform_fn = qcms_transform_data_tetra_clut_rgba;
1200 } else {
1201 transform->transform_fn = qcms_transform_data_tetra_clut;
1207 //XXX: qcms_modular_transform_data may return either the src or dest buffer. If so it must not be free-ed
1208 // It will be stored in r_clut, which will be cleaned up in qcms_transform_release.
1209 if (src && lut != src) {
1210 free(src);
1212 if (dest && lut != dest) {
1213 free(dest);
1216 if (lut == NULL) {
1217 return NULL;
1219 return transform;
1222 #define NO_MEM_TRANSFORM NULL
1224 qcms_transform* qcms_transform_create(
1225 qcms_profile *in, qcms_data_type in_type,
1226 qcms_profile *out, qcms_data_type out_type,
1227 qcms_intent intent)
1229 bool precache = false;
1231 qcms_transform *transform = transform_alloc();
1232 if (!transform) {
1233 return NULL;
1235 if (out_type != QCMS_DATA_RGB_8 &&
1236 out_type != QCMS_DATA_RGBA_8) {
1237 assert(0 && "output type");
1238 qcms_transform_release(transform);
1239 return NULL;
1242 if (out->output_table_r &&
1243 out->output_table_g &&
1244 out->output_table_b) {
1245 precache = true;
1248 // This precache assumes RGB_SIGNATURE (fails on GRAY_SIGNATURE, for instance)
1249 if (qcms_supports_iccv4 &&
1250 (in_type == QCMS_DATA_RGB_8 || in_type == QCMS_DATA_RGBA_8) &&
1251 (in->A2B0 || out->B2A0 || in->mAB || out->mAB))
1253 // Precache the transformation to a CLUT 33x33x33 in size.
1254 // 33 is used by many profiles and works well in pratice.
1255 // This evenly divides 256 into blocks of 8x8x8.
1256 // TODO For transforming small data sets of about 200x200 or less
1257 // precaching should be avoided.
1258 qcms_transform *result = qcms_transform_precacheLUT_float(transform, in, out, 33, in_type);
1259 if (!result) {
1260 assert(0 && "precacheLUT failed");
1261 qcms_transform_release(transform);
1262 return NULL;
1264 return result;
1267 if (precache) {
1268 transform->output_table_r = precache_reference(out->output_table_r);
1269 transform->output_table_g = precache_reference(out->output_table_g);
1270 transform->output_table_b = precache_reference(out->output_table_b);
1271 } else {
1272 if (!out->redTRC || !out->greenTRC || !out->blueTRC) {
1273 qcms_transform_release(transform);
1274 return NO_MEM_TRANSFORM;
1276 build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &transform->output_gamma_lut_r_length);
1277 build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length);
1278 build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, &transform->output_gamma_lut_b_length);
1279 if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g || !transform->output_gamma_lut_b) {
1280 qcms_transform_release(transform);
1281 return NO_MEM_TRANSFORM;
1285 if (in->color_space == RGB_SIGNATURE) {
1286 struct matrix in_matrix, out_matrix, result;
1288 if (in_type != QCMS_DATA_RGB_8 &&
1289 in_type != QCMS_DATA_RGBA_8){
1290 assert(0 && "input type");
1291 qcms_transform_release(transform);
1292 return NULL;
1294 if (precache) {
1295 #ifdef X86
1296 if (sse_version_available() >= 2) {
1297 if (in_type == QCMS_DATA_RGB_8)
1298 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
1299 else
1300 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
1302 #if !(defined(_MSC_VER) && defined(_M_AMD64))
1303 /* Microsoft Compiler for x64 doesn't support MMX.
1304 * SSE code uses MMX so that we disable on x64 */
1305 } else
1306 if (sse_version_available() >= 1) {
1307 if (in_type == QCMS_DATA_RGB_8)
1308 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
1309 else
1310 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
1311 #endif
1312 } else
1313 #endif
1314 #if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__))
1315 if (have_altivec()) {
1316 if (in_type == QCMS_DATA_RGB_8)
1317 transform->transform_fn = qcms_transform_data_rgb_out_lut_altivec;
1318 else
1319 transform->transform_fn = qcms_transform_data_rgba_out_lut_altivec;
1320 } else
1321 #endif
1323 if (in_type == QCMS_DATA_RGB_8)
1324 transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
1325 else
1326 transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
1328 } else {
1329 if (in_type == QCMS_DATA_RGB_8)
1330 transform->transform_fn = qcms_transform_data_rgb_out_lut;
1331 else
1332 transform->transform_fn = qcms_transform_data_rgba_out_lut;
1335 //XXX: avoid duplicating tables if we can
1336 transform->input_gamma_table_r = build_input_gamma_table(in->redTRC);
1337 transform->input_gamma_table_g = build_input_gamma_table(in->greenTRC);
1338 transform->input_gamma_table_b = build_input_gamma_table(in->blueTRC);
1339 if (!transform->input_gamma_table_r || !transform->input_gamma_table_g || !transform->input_gamma_table_b) {
1340 qcms_transform_release(transform);
1341 return NO_MEM_TRANSFORM;
1345 /* build combined colorant matrix */
1346 in_matrix = build_colorant_matrix(in);
1347 out_matrix = build_colorant_matrix(out);
1348 out_matrix = matrix_invert(out_matrix);
1349 if (out_matrix.invalid) {
1350 qcms_transform_release(transform);
1351 return NULL;
1353 result = matrix_multiply(out_matrix, in_matrix);
1355 /* check for NaN values in the matrix and bail if we find any */
1356 for (unsigned i = 0 ; i < 3 ; ++i) {
1357 for (unsigned j = 0 ; j < 3 ; ++j) {
1358 if (result.m[i][j] != result.m[i][j]) {
1359 qcms_transform_release(transform);
1360 return NULL;
1365 /* store the results in column major mode
1366 * this makes doing the multiplication with sse easier */
1367 transform->matrix[0][0] = result.m[0][0];
1368 transform->matrix[1][0] = result.m[0][1];
1369 transform->matrix[2][0] = result.m[0][2];
1370 transform->matrix[0][1] = result.m[1][0];
1371 transform->matrix[1][1] = result.m[1][1];
1372 transform->matrix[2][1] = result.m[1][2];
1373 transform->matrix[0][2] = result.m[2][0];
1374 transform->matrix[1][2] = result.m[2][1];
1375 transform->matrix[2][2] = result.m[2][2];
1377 } else if (in->color_space == GRAY_SIGNATURE) {
1378 if (in_type != QCMS_DATA_GRAY_8 &&
1379 in_type != QCMS_DATA_GRAYA_8){
1380 assert(0 && "input type");
1381 qcms_transform_release(transform);
1382 return NULL;
1385 transform->input_gamma_table_gray = build_input_gamma_table(in->grayTRC);
1386 if (!transform->input_gamma_table_gray) {
1387 qcms_transform_release(transform);
1388 return NO_MEM_TRANSFORM;
1391 if (precache) {
1392 if (in_type == QCMS_DATA_GRAY_8) {
1393 transform->transform_fn = qcms_transform_data_gray_out_precache;
1394 } else {
1395 transform->transform_fn = qcms_transform_data_graya_out_precache;
1397 } else {
1398 if (in_type == QCMS_DATA_GRAY_8) {
1399 transform->transform_fn = qcms_transform_data_gray_out_lut;
1400 } else {
1401 transform->transform_fn = qcms_transform_data_graya_out_lut;
1404 } else {
1405 assert(0 && "unexpected colorspace");
1406 qcms_transform_release(transform);
1407 return NULL;
1409 return transform;
1412 #if defined(__GNUC__) && defined(__i386__)
1413 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
1414 __attribute__((__force_align_arg_pointer__))
1415 #endif
1416 void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
1418 transform->transform_fn(transform, src, dest, length);
1421 qcms_bool qcms_supports_iccv4;
1422 void qcms_enable_iccv4()
1424 qcms_supports_iccv4 = true;