Bug 1584957 [wpt PR 19397] - Update interfaces/css-animation-worklet.idl, a=testonly
[gecko.git] / gfx / qcms / transform.cpp
blob6a5206b04ed1cdb987f4679ebe7ee6d2368405d1
1 /* vim: set ts=8 sw=8 noexpandtab: */
2 // qcms
3 // Copyright (C) 2009 Mozilla Corporation
4 // Copyright (C) 1998-2007 Marti Maria
5 //
6 // Permission is hereby granted, free of charge, to any person obtaining
7 // a copy of this software and associated documentation files (the "Software"),
8 // to deal in the Software without restriction, including without limitation
9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 // and/or sell copies of the Software, and to permit persons to whom the Software
11 // is furnished to do so, subject to the following conditions:
13 // The above copyright notice and this permission notice shall be included in
14 // all copies or substantial portions of the Software.
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include <stdlib.h>
25 #include <math.h>
26 #include <assert.h>
27 #include <string.h> //memcpy
28 #include "qcmsint.h"
29 #include "chain.h"
30 #include "matrix.h"
31 #include "transform_util.h"
33 /* for MSVC, GCC, Intel, and Sun compilers */
34 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64)
35 #define X86
36 #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */
38 /**
39 * AltiVec detection for PowerPC CPUs
40 * In case we have a method of detecting do the runtime detection.
41 * Otherwise statically choose the AltiVec path in case the compiler
42 * was told to build with AltiVec support.
44 #if (defined(__POWERPC__) || defined(__powerpc__))
45 #if defined(__linux__)
46 #include <unistd.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <elf.h>
50 #include <linux/auxvec.h>
51 #include <asm/cputable.h>
52 #include <link.h>
54 static inline bool have_altivec() {
55 static int available = -1;
56 int new_avail = 0;
57 ElfW(auxv_t) auxv;
58 ssize_t count;
59 int fd, i;
61 if (available != -1)
62 return (available != 0 ? true : false);
64 fd = open("/proc/self/auxv", O_RDONLY);
65 if (fd < 0)
66 goto out;
67 do {
68 count = read(fd, &auxv, sizeof(auxv));
69 if (count < 0)
70 goto out_close;
72 if (auxv.a_type == AT_HWCAP) {
73 new_avail = !!(auxv.a_un.a_val & PPC_FEATURE_HAS_ALTIVEC);
74 goto out_close;
76 } while (auxv.a_type != AT_NULL);
78 out_close:
79 close(fd);
80 out:
81 available = new_avail;
82 return (available != 0 ? true : false);
84 #elif defined(__APPLE__) && defined(__MACH__)
85 #include <sys/sysctl.h>
87 /**
88 * rip-off from ffmpeg AltiVec detection code.
89 * this code also appears on Apple's AltiVec pages.
91 static inline bool have_altivec() {
92 int sels[2] = {CTL_HW, HW_VECTORUNIT};
93 static int available = -1;
94 size_t len = sizeof(available);
95 int err;
97 if (available != -1)
98 return (available != 0 ? true : false);
100 err = sysctl(sels, 2, &available, &len, NULL, 0);
102 if (err == 0)
103 if (available != 0)
104 return true;
106 return false;
108 #elif defined(__ALTIVEC__) || defined(__APPLE_ALTIVEC__)
109 #define have_altivec() true
110 #else
111 #define have_altivec() false
112 #endif
113 #endif // (defined(__POWERPC__) || defined(__powerpc__))
115 // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
116 // This is just an approximation, I am not handling all the non-linear
117 // aspects of the RGB to XYZ process, and assumming that the gamma correction
118 // has transitive property in the tranformation chain.
120 // the alghoritm:
122 // - First I build the absolute conversion matrix using
123 // primaries in XYZ. This matrix is next inverted
124 // - Then I eval the source white point across this matrix
125 // obtaining the coeficients of the transformation
126 // - Then, I apply these coeficients to the original matrix
127 static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_CIE_xyYTRIPLE primrs)
129 struct matrix primaries;
130 struct matrix primaries_invert;
131 struct matrix result;
132 struct vector white_point;
133 struct vector coefs;
135 double xn, yn;
136 double xr, yr;
137 double xg, yg;
138 double xb, yb;
140 xn = white.x;
141 yn = white.y;
143 if (yn == 0.0)
144 return matrix_invalid();
146 xr = primrs.red.x;
147 yr = primrs.red.y;
148 xg = primrs.green.x;
149 yg = primrs.green.y;
150 xb = primrs.blue.x;
151 yb = primrs.blue.y;
153 primaries.m[0][0] = xr;
154 primaries.m[0][1] = xg;
155 primaries.m[0][2] = xb;
157 primaries.m[1][0] = yr;
158 primaries.m[1][1] = yg;
159 primaries.m[1][2] = yb;
161 primaries.m[2][0] = 1 - xr - yr;
162 primaries.m[2][1] = 1 - xg - yg;
163 primaries.m[2][2] = 1 - xb - yb;
164 primaries.invalid = false;
166 white_point.v[0] = xn/yn;
167 white_point.v[1] = 1.;
168 white_point.v[2] = (1.0-xn-yn)/yn;
170 primaries_invert = matrix_invert(primaries);
171 if (primaries_invert.invalid) {
172 return matrix_invalid();
175 coefs = matrix_eval(primaries_invert, white_point);
177 result.m[0][0] = coefs.v[0]*xr;
178 result.m[0][1] = coefs.v[1]*xg;
179 result.m[0][2] = coefs.v[2]*xb;
181 result.m[1][0] = coefs.v[0]*yr;
182 result.m[1][1] = coefs.v[1]*yg;
183 result.m[1][2] = coefs.v[2]*yb;
185 result.m[2][0] = coefs.v[0]*(1.-xr-yr);
186 result.m[2][1] = coefs.v[1]*(1.-xg-yg);
187 result.m[2][2] = coefs.v[2]*(1.-xb-yb);
188 result.invalid = primaries_invert.invalid;
190 return result;
193 struct CIE_XYZ {
194 double X;
195 double Y;
196 double Z;
199 /* CIE Illuminant D50 */
200 static const struct CIE_XYZ D50_XYZ = {
201 0.9642,
202 1.0000,
203 0.8249
206 /* from lcms: xyY2XYZ()
207 * corresponds to argyll: icmYxy2XYZ() */
208 static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source)
210 struct CIE_XYZ dest;
211 dest.X = (source.x / source.y) * source.Y;
212 dest.Y = source.Y;
213 dest.Z = ((1 - source.x - source.y) / source.y) * source.Y;
214 return dest;
217 /* from lcms: ComputeChromaticAdaption */
218 // Compute chromatic adaption matrix using chad as cone matrix
219 static struct matrix
220 compute_chromatic_adaption(struct CIE_XYZ source_white_point,
221 struct CIE_XYZ dest_white_point,
222 struct matrix chad)
224 struct matrix chad_inv;
225 struct vector cone_source_XYZ, cone_source_rgb;
226 struct vector cone_dest_XYZ, cone_dest_rgb;
227 struct matrix cone, tmp;
229 tmp = chad;
230 chad_inv = matrix_invert(tmp);
231 if (chad_inv.invalid) {
232 return matrix_invalid();
235 cone_source_XYZ.v[0] = source_white_point.X;
236 cone_source_XYZ.v[1] = source_white_point.Y;
237 cone_source_XYZ.v[2] = source_white_point.Z;
239 cone_dest_XYZ.v[0] = dest_white_point.X;
240 cone_dest_XYZ.v[1] = dest_white_point.Y;
241 cone_dest_XYZ.v[2] = dest_white_point.Z;
243 cone_source_rgb = matrix_eval(chad, cone_source_XYZ);
244 cone_dest_rgb = matrix_eval(chad, cone_dest_XYZ);
246 cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0];
247 cone.m[0][1] = 0;
248 cone.m[0][2] = 0;
249 cone.m[1][0] = 0;
250 cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1];
251 cone.m[1][2] = 0;
252 cone.m[2][0] = 0;
253 cone.m[2][1] = 0;
254 cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2];
255 cone.invalid = false;
257 // Normalize
258 return matrix_multiply(chad_inv, matrix_multiply(cone, chad));
261 /* from lcms: cmsAdaptionMatrix */
262 // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
263 // Bradford is assumed
264 static struct matrix
265 adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
267 struct matrix lam_rigg = {{ // Bradford matrix
268 { 0.8951f, 0.2664f, -0.1614f },
269 { -0.7502f, 1.7135f, 0.0367f },
270 { 0.0389f, -0.0685f, 1.0296f }
272 return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
275 /* from lcms: cmsAdaptMatrixToD50 */
276 static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_white_pt)
278 struct CIE_XYZ Dn;
279 struct matrix Bradford;
281 if (source_white_pt.y == 0.0) {
282 return matrix_invalid();
285 Dn = xyY2XYZ(source_white_pt);
287 Bradford = adaption_matrix(Dn, D50_XYZ);
288 if (Bradford.invalid) {
289 return matrix_invalid();
291 return matrix_multiply(Bradford, r);
294 bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries)
296 struct matrix colorants;
297 colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
298 colorants = adapt_matrix_to_D50(colorants, white_point);
300 if (colorants.invalid)
301 return false;
303 /* note: there's a transpose type of operation going on here */
304 profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]);
305 profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]);
306 profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]);
308 profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]);
309 profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]);
310 profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]);
312 profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]);
313 profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]);
314 profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]);
316 return true;
319 bool get_rgb_colorants(struct matrix *colorants, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries)
321 *colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
322 *colorants = adapt_matrix_to_D50(*colorants, white_point);
324 return (colorants->invalid ? true : false);
327 #if 0
328 static void qcms_transform_data_rgb_out_pow(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
330 int i;
331 const float (*mat)[4] = transform->matrix;
332 for (i=0; i<length; i++) {
333 unsigned char device_r = *src++;
334 unsigned char device_g = *src++;
335 unsigned char device_b = *src++;
337 float linear_r = transform->input_gamma_table_r[device_r];
338 float linear_g = transform->input_gamma_table_g[device_g];
339 float linear_b = transform->input_gamma_table_b[device_b];
341 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
342 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
343 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
345 float out_device_r = pow(out_linear_r, transform->out_gamma_r);
346 float out_device_g = pow(out_linear_g, transform->out_gamma_g);
347 float out_device_b = pow(out_linear_b, transform->out_gamma_b);
349 dest[OUTPUT_R_INDEX] = clamp_u8(255*out_device_r);
350 dest[OUTPUT_G_INDEX] = clamp_u8(255*out_device_g);
351 dest[OUTPUT_B_INDEX] = clamp_u8(255*out_device_b);
352 dest += RGB_OUTPUT_COMPONENTS;
355 #endif
357 /* Alpha is not corrected.
358 A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If
359 RGB Is?" Tech Memo 17 (December 14, 1998).
360 See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
363 template <size_t kRIndex, size_t kGIndex, size_t kBIndex,
364 size_t kInAIndex = NO_A_INDEX, size_t kOutAIndex = kInAIndex>
365 static void qcms_transform_data_gray_template_lut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
367 const unsigned int components = A_INDEX_COMPONENTS(kOutAIndex);
368 unsigned int i;
369 for (i = 0; i < length; i++) {
370 float out_device_r, out_device_g, out_device_b;
371 unsigned char device = *src++;
372 unsigned char alpha = 0xFF;
373 if (kInAIndex != NO_A_INDEX) {
374 alpha = *src++;
377 float linear = transform->input_gamma_table_gray[device];
379 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
380 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
381 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
383 dest[kRIndex] = clamp_u8(out_device_r*255);
384 dest[kGIndex] = clamp_u8(out_device_g*255);
385 dest[kBIndex] = clamp_u8(out_device_b*255);
386 if (kOutAIndex != NO_A_INDEX) {
387 dest[kOutAIndex] = alpha;
389 dest += components;
393 static void qcms_transform_data_gray_out_lut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
395 qcms_transform_data_gray_template_lut<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX>(transform, src, dest, length);
398 static void qcms_transform_data_gray_rgba_out_lut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
400 qcms_transform_data_gray_template_lut<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX, NO_A_INDEX, RGBA_A_INDEX>(transform, src, dest, length);
403 static void qcms_transform_data_gray_bgra_out_lut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
405 qcms_transform_data_gray_template_lut<BGRA_R_INDEX, BGRA_G_INDEX, BGRA_B_INDEX, NO_A_INDEX, BGRA_A_INDEX>(transform, src, dest, length);
408 static void qcms_transform_data_graya_rgba_out_lut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
410 qcms_transform_data_gray_template_lut<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX, RGBA_A_INDEX>(transform, src, dest, length);
413 static void qcms_transform_data_graya_bgra_out_lut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
415 qcms_transform_data_gray_template_lut<BGRA_R_INDEX, BGRA_G_INDEX, BGRA_B_INDEX, BGRA_A_INDEX>(transform, src, dest, length);
418 template <size_t kRIndex, size_t kGIndex, size_t kBIndex,
419 size_t kInAIndex = NO_A_INDEX, size_t kOutAIndex = kInAIndex>
420 static void qcms_transform_data_gray_template_precache(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
422 const unsigned int components = A_INDEX_COMPONENTS(kOutAIndex);
423 unsigned int i;
424 for (i = 0; i < length; i++) {
425 unsigned char device = *src++;
426 unsigned char alpha = 0xFF;
427 if (kInAIndex != NO_A_INDEX) {
428 alpha = *src++;
430 uint16_t gray;
432 float linear = transform->input_gamma_table_gray[device];
434 /* we could round here... */
435 gray = linear * PRECACHE_OUTPUT_MAX;
437 dest[kRIndex] = transform->output_table_r->data[gray];
438 dest[kGIndex] = transform->output_table_g->data[gray];
439 dest[kBIndex] = transform->output_table_b->data[gray];
440 if (kOutAIndex != NO_A_INDEX) {
441 dest[kOutAIndex] = alpha;
443 dest += components;
447 static void qcms_transform_data_gray_out_precache(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
449 qcms_transform_data_gray_template_precache<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX>(transform, src, dest, length);
452 static void qcms_transform_data_gray_rgba_out_precache(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
454 qcms_transform_data_gray_template_precache<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX, NO_A_INDEX, RGBA_A_INDEX>(transform, src, dest, length);
457 static void qcms_transform_data_gray_bgra_out_precache(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
459 qcms_transform_data_gray_template_precache<BGRA_R_INDEX, BGRA_G_INDEX, BGRA_B_INDEX, NO_A_INDEX, BGRA_A_INDEX>(transform, src, dest, length);
462 static void qcms_transform_data_graya_rgba_out_precache(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
464 qcms_transform_data_gray_template_precache<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX, RGBA_A_INDEX>(transform, src, dest, length);
467 static void qcms_transform_data_graya_bgra_out_precache(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
469 qcms_transform_data_gray_template_precache<BGRA_R_INDEX, BGRA_G_INDEX, BGRA_B_INDEX, BGRA_A_INDEX>(transform, src, dest, length);
472 template <size_t kRIndex, size_t kGIndex, size_t kBIndex, size_t kAIndex = NO_A_INDEX>
473 static void qcms_transform_data_template_lut_precache(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
475 const unsigned int components = A_INDEX_COMPONENTS(kAIndex);
476 unsigned int i;
477 const float (*mat)[4] = transform->matrix;
478 for (i = 0; i < length; i++) {
479 unsigned char device_r = src[kRIndex];
480 unsigned char device_g = src[kGIndex];
481 unsigned char device_b = src[kBIndex];
482 unsigned char alpha;
483 if (kAIndex != NO_A_INDEX) {
484 alpha = src[kAIndex];
486 src += components;
487 uint16_t r, g, b;
489 float linear_r = transform->input_gamma_table_r[device_r];
490 float linear_g = transform->input_gamma_table_g[device_g];
491 float linear_b = transform->input_gamma_table_b[device_b];
493 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
494 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
495 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
497 out_linear_r = clamp_float(out_linear_r);
498 out_linear_g = clamp_float(out_linear_g);
499 out_linear_b = clamp_float(out_linear_b);
501 /* we could round here... */
502 r = out_linear_r * PRECACHE_OUTPUT_MAX;
503 g = out_linear_g * PRECACHE_OUTPUT_MAX;
504 b = out_linear_b * PRECACHE_OUTPUT_MAX;
506 dest[kRIndex] = transform->output_table_r->data[r];
507 dest[kGIndex] = transform->output_table_g->data[g];
508 dest[kBIndex] = transform->output_table_b->data[b];
509 if (kAIndex != NO_A_INDEX) {
510 dest[kAIndex] = alpha;
512 dest += components;
516 void qcms_transform_data_rgb_out_lut_precache(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
518 qcms_transform_data_template_lut_precache<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX>(transform, src, dest, length);
521 void qcms_transform_data_rgba_out_lut_precache(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
523 qcms_transform_data_template_lut_precache<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX, RGBA_A_INDEX>(transform, src, dest, length);
526 void qcms_transform_data_bgra_out_lut_precache(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
528 qcms_transform_data_template_lut_precache<BGRA_R_INDEX, BGRA_G_INDEX, BGRA_B_INDEX, BGRA_A_INDEX>(transform, src, dest, length);
531 // Not used
533 static void qcms_transform_data_clut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length) {
534 unsigned int i;
535 int xy_len = 1;
536 int x_len = transform->grid_size;
537 int len = x_len * x_len;
538 const float* r_table = transform->r_clut;
539 const float* g_table = transform->g_clut;
540 const float* b_table = transform->b_clut;
542 for (i = 0; i < length; i++) {
543 unsigned char in_r = *src++;
544 unsigned char in_g = *src++;
545 unsigned char in_b = *src++;
546 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
548 int x = floorf(linear_r * (transform->grid_size-1));
549 int y = floorf(linear_g * (transform->grid_size-1));
550 int z = floorf(linear_b * (transform->grid_size-1));
551 int x_n = ceilf(linear_r * (transform->grid_size-1));
552 int y_n = ceilf(linear_g * (transform->grid_size-1));
553 int z_n = ceilf(linear_b * (transform->grid_size-1));
554 float x_d = linear_r * (transform->grid_size-1) - x;
555 float y_d = linear_g * (transform->grid_size-1) - y;
556 float z_d = linear_b * (transform->grid_size-1) - z;
558 float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d);
559 float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);
560 float r_y1 = lerp(r_x1, r_x2, y_d);
561 float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);
562 float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d);
563 float r_y2 = lerp(r_x3, r_x4, y_d);
564 float clut_r = lerp(r_y1, r_y2, z_d);
566 float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d);
567 float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);
568 float g_y1 = lerp(g_x1, g_x2, y_d);
569 float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);
570 float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d);
571 float g_y2 = lerp(g_x3, g_x4, y_d);
572 float clut_g = lerp(g_y1, g_y2, z_d);
574 float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d);
575 float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);
576 float b_y1 = lerp(b_x1, b_x2, y_d);
577 float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);
578 float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d);
579 float b_y2 = lerp(b_x3, b_x4, y_d);
580 float clut_b = lerp(b_y1, b_y2, z_d);
582 *dest++ = clamp_u8(clut_r*255.0f);
583 *dest++ = clamp_u8(clut_g*255.0f);
584 *dest++ = clamp_u8(clut_b*255.0f);
589 static int int_div_ceil(int value, int div) {
590 return ((value + div - 1) / div);
593 // Using lcms' tetra interpolation algorithm.
594 template <size_t kRIndex, size_t kGIndex, size_t kBIndex, size_t kAIndex = NO_A_INDEX>
595 static void qcms_transform_data_tetra_clut_template(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length) {
596 const unsigned int components = A_INDEX_COMPONENTS(kAIndex);
597 unsigned int i;
598 int xy_len = 1;
599 int x_len = transform->grid_size;
600 int len = x_len * x_len;
601 float* r_table = transform->r_clut;
602 float* g_table = transform->g_clut;
603 float* b_table = transform->b_clut;
604 float c0_r, c1_r, c2_r, c3_r;
605 float c0_g, c1_g, c2_g, c3_g;
606 float c0_b, c1_b, c2_b, c3_b;
607 float clut_r, clut_g, clut_b;
608 for (i = 0; i < length; i++) {
609 unsigned char in_r = src[kRIndex];
610 unsigned char in_g = src[kGIndex];
611 unsigned char in_b = src[kBIndex];
612 unsigned char in_a;
613 if (kAIndex != NO_A_INDEX) {
614 in_a = src[kAIndex];
616 src += components;
617 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
619 int x = in_r * (transform->grid_size-1) / 255;
620 int y = in_g * (transform->grid_size-1) / 255;
621 int z = in_b * (transform->grid_size-1) / 255;
622 int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255);
623 int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255);
624 int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255);
625 float rx = linear_r * (transform->grid_size-1) - x;
626 float ry = linear_g * (transform->grid_size-1) - y;
627 float rz = linear_b * (transform->grid_size-1) - z;
629 c0_r = CLU(r_table, x, y, z);
630 c0_g = CLU(g_table, x, y, z);
631 c0_b = CLU(b_table, x, y, z);
633 if( rx >= ry ) {
634 if (ry >= rz) { //rx >= ry && ry >= rz
635 c1_r = CLU(r_table, x_n, y, z) - c0_r;
636 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
637 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
638 c1_g = CLU(g_table, x_n, y, z) - c0_g;
639 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
640 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
641 c1_b = CLU(b_table, x_n, y, z) - c0_b;
642 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
643 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
644 } else {
645 if (rx >= rz) { //rx >= rz && rz >= ry
646 c1_r = CLU(r_table, x_n, y, z) - c0_r;
647 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
648 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
649 c1_g = CLU(g_table, x_n, y, z) - c0_g;
650 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
651 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
652 c1_b = CLU(b_table, x_n, y, z) - c0_b;
653 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
654 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
655 } else { //rz > rx && rx >= ry
656 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
657 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
658 c3_r = CLU(r_table, x, y, z_n) - c0_r;
659 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
660 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
661 c3_g = CLU(g_table, x, y, z_n) - c0_g;
662 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
663 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
664 c3_b = CLU(b_table, x, y, z_n) - c0_b;
667 } else {
668 if (rx >= rz) { //ry > rx && rx >= rz
669 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
670 c2_r = CLU(r_table, x, y_n, z) - c0_r;
671 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
672 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
673 c2_g = CLU(g_table, x, y_n, z) - c0_g;
674 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
675 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
676 c2_b = CLU(b_table, x, y_n, z) - c0_b;
677 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
678 } else {
679 if (ry >= rz) { //ry >= rz && rz > rx
680 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
681 c2_r = CLU(r_table, x, y_n, z) - c0_r;
682 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
683 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
684 c2_g = CLU(g_table, x, y_n, z) - c0_g;
685 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
686 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
687 c2_b = CLU(b_table, x, y_n, z) - c0_b;
688 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
689 } else { //rz > ry && ry > rx
690 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
691 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
692 c3_r = CLU(r_table, x, y, z_n) - c0_r;
693 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
694 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
695 c3_g = CLU(g_table, x, y, z_n) - c0_g;
696 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
697 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
698 c3_b = CLU(b_table, x, y, z_n) - c0_b;
703 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
704 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
705 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
707 dest[kRIndex] = clamp_u8(clut_r*255.0f);
708 dest[kGIndex] = clamp_u8(clut_g*255.0f);
709 dest[kBIndex] = clamp_u8(clut_b*255.0f);
710 if (kAIndex != NO_A_INDEX) {
711 dest[kAIndex] = in_a;
713 dest += components;
717 static void qcms_transform_data_tetra_clut_rgb(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length) {
718 qcms_transform_data_tetra_clut_template<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX>(transform, src, dest, length);
721 static void qcms_transform_data_tetra_clut_rgba(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length) {
722 qcms_transform_data_tetra_clut_template<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX, RGBA_A_INDEX>(transform, src, dest, length);
725 static void qcms_transform_data_tetra_clut_bgra(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length) {
726 qcms_transform_data_tetra_clut_template<BGRA_R_INDEX, BGRA_G_INDEX, BGRA_B_INDEX, BGRA_A_INDEX>(transform, src, dest, length);
729 template <size_t kRIndex, size_t kGIndex, size_t kBIndex, size_t kAIndex = NO_A_INDEX>
730 static void qcms_transform_data_template_lut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
732 const unsigned int components = A_INDEX_COMPONENTS(kAIndex);
733 unsigned int i;
734 const float (*mat)[4] = transform->matrix;
735 for (i = 0; i < length; i++) {
736 unsigned char device_r = src[kRIndex];
737 unsigned char device_g = src[kGIndex];
738 unsigned char device_b = src[kBIndex];
739 unsigned char alpha;
740 if (kAIndex != NO_A_INDEX) {
741 alpha = src[kAIndex];
743 src += components;
744 float out_device_r, out_device_g, out_device_b;
746 float linear_r = transform->input_gamma_table_r[device_r];
747 float linear_g = transform->input_gamma_table_g[device_g];
748 float linear_b = transform->input_gamma_table_b[device_b];
750 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
751 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
752 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
754 out_linear_r = clamp_float(out_linear_r);
755 out_linear_g = clamp_float(out_linear_g);
756 out_linear_b = clamp_float(out_linear_b);
758 out_device_r = lut_interp_linear(out_linear_r,
759 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
760 out_device_g = lut_interp_linear(out_linear_g,
761 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
762 out_device_b = lut_interp_linear(out_linear_b,
763 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
765 dest[kRIndex] = clamp_u8(out_device_r*255);
766 dest[kGIndex] = clamp_u8(out_device_g*255);
767 dest[kBIndex] = clamp_u8(out_device_b*255);
768 if (kAIndex != NO_A_INDEX) {
769 dest[kAIndex] = alpha;
771 dest += components;
775 void qcms_transform_data_rgb_out_lut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
777 qcms_transform_data_template_lut<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX>(transform, src, dest, length);
780 void qcms_transform_data_rgba_out_lut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
782 qcms_transform_data_template_lut<RGBA_R_INDEX, RGBA_G_INDEX, RGBA_B_INDEX, RGBA_A_INDEX>(transform, src, dest, length);
785 void qcms_transform_data_bgra_out_lut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
787 qcms_transform_data_template_lut<BGRA_R_INDEX, BGRA_G_INDEX, BGRA_B_INDEX, BGRA_A_INDEX>(transform, src, dest, length);
790 #if 0
791 static void qcms_transform_data_rgb_out_linear(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length)
793 int i;
794 const float (*mat)[4] = transform->matrix;
795 for (i = 0; i < length; i++) {
796 unsigned char device_r = *src++;
797 unsigned char device_g = *src++;
798 unsigned char device_b = *src++;
800 float linear_r = transform->input_gamma_table_r[device_r];
801 float linear_g = transform->input_gamma_table_g[device_g];
802 float linear_b = transform->input_gamma_table_b[device_b];
804 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
805 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
806 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
808 *dest++ = clamp_u8(out_linear_r*255);
809 *dest++ = clamp_u8(out_linear_g*255);
810 *dest++ = clamp_u8(out_linear_b*255);
813 #endif
816 * If users create and destroy objects on different threads, even if the same
817 * objects aren't used on different threads at the same time, we can still run
818 * in to trouble with refcounts if they aren't atomic.
820 * This can lead to us prematurely deleting the precache if threads get unlucky
821 * and write the wrong value to the ref count.
823 static struct precache_output *precache_reference(struct precache_output *p)
825 qcms_atomic_increment(p->ref_count);
826 return p;
829 static struct precache_output *precache_create()
831 struct precache_output *p = (struct precache_output*)malloc(sizeof(struct precache_output));
832 if (p)
833 p->ref_count = 1;
834 return p;
837 void precache_release(struct precache_output *p)
839 if (qcms_atomic_decrement(p->ref_count) == 0) {
840 free(p);
844 #ifdef HAVE_POSIX_MEMALIGN
845 static qcms_transform *transform_alloc(void)
847 qcms_transform *t;
849 void *allocated_memory;
850 if (!posix_memalign(&allocated_memory, 16, sizeof(qcms_transform))) {
851 /* Doing a memset to initialise all bits to 'zero'*/
852 memset(allocated_memory, 0, sizeof(qcms_transform));
853 t = (qcms_transform*)allocated_memory;
854 return t;
855 } else {
856 return NULL;
859 static void transform_free(qcms_transform *t)
861 free(t);
863 #else
864 static qcms_transform *transform_alloc(void)
866 /* transform needs to be aligned on a 16byte boundrary */
867 char *original_block = (char *)calloc(sizeof(qcms_transform) + sizeof(void*) + 16, 1);
868 /* make room for a pointer to the block returned by calloc */
869 void *transform_start = original_block + sizeof(void*);
870 /* align transform_start */
871 qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transform_start + 15) & ~0xf);
873 /* store a pointer to the block returned by calloc so that we can free it later */
874 void **(original_block_ptr) = (void**)transform_aligned;
875 if (!original_block)
876 return NULL;
877 original_block_ptr--;
878 *original_block_ptr = original_block;
880 return transform_aligned;
882 static void transform_free(qcms_transform *t)
884 /* get at the pointer to the unaligned block returned by calloc */
885 void **p = (void**)t;
886 p--;
887 free(*p);
889 #endif
891 void qcms_transform_release(qcms_transform *t)
893 /* ensure we only free the gamma tables once even if there are
894 * multiple references to the same data */
896 if (t->output_table_r)
897 precache_release(t->output_table_r);
898 if (t->output_table_g)
899 precache_release(t->output_table_g);
900 if (t->output_table_b)
901 precache_release(t->output_table_b);
903 free(t->input_gamma_table_r);
904 if (t->input_gamma_table_g != t->input_gamma_table_r)
905 free(t->input_gamma_table_g);
906 if (t->input_gamma_table_g != t->input_gamma_table_r &&
907 t->input_gamma_table_g != t->input_gamma_table_b)
908 free(t->input_gamma_table_b);
910 free(t->input_gamma_table_gray);
912 free(t->output_gamma_lut_r);
913 free(t->output_gamma_lut_g);
914 free(t->output_gamma_lut_b);
916 /* r_clut points to beginning of buffer allocated in qcms_transform_precacheLUT_float */
917 if (t->r_clut)
918 free(t->r_clut);
920 transform_free(t);
923 #ifdef X86
924 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
925 // mozilla/jpeg)
926 // -------------------------------------------------------------------------
927 #if defined(_M_IX86) && defined(_MSC_VER)
928 #define HAS_CPUID
929 /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
930 register - I'm not sure if that ever happens on windows, but cpuid isn't
931 on the critical path so we just preserve the register to be safe and to be
932 consistent with the non-windows version. */
933 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
934 uint32_t a_, b_, c_, d_;
935 __asm {
936 xchg ebx, esi
937 mov eax, fxn
938 cpuid
939 mov a_, eax
940 mov b_, ebx
941 mov c_, ecx
942 mov d_, edx
943 xchg ebx, esi
945 *a = a_;
946 *b = b_;
947 *c = c_;
948 *d = d_;
950 #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386))
951 #define HAS_CPUID
952 /* Get us a CPUID function. We can't use ebx because it's the PIC register on
953 some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
954 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
956 uint32_t a_, b_, c_, d_;
957 __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
958 : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
959 *a = a_;
960 *b = b_;
961 *c = c_;
962 *d = d_;
964 #endif
966 // -------------------------Runtime SSEx Detection-----------------------------
968 /* MMX is always supported per
969 * Gecko v1.9.1 minimum CPU requirements */
970 #define SSE1_EDX_MASK (1UL << 25)
971 #define SSE2_EDX_MASK (1UL << 26)
972 #define SSE3_ECX_MASK (1UL << 0)
974 static int sse_version_available(void)
976 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
977 /* we know at build time that 64-bit CPUs always have SSE2
978 * this tells the compiler that non-SSE2 branches will never be
979 * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
980 return 2;
981 #elif defined(HAS_CPUID)
982 static int sse_version = -1;
983 uint32_t a, b, c, d;
984 uint32_t function = 0x00000001;
986 if (sse_version == -1) {
987 sse_version = 0;
988 cpuid(function, &a, &b, &c, &d);
989 if (c & SSE3_ECX_MASK)
990 sse_version = 3;
991 else if (d & SSE2_EDX_MASK)
992 sse_version = 2;
993 else if (d & SSE1_EDX_MASK)
994 sse_version = 1;
997 return sse_version;
998 #else
999 return 0;
1000 #endif
1002 #endif
1004 static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f},
1005 {-0.7502f, 1.7135f, 0.0367f},
1006 { 0.0389f,-0.0685f, 1.0296f}},
1007 false};
1009 static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0.1599627f},
1010 { 0.4323053f, 0.5183603f, 0.0492912f},
1011 {-0.0085287f, 0.0400428f, 0.9684867f}},
1012 false};
1014 // See ICCv4 E.3
1015 struct matrix compute_whitepoint_adaption(float X, float Y, float Z) {
1016 float p = (0.96422f*bradford_matrix.m[0][0] + 1.000f*bradford_matrix.m[1][0] + 0.82521f*bradford_matrix.m[2][0]) /
1017 (X*bradford_matrix.m[0][0] + Y*bradford_matrix.m[1][0] + Z*bradford_matrix.m[2][0] );
1018 float y = (0.96422f*bradford_matrix.m[0][1] + 1.000f*bradford_matrix.m[1][1] + 0.82521f*bradford_matrix.m[2][1]) /
1019 (X*bradford_matrix.m[0][1] + Y*bradford_matrix.m[1][1] + Z*bradford_matrix.m[2][1] );
1020 float b = (0.96422f*bradford_matrix.m[0][2] + 1.000f*bradford_matrix.m[1][2] + 0.82521f*bradford_matrix.m[2][2]) /
1021 (X*bradford_matrix.m[0][2] + Y*bradford_matrix.m[1][2] + Z*bradford_matrix.m[2][2] );
1022 struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false};
1023 return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adaption, bradford_matrix) );
1026 void qcms_profile_precache_output_transform(qcms_profile *profile)
1028 /* we only support precaching on rgb profiles */
1029 if (profile->color_space != RGB_SIGNATURE)
1030 return;
1032 if (qcms_supports_iccv4) {
1033 /* don't precache since we will use the B2A LUT */
1034 if (profile->B2A0)
1035 return;
1037 /* don't precache since we will use the mBA LUT */
1038 if (profile->mBA)
1039 return;
1042 /* don't precache if we do not have the TRC curves */
1043 if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
1044 return;
1046 if (!profile->output_table_r) {
1047 profile->output_table_r = precache_create();
1048 if (profile->output_table_r &&
1049 !compute_precache(profile->redTRC, profile->output_table_r->data)) {
1050 precache_release(profile->output_table_r);
1051 profile->output_table_r = NULL;
1054 if (!profile->output_table_g) {
1055 profile->output_table_g = precache_create();
1056 if (profile->output_table_g &&
1057 !compute_precache(profile->greenTRC, profile->output_table_g->data)) {
1058 precache_release(profile->output_table_g);
1059 profile->output_table_g = NULL;
1062 if (!profile->output_table_b) {
1063 profile->output_table_b = precache_create();
1064 if (profile->output_table_b &&
1065 !compute_precache(profile->blueTRC, profile->output_table_b->data)) {
1066 precache_release(profile->output_table_b);
1067 profile->output_table_b = NULL;
1072 /* Replace the current transformation with a LUT transformation using a given number of sample points */
1073 qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms_profile *in, qcms_profile *out,
1074 int samples, qcms_data_type in_type)
1076 /* The range between which 2 consecutive sample points can be used to interpolate */
1077 uint16_t x,y,z;
1078 uint32_t l;
1079 uint32_t lutSize = 3 * samples * samples * samples;
1080 float* src = NULL;
1081 float* dest = NULL;
1082 float* lut = NULL;
1084 src = (float*)malloc(lutSize*sizeof(float));
1085 dest = (float*)malloc(lutSize*sizeof(float));
1087 if (src && dest) {
1088 /* Prepare a list of points we want to sample */
1089 l = 0;
1090 for (x = 0; x < samples; x++) {
1091 for (y = 0; y < samples; y++) {
1092 for (z = 0; z < samples; z++) {
1093 src[l++] = x / (float)(samples-1);
1094 src[l++] = y / (float)(samples-1);
1095 src[l++] = z / (float)(samples-1);
1100 lut = qcms_chain_transform(in, out, src, dest, lutSize);
1101 if (lut) {
1102 transform->r_clut = &lut[0];
1103 transform->g_clut = &lut[1];
1104 transform->b_clut = &lut[2];
1105 transform->grid_size = samples;
1106 if (in_type == QCMS_DATA_RGBA_8) {
1107 transform->transform_fn = qcms_transform_data_tetra_clut_rgba;
1108 } else if (in_type == QCMS_DATA_BGRA_8) {
1109 transform->transform_fn = qcms_transform_data_tetra_clut_bgra;
1110 } else if (in_type == QCMS_DATA_RGB_8) {
1111 transform->transform_fn = qcms_transform_data_tetra_clut_rgb;
1113 assert(transform->transform_fn);
1118 //XXX: qcms_modular_transform_data may return either the src or dest buffer. If so it must not be free-ed
1119 // It will be stored in r_clut, which will be cleaned up in qcms_transform_release.
1120 if (src && lut != src) {
1121 free(src);
1123 if (dest && lut != dest) {
1124 free(dest);
1127 if (lut == NULL) {
1128 return NULL;
1130 return transform;
1133 #define NO_MEM_TRANSFORM NULL
1135 qcms_transform* qcms_transform_create(
1136 qcms_profile *in, qcms_data_type in_type,
1137 qcms_profile *out, qcms_data_type out_type,
1138 qcms_intent intent)
1140 // Ensure the requested input and output types make sense.
1141 bool match = false;
1142 if (in_type == QCMS_DATA_RGB_8) {
1143 match = out_type == QCMS_DATA_RGB_8;
1144 } else if (in_type == QCMS_DATA_RGBA_8) {
1145 match = out_type == QCMS_DATA_RGBA_8;
1146 } else if (in_type == QCMS_DATA_BGRA_8) {
1147 match = out_type == QCMS_DATA_BGRA_8;
1148 } else if (in_type == QCMS_DATA_GRAY_8) {
1149 match = out_type == QCMS_DATA_RGB_8 || out_type == QCMS_DATA_RGBA_8 || out_type == QCMS_DATA_BGRA_8;
1150 } else if (in_type == QCMS_DATA_GRAYA_8) {
1151 match = out_type == QCMS_DATA_RGBA_8 || out_type == QCMS_DATA_BGRA_8;
1153 if (!match) {
1154 assert(0 && "input/output type");
1155 return NULL;
1158 qcms_transform *transform = transform_alloc();
1159 if (!transform) {
1160 return NULL;
1163 bool precache = false;
1164 if (out->output_table_r &&
1165 out->output_table_g &&
1166 out->output_table_b) {
1167 precache = true;
1170 // This precache assumes RGB_SIGNATURE (fails on GRAY_SIGNATURE, for instance)
1171 if (qcms_supports_iccv4 &&
1172 (in_type == QCMS_DATA_RGB_8 || in_type == QCMS_DATA_RGBA_8 || in_type == QCMS_DATA_BGRA_8) &&
1173 (in->A2B0 || out->B2A0 || in->mAB || out->mAB))
1175 // Precache the transformation to a CLUT 33x33x33 in size.
1176 // 33 is used by many profiles and works well in pratice.
1177 // This evenly divides 256 into blocks of 8x8x8.
1178 // TODO For transforming small data sets of about 200x200 or less
1179 // precaching should be avoided.
1180 qcms_transform *result = qcms_transform_precacheLUT_float(transform, in, out, 33, in_type);
1181 if (!result) {
1182 assert(0 && "precacheLUT failed");
1183 qcms_transform_release(transform);
1184 return NULL;
1186 return result;
1189 if (precache) {
1190 transform->output_table_r = precache_reference(out->output_table_r);
1191 transform->output_table_g = precache_reference(out->output_table_g);
1192 transform->output_table_b = precache_reference(out->output_table_b);
1193 } else {
1194 if (!out->redTRC || !out->greenTRC || !out->blueTRC) {
1195 qcms_transform_release(transform);
1196 return NO_MEM_TRANSFORM;
1198 build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &transform->output_gamma_lut_r_length);
1199 build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length);
1200 build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, &transform->output_gamma_lut_b_length);
1201 if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g || !transform->output_gamma_lut_b) {
1202 qcms_transform_release(transform);
1203 return NO_MEM_TRANSFORM;
1207 if (in->color_space == RGB_SIGNATURE) {
1208 struct matrix in_matrix, out_matrix, result;
1209 if (precache) {
1210 #ifdef X86
1211 if (sse_version_available() >= 2) {
1212 if (in_type == QCMS_DATA_RGB_8) {
1213 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
1214 } else if (in_type == QCMS_DATA_RGBA_8) {
1215 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
1216 } else if (in_type == QCMS_DATA_BGRA_8) {
1217 transform->transform_fn = qcms_transform_data_bgra_out_lut_sse2;
1220 #if !(defined(_MSC_VER) && defined(_M_AMD64))
1221 /* Microsoft Compiler for x64 doesn't support MMX.
1222 * SSE code uses MMX so that we disable on x64 */
1223 } else
1224 if (sse_version_available() >= 1) {
1225 if (in_type == QCMS_DATA_RGB_8) {
1226 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
1227 } else if (in_type == QCMS_DATA_RGBA_8) {
1228 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
1229 } else if (in_type == QCMS_DATA_BGRA_8) {
1230 transform->transform_fn = qcms_transform_data_bgra_out_lut_sse1;
1232 #endif
1233 } else
1234 #endif
1235 #if defined(__arm__) || defined(__aarch64__)
1236 if (qcms_supports_neon) {
1237 if (in_type == QCMS_DATA_RGB_8) {
1238 transform->transform_fn = qcms_transform_data_rgb_out_lut_neon;
1239 } else if (in_type == QCMS_DATA_RGBA_8) {
1240 transform->transform_fn = qcms_transform_data_rgba_out_lut_neon;
1241 } else if (in_type == QCMS_DATA_BGRA_8) {
1242 transform->transform_fn = qcms_transform_data_bgra_out_lut_neon;
1244 } else
1245 #endif
1246 #if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__))
1247 if (have_altivec()) {
1248 if (in_type == QCMS_DATA_RGB_8) {
1249 transform->transform_fn = qcms_transform_data_rgb_out_lut_altivec;
1250 } else if (in_type == QCMS_DATA_RGBA_8) {
1251 transform->transform_fn = qcms_transform_data_rgba_out_lut_altivec;
1252 } else if (in_type == QCMS_DATA_BGRA_8) {
1253 transform->transform_fn = qcms_transform_data_bgra_out_lut_altivec;
1255 } else
1256 #endif
1258 if (in_type == QCMS_DATA_RGB_8) {
1259 transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
1260 } else if (in_type == QCMS_DATA_RGBA_8) {
1261 transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
1262 } else if (in_type == QCMS_DATA_BGRA_8) {
1263 transform->transform_fn = qcms_transform_data_bgra_out_lut_precache;
1266 } else {
1267 if (in_type == QCMS_DATA_RGB_8) {
1268 transform->transform_fn = qcms_transform_data_rgb_out_lut;
1269 } else if (in_type == QCMS_DATA_RGBA_8) {
1270 transform->transform_fn = qcms_transform_data_rgba_out_lut;
1271 } else if (in_type == QCMS_DATA_BGRA_8) {
1272 transform->transform_fn = qcms_transform_data_bgra_out_lut;
1276 //XXX: avoid duplicating tables if we can
1277 transform->input_gamma_table_r = build_input_gamma_table(in->redTRC);
1278 transform->input_gamma_table_g = build_input_gamma_table(in->greenTRC);
1279 transform->input_gamma_table_b = build_input_gamma_table(in->blueTRC);
1280 if (!transform->input_gamma_table_r || !transform->input_gamma_table_g || !transform->input_gamma_table_b) {
1281 qcms_transform_release(transform);
1282 return NO_MEM_TRANSFORM;
1286 /* build combined colorant matrix */
1287 in_matrix = build_colorant_matrix(in);
1288 out_matrix = build_colorant_matrix(out);
1289 out_matrix = matrix_invert(out_matrix);
1290 if (out_matrix.invalid) {
1291 qcms_transform_release(transform);
1292 return NULL;
1294 result = matrix_multiply(out_matrix, in_matrix);
1296 /* check for NaN values in the matrix and bail if we find any */
1297 for (unsigned i = 0 ; i < 3 ; ++i) {
1298 for (unsigned j = 0 ; j < 3 ; ++j) {
1299 if (result.m[i][j] != result.m[i][j]) {
1300 qcms_transform_release(transform);
1301 return NULL;
1306 /* store the results in column major mode
1307 * this makes doing the multiplication with sse easier */
1308 transform->matrix[0][0] = result.m[0][0];
1309 transform->matrix[1][0] = result.m[0][1];
1310 transform->matrix[2][0] = result.m[0][2];
1311 transform->matrix[0][1] = result.m[1][0];
1312 transform->matrix[1][1] = result.m[1][1];
1313 transform->matrix[2][1] = result.m[1][2];
1314 transform->matrix[0][2] = result.m[2][0];
1315 transform->matrix[1][2] = result.m[2][1];
1316 transform->matrix[2][2] = result.m[2][2];
1318 } else if (in->color_space == GRAY_SIGNATURE) {
1319 transform->input_gamma_table_gray = build_input_gamma_table(in->grayTRC);
1320 if (!transform->input_gamma_table_gray) {
1321 qcms_transform_release(transform);
1322 return NO_MEM_TRANSFORM;
1325 if (precache) {
1326 if (out_type == QCMS_DATA_RGB_8) {
1327 transform->transform_fn = qcms_transform_data_gray_out_precache;
1328 } else if (out_type == QCMS_DATA_RGBA_8) {
1329 if (in_type == QCMS_DATA_GRAY_8) {
1330 transform->transform_fn = qcms_transform_data_gray_rgba_out_precache;
1331 } else {
1332 transform->transform_fn = qcms_transform_data_graya_rgba_out_precache;
1334 } else if (out_type == QCMS_DATA_BGRA_8) {
1335 if (in_type == QCMS_DATA_GRAY_8) {
1336 transform->transform_fn = qcms_transform_data_gray_bgra_out_precache;
1337 } else {
1338 transform->transform_fn = qcms_transform_data_graya_bgra_out_precache;
1341 } else {
1342 if (out_type == QCMS_DATA_RGB_8) {
1343 transform->transform_fn = qcms_transform_data_gray_out_lut;
1344 } else if (out_type == QCMS_DATA_RGBA_8) {
1345 if (in_type == QCMS_DATA_GRAY_8) {
1346 transform->transform_fn = qcms_transform_data_gray_rgba_out_lut;
1347 } else {
1348 transform->transform_fn = qcms_transform_data_graya_rgba_out_lut;
1350 } else if (out_type == QCMS_DATA_BGRA_8) {
1351 if (in_type == QCMS_DATA_GRAY_8) {
1352 transform->transform_fn = qcms_transform_data_gray_bgra_out_lut;
1353 } else {
1354 transform->transform_fn = qcms_transform_data_graya_bgra_out_lut;
1358 } else {
1359 assert(0 && "unexpected colorspace");
1360 qcms_transform_release(transform);
1361 return NULL;
1363 assert(transform->transform_fn);
1364 return transform;
1367 #if defined(__GNUC__) && defined(__i386__)
1368 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
1369 __attribute__((__force_align_arg_pointer__))
1370 #endif
1371 void qcms_transform_data(qcms_transform *transform, const void *src, void *dest, size_t length)
1373 transform->transform_fn(transform, (const unsigned char*)src, (unsigned char*)dest, length);
1376 bool qcms_supports_iccv4;
1377 void qcms_enable_iccv4()
1379 qcms_supports_iccv4 = true;
1382 #if defined(__arm__) || defined(__aarch64__)
1383 bool qcms_supports_neon;
1384 #endif
1385 void qcms_enable_neon()
1387 #if defined(__arm__) || defined(__aarch64__)
1388 qcms_supports_neon = true;
1389 #endif