1 /* vim: set ts=8 sw=8 noexpandtab: */
12 /* used as a lookup table for the output transformation.
13 * we refcount them so we only need to have one around per output
14 * profile, instead of duplicating them per transform */
15 struct precache_output
18 /* We previously used a count of 65536 here but that seems like more
19 * precision than we actually need. By reducing the size we can
20 * improve startup performance and reduce memory usage. ColorSync on
21 * 10.5 uses 4097 which is perhaps because they use a fixed point
22 * representation where 1. is represented by 0x1000. */
23 #define PRECACHE_OUTPUT_SIZE 8192
24 #define PRECACHE_OUTPUT_MAX (PRECACHE_OUTPUT_SIZE-1)
25 uint8_t data
[PRECACHE_OUTPUT_SIZE
];
29 #define ALIGN __declspec(align(16))
31 #define ALIGN __attribute__(( aligned (16) ))
34 struct _qcms_transform
;
36 typedef void (*transform_fn_t
)(const struct _qcms_transform
*transform
, const unsigned char *src
, unsigned char *dest
, size_t length
);
38 struct _qcms_transform
{
39 float ALIGN matrix
[3][4];
40 float *input_gamma_table_r
;
41 float *input_gamma_table_g
;
42 float *input_gamma_table_b
;
44 float *input_clut_table_r
;
45 float *input_clut_table_g
;
46 float *input_clut_table_b
;
47 uint16_t input_clut_table_length
;
52 float *output_clut_table_r
;
53 float *output_clut_table_g
;
54 float *output_clut_table_b
;
55 uint16_t output_clut_table_length
;
57 float *input_gamma_table_gray
;
65 uint16_t *output_gamma_lut_r
;
66 uint16_t *output_gamma_lut_g
;
67 uint16_t *output_gamma_lut_b
;
69 uint16_t *output_gamma_lut_gray
;
71 size_t output_gamma_lut_r_length
;
72 size_t output_gamma_lut_g_length
;
73 size_t output_gamma_lut_b_length
;
75 size_t output_gamma_lut_gray_length
;
77 struct precache_output
*output_table_r
;
78 struct precache_output
*output_table_g
;
79 struct precache_output
*output_table_b
;
81 transform_fn_t transform_fn
;
89 struct qcms_modular_transform
;
91 typedef void (*transform_module_fn_t
)(struct qcms_modular_transform
*transform
, float *src
, float *dest
, size_t length
);
93 struct qcms_modular_transform
{
97 float *input_clut_table_r
;
98 float *input_clut_table_g
;
99 float *input_clut_table_b
;
100 uint16_t input_clut_table_length
;
105 float *output_clut_table_r
;
106 float *output_clut_table_g
;
107 float *output_clut_table_b
;
108 uint16_t output_clut_table_length
;
110 uint16_t *output_gamma_lut_r
;
111 uint16_t *output_gamma_lut_g
;
112 uint16_t *output_gamma_lut_b
;
114 size_t output_gamma_lut_r_length
;
115 size_t output_gamma_lut_g_length
;
116 size_t output_gamma_lut_b_length
;
118 transform_module_fn_t transform_module_fn
;
119 struct qcms_modular_transform
*next_transform
;
122 typedef int32_t s15Fixed16Number
;
123 typedef uint16_t uInt16Number
;
124 typedef uint8_t uInt8Number
;
140 uint8_t num_in_channels
;
141 uint8_t num_out_channels
;
142 // 16 is the upperbound, actual is 0..num_in_channels.
143 uint8_t num_grid_points
[16];
145 s15Fixed16Number e00
;
146 s15Fixed16Number e01
;
147 s15Fixed16Number e02
;
148 s15Fixed16Number e03
;
149 s15Fixed16Number e10
;
150 s15Fixed16Number e11
;
151 s15Fixed16Number e12
;
152 s15Fixed16Number e13
;
153 s15Fixed16Number e20
;
154 s15Fixed16Number e21
;
155 s15Fixed16Number e22
;
156 s15Fixed16Number e23
;
158 // reversed elements (for mBA)
162 struct curveType
*a_curves
[10];
163 struct curveType
*b_curves
[10];
164 struct curveType
*m_curves
[10];
165 float clut_table_data
[];
168 /* should lut8Type and lut16Type be different types? */
169 struct lutType
{ // used by lut8Type/lut16Type (mft2) only
170 uint8_t num_input_channels
;
171 uint8_t num_output_channels
;
172 uint8_t num_clut_grid_points
;
174 s15Fixed16Number e00
;
175 s15Fixed16Number e01
;
176 s15Fixed16Number e02
;
177 s15Fixed16Number e10
;
178 s15Fixed16Number e11
;
179 s15Fixed16Number e12
;
180 s15Fixed16Number e20
;
181 s15Fixed16Number e21
;
182 s15Fixed16Number e22
;
184 uint16_t num_input_table_entries
;
185 uint16_t num_output_table_entries
;
194 /* this is from an intial idea of having the struct correspond to the data in
195 * the file. I decided that it wasn't a good idea.
209 }; // I guess we need to pack this?
212 #define RGB_SIGNATURE 0x52474220
213 #define GRAY_SIGNATURE 0x47524159
214 #define XYZ_SIGNATURE 0x58595A20
215 #define LAB_SIGNATURE 0x4C616220
217 struct _qcms_profile
{
219 uint32_t color_space
;
221 qcms_intent rendering_intent
;
222 struct XYZNumber redColorant
;
223 struct XYZNumber blueColorant
;
224 struct XYZNumber greenColorant
;
225 struct curveType
*redTRC
;
226 struct curveType
*blueTRC
;
227 struct curveType
*greenTRC
;
228 struct curveType
*grayTRC
;
229 struct lutType
*A2B0
;
230 struct lutType
*B2A0
;
231 struct lutmABType
*mAB
;
232 struct lutmABType
*mBA
;
233 struct matrix chromaticAdaption
;
235 struct precache_output
*output_table_r
;
236 struct precache_output
*output_table_g
;
237 struct precache_output
*output_table_b
;
241 #define inline _inline
244 /* produces the nearest float to 'a' with a maximum error
245 * of 1/1024 which happens for large values like 0x40000040 */
246 static inline float s15Fixed16Number_to_float(s15Fixed16Number a
)
248 return ((int32_t)a
)/65536.f
;
251 static inline s15Fixed16Number
double_to_s15Fixed16Number(double v
)
253 return (int32_t)(v
*65536);
256 static inline float uInt8Number_to_float(uInt8Number a
)
258 return ((int32_t)a
)/255.f
;
261 static inline float uInt16Number_to_float(uInt16Number a
)
263 return ((int32_t)a
)/65535.f
;
267 void precache_release(struct precache_output
*p
);
268 bool set_rgb_colorants(qcms_profile
*profile
, qcms_CIE_xyY white_point
, qcms_CIE_xyYTRIPLE primaries
);
269 bool get_rgb_colorants(struct matrix
*colorants
, qcms_CIE_xyY white_point
, qcms_CIE_xyYTRIPLE primaries
);
271 void qcms_transform_data_rgb_out_lut(const qcms_transform
*transform
,
272 const unsigned char *src
,
275 void qcms_transform_data_rgba_out_lut(const qcms_transform
*transform
,
276 const unsigned char *src
,
279 void qcms_transform_data_bgra_out_lut(const qcms_transform
*transform
,
280 const unsigned char *src
,
284 void qcms_transform_data_rgb_out_lut_precache(const qcms_transform
*transform
,
285 const unsigned char *src
,
288 void qcms_transform_data_rgba_out_lut_precache(const qcms_transform
*transform
,
289 const unsigned char *src
,
292 void qcms_transform_data_bgra_out_lut_precache(const qcms_transform
*transform
,
293 const unsigned char *src
,
297 void qcms_transform_data_rgb_out_lut_avx(const qcms_transform
*transform
,
298 const unsigned char *src
,
301 void qcms_transform_data_rgba_out_lut_avx(const qcms_transform
*transform
,
302 const unsigned char *src
,
305 void qcms_transform_data_bgra_out_lut_avx(const qcms_transform
*transform
,
306 const unsigned char *src
,
309 void qcms_transform_data_rgb_out_lut_sse2(const qcms_transform
*transform
,
310 const unsigned char *src
,
313 void qcms_transform_data_rgba_out_lut_sse2(const qcms_transform
*transform
,
314 const unsigned char *src
,
317 void qcms_transform_data_bgra_out_lut_sse2(const qcms_transform
*transform
,
318 const unsigned char *src
,
321 void qcms_transform_data_rgb_out_lut_sse1(const qcms_transform
*transform
,
322 const unsigned char *src
,
325 void qcms_transform_data_rgba_out_lut_sse1(const qcms_transform
*transform
,
326 const unsigned char *src
,
329 void qcms_transform_data_bgra_out_lut_sse1(const qcms_transform
*transform
,
330 const unsigned char *src
,
334 void qcms_transform_data_rgb_out_lut_altivec(const qcms_transform
*transform
,
335 const unsigned char *src
,
338 void qcms_transform_data_rgba_out_lut_altivec(const qcms_transform
*transform
,
339 const unsigned char *src
,
342 void qcms_transform_data_bgra_out_lut_altivec(const qcms_transform
*transform
,
343 const unsigned char *src
,
347 void qcms_transform_data_rgb_out_lut_neon(const qcms_transform
*transform
,
348 const unsigned char *src
,
351 void qcms_transform_data_rgba_out_lut_neon(const qcms_transform
*transform
,
352 const unsigned char *src
,
355 void qcms_transform_data_bgra_out_lut_neon(const qcms_transform
*transform
,
356 const unsigned char *src
,
360 extern bool qcms_supports_iccv4
;
361 extern bool qcms_supports_neon
;
362 extern bool qcms_supports_avx
;
366 long __cdecl
_InterlockedIncrement(long volatile *);
367 long __cdecl
_InterlockedDecrement(long volatile *);
368 #pragma intrinsic(_InterlockedIncrement)
369 #pragma intrinsic(_InterlockedDecrement)
371 #define qcms_atomic_increment(x) _InterlockedIncrement((long volatile *)&x)
372 #define qcms_atomic_decrement(x) _InterlockedDecrement((long volatile*)&x)
376 #define qcms_atomic_increment(x) __sync_add_and_fetch(&x, 1)
377 #define qcms_atomic_decrement(x) __sync_sub_and_fetch(&x, 1)
382 #define RGB_COMPONENTS 3
383 #define RGBA_COMPONENTS 4
385 #define RGBA_R_INDEX 0
386 #define RGBA_G_INDEX 1
387 #define RGBA_B_INDEX 2
388 #define RGBA_A_INDEX 3
390 #define BGRA_B_INDEX 0
391 #define BGRA_G_INDEX 1
392 #define BGRA_R_INDEX 2
393 #define BGRA_A_INDEX 3
395 #define NO_A_INDEX 0xFF
397 #define A_INDEX_COMPONENTS(kAIndex) ((kAIndex) == NO_A_INDEX ? RGB_COMPONENTS : RGBA_COMPONENTS)
399 #define FLOATSCALE (float)(PRECACHE_OUTPUT_SIZE)
400 #define CLAMPMAXVAL ( ((float) (PRECACHE_OUTPUT_SIZE - 1)) / PRECACHE_OUTPUT_SIZE )