4 * Copyright (C) 1994-1996, Thomas G. Lane.
5 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
8 * This file contains code for merged upsampling/color conversion.
10 * This file combines functions from jdsample.c and jdcolor.c;
11 * read those files first to understand what's going on.
13 * When the chroma components are to be upsampled by simple replication
14 * (ie, box filtering), we can save some work in color conversion by
15 * calculating all the output pixels corresponding to a pair of chroma
16 * samples at one time. In the conversion equations
18 * G = Y + K2 * Cb + K3 * Cr
20 * only the Y term varies among the group of pixels corresponding to a pair
21 * of chroma samples, so the rest of the terms can be calculated just once.
22 * At typical sampling ratios, this eliminates half or three-quarters of the
23 * multiplications needed for color conversion.
25 * This file currently provides implementations for the following cases:
26 * YCbCr => RGB color conversion only.
27 * Sampling ratios of 2h1v or 2h2v.
28 * No scaling needed at upsample time.
29 * Corner-aligned (non-CCIR601) sampling alignment.
30 * Other special cases could be added, but in most applications these are
31 * the only common cases. (For uncommon cases we fall back on the more
32 * general code in jdsample.c and jdcolor.c.)
35 #define JPEG_INTERNALS
39 #ifdef UPSAMPLE_MERGING_SUPPORTED
41 #ifdef HAVE_MMX_INTEL_MNEMONICS
42 __int64 const1
= 0x59BA0000D24B59BA; // Cr_r Cr_b Cr_g Cr_r
43 __int64 const2
= 0x00007168E9FA0000; // Cb-r Cb_b Cb_g Cb_r
44 __int64 const5
= 0x0000D24B59BA0000; // Cr_b Cr_g Cr_r Cr_b
45 __int64 const6
= 0x7168E9FA00007168; // Cb_b Cb_g Cb_r Cb_b
47 // constants for factors (One_Half/fix(x)) << 2
49 __int64 const05
= 0x0001000000000001; // Cr_r Cr_b Cr_g Cr_r
50 __int64 const15
= 0x00000001FFFA0000; // Cb-r Cb_b Cb_g Cb_r
51 __int64 const45
= 0x0000000000010000; // Cr_b Cr_g Cr_r Cr_b
52 __int64 const55
= 0x0001FFFA00000001; // Cb_b Cb_g Cb_r Cb_b
55 /* Private subobject */
58 struct jpeg_upsampler pub
; /* public fields */
60 /* Pointer to routine to do actual upsampling/conversion of one row group */
61 JMETHOD(void, upmethod
, (j_decompress_ptr cinfo
,
62 JSAMPIMAGE input_buf
, JDIMENSION in_row_group_ctr
,
63 JSAMPARRAY output_buf
));
65 /* Private state for YCC->RGB conversion */
66 int * Cr_r_tab
; /* => table for Cr to R conversion */
67 int * Cb_b_tab
; /* => table for Cb to B conversion */
68 INT32
* Cr_g_tab
; /* => table for Cr to G conversion */
69 INT32
* Cb_g_tab
; /* => table for Cb to G conversion */
71 /* For 2:1 vertical sampling, we produce two output rows at a time.
72 * We need a "spare" row buffer to hold the second output row if the
73 * application provides just a one-row buffer; we also use the spare
74 * to discard the dummy last row if the image height is odd.
77 boolean spare_full
; /* T if spare buffer is occupied */
79 JDIMENSION out_row_width
; /* samples per output row */
80 JDIMENSION rows_to_go
; /* counts rows remaining in image */
83 typedef my_upsampler
* my_upsample_ptr
;
85 #define SCALEBITS 16 /* speediest right-shift on some machines */
86 #define ONE_HALF ((INT32) 1 << (SCALEBITS-1))
87 #define FIX(x) ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
91 * Initialize tables for YCC->RGB colorspace conversion.
92 * This is taken directly from jdcolor.c; see that file for more info.
96 build_ycc_rgb_table (j_decompress_ptr cinfo
)
98 my_upsample_ptr upsample
= (my_upsample_ptr
) cinfo
->upsample
;
103 upsample
->Cr_r_tab
= (int *)
104 (*cinfo
->mem
->alloc_small
) ((j_common_ptr
) cinfo
, JPOOL_IMAGE
,
105 (MAXJSAMPLE
+1) * SIZEOF(int));
106 upsample
->Cb_b_tab
= (int *)
107 (*cinfo
->mem
->alloc_small
) ((j_common_ptr
) cinfo
, JPOOL_IMAGE
,
108 (MAXJSAMPLE
+1) * SIZEOF(int));
109 upsample
->Cr_g_tab
= (INT32
*)
110 (*cinfo
->mem
->alloc_small
) ((j_common_ptr
) cinfo
, JPOOL_IMAGE
,
111 (MAXJSAMPLE
+1) * SIZEOF(INT32
));
112 upsample
->Cb_g_tab
= (INT32
*)
113 (*cinfo
->mem
->alloc_small
) ((j_common_ptr
) cinfo
, JPOOL_IMAGE
,
114 (MAXJSAMPLE
+1) * SIZEOF(INT32
));
116 for (i
= 0, x
= -CENTERJSAMPLE
; i
<= MAXJSAMPLE
; i
++, x
++) {
117 /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
118 /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
119 /* Cr=>R value is nearest int to 1.40200 * x */
120 upsample
->Cr_r_tab
[i
] = (int)
121 RIGHT_SHIFT(FIX(1.40200) * x
+ ONE_HALF
, SCALEBITS
);
122 /* Cb=>B value is nearest int to 1.77200 * x */
123 upsample
->Cb_b_tab
[i
] = (int)
124 RIGHT_SHIFT(FIX(1.77200) * x
+ ONE_HALF
, SCALEBITS
);
125 /* Cr=>G value is scaled-up -0.71414 * x */
126 upsample
->Cr_g_tab
[i
] = (- FIX(0.71414)) * x
;
127 /* Cb=>G value is scaled-up -0.34414 * x */
128 /* We also add in ONE_HALF so that need not do it in inner loop */
129 upsample
->Cb_g_tab
[i
] = (- FIX(0.34414)) * x
+ ONE_HALF
;
135 * Initialize for an upsampling pass.
139 start_pass_merged_upsample (j_decompress_ptr cinfo
)
141 my_upsample_ptr upsample
= (my_upsample_ptr
) cinfo
->upsample
;
143 /* Mark the spare buffer empty */
144 upsample
->spare_full
= FALSE
;
145 /* Initialize total-height counter for detecting bottom of image */
146 upsample
->rows_to_go
= cinfo
->output_height
;
151 * Control routine to do upsampling (and color conversion).
153 * The control routine just handles the row buffering considerations.
157 merged_2v_upsample (j_decompress_ptr cinfo
,
158 JSAMPIMAGE input_buf
, JDIMENSION
*in_row_group_ctr
,
159 JDIMENSION in_row_groups_avail
,
160 JSAMPARRAY output_buf
, JDIMENSION
*out_row_ctr
,
161 JDIMENSION out_rows_avail
)
162 /* 2:1 vertical sampling case: may need a spare row. */
164 my_upsample_ptr upsample
= (my_upsample_ptr
) cinfo
->upsample
;
165 JSAMPROW work_ptrs
[2];
166 JDIMENSION num_rows
; /* number of rows returned to caller */
168 if (upsample
->spare_full
) {
169 /* If we have a spare row saved from a previous cycle, just return it. */
170 jcopy_sample_rows(& upsample
->spare_row
, 0, output_buf
+ *out_row_ctr
, 0,
171 1, upsample
->out_row_width
);
173 upsample
->spare_full
= FALSE
;
175 /* Figure number of rows to return to caller. */
177 /* Not more than the distance to the end of the image. */
178 if (num_rows
> upsample
->rows_to_go
)
179 num_rows
= upsample
->rows_to_go
;
180 /* And not more than what the client can accept: */
181 out_rows_avail
-= *out_row_ctr
;
182 if (num_rows
> out_rows_avail
)
183 num_rows
= out_rows_avail
;
184 /* Create output pointer array for upsampler. */
185 work_ptrs
[0] = output_buf
[*out_row_ctr
];
187 work_ptrs
[1] = output_buf
[*out_row_ctr
+ 1];
189 work_ptrs
[1] = upsample
->spare_row
;
190 upsample
->spare_full
= TRUE
;
192 /* Now do the upsampling. */
193 (*upsample
->upmethod
) (cinfo
, input_buf
, *in_row_group_ctr
, work_ptrs
);
197 *out_row_ctr
+= num_rows
;
198 upsample
->rows_to_go
-= num_rows
;
199 /* When the buffer is emptied, declare this input row group consumed */
200 if (! upsample
->spare_full
)
201 (*in_row_group_ctr
)++;
206 merged_1v_upsample (j_decompress_ptr cinfo
,
207 JSAMPIMAGE input_buf
, JDIMENSION
*in_row_group_ctr
,
208 JDIMENSION in_row_groups_avail
,
209 JSAMPARRAY output_buf
, JDIMENSION
*out_row_ctr
,
210 JDIMENSION out_rows_avail
)
211 /* 1:1 vertical sampling case: much easier, never need a spare row. */
213 my_upsample_ptr upsample
= (my_upsample_ptr
) cinfo
->upsample
;
215 /* Just do the upsampling. */
216 (*upsample
->upmethod
) (cinfo
, input_buf
, *in_row_group_ctr
,
217 output_buf
+ *out_row_ctr
);
220 (*in_row_group_ctr
)++;
225 * These are the routines invoked by the control routines to do
226 * the actual upsampling/conversion. One row group is processed per call.
228 * Note: since we may be writing directly into application-supplied buffers,
229 * we have to be honest about the output width; we can't assume the buffer
230 * has been rounded up to an even width.
235 * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
239 h2v1_merged_upsample (j_decompress_ptr cinfo
,
240 JSAMPIMAGE input_buf
, JDIMENSION in_row_group_ctr
,
241 JSAMPARRAY output_buf
)
245 my_upsample_ptr upsample
= (my_upsample_ptr
) cinfo
->upsample
;
246 register int y
, cred
, cgreen
, cblue
;
248 register JSAMPROW outptr
;
249 JSAMPROW inptr0
, inptr1
, inptr2
;
251 /* copy these pointers into registers if possible */
252 register JSAMPLE
* range_limit
= cinfo
->sample_range_limit
;
253 int * Crrtab
= upsample
->Cr_r_tab
;
254 int * Cbbtab
= upsample
->Cb_b_tab
;
255 INT32
* Crgtab
= upsample
->Cr_g_tab
;
256 INT32
* Cbgtab
= upsample
->Cb_g_tab
;
259 inptr0
= input_buf
[0][in_row_group_ctr
];
260 inptr1
= input_buf
[1][in_row_group_ctr
];
261 inptr2
= input_buf
[2][in_row_group_ctr
];
262 outptr
= output_buf
[0];
263 /* Loop for each pair of output pixels */
264 for (col
= cinfo
->output_width
>> 1; col
> 0; col
--) {
265 /* Do the chroma part of the calculation */
266 cb
= GETJSAMPLE(*inptr1
++);
267 cr
= GETJSAMPLE(*inptr2
++);
269 cgreen
= (int) RIGHT_SHIFT(Cbgtab
[cb
] + Crgtab
[cr
], SCALEBITS
);
271 /* Fetch 2 Y values and emit 2 pixels */
272 y
= GETJSAMPLE(*inptr0
++);
273 outptr
[RGB_RED
] = range_limit
[y
+ cred
];
274 outptr
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
275 outptr
[RGB_BLUE
] = range_limit
[y
+ cblue
];
276 outptr
+= RGB_PIXELSIZE
;
277 y
= GETJSAMPLE(*inptr0
++);
278 outptr
[RGB_RED
] = range_limit
[y
+ cred
];
279 outptr
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
280 outptr
[RGB_BLUE
] = range_limit
[y
+ cblue
];
281 outptr
+= RGB_PIXELSIZE
;
283 /* If image width is odd, do the last output column separately */
284 if (cinfo
->output_width
& 1) {
285 cb
= GETJSAMPLE(*inptr1
);
286 cr
= GETJSAMPLE(*inptr2
);
288 cgreen
= (int) RIGHT_SHIFT(Cbgtab
[cb
] + Crgtab
[cr
], SCALEBITS
);
290 y
= GETJSAMPLE(*inptr0
);
291 outptr
[RGB_RED
] = range_limit
[y
+ cred
];
292 outptr
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
293 outptr
[RGB_BLUE
] = range_limit
[y
+ cblue
];
299 * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
302 #ifdef HAVE_MMX_INTEL_MNEMONICS
303 __inline
METHODDEF(void)
304 h2v2_merged_upsample_orig (j_decompress_ptr cinfo
,
305 JSAMPIMAGE input_buf
, JDIMENSION in_row_group_ctr
,
306 JSAMPARRAY output_buf
);
307 __inline
METHODDEF(void)
308 h2v2_merged_upsample_mmx (j_decompress_ptr cinfo
,
309 JSAMPIMAGE input_buf
, JDIMENSION in_row_group_ctr
,
310 JSAMPARRAY output_buf
);
314 h2v2_merged_upsample (j_decompress_ptr cinfo
,
315 JSAMPIMAGE input_buf
, JDIMENSION in_row_group_ctr
,
316 JSAMPARRAY output_buf
);
318 #ifdef HAVE_MMX_INTEL_MNEMONICS
320 h2v2_merged_upsample (j_decompress_ptr cinfo
,
321 JSAMPIMAGE input_buf
, JDIMENSION in_row_group_ctr
,
322 JSAMPARRAY output_buf
)
324 if (MMXAvailable
&& (cinfo
->image_width
>= 8))
325 h2v2_merged_upsample_mmx (cinfo
, input_buf
, in_row_group_ctr
, output_buf
);
327 h2v2_merged_upsample_orig (cinfo
, input_buf
, in_row_group_ctr
, output_buf
);
331 __inline
METHODDEF(void)
332 h2v2_merged_upsample_orig (j_decompress_ptr cinfo
,
333 JSAMPIMAGE input_buf
, JDIMENSION in_row_group_ctr
,
334 JSAMPARRAY output_buf
)
337 my_upsample_ptr upsample
= (my_upsample_ptr
) cinfo
->upsample
;
338 register int y
, cred
, cgreen
, cblue
;
340 register JSAMPROW outptr0
, outptr1
;
341 JSAMPROW inptr00
, inptr01
, inptr1
, inptr2
;
343 /* copy these pointers into registers if possible */
344 register JSAMPLE
* range_limit
= cinfo
->sample_range_limit
;
345 int * Crrtab
= upsample
->Cr_r_tab
;
346 int * Cbbtab
= upsample
->Cb_b_tab
;
347 INT32
* Crgtab
= upsample
->Cr_g_tab
;
348 INT32
* Cbgtab
= upsample
->Cb_g_tab
;
351 inptr00
= input_buf
[0][in_row_group_ctr
*2];
352 inptr01
= input_buf
[0][in_row_group_ctr
*2 + 1];
353 inptr1
= input_buf
[1][in_row_group_ctr
];
354 inptr2
= input_buf
[2][in_row_group_ctr
];
355 outptr0
= output_buf
[0];
356 outptr1
= output_buf
[1];
357 /* Loop for each group of output pixels */
358 for (col
= cinfo
->output_width
>> 1; col
> 0; col
--) {
359 /* Do the chroma part of the calculation */
360 cb
= GETJSAMPLE(*inptr1
++);
361 cr
= GETJSAMPLE(*inptr2
++);
363 cgreen
= (int) RIGHT_SHIFT(Cbgtab
[cb
] + Crgtab
[cr
], SCALEBITS
);
365 /* Fetch 4 Y values and emit 4 pixels */
366 y
= GETJSAMPLE(*inptr00
++);
367 outptr0
[RGB_RED
] = range_limit
[y
+ cred
];
368 outptr0
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
369 outptr0
[RGB_BLUE
] = range_limit
[y
+ cblue
];
370 outptr0
+= RGB_PIXELSIZE
;
371 y
= GETJSAMPLE(*inptr00
++);
372 outptr0
[RGB_RED
] = range_limit
[y
+ cred
];
373 outptr0
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
374 outptr0
[RGB_BLUE
] = range_limit
[y
+ cblue
];
375 outptr0
+= RGB_PIXELSIZE
;
376 y
= GETJSAMPLE(*inptr01
++);
377 outptr1
[RGB_RED
] = range_limit
[y
+ cred
];
378 outptr1
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
379 outptr1
[RGB_BLUE
] = range_limit
[y
+ cblue
];
380 outptr1
+= RGB_PIXELSIZE
;
381 y
= GETJSAMPLE(*inptr01
++);
382 outptr1
[RGB_RED
] = range_limit
[y
+ cred
];
383 outptr1
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
384 outptr1
[RGB_BLUE
] = range_limit
[y
+ cblue
];
385 outptr1
+= RGB_PIXELSIZE
;
387 /* If image width is odd, do the last output column separately */
388 if (cinfo
->output_width
& 1) {
389 cb
= GETJSAMPLE(*inptr1
);
390 cr
= GETJSAMPLE(*inptr2
);
392 cgreen
= (int) RIGHT_SHIFT(Cbgtab
[cb
] + Crgtab
[cr
], SCALEBITS
);
394 y
= GETJSAMPLE(*inptr00
);
395 outptr0
[RGB_RED
] = range_limit
[y
+ cred
];
396 outptr0
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
397 outptr0
[RGB_BLUE
] = range_limit
[y
+ cblue
];
398 y
= GETJSAMPLE(*inptr01
);
399 outptr1
[RGB_RED
] = range_limit
[y
+ cred
];
400 outptr1
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
401 outptr1
[RGB_BLUE
] = range_limit
[y
+ cblue
];
406 * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
408 __inline
METHODDEF(void)
409 h2v2_merged_upsample_mmx (j_decompress_ptr cinfo
,
410 JSAMPIMAGE input_buf
, JDIMENSION in_row_group_ctr
,
411 JSAMPARRAY output_buf
)
414 __int64 const128
= 0x0080008000800080;
415 __int64 empty
= 0x0000000000000000;
416 __int64 davemask
= 0x0000FFFFFFFF0000;
417 ////////////////////////////////
419 my_upsample_ptr upsample
= (my_upsample_ptr
) cinfo
->upsample
;
420 register int y
, cred
, cgreen
, cblue
;
422 register JSAMPROW outptr0
, outptr1
;
423 JSAMPROW inptr00
, inptr01
, inptr1
, inptr2
;
425 /* copy these pointers into registers if possible */
426 register JSAMPLE
* range_limit
= cinfo
->sample_range_limit
;
427 int * Crrtab
= upsample
->Cr_r_tab
;
428 int * Cbbtab
= upsample
->Cb_b_tab
;
429 INT32
* Crgtab
= upsample
->Cr_g_tab
;
430 INT32
* Cbgtab
= upsample
->Cb_g_tab
;
435 register int width
= cinfo
->image_width
;
436 int cols
= cinfo
->output_width
;
437 int cols_asm
= (cols
>> 3);
438 int diff
= cols
- (cols_asm
<<3);
439 int cols_asm_copy
= cols_asm
;
441 ///////////////////////////////////////
443 inptr00
= input_buf
[0][in_row_group_ctr
*2];
444 inptr01
= input_buf
[0][in_row_group_ctr
*2 + 1];
445 inptr1
= input_buf
[1][in_row_group_ctr
];
446 inptr2
= input_buf
[2][in_row_group_ctr
];
447 outptr0
= output_buf
[0];
448 outptr1
= output_buf
[1];
449 /* Loop for each group of output pixels */
468 movd mm0
, [ebx
] ; Cr7 Cr6
.....Cr1 Cr0
472 punpcklbw mm0
, mm0
; Cr3 Cr3 Cr2 Cr2 Cr1 Cr1 Cr0 Cr0
476 punpcklwd mm0
, mm0
; Cr1 Cr1 Cr1 Cr1 Cr0 Cr0 Cr0 Cr0
480 punpcklbw mm0
, mm6
; Cr0 Cr0 Cr0 Cr0
482 psubsw mm0
, mm7
; Cr0
- 128:Cr0
-128:Cr0
-128:Cr0
-128
484 movd mm1
, [ecx
] ; Cb7 Cb6
...... Cb1 Cb0
486 psllw mm0
, 2 ; left shift by
2 bits
488 punpcklbw mm1
, mm1
; Cb3 Cb3 Cb2 Cb2 Cb1 Cb1 Cb0 Cb0
490 paddsw mm0
, const05
; add (one_half
/fix(x
)) << 2
492 punpcklwd mm1
, mm1
; Cb1 Cb1 Cb1 Cb1 Cb0 Cb0 Cb0 Cb0
496 pmulhw mm0
, const1
; multiply
by (fix(x
) >> 1)
498 punpcklbw mm1
, mm6
; Cb0 Cb0 Cb0 Cb0
500 punpckhbw mm4
, mm6
; Cr1 Cr1 Cr1 Cr1
502 psubsw mm1
, mm7
; Cb0
- 128:Cb0
-128:Cb0
-128:Cb0
-128
504 punpckhbw mm5
, mm6
; Cb1 Cb1 Cb1 Cb1
506 psllw mm1
, 2 ; left shift by
2 bits
508 paddsw mm1
, const15
; add (one_half
/fix(x
)) << 2
510 psubsw mm4
, mm7
; Cr1
- 128:Cr1
-128:Cr1
-128:Cr1
-128
512 psubsw mm5
, mm7
; Cb1
- 128:Cb1
-128:Cb1
-128:Cb1
-128
514 pmulhw mm1
, const2
; multiply
by (fix(x
) >> 1)
516 psllw mm4
, 2 ; left shift by
2 bits
518 psllw mm5
, 2 ; left shift by
2 bits
520 paddsw mm4
, const45
; add (one_half
/fix(x
)) << 2
522 movd mm7
, [esi
] ; Y13 Y12 Y9 Y8 Y5 Y4 Y1 Y0
524 pmulhw mm4
, const5
; multiply
by (fix(x
) >> 1)
528 punpcklbw mm7
, mm7
; Y5 Y5 Y4 Y4 Y1 Y1 Y0 Y0
530 paddsw mm5
, const55
; add (one_half
/fix(x
)) << 2
532 paddsw mm0
, mm1
; cred0 cbl0 cgr0 cred0
536 pmulhw mm5
, const6
; multiply
by (fix(x
) >> 1)
538 movq mm2
, mm0
; cred0 cbl0 cgr0 cred0
540 punpcklwd mm7
, mm6
; Y5 Y4 Y1 Y1 Y1 Y0 Y0 Y0
542 pand mm2
, davemask
; 0 cbl0 cgr0
0
544 psrlq mm1
, 16 ; 0 0 Y5 Y5 Y4 Y4 Y1 Y1
546 psrlq mm2
, 16 ; 0 0 cbl0 cgr0
548 punpcklbw mm7
, empty
; Y1 Y0 Y0 Y0
550 paddsw mm4
, mm5
; cbl1 cgr1 cred1 cbl1
552 movq mm3
, mm4
; cbl1 cgr1 cred1 cbl1
554 pand mm3
, davemask
; 0 cgr1 cred1
0
556 paddsw mm7
, mm0
; r1 b0 g0 r0
558 psllq mm3
, 16 ; cgr1 cred1
0 0
560 movq mm6
, mm1
; 0 0 Y5 Y5 Y4 Y4 Y1 Y1
562 por mm2
, mm3
; cgr1 cred1 cbl0 cgr0
564 punpcklbw mm6
, empty
; Y4 Y4 Y1 Y1
566 movd mm3
, [eax
] ; Y15 Y14 Y11 Y10 Y7 Y6 Y3 Y2
568 paddsw mm6
, mm2
; g4 r4 b1 g1
570 packuswb mm7
, mm6
; g4 r4 b1 g1 r1 b0 g0 r0
572 movq mm6
, mm3
; Y15 Y14 Y11 Y10 Y7 Y6 Y3 Y2
574 punpcklbw mm3
, mm3
; Y7 Y7 Y6 Y6 Y3 Y3 Y2 Y2
576 movq
[edi
], mm7
; move to memory g4 r4 b1 g1 r1 b0 g0 r0
578 movq mm5
, mm3
; Y7 Y7 Y6 Y6 Y3 Y3 Y2 Y2
580 punpcklwd mm3
, mm6
; X X X X Y3 Y2 Y2 Y2
582 punpcklbw mm3
, empty
; Y3 Y2 Y2 Y2
584 psrlq mm5
, 16 ; 0 0 Y7 Y7 Y6 Y6 Y3 Y3
586 paddsw mm3
, mm0
; r3 b2 g2 r2
588 movq mm6
, mm5
; 0 0 Y7 Y7 Y6 Y6 Y3 Y3
590 movq mm0
, mm1
; 0 0 Y5 Y5 Y4 Y4 Y1 Y1
592 punpckldq mm6
, mm6
; X X X X Y6 Y6 Y3 Y3
594 punpcklbw mm6
, empty
; Y6 Y6 Y3 Y3
596 psrlq mm1
, 24 ; 0 0 0 0 0 Y5 Y5 Y4
598 paddsw mm6
, mm2
; g6 r6 b3 g3
600 packuswb mm3
, mm6
; g6 r6 b3 g3 r3 b2 g2 r2
602 movq mm2
, mm5
; 0 0 Y7 Y7 Y6 Y6 Y3 Y3
604 psrlq mm0
, 32 ; 0 0 0 0 0 0 Y5 Y5
606 movq
[edx
], mm3
; move to memory g6 r6 b3 g3 r3 b2 g2 r2
608 punpcklwd mm1
, mm0
; X X X X Y5 Y5 Y5 Y4
610 psrlq mm5
, 24 ; 0 0 0 0 0 Y7 Y7 Y6
612 movd mm0
, [ebx
] ; Cr9 Cr8
.....Cr3 Cr2
614 psrlq mm2
, 32 ; 0 0 0 0 0 0 Y7 Y7
618 punpcklbw mm1
, empty
; Y5 Y5 Y5 Y4
620 punpcklwd mm5
, mm2
; X X X X Y7 Y7 Y7 Y6
622 paddsw mm1
, mm4
; b5 g5 r5 b4
624 punpcklbw mm5
, empty
; Y7 Y7 Y7 Y6
626 pxor mm6
, mm6
; clear mm6 registr
628 punpcklbw mm0
, mm0
; X X X X Cr3 Cr3 Cr2 Cr2
630 paddsw mm5
, mm4
; b7 g7 r7 b6
632 punpcklwd mm0
, mm0
; Cr3 Cr3 Cr3 Cr3 Cr2 Cr2 Cr2 Cr2
636 movd mm3
, [ecx
] ; Cb9 Cb8
...... Cb3 Cb2
638 punpcklbw mm0
, mm6
; Cr2 Cr2 Cr2 Cr2
642 psubsw mm0
, const128
; Cr2
- 128:Cr2
-128:Cr2
-128:Cr2
-128
644 punpcklbw mm3
, mm3
; X X X X Cb3 Cb3 Cb2 Cb2
646 psllw mm0
, 2 ; left shift by
2 bits
648 paddsw mm0
, const05
; add (one_half
/fix(x
)) << 2
650 punpcklwd mm3
, mm3
; Cb3 Cb3 Cb3 Cb3 Cb2 Cb2 Cb2 Cb2
654 pmulhw mm0
, const1
; multiply
by (fix(x
) >> 1)
656 punpcklbw mm3
, mm6
; Cb2 Cb2 Cb2 Cb2
658 psubsw mm3
, const128
; Cb0
- 128:Cb0
-128:Cb0
-128:Cb0
-128
660 punpckhbw mm4
, mm6
; Cr3 Cr3 Cr3 Cr3
662 psllw mm3
, 2 ; left shift by
2 bits
664 paddsw mm3
, const15
; add (one_half
/fix(x
)) << 2
666 punpckhbw mm7
, mm6
; Cb3 Cb3 Cb3 Cb3
668 pmulhw mm3
, const2
; multiply
by (fix(x
) >> 1)
670 psubsw mm7
, const128
; Cb3
- 128:Cb3
-128:Cb3
-128:Cb3
-128
672 paddsw mm0
, mm3
; cred2 cbl2 cgr2 cred2
674 psllw mm7
, 2 ; left shift by
2 bits
676 psubsw mm4
, const128
; Cr3
- 128:Cr3
-128:Cr3
-128:Cr3
-128
678 movd mm3
, [esi
+4] ; Y21 Y20 Y17 Y16 Y13 Y12 Y9 Y8
680 psllw mm4
, 2 ; left shift by
2 bits
682 paddsw mm7
, const55
; add (one_half
/fix(x
)) << 2
684 movq mm6
, mm3
; Y21 Y20 Y17 Y16 Y13 Y12 Y9 Y8
690 punpcklbw mm3
, mm3
; Y13 Y13 Y12 Y12 Y9 Y9 Y8 Y8
694 paddsw mm4
, const45
; add (one_half
/fix(x
)) << 2
696 punpcklwd mm3
, mm6
; X X X X Y9 Y8 Y8 Y8
698 pmulhw mm4
, const5
; multiply
by (fix(x
) >> 1)
700 pmulhw mm7
, const6
; multiply
by (fix(x
) >> 1)
702 punpcklbw mm3
, empty
; Y9 Y8 Y8 Y8
704 paddsw mm4
, mm7
; cbl3 cgr3 cred3 cbl3
706 paddsw mm3
, mm0
; r9 b8 g8 r8
710 packuswb mm1
, mm3
; r9 b8 g8 r8 b5 g5 r5 b4
712 movd mm3
, [eax
+4] ; Y23 Y22 Y19 Y18 Y15 Y14 Y11 Y10
716 psrlq mm6
, 8 ; 0 Y21 Y20 Y17 Y16 Y13 Y12 Y9
720 movq
[edi
+8], mm1
; move to memory r9 b8 g8 r8 b5 g5 r5 b4
724 movq mm7
, mm3
; Y23 Y22 Y19 Y18 Y15 Y14 Y11 Y10
726 punpcklbw mm3
, mm3
; X X X X Y11 Y11 Y10 Y10
730 punpcklwd mm3
, mm7
; X X X X Y11 Y10 Y10 Y10
732 punpcklbw mm3
, mm1
; Y11 Y10 Y10 Y10
734 psrlq mm7
, 8 ; 0 Y23 Y22 Y19 Y18 Y15 Y14 Y11
736 paddsw mm3
, mm0
; r11 b10 g10 r10
738 movq mm0
, mm7
; 0 Y23 Y22 Y19 Y18 Y15 Y14 Y11
740 packuswb mm5
, mm3
; r11 b10 g10 r10 b7 g7 r7 b6
742 punpcklbw mm7
, mm7
; X X X X Y14 Y14 Y11 Y11
744 movq
[edx
+8], mm5
; move to memory r11 b10 g10 r10 b7 g7 r7 b6
746 movq mm3
, mm6
; 0 Y21 Y20 Y17 Y16 Y13 Y12 Y9
748 punpcklbw mm6
, mm6
; X X X X Y12 Y12 Y9 Y9
750 punpcklbw mm7
, mm1
; Y14 Y14 Y11 Y11
752 punpcklbw mm6
, mm1
; Y12 Y12 Y9 Y9
754 paddsw mm7
, mm2
; g14 r14 b11 g11
756 paddsw mm6
, mm2
; g12 r12 b9 g9
758 psrlq mm3
, 8 ; 0 0 Y21 Y20 Y17 Y16 Y13 Y12
760 movq mm1
, mm3
; 0 0 Y21 Y20 Y17 Y16 Y13 Y12
762 punpcklbw mm3
, mm3
; X X X X Y13 Y13 Y12 Y12
766 psrlq mm3
, 16 ; X X X X X X Y13 Y13 modified on
09/24
768 punpcklwd mm1
, mm3
; X X X X Y13 Y13 Y13 Y12
772 psrlq mm0
, 8 ; 0 0 Y23 Y22 Y19 Y18 Y15 Y14
774 punpcklbw mm1
, empty
; Y13 Y13 Y13 Y12
776 movq mm5
, mm0
; 0 0 Y23 Y22 Y19 Y18 Y15 Y14
778 punpcklbw mm0
, mm0
; X X X X Y15 Y15 Y14 Y14
780 paddsw mm1
, mm4
; b13 g13 r13 b12
782 psrlq mm0
, 16 ; X X X X X X Y15 Y15
786 punpcklwd mm5
, mm0
; X X X X Y15 Y15 Y15 Y14
788 packuswb mm6
, mm1
; b13 g13 r13 b12 g12 r12 b9 g9
792 punpcklbw mm5
, empty
; Y15 Y15 Y15 Y14
796 paddsw mm5
, mm4
; b15 g15 r15 b14
798 movq
[edi
-8], mm6
; move to memory b13 g13 r13 b12 g12 r12 b9 g9
800 packuswb mm7
, mm5
; b15 g15 r15 b14 g14 r14 b11 g11
804 movq
[edx
-8], mm7
; move to memory b15 g15 r15 b14 g14 r14 b11 g11
815 inptr1
+= (cols_asm_copy
<<2);
817 inptr2
+= (cols_asm_copy
<<2);
819 inptr00
+= (cols_asm_copy
<<3);
821 inptr01
+= (cols_asm_copy
<<3);
823 outptr0
+= cols_asm_copy
*24;
825 outptr1
+= cols_asm_copy
*24;
827 //for (col = cinfo->output_width >> 1; col > 0; col--) {
828 /* Do the chroma part of the calculation */
829 /*cb = GETJSAMPLE(*inptr1++);
830 cr = GETJSAMPLE(*inptr2++);
832 cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
833 cblue = Cbbtab[cb];*/
834 /* Fetch 4 Y values and emit 4 pixels */
835 /*y = GETJSAMPLE(*inptr00++);
836 outptr0[RGB_RED] = range_limit[y + cred];
837 outptr0[RGB_GREEN] = range_limit[y + cgreen];
838 outptr0[RGB_BLUE] = range_limit[y + cblue];
839 outptr0 += RGB_PIXELSIZE;
840 y = GETJSAMPLE(*inptr00++);
841 outptr0[RGB_RED] = range_limit[y + cred];
842 outptr0[RGB_GREEN] = range_limit[y + cgreen];
843 outptr0[RGB_BLUE] = range_limit[y + cblue];
844 outptr0 += RGB_PIXELSIZE;
845 y = GETJSAMPLE(*inptr01++);
846 outptr1[RGB_RED] = range_limit[y + cred];
847 outptr1[RGB_GREEN] = range_limit[y + cgreen];
848 outptr1[RGB_BLUE] = range_limit[y + cblue];
849 outptr1 += RGB_PIXELSIZE;
850 y = GETJSAMPLE(*inptr01++);
851 outptr1[RGB_RED] = range_limit[y + cred];
852 outptr1[RGB_GREEN] = range_limit[y + cgreen];
853 outptr1[RGB_BLUE] = range_limit[y + cblue];
854 outptr1 += RGB_PIXELSIZE;
858 for (col
= diff
>> 1; col
> 0; col
--) {
859 /* Do the chroma part of the calculation */
860 cb
= GETJSAMPLE(*inptr1
++);
861 cr
= GETJSAMPLE(*inptr2
++);
863 cgreen
= (int) RIGHT_SHIFT(Cbgtab
[cb
] + Crgtab
[cr
], SCALEBITS
);
865 /* Fetch 4 Y values and emit 4 pixels */
866 y
= GETJSAMPLE(*inptr00
++);
867 outptr0
[RGB_RED
] = range_limit
[y
+ cred
];
868 outptr0
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
869 outptr0
[RGB_BLUE
] = range_limit
[y
+ cblue
];
870 outptr0
+= RGB_PIXELSIZE
;
871 y
= GETJSAMPLE(*inptr00
++);
872 outptr0
[RGB_RED
] = range_limit
[y
+ cred
];
873 outptr0
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
874 outptr0
[RGB_BLUE
] = range_limit
[y
+ cblue
];
875 outptr0
+= RGB_PIXELSIZE
;
876 y
= GETJSAMPLE(*inptr01
++);
877 outptr1
[RGB_RED
] = range_limit
[y
+ cred
];
878 outptr1
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
879 outptr1
[RGB_BLUE
] = range_limit
[y
+ cblue
];
880 outptr1
+= RGB_PIXELSIZE
;
881 y
= GETJSAMPLE(*inptr01
++);
882 outptr1
[RGB_RED
] = range_limit
[y
+ cred
];
883 outptr1
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
884 outptr1
[RGB_BLUE
] = range_limit
[y
+ cblue
];
885 outptr1
+= RGB_PIXELSIZE
;
889 /* If image width is odd, do the last output column separately */
890 //if (cinfo->output_width & 1) {
892 cb
= GETJSAMPLE(*inptr1
);
893 cr
= GETJSAMPLE(*inptr2
);
895 cgreen
= (int) RIGHT_SHIFT(Cbgtab
[cb
] + Crgtab
[cr
], SCALEBITS
);
897 y
= GETJSAMPLE(*inptr00
);
898 outptr0
[RGB_RED
] = range_limit
[y
+ cred
];
899 outptr0
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
900 outptr0
[RGB_BLUE
] = range_limit
[y
+ cblue
];
901 y
= GETJSAMPLE(*inptr01
);
902 outptr1
[RGB_RED
] = range_limit
[y
+ cred
];
903 outptr1
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
904 outptr1
[RGB_BLUE
] = range_limit
[y
+ cblue
];
911 h2v2_merged_upsample (j_decompress_ptr cinfo
,
912 JSAMPIMAGE input_buf
, JDIMENSION in_row_group_ctr
,
913 JSAMPARRAY output_buf
)
915 my_upsample_ptr upsample
= (my_upsample_ptr
) cinfo
->upsample
;
916 register int y
, cred
, cgreen
, cblue
;
918 register JSAMPROW outptr0
, outptr1
;
919 JSAMPROW inptr00
, inptr01
, inptr1
, inptr2
;
921 /* copy these pointers into registers if possible */
922 register JSAMPLE
* range_limit
= cinfo
->sample_range_limit
;
923 int * Crrtab
= upsample
->Cr_r_tab
;
924 int * Cbbtab
= upsample
->Cb_b_tab
;
925 INT32
* Crgtab
= upsample
->Cr_g_tab
;
926 INT32
* Cbgtab
= upsample
->Cb_g_tab
;
929 inptr00
= input_buf
[0][in_row_group_ctr
*2];
930 inptr01
= input_buf
[0][in_row_group_ctr
*2 + 1];
931 inptr1
= input_buf
[1][in_row_group_ctr
];
932 inptr2
= input_buf
[2][in_row_group_ctr
];
933 outptr0
= output_buf
[0];
934 outptr1
= output_buf
[1];
935 /* Loop for each group of output pixels */
936 for (col
= cinfo
->output_width
>> 1; col
> 0; col
--) {
937 /* Do the chroma part of the calculation */
938 cb
= GETJSAMPLE(*inptr1
++);
939 cr
= GETJSAMPLE(*inptr2
++);
941 cgreen
= (int) RIGHT_SHIFT(Cbgtab
[cb
] + Crgtab
[cr
], SCALEBITS
);
943 /* Fetch 4 Y values and emit 4 pixels */
944 y
= GETJSAMPLE(*inptr00
++);
945 outptr0
[RGB_RED
] = range_limit
[y
+ cred
];
946 outptr0
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
947 outptr0
[RGB_BLUE
] = range_limit
[y
+ cblue
];
948 outptr0
+= RGB_PIXELSIZE
;
949 y
= GETJSAMPLE(*inptr00
++);
950 outptr0
[RGB_RED
] = range_limit
[y
+ cred
];
951 outptr0
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
952 outptr0
[RGB_BLUE
] = range_limit
[y
+ cblue
];
953 outptr0
+= RGB_PIXELSIZE
;
954 y
= GETJSAMPLE(*inptr01
++);
955 outptr1
[RGB_RED
] = range_limit
[y
+ cred
];
956 outptr1
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
957 outptr1
[RGB_BLUE
] = range_limit
[y
+ cblue
];
958 outptr1
+= RGB_PIXELSIZE
;
959 y
= GETJSAMPLE(*inptr01
++);
960 outptr1
[RGB_RED
] = range_limit
[y
+ cred
];
961 outptr1
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
962 outptr1
[RGB_BLUE
] = range_limit
[y
+ cblue
];
963 outptr1
+= RGB_PIXELSIZE
;
965 /* If image width is odd, do the last output column separately */
966 if (cinfo
->output_width
& 1) {
967 cb
= GETJSAMPLE(*inptr1
);
968 cr
= GETJSAMPLE(*inptr2
);
970 cgreen
= (int) RIGHT_SHIFT(Cbgtab
[cb
] + Crgtab
[cr
], SCALEBITS
);
972 y
= GETJSAMPLE(*inptr00
);
973 outptr0
[RGB_RED
] = range_limit
[y
+ cred
];
974 outptr0
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
975 outptr0
[RGB_BLUE
] = range_limit
[y
+ cblue
];
976 y
= GETJSAMPLE(*inptr01
);
977 outptr1
[RGB_RED
] = range_limit
[y
+ cred
];
978 outptr1
[RGB_GREEN
] = range_limit
[y
+ cgreen
];
979 outptr1
[RGB_BLUE
] = range_limit
[y
+ cblue
];
986 * Module initialization routine for merged upsampling/color conversion.
988 * NB: this is called under the conditions determined by use_merged_upsample()
989 * in jdmaster.c. That routine MUST correspond to the actual capabilities
990 * of this module; no safety checks are made here.
994 jinit_merged_upsampler (j_decompress_ptr cinfo
)
996 my_upsample_ptr upsample
;
998 upsample
= (my_upsample_ptr
)
999 (*cinfo
->mem
->alloc_small
) ((j_common_ptr
) cinfo
, JPOOL_IMAGE
,
1000 SIZEOF(my_upsampler
));
1001 cinfo
->upsample
= (struct jpeg_upsampler
*) upsample
;
1002 upsample
->pub
.start_pass
= start_pass_merged_upsample
;
1003 upsample
->pub
.need_context_rows
= FALSE
;
1005 upsample
->out_row_width
= cinfo
->output_width
* cinfo
->out_color_components
;
1007 if (cinfo
->max_v_samp_factor
== 2) {
1008 upsample
->pub
.upsample
= merged_2v_upsample
;
1009 upsample
->upmethod
= h2v2_merged_upsample
;
1010 /* Allocate a spare row buffer */
1011 upsample
->spare_row
= (JSAMPROW
)
1012 (*cinfo
->mem
->alloc_large
) ((j_common_ptr
) cinfo
, JPOOL_IMAGE
,
1013 (size_t) (upsample
->out_row_width
* SIZEOF(JSAMPLE
)));
1015 upsample
->pub
.upsample
= merged_1v_upsample
;
1016 upsample
->upmethod
= h2v1_merged_upsample
;
1017 /* No spare row needed */
1018 upsample
->spare_row
= NULL
;
1021 build_ycc_rgb_table(cinfo
);
1024 #endif /* UPSAMPLE_MERGING_SUPPORTED */