2 * Copyright (C) 2003-2006 Gabest
3 * http://www.gabest.org
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GNU Make; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
27 #include "Rasterizer.h"
28 #include "SeparableFilter.h"
29 #include "xy_logger.h"
30 #include <boost/flyweight/key_value.hpp>
32 #ifndef _MAX /* avoid collision with common (nonconforming) macros */
38 #define _IMPL_MAX _MAX
39 #define _IMPL_MIN _MIN
43 //NOTE: signed or unsigned affects the result seriously
44 #define COMBINE_AYUV(a, y, u, v) ((((((((int)(a))<<8)|y)<<8)|u)<<8)|v)
46 #define SPLIT_AYUV(color, a, y, u, v) do { \
48 *(u)=((color)>>8) &0xff; \
49 *(y)=((color)>>16)&0xff;\
50 *(a)=((color)>>24)&0xff;\
56 static const int VOLUME_BITS
= 22;//should not exceed 32-8, and better not exceed 31-8
58 ass_synth_priv(const double sigma
);
59 ass_synth_priv(const ass_synth_priv
& priv
);
62 int generate_tables(double sigma
);
73 struct ass_synth_priv_key
75 const double& operator()(const ass_synth_priv
& x
)const
84 ass_tmp_buf(size_t size
);
85 ass_tmp_buf(const ass_tmp_buf
& buf
);
91 struct ass_tmp_buf_get_size
93 const size_t& operator()(const ass_tmp_buf
& buf
)const
99 static const unsigned int maxcolor
= 255;
100 static const unsigned base
= 256;
102 ass_synth_priv::ass_synth_priv(const double sigma
)
111 generate_tables(sigma
);
114 ass_synth_priv::ass_synth_priv(const ass_synth_priv
& priv
):g_r(priv
.g_r
),g_w(priv
.g_w
),sigma(priv
.sigma
)
116 if (this->g_w
> 0 && this != &priv
) {
117 this->g
= (unsigned*)realloc(this->g
, this->g_w
* sizeof(unsigned));
118 this->gt2
= (unsigned*)realloc(this->gt2
, 256 * this->g_w
* sizeof(unsigned));
119 //if (this->g == null || this->gt2 == null) {
122 memcpy(g
, priv
.g
, this->g_w
* sizeof(unsigned));
123 memcpy(gt2
, priv
.gt2
, 256 * this->g_w
* sizeof(unsigned));
127 ass_synth_priv::~ass_synth_priv()
133 int ass_synth_priv::generate_tables(double sigma
)
135 const int TARGET_VOLUME
= 1<<VOLUME_BITS
;
136 const int MAX_VOLUME_ERROR
= VOLUME_BITS
>=22 ? 16 : 1;
138 double a
= -1 / (sigma
* sigma
* 2);
139 double exp_a
= exp(a
);
141 double volume_factor
= 0;
142 double volume_start
= 0, volume_end
= 0;
145 if (this->sigma
== sigma
)
150 this->g_w
= (int)ceil(sigma
*3) | 1;
151 this->g_r
= this->g_w
/ 2;
154 this->g
= (unsigned*)realloc(this->g
, this->g_w
* sizeof(unsigned));
155 this->gt2
= (unsigned*)realloc(this->gt2
, 256 * this->g_w
* sizeof(unsigned));
156 if (this->g
== NULL
|| this->gt2
== NULL
) {
165 double exp_1
= exp_a
;
166 double exp_2
= exp_1
* exp_1
;
167 volume_start
+= exp_0
;
168 for(int i
=0;i
<this->g_r
;++i
)
172 volume_start
+= exp_0
;
173 volume_start
+= exp_0
;
176 // for (i = 0; i < this->g_w; ++i) {
177 // volume_start += exp(a * (i - this->g_r) * (i - this->g_r));
180 volume_end
= (TARGET_VOLUME
+g_w
)/volume_start
;
181 volume_start
= (TARGET_VOLUME
-g_w
)/volume_start
;
184 while( volume_start
+0.000001<volume_end
)
186 volume_factor
= (volume_start
+volume_end
)*0.5;
189 exp_0
= volume_factor
;
191 exp_2
= exp_1
* exp_1
;
193 volume
= static_cast<int>(exp_0
+.5);
194 this->g
[this->g_r
] = volume
;
196 unsigned* p_left
= this->g
+this->g_r
-1;
197 unsigned* p_right
= this->g
+this->g_r
+1;
198 for(int i
=0; i
<this->g_r
;++i
,p_left
--,p_right
++)
202 *p_left
= static_cast<int>(exp_0
+.5);
204 volume
+= (*p_left
<<1);
207 // for (i = 0; i < this->g_w; ++i) {
208 // this->g[i] = (unsigned) ( exp(a * (i - this->g_r) * (i - this->g_r))* volume_factor + .5 );
209 // volume += this->g[i];
212 // volume don't have to be equal to TARGET_VOLUME,
213 // even if volume=TARGET_VOLUME+MAX_VOLUME_ERROR,
214 // max error introducing in later blur operation,
215 // which is (dot_product(g_w, pixel))/TARGET_VOLUME with pixel<256,
216 // would not exceed (MAX_VOLUME_ERROR*256)/TARGET_VOLUME,
217 // as long as MAX_VOLUME_ERROR/TARGET_VOLUME is small enough, error introduced would be kept in safe range
219 // NOTE: when it comes to rounding, no matter how small the error is,
220 // it may result a different rounding output
221 if( volume
>=TARGET_VOLUME
&& volume
< (TARGET_VOLUME
+MAX_VOLUME_ERROR
) )
223 else if(volume
< TARGET_VOLUME
)
225 volume_start
= volume_factor
;
227 else if(volume
>= TARGET_VOLUME
+MAX_VOLUME_ERROR
)
229 volume_end
= volume_factor
;
234 volume_factor
= volume_end
;
236 exp_0
= volume_factor
;
238 exp_2
= exp_1
* exp_1
;
240 volume
= static_cast<int>(exp_0
+.5);
241 this->g
[this->g_r
] = volume
;
243 unsigned* p_left
= this->g
+this->g_r
-1;
244 unsigned* p_right
= this->g
+this->g_r
+1;
245 for(int i
=0; i
<this->g_r
;++i
,p_left
--,p_right
++)
249 *p_left
= static_cast<int>(exp_0
+.5);
251 volume
+= (*p_left
<<1);
254 // for (i = 0; i < this->g_w; ++i) {
255 // this->g[i] = (unsigned) ( exp(a * (i - this->g_r) * (i - this->g_r))* volume_factor + .5 );
256 // volume += this->g[i];
261 for (int mx
= 0; mx
< this->g_w
; mx
++) {
263 unsigned *p_gt2
= this->gt2
+ mx
;
265 for (int i
= 1; i
< 256; i
++) {
266 last_mul
= last_mul
+this->g
[mx
];
270 // this->gt2[this->g_w * i+ mx] = this->g[mx] * i;
277 ass_tmp_buf::ass_tmp_buf(size_t size
)
279 tmp
= (unsigned *)malloc(size
* sizeof(unsigned));
283 ass_tmp_buf::ass_tmp_buf(const ass_tmp_buf
& buf
)
286 tmp
= (unsigned *)malloc(size
* sizeof(unsigned));
289 ass_tmp_buf::~ass_tmp_buf()
295 * \brief gaussian blur. an fast pure c implementation from libass.
297 static void ass_gauss_blur(unsigned char *buffer
, unsigned *tmp2
,
298 int width
, int height
, int stride
, const unsigned *m2
,
304 unsigned char *s
= buffer
;
305 unsigned *t
= tmp2
+ 1;
306 for (y
= 0; y
< height
; y
++) {
307 memset(t
- 1, 0, (width
+ 1) * sizeof(*t
));
309 if(x
< r
)//in case that r < 0
311 const int src
= s
[x
];
313 register unsigned *dstp
= t
+ x
- r
;
315 const unsigned *m3
= m2
+ src
* mwidth
;
317 for (mx
= mwidth
-1; mx
>= r
- x
; mx
--) {
324 for (x
= 1; x
< r
; x
++) {
325 const int src
= s
[x
];
327 register unsigned *dstp
= t
+ x
- r
;
329 const unsigned *m3
= m2
+ src
* mwidth
;
330 for (mx
= r
- x
; mx
< mwidth
; mx
++) {
336 for (; x
< width
- r
; x
++) {
337 const int src
= s
[x
];
339 register unsigned *dstp
= t
+ x
- r
;
341 const unsigned *m3
= m2
+ src
* mwidth
;
342 for (mx
= 0; mx
< mwidth
; mx
++) {
348 for (; x
< width
-1; x
++) {
349 const int src
= s
[x
];
351 register unsigned *dstp
= t
+ x
- r
;
353 const int x2
= r
+ width
- x
;
354 const unsigned *m3
= m2
+ src
* mwidth
;
355 for (mx
= 0; mx
< x2
; mx
++) {
360 if(x
==width
-1) //important: x==width-1 failed, if r==0
362 const int src
= s
[x
];
364 register unsigned *dstp
= t
+ x
- r
;
366 const int x2
= r
+ width
- x
;
367 const unsigned *m3
= m2
+ src
* mwidth
;
369 for (mx
= 0; mx
< x2
; mx
++) {
381 for (x
= 0; x
< width
; x
++) {
383 if(y
< r
)//in case that r<0
385 unsigned *srcp
= t
+ y
* (width
+ 1) + 1;
388 register unsigned *dstp
= srcp
- 1 + (mwidth
-r
+y
)*(width
+ 1);
389 const int src2
= (src
+ (1<<(ass_synth_priv::VOLUME_BITS
-1))) >> ass_synth_priv::VOLUME_BITS
;
390 const unsigned *m3
= m2
+ src2
* mwidth
;
393 *srcp
= (1<<(ass_synth_priv::VOLUME_BITS
-1));
394 for (mx
= mwidth
-1; mx
>=r
- y
; mx
--) {
401 for (y
= 1; y
< r
; y
++) {
402 unsigned *srcp
= t
+ y
* (width
+ 1) + 1;
405 register unsigned *dstp
= srcp
- 1 + width
+ 1;
406 const int src2
= (src
+ (1<<(ass_synth_priv::VOLUME_BITS
-1))) >> ass_synth_priv::VOLUME_BITS
;
407 const unsigned *m3
= m2
+ src2
* mwidth
;
410 *srcp
= (1<<(ass_synth_priv::VOLUME_BITS
-1));
411 for (mx
= r
- y
; mx
< mwidth
; mx
++) {
417 for (; y
< height
- r
; y
++) {
418 unsigned *srcp
= t
+ y
* (width
+ 1) + 1;
421 register unsigned *dstp
= srcp
- 1 - r
* (width
+ 1);
422 const int src2
= (src
+ (1<<(ass_synth_priv::VOLUME_BITS
-1))) >> ass_synth_priv::VOLUME_BITS
;
423 const unsigned *m3
= m2
+ src2
* mwidth
;
426 *srcp
= (1<<(ass_synth_priv::VOLUME_BITS
-1));
427 for (mx
= 0; mx
< mwidth
; mx
++) {
433 for (; y
< height
-1; y
++) {
434 unsigned *srcp
= t
+ y
* (width
+ 1) + 1;
437 const int y2
= r
+ height
- y
;
438 register unsigned *dstp
= srcp
- 1 - r
* (width
+ 1);
439 const int src2
= (src
+ (1<<(ass_synth_priv::VOLUME_BITS
-1))) >> ass_synth_priv::VOLUME_BITS
;
440 const unsigned *m3
= m2
+ src2
* mwidth
;
443 *srcp
= (1<<(ass_synth_priv::VOLUME_BITS
-1));
444 for (mx
= 0; mx
< y2
; mx
++) {
450 if(y
== height
- 1)//important: y == height - 1 failed if r==0
452 unsigned *srcp
= t
+ y
* (width
+ 1) + 1;
455 const int y2
= r
+ height
- y
;
456 register unsigned *dstp
= srcp
- 1 - r
* (width
+ 1);
457 const int src2
= (src
+ (1<<(ass_synth_priv::VOLUME_BITS
-1))) >> ass_synth_priv::VOLUME_BITS
;
458 const unsigned *m3
= m2
+ src2
* mwidth
;
461 *srcp
= (1<<(ass_synth_priv::VOLUME_BITS
-1));
462 for (mx
= 0; mx
< y2
; mx
++) {
474 for (y
= 0; y
< height
; y
++) {
475 for (x
= 0; x
< width
; x
++) {
476 s
[x
] = t
[x
] >> ass_synth_priv::VOLUME_BITS
;
484 * \brief blur with [[1,2,1]. [2,4,2], [1,2,1]] kernel.
486 static void be_blur(unsigned char *buf
, unsigned *tmp_base
, int w
, int h
, int stride
)
488 WORD
*col_pix_buf_base
= reinterpret_cast<WORD
*>(xy_malloc(w
*sizeof(WORD
)));
489 WORD
*col_sum_buf_base
= reinterpret_cast<WORD
*>(xy_malloc(w
*sizeof(WORD
)));
490 if(!col_sum_buf_base
|| !col_pix_buf_base
)
492 //ToDo: error handling
495 memset(col_pix_buf_base
, 0, w
*sizeof(WORD
));
496 memset(col_sum_buf_base
, 0, w
*sizeof(WORD
));
497 WORD
*col_pix_buf
= col_pix_buf_base
-2;//for aligment;
498 WORD
*col_sum_buf
= col_sum_buf_base
-2;//for aligment;
501 unsigned char *src
=buf
+y
*stride
;
504 int old_pix
= src
[x
-1];
505 int old_sum
= old_pix
+ src
[x
-2];
506 for ( ; x
< w
; x
++) {
508 int temp2
= old_pix
+ temp1
;
510 temp1
= old_sum
+ temp2
;
512 col_pix_buf
[x
] = temp1
;
517 unsigned char *src
=buf
+y
*stride
;
521 int old_pix
= src
[x
-1];
522 int old_sum
= old_pix
+ src
[x
-2];
523 for ( ; x
< w
; x
++) {
525 int temp2
= old_pix
+ temp1
;
527 temp1
= old_sum
+ temp2
;
530 temp2
= col_pix_buf
[x
] + temp1
;
531 col_pix_buf
[x
] = temp1
;
532 //dst[x-1] = (col_sum_buf[x] + temp2) >> 4;
533 col_sum_buf
[x
] = temp2
;
537 //__m128i round = _mm_set1_epi16(8);
538 for (int y
= 2; y
< h
; y
++) {
539 unsigned char *src
=buf
+y
*stride
;
540 unsigned char *dst
=buf
+(y
-1)*stride
;
544 __m128i old_pix_128
= _mm_cvtsi32_si128(src
[1]);
545 __m128i old_sum_128
= _mm_cvtsi32_si128(src
[0]+src
[1]);
546 for ( ; x
< ((w
-2)&(~7)); x
+=8) {
547 __m128i new_pix
= _mm_loadl_epi64(reinterpret_cast<const __m128i
*>(src
+x
));
548 new_pix
= _mm_unpacklo_epi8(new_pix
, _mm_setzero_si128());
549 __m128i temp
= _mm_slli_si128(new_pix
,2);
550 temp
= _mm_add_epi16(temp
, old_pix_128
);
551 temp
= _mm_add_epi16(temp
, new_pix
);
552 old_pix_128
= _mm_srli_si128(new_pix
,14);
554 new_pix
= _mm_slli_si128(temp
,2);
555 new_pix
= _mm_add_epi16(new_pix
, old_sum_128
);
556 new_pix
= _mm_add_epi16(new_pix
, temp
);
557 old_sum_128
= _mm_srli_si128(temp
, 14);
559 __m128i old_col_pix
= _mm_loadu_si128( reinterpret_cast<const __m128i
*>(col_pix_buf
+x
) );
560 __m128i old_col_sum
= _mm_loadu_si128( reinterpret_cast<const __m128i
*>(col_sum_buf
+x
) );
561 _mm_storeu_si128( reinterpret_cast<__m128i
*>(col_pix_buf
+x
), new_pix
);
562 temp
= _mm_add_epi16(new_pix
, old_col_pix
);
563 _mm_storeu_si128( reinterpret_cast<__m128i
*>(col_sum_buf
+x
), temp
);
565 old_col_sum
= _mm_add_epi16(old_col_sum
, temp
);
566 //old_col_sum = _mm_add_epi16(old_col_sum, round);
567 old_col_sum
= _mm_srli_epi16(old_col_sum
, 4);
568 old_col_sum
= _mm_packus_epi16(old_col_sum
, old_col_sum
);
569 _mm_storel_epi64( reinterpret_cast<__m128i
*>(dst
+x
-1), old_col_sum
);
571 int old_pix
= src
[x
-1];
572 int old_sum
= old_pix
+ src
[x
-2];
573 for ( ; x
< w
; x
++) {
575 int temp2
= old_pix
+ temp1
;
577 temp1
= old_sum
+ temp2
;
580 temp2
= col_pix_buf
[x
] + temp1
;
581 col_pix_buf
[x
] = temp1
;
582 dst
[x
-1] = (col_sum_buf
[x
] + temp2
) >> 4;
583 col_sum_buf
[x
] = temp2
;
587 xy_free(col_sum_buf_base
);
588 xy_free(col_pix_buf_base
);
591 static void Bilinear(unsigned char *buf
, int w
, int h
, int stride
, int x_factor
, int y_factor
)
593 WORD
*col_pix_buf_base
= reinterpret_cast<WORD
*>(xy_malloc(w
*sizeof(WORD
)));
594 if(!col_pix_buf_base
)
596 //ToDo: error handling
599 memset(col_pix_buf_base
, 0, w
*sizeof(WORD
));
601 for (int y
= 0; y
< h
; y
++){
602 unsigned char *src
=buf
+y
*stride
;
604 WORD
*col_pix_buf
= col_pix_buf_base
;
606 for(int x
= 0; x
< w
; x
++)
609 int temp2
= temp1
*x_factor
;
615 temp2
= temp1
*y_factor
;
618 temp1
+= col_pix_buf
[x
];
619 src
[x
] = ((temp1
+32)>>6);
620 col_pix_buf
[x
] = temp2
;
623 xy_free(col_pix_buf_base
);
626 bool Rasterizer::Rasterize(const ScanLineData
& scan_line_data
, int xsub
, int ysub
, SharedPtrOverlay overlay
)
628 using namespace ::boost::flyweights
;
636 if(!scan_line_data
.mWidth
|| !scan_line_data
.mHeight
)
643 int width
= scan_line_data
.mWidth
+ xsub
;
644 int height
= scan_line_data
.mHeight
+ ysub
;
645 overlay
->mOffsetX
= scan_line_data
.mPathOffsetX
- xsub
;
646 overlay
->mOffsetY
= scan_line_data
.mPathOffsetY
- ysub
;
647 int wide_border
= (scan_line_data
.mWideBorder
+7)&~7;
648 overlay
->mfWideOutlineEmpty
= scan_line_data
.mWideOutline
.empty();
649 if(!overlay
->mfWideOutlineEmpty
)
651 width
+= 2*wide_border
;
652 height
+= 2*wide_border
;
653 xsub
+= wide_border
;
654 ysub
+= wide_border
;
655 overlay
->mOffsetX
-= wide_border
;
656 overlay
->mOffsetY
-= wide_border
;
659 overlay
->mWidth
= width
;
660 overlay
->mHeight
= height
;
661 overlay
->mOverlayWidth
= ((width
+7)>>3) + 1;
662 overlay
->mOverlayHeight
= ((height
+7)>>3) + 1;
663 overlay
->mOverlayPitch
= (overlay
->mOverlayWidth
+15)&~15;
665 overlay
->mpOverlayBuffer
.base
= (byte
*)xy_malloc(2 * overlay
->mOverlayPitch
* overlay
->mOverlayHeight
);
666 memset(overlay
->mpOverlayBuffer
.base
, 0, 2 * overlay
->mOverlayPitch
* overlay
->mOverlayHeight
);
667 overlay
->mpOverlayBuffer
.body
= overlay
->mpOverlayBuffer
.base
;
668 overlay
->mpOverlayBuffer
.border
= overlay
->mpOverlayBuffer
.base
+ overlay
->mOverlayPitch
* overlay
->mOverlayHeight
;
670 // Are we doing a border?
671 const ScanLineData::tSpanBuffer
* pOutline
[2] = {&(scan_line_data
.mOutline
), &(scan_line_data
.mWideOutline
)};
672 for(int i
= countof(pOutline
)-1; i
>= 0; i
--)
674 ScanLineData::tSpanBuffer::const_iterator it
= pOutline
[i
]->begin();
675 ScanLineData::tSpanBuffer::const_iterator itEnd
= pOutline
[i
]->end();
676 byte
* plan_selected
= i
==0 ? overlay
->mpOverlayBuffer
.body
: overlay
->mpOverlayBuffer
.border
;
677 int pitch
= overlay
->mOverlayPitch
;
678 for(; it
!=itEnd
; ++it
)
680 int y
= (int)(((*it
).first
>> 32) - 0x40000000 + ysub
);
681 int x1
= (int)(((*it
).first
& 0xffffffff) - 0x40000000 + xsub
);
682 int x2
= (int)(((*it
).second
& 0xffffffff) - 0x40000000 + xsub
);
686 int last
= (x2
-1)>>3;
687 byte
* dst
= plan_selected
+ (pitch
*(y
>>3) + first
);
692 *dst
+= ((first
+1)<<3) - x1
;
694 while(++first
< last
)
699 *dst
+= x2
- (last
<<3);
708 // @return: true if actually a blur operation has done, or else false and output is leave unset.
709 bool Rasterizer::Blur(const Overlay
& input_overlay
, int fBlur
, double fGaussianBlur
,
710 SharedPtrOverlay output_overlay
)
712 using namespace ::boost::flyweights
;
718 output_overlay
->CleanUp();
720 output_overlay
->mOffsetX
= input_overlay
.mOffsetX
;
721 output_overlay
->mOffsetY
= input_overlay
.mOffsetY
;
722 output_overlay
->mWidth
= input_overlay
.mWidth
;
723 output_overlay
->mHeight
= input_overlay
.mHeight
;
724 output_overlay
->mOverlayWidth
= input_overlay
.mOverlayWidth
;
725 output_overlay
->mOverlayHeight
= input_overlay
.mOverlayHeight
;
726 output_overlay
->mfWideOutlineEmpty
= input_overlay
.mfWideOutlineEmpty
;
729 if(fBlur
|| fGaussianBlur
> 0.1)
731 if (fGaussianBlur
> 0)
732 bluradjust
+= (int)(fGaussianBlur
*3*8 + 0.5) | 1;
735 // Expand the buffer a bit when we're blurring, since that can also widen the borders a bit
736 bluradjust
= (bluradjust
+7)&~7;
738 output_overlay
->mOffsetX
-= bluradjust
;
739 output_overlay
->mOffsetY
-= bluradjust
;
740 output_overlay
->mWidth
+= (bluradjust
<<1);
741 output_overlay
->mHeight
+= (bluradjust
<<1);
742 output_overlay
->mOverlayWidth
+= (bluradjust
>>2);
743 output_overlay
->mOverlayHeight
+= (bluradjust
>>2);
750 output_overlay
->mOverlayPitch
= (output_overlay
->mOverlayWidth
+15)&~15;
752 output_overlay
->mpOverlayBuffer
.base
= (byte
*)xy_malloc(2 * output_overlay
->mOverlayPitch
* output_overlay
->mOverlayHeight
);
753 memset(output_overlay
->mpOverlayBuffer
.base
, 0, 2 * output_overlay
->mOverlayPitch
* output_overlay
->mOverlayHeight
);
754 output_overlay
->mpOverlayBuffer
.body
= output_overlay
->mpOverlayBuffer
.base
;
755 output_overlay
->mpOverlayBuffer
.border
= output_overlay
->mpOverlayBuffer
.base
+ output_overlay
->mOverlayPitch
* output_overlay
->mOverlayHeight
;
758 for(int i
= 1; i
>= 0; i
--)
760 byte
* plan_selected
= i
==0 ? output_overlay
->mpOverlayBuffer
.body
: output_overlay
->mpOverlayBuffer
.border
;
761 const byte
* plan_input
= i
==0 ? input_overlay
.mpOverlayBuffer
.body
: input_overlay
.mpOverlayBuffer
.border
;
763 plan_selected
+= (bluradjust
>>3) + (bluradjust
>>3)*output_overlay
->mOverlayPitch
;
764 for (int j
=0;j
<input_overlay
.mOverlayHeight
;j
++)
766 memcpy(plan_selected
, plan_input
, input_overlay
.mOverlayPitch
);
767 plan_selected
+= output_overlay
->mOverlayPitch
;
768 plan_input
+= input_overlay
.mOverlayPitch
;
772 ass_tmp_buf
tmp_buf( max((output_overlay
->mOverlayPitch
+1)*(output_overlay
->mOverlayHeight
+1),0) );
773 //flyweight<key_value<int, ass_tmp_buf, ass_tmp_buf_get_size>, no_locking> tmp_buf((overlay->mOverlayWidth+1)*(overlay->mOverlayPitch+1));
774 // Do some gaussian blur magic
775 if (fGaussianBlur
> 0.1)//(fGaussianBlur > 0) return true even if fGaussianBlur very small
777 byte
* plan_selected
= output_overlay
->mfWideOutlineEmpty
? output_overlay
->mpOverlayBuffer
.body
: output_overlay
->mpOverlayBuffer
.border
;
778 flyweight
<key_value
<double, ass_synth_priv
, ass_synth_priv_key
>, no_locking
> fw_priv_blur(fGaussianBlur
);
779 const ass_synth_priv
& priv_blur
= fw_priv_blur
.get();
780 if (output_overlay
->mOverlayWidth
>=priv_blur
.g_w
&& output_overlay
->mOverlayHeight
>=priv_blur
.g_w
)
782 ass_gauss_blur(plan_selected
, tmp_buf
.tmp
, output_overlay
->mOverlayWidth
, output_overlay
->mOverlayHeight
, output_overlay
->mOverlayPitch
,
783 priv_blur
.gt2
, priv_blur
.g_r
, priv_blur
.g_w
);
787 for (int pass
= 0; pass
< fBlur
; pass
++)
789 if(output_overlay
->mOverlayWidth
>= 3 && output_overlay
->mOverlayHeight
>= 3)
791 int pitch
= output_overlay
->mOverlayPitch
;
792 byte
* plan_selected
= output_overlay
->mfWideOutlineEmpty
? output_overlay
->mpOverlayBuffer
.body
: output_overlay
->mpOverlayBuffer
.border
;
793 be_blur(plan_selected
, tmp_buf
.tmp
, output_overlay
->mOverlayWidth
, output_overlay
->mOverlayHeight
, pitch
);
799 ///////////////////////////////////////////////////////////////////////////
801 static __forceinline
void pixmix(DWORD
*dst
, DWORD color
, DWORD alpha
)
804 // Make sure both a and ia are in range 1..256 for the >>8 operations below to be correct
807 *dst
= ((((*dst
&0x00ff00ff)*ia
+ (color
&0x00ff00ff)*a
)&0xff00ff00)>>8)
808 | ((((*dst
&0x0000ff00)*ia
+ (color
&0x0000ff00)*a
)&0x00ff0000)>>8)
809 | ((((*dst
>>8)&0x00ff0000)*ia
)&0xff000000);
812 static __forceinline
void pixmix2(DWORD
*dst
, DWORD color
, DWORD shapealpha
, DWORD clipalpha
)
814 int a
= (((shapealpha
)*(clipalpha
)*(color
>>24))>>12)&0xff;
817 *dst
= ((((*dst
&0x00ff00ff)*ia
+ (color
&0x00ff00ff)*a
)&0xff00ff00)>>8)
818 | ((((*dst
&0x0000ff00)*ia
+ (color
&0x0000ff00)*a
)&0x00ff0000)>>8)
819 | ((((*dst
>>8)&0x00ff0000)*ia
)&0xff000000);
822 #include <xmmintrin.h>
823 #include <emmintrin.h>
825 static __forceinline
void pixmix_sse2(DWORD
* dst
, DWORD color
, DWORD alpha
)
827 // alpha = (((alpha) * (color>>24)) >> 6) & 0xff;
829 __m128i zero
= _mm_setzero_si128();
830 __m128i a
= _mm_set1_epi32(((alpha
+1) << 16) | (0x100 - alpha
));
831 __m128i d
= _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dst
), zero
);
832 __m128i s
= _mm_unpacklo_epi8(_mm_cvtsi32_si128(color
), zero
);
833 __m128i r
= _mm_unpacklo_epi16(d
, s
);
834 r
= _mm_madd_epi16(r
, a
);
835 r
= _mm_srli_epi32(r
, 8);
836 r
= _mm_packs_epi32(r
, r
);
837 r
= _mm_packus_epi16(r
, r
);
838 *dst
= (DWORD
)_mm_cvtsi128_si32(r
);
841 static __forceinline
void pixmix2_sse2(DWORD
* dst
, DWORD color
, DWORD shapealpha
, DWORD clipalpha
)
843 int alpha
= (((shapealpha
)*(clipalpha
)*(color
>>24))>>12)&0xff;
845 __m128i zero
= _mm_setzero_si128();
846 __m128i a
= _mm_set1_epi32(((alpha
+1) << 16) | (0x100 - alpha
));
847 __m128i d
= _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dst
), zero
);
848 __m128i s
= _mm_unpacklo_epi8(_mm_cvtsi32_si128(color
), zero
);
849 __m128i r
= _mm_unpacklo_epi16(d
, s
);
850 r
= _mm_madd_epi16(r
, a
);
851 r
= _mm_srli_epi32(r
, 8);
852 r
= _mm_packs_epi32(r
, r
);
853 r
= _mm_packus_epi16(r
, r
);
854 *dst
= (DWORD
)_mm_cvtsi128_si32(r
);
857 #include <mmintrin.h>
859 // Calculate a - b clamping to 0 instead of underflowing
860 static __forceinline DWORD
safe_subtract(DWORD a
, DWORD b
)
862 __m64 ap
= _mm_cvtsi32_si64(a
);
863 __m64 bp
= _mm_cvtsi32_si64(b
);
864 __m64 rp
= _mm_subs_pu16(ap
, bp
);
865 DWORD r
= (DWORD
)_mm_cvtsi64_si32(rp
);
868 //return (b > a) ? 0 : a - b;
872 * No aligned requirement
875 void AlphaBlt(byte
* pY
,
876 const byte
* pAlphaMask
,
878 int h
, int w
, int src_stride
, int dst_stride
)
880 __m128i zero
= _mm_setzero_si128();
881 __m128i s
= _mm_set1_epi16(Y
); //s = c 0 c 0 c 0 c 0 c 0 c 0 c 0 c 0
883 if( w
>16 )//IMPORTANT! The result of the following code is undefined with w<15.
885 for( ; h
>0; h
--, pAlphaMask
+= src_stride
, pY
+= dst_stride
)
887 const BYTE
* sa
= pAlphaMask
;
889 const BYTE
* dy_first_mod16
= reinterpret_cast<BYTE
*>((reinterpret_cast<int>(pY
)+15)&~15); //IMPORTANT! w must >= 15
890 const BYTE
* dy_end_mod16
= reinterpret_cast<BYTE
*>(reinterpret_cast<int>(pY
+w
)&~15);
891 const BYTE
* dy_end
= pY
+ w
;
893 for(;dy
< dy_first_mod16
; sa
++, dy
++)
895 *dy
= (*dy
* (256 - *sa
)+ Y
*(*sa
+1))>>8;
897 for(; dy
< dy_end_mod16
; sa
+=8, dy
+=16)
899 __m128i a
= _mm_loadl_epi64((__m128i
*)sa
);
902 __m128i d
= _mm_load_si128((__m128i
*)dy
);
904 //__m128i ones = _mm_cmpeq_epi32(zero,zero); //ones = ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
905 //__m128i ia = _mm_xor_si128(a,ones); //ia = ~a
906 //ia = _mm_unpacklo_epi8(ia,zero); //ia = ~a0 0 ~a1 0 ~a2 0 ~a3 0 ~a4 0 ~a5 0 ~a6 0 ~a7 0
907 a
= _mm_unpacklo_epi8(a
,zero
); //a= a0 0 a1 0 a2 0 a3 0 a4 0 a5 0 a6 0 a7 0
908 __m128i ones
= _mm_set1_epi16(256); //ones = 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
909 __m128i ia
= _mm_sub_epi16(ones
, a
); //ia = 256-a0 ... 256-a7
910 ones
= _mm_srli_epi16(ones
, 8);
911 a
= _mm_add_epi16(a
, ones
); //a= 1+a0 ... 1+a7
913 __m128i dl
= _mm_unpacklo_epi8(d
,zero
); //d = b0 0 b1 0 b2 0 b3 0 b4 0 b5 0 b6 0 b7 0
914 __m128i sl
= _mm_mullo_epi16(s
,a
); //sl = c0*a0 c1*a1 ... c7*a7
916 dl
= _mm_mullo_epi16(dl
,ia
); //d = b0*~a0 b1*~a1 ... b7*~a7
918 dl
= _mm_add_epi16(dl
,sl
); //d = d + sl
919 dl
= _mm_srli_epi16(dl
, 8); //d = d>>8
922 a
= _mm_loadl_epi64((__m128i
*)sa
);
924 a
= _mm_unpacklo_epi8(a
,zero
);
925 ones
= _mm_slli_epi16(ones
, 8);
926 ia
= _mm_sub_epi16(ones
, a
);
927 ones
= _mm_srli_epi16(ones
, 8);
928 a
= _mm_add_epi16(a
,ones
);
930 d
= _mm_unpackhi_epi8(d
,zero
);
931 sl
= _mm_mullo_epi16(s
,a
);
932 d
= _mm_mullo_epi16(d
,ia
);
933 d
= _mm_add_epi16(d
,sl
);
934 d
= _mm_srli_epi16(d
, 8);
936 dl
= _mm_packus_epi16(dl
,d
);
938 _mm_store_si128((__m128i
*)dy
, dl
);
940 for(;dy
< dy_end
; sa
++, dy
++)
942 *dy
= (*dy
* (256 - *sa
)+ Y
*(*sa
+1))>>8;
948 for( ; h
>0; h
--, pAlphaMask
+= src_stride
, pY
+= dst_stride
)
950 const BYTE
* sa
= pAlphaMask
;
952 const BYTE
* dy_end
= pY
+ w
;
954 for(;dy
< dy_end
; sa
++, dy
++)
956 *dy
= (*dy
* (256 - *sa
)+ Y
*(*sa
+1))>>8;
964 * No aligned requirement
967 void AlphaBlt(byte
* pY
,
970 int h
, int w
, int dst_stride
)
972 int yPremul
= Y
*(alpha
+1);
973 int dstAlpha
= 0x100 - alpha
;
974 if( w
>32 )//IMPORTANT! The result of the following code is undefined with w<15.
976 __m128i zero
= _mm_setzero_si128();
977 __m128i s
= _mm_set1_epi16(yPremul
); //s = c 0 c 0 c 0 c 0 c 0 c 0 c 0 c 0
978 __m128i ia
= _mm_set1_epi16(dstAlpha
);
979 for( ; h
>0; h
--, pY
+= dst_stride
)
982 const BYTE
* dy_first_mod16
= reinterpret_cast<BYTE
*>((reinterpret_cast<int>(pY
)+15)&~15); //IMPORTANT! w must >= 15
983 const BYTE
* dy_end_mod16
= reinterpret_cast<BYTE
*>(reinterpret_cast<int>(pY
+w
)&~15);
984 const BYTE
* dy_end
= pY
+ w
;
986 for(;dy
< dy_first_mod16
; dy
++)
988 *dy
= (*dy
* dstAlpha
+ yPremul
)>>8;
990 for(; dy
< dy_end_mod16
; dy
+=16)
993 __m128i d
= _mm_load_si128(reinterpret_cast<const __m128i
*>(dy
));
994 __m128i dl
= _mm_unpacklo_epi8(d
,zero
); //d = b0 0 b1 0 b2 0 b3 0 b4 0 b5 0 b6 0 b7 0
996 dl
= _mm_mullo_epi16(dl
,ia
); //d = b0*~a0 b1*~a1 ... b7*~a7
997 dl
= _mm_adds_epu16(dl
,s
); //d = d + s
998 dl
= _mm_srli_epi16(dl
, 8); //d = d>>8
1000 d
= _mm_unpackhi_epi8(d
,zero
);
1001 d
= _mm_mullo_epi16(d
,ia
);
1002 d
= _mm_adds_epu16(d
,s
);
1003 d
= _mm_srli_epi16(d
, 8);
1005 dl
= _mm_packus_epi16(dl
,d
);
1007 _mm_store_si128(reinterpret_cast<__m128i
*>(dy
), dl
);
1009 for(;dy
< dy_end
; dy
++)
1011 *dy
= (*dy
* dstAlpha
+ yPremul
)>>8;
1017 for( ; h
>0; h
--, pY
+= dst_stride
)
1020 const BYTE
* dy_end
= pY
+ w
;
1022 for(;dy
< dy_end
; dy
++)
1024 *dy
= (*dy
* dstAlpha
+ yPremul
)>>8;
1032 * No aligned requirement
1035 void AlphaBltC(byte
* pY
,
1038 int h
, int w
, int dst_stride
)
1040 int yPremul
= Y
*(alpha
+1);
1041 int dstAlpha
= 0x100 - alpha
;
1043 for( ; h
>0; h
--, pY
+= dst_stride
)
1046 const BYTE
* dy_end
= pY
+ w
;
1048 for(;dy
< dy_end
; dy
++)
1050 *dy
= (*dy
* dstAlpha
+ yPremul
)>>8;
1055 // For CPUID usage in Rasterizer::Draw
1056 #include "../dsutil/vd.h"
1058 static const __int64 _00ff00ff00ff00ff
= 0x00ff00ff00ff00ffi
64;
1060 // Render a subpicture onto a surface.
1061 // spd is the surface to render on.
1062 // clipRect is a rectangular clip region to render inside.
1063 // pAlphaMask is an alpha clipping mask.
1064 // xsub and ysub ???
1065 // switchpts seems to be an array of fill colours interlaced with coordinates.
1066 // switchpts[i*2] contains a colour and switchpts[i*2+1] contains the coordinate to use that colour from
1067 // fBody tells whether to render the body of the subs.
1068 // fBorder tells whether to render the border of the subs.
1069 SharedPtrByte
Rasterizer::CompositeAlphaMask(SubPicDesc
& spd
, SharedPtrOverlay overlay
, const CRect
& clipRect
, byte
* pAlphaMask
,
1070 int xsub
, int ysub
, const DWORD
* switchpts
, bool fBody
, bool fBorder
,
1071 CRect
*outputDirtyRect
)
1073 //fix me: check and log error
1074 SharedPtrByte result
;
1075 *outputDirtyRect
= CRect(0, 0, 0, 0);
1076 if(!switchpts
|| !fBody
&& !fBorder
) return(result
);
1079 // Limit drawn area to intersection of rendering surface and rectangular clip area
1080 CRect
r(0, 0, spd
.w
, spd
.h
);
1082 // Remember that all subtitle coordinates are specified in 1/8 pixels
1083 // (x+4)>>3 rounds to nearest whole pixel.
1084 // ??? What is xsub, ysub, mOffsetX and mOffsetY ?
1085 int x
= (xsub
+ overlay
->mOffsetX
+ 4)>>3;
1086 int y
= (ysub
+ overlay
->mOffsetY
+ 4)>>3;
1087 int w
= overlay
->mOverlayWidth
;
1088 int h
= overlay
->mOverlayHeight
;
1091 if(x
< r
.left
) {xo
= r
.left
-x
; w
-= r
.left
-x
; x
= r
.left
;}
1092 if(y
< r
.top
) {yo
= r
.top
-y
; h
-= r
.top
-y
; y
= r
.top
;}
1093 if(x
+w
> r
.right
) w
= r
.right
-x
;
1094 if(y
+h
> r
.bottom
) h
= r
.bottom
-y
;
1095 // Check if there's actually anything to render
1096 if(w
<= 0 || h
<= 0) return(result
);
1097 outputDirtyRect
->SetRect(x
, y
, x
+w
, y
+h
);
1098 *outputDirtyRect
&= CRect(0, 0, spd
.w
, spd
.h
);
1100 bool fSingleColor
= (switchpts
[1]==0xffffffff);
1103 // Grab the first colour
1104 DWORD color
= switchpts
[0];
1105 byte
* s_base
= (byte
*)xy_malloc(overlay
->mOverlayPitch
* overlay
->mOverlayHeight
);
1109 overlay
->FillAlphaMash(s_base
, fBody
, fBorder
, xo
, yo
, w
, h
,
1110 pAlphaMask
==NULL
? NULL
: pAlphaMask
+ spd
.w
* y
+ x
, spd
.w
,
1116 const DWORD
*sw
= switchpts
;
1117 while( last_x
<w
+xo
)
1119 byte alpha
= sw
[0]>>24;
1120 while( sw
[3]<w
+xo
&& (sw
[2]>>24)==alpha
)
1124 int new_x
= sw
[3] < w
+xo
? sw
[3] : w
+xo
;
1125 overlay
->FillAlphaMash(s_base
, fBody
, fBorder
,
1126 last_x
, yo
, new_x
-last_x
, h
,
1127 pAlphaMask
==NULL
? NULL
: pAlphaMask
+ spd
.w
* y
+ x
+ last_x
- xo
, spd
.w
,
1133 result
.reset( s_base
, xy_free
);
1137 CRect
Rasterizer::Draw(SubPicDesc
& spd
, SharedPtrOverlay overlay
, const CRect
& clipRect
, byte
* pAlphaMask
,
1138 int xsub
, int ysub
, const DWORD
* switchpts
, bool fBody
, bool fBorder
)
1140 CRect
bbox(0,0,0,0);
1141 if(!switchpts
|| !fBody
&& !fBorder
) return(bbox
);
1144 // Limit drawn area to intersection of rendering surface and rectangular clip area
1145 CRect
r(0, 0, spd
.w
, spd
.h
);
1147 // Remember that all subtitle coordinates are specified in 1/8 pixels
1148 // (x+4)>>3 rounds to nearest whole pixel.
1149 // ??? What is xsub, ysub, mOffsetX and mOffsetY ?
1150 int overlayPitch
= overlay
->mOverlayPitch
;
1151 int x
= (xsub
+ overlay
->mOffsetX
+ 4)>>3;
1152 int y
= (ysub
+ overlay
->mOffsetY
+ 4)>>3;
1153 int w
= overlay
->mOverlayWidth
;
1154 int h
= overlay
->mOverlayHeight
;
1157 if(x
< r
.left
) {xo
= r
.left
-x
; w
-= r
.left
-x
; x
= r
.left
;}
1158 if(y
< r
.top
) {yo
= r
.top
-y
; h
-= r
.top
-y
; y
= r
.top
;}
1159 if(x
+w
> r
.right
) w
= r
.right
-x
;
1160 if(y
+h
> r
.bottom
) h
= r
.bottom
-y
;
1161 // Check if there's actually anything to render
1162 if(w
<= 0 || h
<= 0) return(bbox
);
1165 bool fSSE2
= !!(g_cpuid
.m_flags
& CCpuID::sse2
);
1166 bool fSingleColor
= (switchpts
[1]==0xffffffff);
1167 bool AYUV_PLANAR
= (spd
.type
==MSP_AYUV_PLANAR
);
1168 int draw_method
= 0;
1170 draw_method
|= DM::SINGLE_COLOR
;
1172 draw_method
|= DM::SSE2
;
1174 draw_method
|= DM::AYUV_PLANAR
;
1177 // Grab the first colour
1178 DWORD color
= switchpts
[0];
1179 SharedPtrByte s_base
= CompositeAlphaMask(spd
, overlay
, clipRect
, pAlphaMask
, xsub
, ysub
, switchpts
,
1180 fBody
, fBorder
, &bbox
);
1181 const byte
* s
= s_base
.get() + overlay
->mOverlayPitch
*yo
+ xo
;
1183 // How would this differ from src?
1184 unsigned long* dst
= (unsigned long *)(((char *)spd
.bits
+ spd
.pitch
* y
) + ((x
*spd
.bpp
)>>3));
1186 // Every remaining line in the bitmap to be rendered...
1189 case DM::SINGLE_COLOR
| DM::SSE2
| 0*DM::AYUV_PLANAR
:
1193 for(int wt
=0; wt
<w
; ++wt
)
1194 // The <<6 is due to pixmix expecting the alpha parameter to be
1195 // the multiplication of two 6-bit unsigned numbers but we
1196 // only have one here. (No alpha mask.)
1197 pixmix_sse2(&dst
[wt
], color
, s
[wt
]);
1199 dst
= (unsigned long *)((char *)dst
+ spd
.pitch
);
1203 case DM::SINGLE_COLOR
| 0*DM::SSE2
| 0*DM::AYUV_PLANAR
:
1207 for(int wt
=0; wt
<w
; ++wt
)
1208 pixmix(&dst
[wt
], color
, s
[wt
]);
1210 dst
= (unsigned long *)((char *)dst
+ spd
.pitch
);
1214 case 0*DM::SINGLE_COLOR
| DM::SSE2
| 0*DM::AYUV_PLANAR
:
1218 const DWORD
*sw
= switchpts
;
1219 for(int wt
=0; wt
<w
; ++wt
)
1221 // xo is the offset (usually negative) we have moved into the image
1222 // So if we have passed the switchpoint (?) switch to another colour
1223 // (So switchpts stores both colours *and* coordinates?)
1224 if(wt
+xo
>= sw
[1]) {while(wt
+xo
>= sw
[1]) sw
+= 2; color
= sw
[-2];}
1225 pixmix_sse2(&dst
[wt
], color
, s
[wt
]);
1228 dst
= (unsigned long *)((char *)dst
+ spd
.pitch
);
1232 case 0*DM::SINGLE_COLOR
| 0*DM::SSE2
| 0*DM::AYUV_PLANAR
:
1236 const DWORD
*sw
= switchpts
;
1237 for(int wt
=0; wt
<w
; ++wt
)
1239 if(wt
+xo
>= sw
[1]) {while(wt
+xo
>= sw
[1]) sw
+= 2; color
= sw
[-2];}
1240 pixmix(&dst
[wt
], color
, s
[wt
]);
1243 dst
= (unsigned long *)((char *)dst
+ spd
.pitch
);
1247 case DM::SINGLE_COLOR
| DM::SSE2
| DM::AYUV_PLANAR
:
1249 unsigned char* dst_A
= (unsigned char*)dst
;
1250 unsigned char* dst_Y
= dst_A
+ spd
.pitch
*spd
.h
;
1251 unsigned char* dst_U
= dst_Y
+ spd
.pitch
*spd
.h
;
1252 unsigned char* dst_V
= dst_U
+ spd
.pitch
*spd
.h
;
1254 AlphaBlt(dst_Y
, s
, ((color
)>>16)&0xff, h
, w
, overlayPitch
, spd
.pitch
);
1255 AlphaBlt(dst_U
, s
, ((color
)>>8)&0xff, h
, w
, overlayPitch
, spd
.pitch
);
1256 AlphaBlt(dst_V
, s
, ((color
))&0xff, h
, w
, overlayPitch
, spd
.pitch
);
1257 AlphaBlt(dst_A
, s
, 0, h
, w
, overlayPitch
, spd
.pitch
);
1260 case 0*DM::SINGLE_COLOR
| DM::SSE2
| DM::AYUV_PLANAR
:
1262 unsigned char* dst_A
= (unsigned char*)dst
;
1263 unsigned char* dst_Y
= dst_A
+ spd
.pitch
*spd
.h
;
1264 unsigned char* dst_U
= dst_Y
+ spd
.pitch
*spd
.h
;
1265 unsigned char* dst_V
= dst_U
+ spd
.pitch
*spd
.h
;
1267 const DWORD
*sw
= switchpts
;
1272 int new_x
= sw
[3] < w
+xo
? sw
[3] : w
+xo
;
1275 AlphaBlt(dst_Y
, s
+ last_x
- xo
, (color
>>16)&0xff, h
, new_x
-last_x
, overlayPitch
, spd
.pitch
);
1276 AlphaBlt(dst_U
, s
+ last_x
- xo
, (color
>>8)&0xff, h
, new_x
-last_x
, overlayPitch
, spd
.pitch
);
1277 AlphaBlt(dst_V
, s
+ last_x
- xo
, (color
)&0xff, h
, new_x
-last_x
, overlayPitch
, spd
.pitch
);
1278 AlphaBlt(dst_A
, s
+ last_x
- xo
, 0, h
, new_x
-last_x
, overlayPitch
, spd
.pitch
);
1280 dst_A
+= new_x
- last_x
;
1281 dst_Y
+= new_x
- last_x
;
1282 dst_U
+= new_x
- last_x
;
1283 dst_V
+= new_x
- last_x
;
1288 case DM::SINGLE_COLOR
| 0*DM::SSE2
| DM::AYUV_PLANAR
:
1290 // char * debug_dst=(char*)dst;int h2 = h;
1291 // XY_DO_ONCE( xy_logger::write_file("G:\\b2_rt", (char*)&color, sizeof(color)) );
1292 // XY_DO_ONCE( xy_logger::write_file("G:\\b2_rt", debug_dst, (h2-1)*spd.pitch) );
1293 // debug_dst += spd.pitch*spd.h;
1294 // XY_DO_ONCE( xy_logger::write_file("G:\\b2_rt", debug_dst, (h2-1)*spd.pitch) );
1295 // debug_dst += spd.pitch*spd.h;
1296 // XY_DO_ONCE( xy_logger::write_file("G:\\b2_rt", debug_dst, (h2-1)*spd.pitch) );
1297 // debug_dst += spd.pitch*spd.h;
1298 // XY_DO_ONCE( xy_logger::write_file("G:\\b2_rt", debug_dst, (h2-1)*spd.pitch) );
1299 // debug_dst=(char*)dst;
1301 unsigned char* dst_A
= (unsigned char*)dst
;
1302 unsigned char* dst_Y
= dst_A
+ spd
.pitch
*spd
.h
;
1303 unsigned char* dst_U
= dst_Y
+ spd
.pitch
*spd
.h
;
1304 unsigned char* dst_V
= dst_U
+ spd
.pitch
*spd
.h
;
1307 for(int wt
=0; wt
<w
; ++wt
)
1309 DWORD temp
= COMBINE_AYUV(dst_A
[wt
], dst_Y
[wt
], dst_U
[wt
], dst_V
[wt
]);
1310 pixmix(&temp
, color
, s
[wt
]);
1311 SPLIT_AYUV(temp
, dst_A
+wt
, dst_Y
+wt
, dst_U
+wt
, dst_V
+wt
);
1319 // XY_DO_ONCE( xy_logger::write_file("G:\\a2_rt", debug_dst, (h2-1)*spd.pitch) );
1320 // debug_dst += spd.pitch*spd.h;
1321 // XY_DO_ONCE( xy_logger::write_file("G:\\a2_rt", debug_dst, (h2-1)*spd.pitch) );
1322 // debug_dst += spd.pitch*spd.h;
1323 // XY_DO_ONCE( xy_logger::write_file("G:\\a2_rt", debug_dst, (h2-1)*spd.pitch) );
1324 // debug_dst += spd.pitch*spd.h;
1325 // XY_DO_ONCE( xy_logger::write_file("G:\\a2_rt", debug_dst, (h2-1)*spd.pitch) );
1328 case 0*DM::SINGLE_COLOR
| 0*DM::SSE2
| DM::AYUV_PLANAR
:
1330 unsigned char* dst_A
= (unsigned char*)dst
;
1331 unsigned char* dst_Y
= dst_A
+ spd
.pitch
*spd
.h
;
1332 unsigned char* dst_U
= dst_Y
+ spd
.pitch
*spd
.h
;
1333 unsigned char* dst_V
= dst_U
+ spd
.pitch
*spd
.h
;
1336 const DWORD
*sw
= switchpts
;
1337 for(int wt
=0; wt
<w
; ++wt
)
1339 if(wt
+xo
>= sw
[1]) {while(wt
+xo
>= sw
[1]) sw
+= 2; color
= sw
[-2];}
1340 DWORD temp
= COMBINE_AYUV(dst_A
[wt
], dst_Y
[wt
], dst_U
[wt
], dst_V
[wt
]);
1341 pixmix(&temp
, color
, (s
[wt
]*(color
>>24))>>8);
1342 SPLIT_AYUV(temp
, dst_A
+wt
, dst_Y
+wt
, dst_U
+wt
, dst_V
+wt
);
1353 // Remember to EMMS!
1354 // Rendering fails in funny ways if we don't do this.
1359 CRect
Rasterizer::Draw( SubPicDesc
& spd
, DrawItem
& draw_item
)
1361 return Draw(spd
, draw_item
.overlay
, draw_item
.clip_rect
, draw_item
.alpha_mask
.get(),
1362 draw_item
.xsub
, draw_item
.ysub
, draw_item
.switchpts
, draw_item
.fBody
, draw_item
.fBorder
);
1365 DrawItem
* Rasterizer::CreateDrawItem( SubPicDesc
& spd
, SharedPtrOverlay overlay
, const CRect
& clipRect
, SharedArrayByte pAlphaMask
, int xsub
, int ysub
, const DWORD
* switchpts
, bool fBody
, bool fBorder
)
1367 DrawItem
* result
= new DrawItem();
1368 result
->overlay
= overlay
;
1369 result
->clip_rect
= clipRect
;
1370 result
->alpha_mask
= pAlphaMask
;
1371 result
->xsub
= xsub
;
1372 result
->ysub
= ysub
;
1374 memcpy(result
->switchpts
, switchpts
, sizeof(result
->switchpts
));
1375 result
->fBody
= fBody
;
1376 result
->fBorder
= fBorder
;
1380 CRect
Rasterizer::DryDraw( SubPicDesc
& spd
, SharedPtrOverlay overlay
, const CRect
& clipRect
, byte
* pAlphaMask
, int xsub
, int ysub
, const DWORD
* switchpts
, bool fBody
, bool fBorder
)
1382 CRect
bbox(0, 0, 0, 0);
1383 if(!switchpts
|| !fBody
&& !fBorder
) return(bbox
);
1386 // Limit drawn area to intersection of rendering surface and rectangular clip area
1387 CRect
r(0, 0, spd
.w
, spd
.h
);
1389 // Remember that all subtitle coordinates are specified in 1/8 pixels
1390 // (x+4)>>3 rounds to nearest whole pixel.
1391 // ??? What is xsub, ysub, mOffsetX and mOffsetY ?
1392 int overlayPitch
= overlay
->mOverlayPitch
;
1393 int x
= (xsub
+ overlay
->mOffsetX
+ 4)>>3;
1394 int y
= (ysub
+ overlay
->mOffsetY
+ 4)>>3;
1395 int w
= overlay
->mOverlayWidth
;
1396 int h
= overlay
->mOverlayHeight
;
1399 if(x
< r
.left
) {xo
= r
.left
-x
; w
-= r
.left
-x
; x
= r
.left
;}
1400 if(y
< r
.top
) {yo
= r
.top
-y
; h
-= r
.top
-y
; y
= r
.top
;}
1401 if(x
+w
> r
.right
) w
= r
.right
-x
;
1402 if(y
+h
> r
.bottom
) h
= r
.bottom
-y
;
1403 // Check if there's actually anything to render
1404 if(w
<= 0 || h
<= 0) return(bbox
);
1405 bbox
.SetRect(x
, y
, x
+w
, y
+h
);
1406 bbox
&= CRect(0, 0, spd
.w
, spd
.h
);
1411 CRect
Rasterizer::DryDraw( SubPicDesc
& spd
, DrawItem
& draw_item
)
1413 return DryDraw(spd
, draw_item
.overlay
, draw_item
.clip_rect
, draw_item
.alpha_mask
.get(),
1414 draw_item
.xsub
, draw_item
.ysub
, draw_item
.switchpts
, draw_item
.fBody
, draw_item
.fBorder
);
1417 void Rasterizer::FillSolidRect(SubPicDesc
& spd
, int x
, int y
, int nWidth
, int nHeight
, DWORD argb
)
1419 bool fSSE2
= !!(g_cpuid
.m_flags
& CCpuID::sse2
);
1420 bool AYUV_PLANAR
= (spd
.type
==MSP_AYUV_PLANAR
);
1421 int draw_method
= 0;
1423 draw_method
|= DM::SSE2
;
1425 draw_method
|= DM::AYUV_PLANAR
;
1427 switch (draw_method
)
1429 case DM::SSE2
| 0*DM::AYUV_PLANAR
:
1431 for (int wy
=y
; wy
<y
+nHeight
; wy
++) {
1432 DWORD
* dst
= (DWORD
*)((BYTE
*)spd
.bits
+ spd
.pitch
* wy
) + x
;
1433 for(int wt
=0; wt
<nWidth
; ++wt
) {
1434 pixmix_sse2(&dst
[wt
], argb
, argb
>>24);
1439 case 0*DM::SSE2
| 0*DM::AYUV_PLANAR
:
1441 for (int wy
=y
; wy
<y
+nHeight
; wy
++) {
1442 DWORD
* dst
= (DWORD
*)((BYTE
*)spd
.bits
+ spd
.pitch
* wy
) + x
;
1443 for(int wt
=0; wt
<nWidth
; ++wt
) {
1444 pixmix(&dst
[wt
], argb
, argb
>>24);
1449 case DM::SSE2
| DM::AYUV_PLANAR
:
1451 BYTE
* dst
= reinterpret_cast<BYTE
*>(spd
.bits
) + spd
.pitch
* y
+ x
;
1453 BYTE
* dst_Y
= dst_A
+ spd
.pitch
*spd
.h
;
1454 BYTE
* dst_U
= dst_Y
+ spd
.pitch
*spd
.h
;
1455 BYTE
* dst_V
= dst_U
+ spd
.pitch
*spd
.h
;
1456 AlphaBlt(dst_Y
, argb
>>24, ((argb
)>>16)&0xff, nHeight
, nWidth
, spd
.pitch
);
1457 AlphaBlt(dst_U
, argb
>>24, ((argb
)>>8)&0xff, nHeight
, nWidth
, spd
.pitch
);
1458 AlphaBlt(dst_V
, argb
>>24, ((argb
))&0xff, nHeight
, nWidth
, spd
.pitch
);
1459 AlphaBlt(dst_A
, argb
>>24, 0, nHeight
, nWidth
, spd
.pitch
);
1462 case 0*DM::SSE2
| DM::AYUV_PLANAR
:
1464 BYTE
* dst
= reinterpret_cast<BYTE
*>(spd
.bits
) + spd
.pitch
* y
+ x
;
1466 BYTE
* dst_Y
= dst_A
+ spd
.pitch
*spd
.h
;
1467 BYTE
* dst_U
= dst_Y
+ spd
.pitch
*spd
.h
;
1468 BYTE
* dst_V
= dst_U
+ spd
.pitch
*spd
.h
;
1469 AlphaBltC(dst_Y
, argb
>>24, ((argb
)>>16)&0xff, nHeight
, nWidth
, spd
.pitch
);
1470 AlphaBltC(dst_U
, argb
>>24, ((argb
)>>8)&0xff, nHeight
, nWidth
, spd
.pitch
);
1471 AlphaBltC(dst_V
, argb
>>24, ((argb
))&0xff, nHeight
, nWidth
, spd
.pitch
);
1472 AlphaBltC(dst_A
, argb
>>24, 0, nHeight
, nWidth
, spd
.pitch
);
1479 ///////////////////////////////////////////////////////////////
1483 void Overlay::_DoFillAlphaMash(byte
* outputAlphaMask
, const byte
* pBody
, const byte
* pBorder
, int x
, int y
, int w
, int h
, const byte
* pAlphaMask
, int pitch
, DWORD color_alpha
)
1485 pBody
= pBody
!=NULL
? pBody
+ y
*mOverlayPitch
+ x
: NULL
;
1486 pBorder
= pBorder
!=NULL
? pBorder
+ y
*mOverlayPitch
+ x
: NULL
;
1487 byte
* dst
= outputAlphaMask
+ y
*mOverlayPitch
+ x
;
1489 const int x0
= ((reinterpret_cast<int>(dst
)+3)&~3) - reinterpret_cast<int>(dst
) < w
?
1490 ((reinterpret_cast<int>(dst
)+3)&~3) - reinterpret_cast<int>(dst
) : w
; //IMPORTANT! Should not exceed w.
1491 const int x00
= ((reinterpret_cast<int>(dst
)+15)&~15) - reinterpret_cast<int>(dst
) < w
?
1492 ((reinterpret_cast<int>(dst
)+15)&~15) - reinterpret_cast<int>(dst
) : w
;//IMPORTANT! Should not exceed w.
1493 const int x_end00
= ((reinterpret_cast<int>(dst
)+w
)&~15) - reinterpret_cast<int>(dst
);
1494 const int x_end0
= ((reinterpret_cast<int>(dst
)+w
)&~3) - reinterpret_cast<int>(dst
);
1495 const int x_end
= w
;
1497 __m64 color_alpha_64
= _mm_set1_pi16(color_alpha
);
1498 __m128i color_alpha_128
= _mm_set1_epi16(color_alpha
);
1500 if(pAlphaMask
==NULL
&& pBody
!=NULL
&& pBorder
!=NULL
)
1505 mov eax, color_alpha
1507 punpcklwd XMM3, XMM3
1508 pshufd XMM3, XMM3, 0
1516 int temp
= pBorder
[j
]-pBody
[j
];
1517 temp
= temp
<0 ? 0 : temp
;
1518 dst
[j
] = (temp
* color_alpha
)>>6;
1522 __m64 border
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(pBorder
+j
));
1523 __m64 body
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(pBody
+j
));
1524 border
= _mm_subs_pu8(border
, body
);
1525 __m64 zero
= _mm_setzero_si64();
1526 border
= _mm_unpacklo_pi8(border
, zero
);
1527 border
= _mm_mullo_pi16(border
, color_alpha_64
);
1528 border
= _mm_srli_pi16(border
, 6);
1529 border
= _mm_packs_pu16(border
,border
);
1530 *reinterpret_cast<int*>(dst
+j
) = _mm_cvtsi64_si32(border
);
1532 __m128i zero
= _mm_setzero_si128();
1533 for( ;j
<x_end00
;j
+=16)
1535 __m128i border
= _mm_loadu_si128(reinterpret_cast<const __m128i
*>(pBorder
+j
));
1536 __m128i body
= _mm_loadu_si128(reinterpret_cast<const __m128i
*>(pBody
+j
));
1537 border
= _mm_subs_epu8(border
,body
);
1538 __m128i srchi
= border
;
1539 border
= _mm_unpacklo_epi8(border
, zero
);
1540 srchi
= _mm_unpackhi_epi8(srchi
, zero
);
1541 border
= _mm_mullo_epi16(border
, color_alpha_128
);
1542 srchi
= _mm_mullo_epi16(srchi
, color_alpha_128
);
1543 border
= _mm_srli_epi16(border
, 6);
1544 srchi
= _mm_srli_epi16(srchi
, 6);
1545 border
= _mm_packus_epi16(border
, srchi
);
1546 _mm_storeu_si128(reinterpret_cast<__m128i
*>(dst
+j
), border
);
1548 for( ;j
<x_end0
;j
+=4)
1550 __m64 border
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(pBorder
+j
));
1551 __m64 body
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(pBody
+j
));
1552 border
= _mm_subs_pu8(border
, body
);
1553 __m64 zero
= _mm_setzero_si64();
1554 border
= _mm_unpacklo_pi8(border
, zero
);
1555 border
= _mm_mullo_pi16(border
, color_alpha_64
);
1556 border
= _mm_srli_pi16(border
, 6);
1557 border
= _mm_packs_pu16(border
,border
);
1558 *reinterpret_cast<int*>(dst
+j
) = _mm_cvtsi64_si32(border
);
1562 int temp
= pBorder
[j
]-pBody
[j
];
1563 temp
= temp
<0 ? 0 : temp
;
1564 dst
[j
] = (temp
* color_alpha
)>>6;
1566 pBody
+= mOverlayPitch
;
1567 pBorder
+= mOverlayPitch
;
1568 //pAlphaMask += pitch;
1569 dst
+= mOverlayPitch
;
1572 else if( ((pBody
==NULL
) + (pBorder
==NULL
))==1 && pAlphaMask
==NULL
)
1574 const BYTE
* src1
= pBody
!=NULL
? pBody
: pBorder
;
1580 dst
[j
] = (src1
[j
] * color_alpha
)>>6;
1584 __m64 src
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(src1
+j
));
1585 __m64 zero
= _mm_setzero_si64();
1586 src
= _mm_unpacklo_pi8(src
, zero
);
1587 src
= _mm_mullo_pi16(src
, color_alpha_64
);
1588 src
= _mm_srli_pi16(src
, 6);
1589 src
= _mm_packs_pu16(src
,src
);
1590 *reinterpret_cast<int*>(dst
+j
) = _mm_cvtsi64_si32(src
);
1592 __m128i zero
= _mm_setzero_si128();
1593 for( ;j
<x_end00
;j
+=16)
1595 __m128i src
= _mm_loadu_si128(reinterpret_cast<const __m128i
*>(src1
+j
));
1596 __m128i srchi
= src
;
1597 src
= _mm_unpacklo_epi8(src
, zero
);
1598 srchi
= _mm_unpackhi_epi8(srchi
, zero
);
1599 src
= _mm_mullo_epi16(src
, color_alpha_128
);
1600 srchi
= _mm_mullo_epi16(srchi
, color_alpha_128
);
1601 src
= _mm_srli_epi16(src
, 6);
1602 srchi
= _mm_srli_epi16(srchi
, 6);
1603 src
= _mm_packus_epi16(src
, srchi
);
1604 _mm_storeu_si128(reinterpret_cast<__m128i
*>(dst
+j
), src
);
1606 for( ;j
<x_end0
;j
+=4)
1608 __m64 src
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(src1
+j
));
1609 __m64 zero
= _mm_setzero_si64();
1610 src
= _mm_unpacklo_pi8(src
, zero
);
1611 src
= _mm_mullo_pi16(src
, color_alpha_64
);
1612 src
= _mm_srli_pi16(src
, 6);
1613 src
= _mm_packs_pu16(src
,src
);
1614 *reinterpret_cast<int*>(dst
+j
) = _mm_cvtsi64_si32(src
);
1618 dst
[j
] = (src1
[j
] * color_alpha
)>>6;
1620 src1
+= mOverlayPitch
;
1621 //pAlphaMask += pitch;
1622 dst
+= mOverlayPitch
;
1625 else if( ((pBody
==NULL
) + (pBorder
==NULL
))==1 && pAlphaMask
!=NULL
)
1627 const BYTE
* src1
= pBody
!=NULL
? pBody
: pBorder
;
1633 dst
[j
] = (src1
[j
] * pAlphaMask
[j
] * color_alpha
)>>12;
1637 __m64 src
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(src1
+j
));
1638 __m64 mask
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(pAlphaMask
+j
));
1639 __m64 zero
= _mm_setzero_si64();
1640 src
= _mm_unpacklo_pi8(src
, zero
);
1641 src
= _mm_mullo_pi16(src
, color_alpha_64
);
1642 mask
= _mm_unpacklo_pi8(zero
, mask
); //important!
1643 src
= _mm_mulhi_pi16(src
, mask
); //important!
1644 src
= _mm_srli_pi16(src
, 12+8-16); //important!
1645 src
= _mm_packs_pu16(src
,src
);
1646 *reinterpret_cast<int*>(dst
+j
) = _mm_cvtsi64_si32(src
);
1648 __m128i zero
= _mm_setzero_si128();
1649 for( ;j
<x_end00
;j
+=16)
1651 __m128i src
= _mm_loadu_si128(reinterpret_cast<const __m128i
*>(src1
+j
));
1652 __m128i mask
= _mm_loadu_si128(reinterpret_cast<const __m128i
*>(pAlphaMask
+j
));
1653 __m128i srchi
= src
;
1654 __m128i maskhi
= mask
;
1655 src
= _mm_unpacklo_epi8(src
, zero
);
1656 srchi
= _mm_unpackhi_epi8(srchi
, zero
);
1657 mask
= _mm_unpacklo_epi8(zero
, mask
); //important!
1658 maskhi
= _mm_unpackhi_epi8(zero
, maskhi
);
1659 src
= _mm_mullo_epi16(src
, color_alpha_128
);
1660 srchi
= _mm_mullo_epi16(srchi
, color_alpha_128
);
1661 src
= _mm_mulhi_epu16(src
, mask
); //important!
1662 srchi
= _mm_mulhi_epu16(srchi
, maskhi
);
1663 src
= _mm_srli_epi16(src
, 12+8-16); //important!
1664 srchi
= _mm_srli_epi16(srchi
, 12+8-16);
1665 src
= _mm_packus_epi16(src
, srchi
);
1666 _mm_storeu_si128(reinterpret_cast<__m128i
*>(dst
+j
), src
);
1668 for( ;j
<x_end0
;j
+=4)
1670 __m64 src
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(src1
+j
));
1671 __m64 mask
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(pAlphaMask
+j
));
1672 __m64 zero
= _mm_setzero_si64();
1673 src
= _mm_unpacklo_pi8(src
, zero
);
1674 src
= _mm_mullo_pi16(src
, color_alpha_64
);
1675 mask
= _mm_unpacklo_pi8(zero
, mask
); //important!
1676 src
= _mm_mulhi_pi16(src
, mask
); //important!
1677 src
= _mm_srli_pi16(src
, 12+8-16); //important!
1678 src
= _mm_packs_pu16(src
,src
);
1679 *reinterpret_cast<int*>(dst
+j
) = _mm_cvtsi64_si32(src
);
1683 dst
[j
] = (src1
[j
] * pAlphaMask
[j
] * color_alpha
)>>12;
1685 src1
+= mOverlayPitch
;
1686 pAlphaMask
+= pitch
;
1687 dst
+= mOverlayPitch
;
1690 else if( pAlphaMask
!=NULL
&& pBody
!=NULL
&& pBorder
!=NULL
)
1697 int temp
= pBorder
[j
]-pBody
[j
];
1698 temp
= temp
<0 ? 0 : temp
;
1699 dst
[j
] = (temp
* pAlphaMask
[j
] * color_alpha
)>>12;
1703 __m64 border
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(pBorder
+j
));
1704 __m64 body
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(pBody
+j
));
1705 border
= _mm_subs_pu8(border
, body
);
1706 __m64 mask
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(pAlphaMask
+j
));
1707 __m64 zero
= _mm_setzero_si64();
1708 border
= _mm_unpacklo_pi8(border
, zero
);
1709 border
= _mm_mullo_pi16(border
, color_alpha_64
);
1710 mask
= _mm_unpacklo_pi8(zero
, mask
); //important!
1711 border
= _mm_mulhi_pi16(border
, mask
); //important!
1712 border
= _mm_srli_pi16(border
, 12+8-16); //important!
1713 border
= _mm_packs_pu16(border
,border
);
1714 *reinterpret_cast<int*>(dst
+j
) = _mm_cvtsi64_si32(border
);
1716 __m128i zero
= _mm_setzero_si128();
1717 for( ;j
<x_end00
;j
+=16)
1719 __m128i border
= _mm_loadu_si128(reinterpret_cast<const __m128i
*>(pBorder
+j
));
1720 __m128i body
= _mm_loadu_si128(reinterpret_cast<const __m128i
*>(pBody
+j
));
1721 border
= _mm_subs_epu8(border
,body
);
1723 __m128i mask
= _mm_loadu_si128(reinterpret_cast<const __m128i
*>(pAlphaMask
+j
));
1724 __m128i srchi
= border
;
1725 __m128i maskhi
= mask
;
1726 border
= _mm_unpacklo_epi8(border
, zero
);
1727 srchi
= _mm_unpackhi_epi8(srchi
, zero
);
1728 mask
= _mm_unpacklo_epi8(zero
, mask
); //important!
1729 maskhi
= _mm_unpackhi_epi8(zero
, maskhi
);
1730 border
= _mm_mullo_epi16(border
, color_alpha_128
);
1731 srchi
= _mm_mullo_epi16(srchi
, color_alpha_128
);
1732 border
= _mm_mulhi_epu16(border
, mask
); //important!
1733 srchi
= _mm_mulhi_epu16(srchi
, maskhi
);
1734 border
= _mm_srli_epi16(border
, 12+8-16); //important!
1735 srchi
= _mm_srli_epi16(srchi
, 12+8-16);
1736 border
= _mm_packus_epi16(border
, srchi
);
1737 _mm_storeu_si128(reinterpret_cast<__m128i
*>(dst
+j
), border
);
1739 for( ;j
<x_end0
;j
+=4)
1741 __m64 border
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(pBorder
+j
));
1742 __m64 body
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(pBody
+j
));
1743 border
= _mm_subs_pu8(border
, body
);
1744 __m64 mask
= _mm_cvtsi32_si64(*reinterpret_cast<const int*>(pAlphaMask
+j
));
1745 __m64 zero
= _mm_setzero_si64();
1746 border
= _mm_unpacklo_pi8(border
, zero
);
1747 border
= _mm_mullo_pi16(border
, color_alpha_64
);
1748 mask
= _mm_unpacklo_pi8(zero
, mask
); //important!
1749 border
= _mm_mulhi_pi16(border
, mask
); //important!
1750 border
= _mm_srli_pi16(border
, 12+8-16); //important!
1751 border
= _mm_packs_pu16(border
,border
);
1752 *reinterpret_cast<int*>(dst
+j
) = _mm_cvtsi64_si32(border
);
1756 int temp
= pBorder
[j
]-pBody
[j
];
1757 temp
= temp
<0 ? 0 : temp
;
1758 dst
[j
] = (temp
* pAlphaMask
[j
] * color_alpha
)>>12;
1760 pBody
+= mOverlayPitch
;
1761 pBorder
+= mOverlayPitch
;
1762 pAlphaMask
+= pitch
;
1763 dst
+= mOverlayPitch
;
1768 //should NOT happen!
1773 void Overlay::FillAlphaMash( byte
* outputAlphaMask
, bool fBody
, bool fBorder
, int x
, int y
, int w
, int h
, const byte
* pAlphaMask
, int pitch
, DWORD color_alpha
)
1775 if(!fBorder
&& fBody
&& pAlphaMask
==NULL
)
1777 _DoFillAlphaMash(outputAlphaMask
, mpOverlayBuffer
.body
, NULL
, x
, y
, w
, h
, pAlphaMask
, pitch
, color_alpha
);
1779 else if(/*fBorder &&*/ fBody
&& pAlphaMask
==NULL
)
1781 _DoFillAlphaMash(outputAlphaMask
, NULL
, mpOverlayBuffer
.border
, x
, y
, w
, h
, pAlphaMask
, pitch
, color_alpha
);
1783 else if(!fBody
&& fBorder
/* pAlphaMask==NULL or not*/)
1785 _DoFillAlphaMash(outputAlphaMask
, mpOverlayBuffer
.body
, mpOverlayBuffer
.border
, x
, y
, w
, h
, pAlphaMask
, pitch
, color_alpha
);
1787 else if(!fBorder
&& fBody
&& pAlphaMask
!=NULL
)
1789 _DoFillAlphaMash(outputAlphaMask
, mpOverlayBuffer
.body
, NULL
, x
, y
, w
, h
, pAlphaMask
, pitch
, color_alpha
);
1791 else if(fBorder
&& fBody
&& pAlphaMask
!=NULL
)
1793 _DoFillAlphaMash(outputAlphaMask
, NULL
, mpOverlayBuffer
.border
, x
, y
, w
, h
, pAlphaMask
, pitch
, color_alpha
);
1802 Overlay
* Overlay::GetSubpixelVariance(unsigned int xshift
, unsigned int yshift
)
1804 Overlay
* overlay
= new Overlay();
1812 overlay
->mOffsetX
= mOffsetX
- xshift
;
1813 overlay
->mOffsetY
= mOffsetY
- yshift
;
1814 overlay
->mWidth
= mWidth
+ xshift
;
1815 overlay
->mHeight
= mHeight
+ yshift
;
1817 overlay
->mOverlayWidth
= ((overlay
->mWidth
+7)>>3) + 1;
1818 overlay
->mOverlayHeight
= ((overlay
->mHeight
+ 7)>>3) + 1;
1819 overlay
->mOverlayPitch
= (overlay
->mOverlayWidth
+15)&~15;
1821 overlay
->mpOverlayBuffer
.base
= reinterpret_cast<byte
*>(xy_malloc(2 * overlay
->mOverlayPitch
* overlay
->mOverlayHeight
));
1822 overlay
->mpOverlayBuffer
.body
= overlay
->mpOverlayBuffer
.base
;
1823 overlay
->mpOverlayBuffer
.border
= overlay
->mpOverlayBuffer
.base
+ overlay
->mOverlayPitch
* overlay
->mOverlayHeight
;
1825 overlay
->mfWideOutlineEmpty
= mfWideOutlineEmpty
;
1827 if(overlay
->mOverlayWidth
==mOverlayWidth
&& overlay
->mOverlayHeight
==mOverlayHeight
)
1828 memcpy(overlay
->mpOverlayBuffer
.base
, mpOverlayBuffer
.base
, 2 * mOverlayPitch
* mOverlayHeight
);
1831 memset(overlay
->mpOverlayBuffer
.base
, 0, 2 * overlay
->mOverlayPitch
* overlay
->mOverlayHeight
);
1832 byte
* dst
= overlay
->mpOverlayBuffer
.body
;
1833 const byte
* src
= mpOverlayBuffer
.body
;
1834 for (int i
=0;i
<mOverlayHeight
;i
++)
1836 memcpy(dst
, src
, mOverlayPitch
);
1837 dst
+= overlay
->mOverlayPitch
;
1838 src
+= mOverlayPitch
;
1840 dst
= overlay
->mpOverlayBuffer
.border
;
1841 src
= mpOverlayBuffer
.border
;
1842 for (int i
=0;i
<mOverlayHeight
;i
++)
1844 memcpy(dst
, src
, mOverlayPitch
);
1845 dst
+= overlay
->mOverlayPitch
;
1846 src
+= mOverlayPitch
;
1850 // Bilinear(overlay->mpOverlayBuffer.base, overlay->mOverlayWidth, 2*overlay->mOverlayHeight, overlay->mOverlayPitch, xshift, yshift);
1851 Bilinear(overlay
->mpOverlayBuffer
.body
, overlay
->mOverlayWidth
, overlay
->mOverlayHeight
, overlay
->mOverlayPitch
, xshift
, yshift
);
1852 Bilinear(overlay
->mpOverlayBuffer
.border
, overlay
->mOverlayWidth
, overlay
->mOverlayHeight
, overlay
->mOverlayPitch
, xshift
, yshift
);
1856 ///////////////////////////////////////////////////////////////
1860 PathData::PathData():mpPathTypes(NULL
), mpPathPoints(NULL
), mPathPoints(0)
1864 PathData::PathData( const PathData
& src
):mPathPoints(src
.mPathPoints
)
1866 //TODO: deal with the case that src.mPathPoints<0
1869 mpPathTypes
= static_cast<BYTE
*>(malloc(mPathPoints
* sizeof(BYTE
)));
1870 mpPathPoints
= static_cast<POINT
*>(malloc(mPathPoints
* sizeof(POINT
)));
1874 memcpy(mpPathTypes
, src
.mpPathTypes
, mPathPoints
*sizeof(BYTE
));
1875 memcpy(mpPathPoints
, src
.mpPathPoints
, mPathPoints
*sizeof(POINT
));
1879 const PathData
& PathData::operator=( const PathData
& src
)
1883 if(mPathPoints
!=src
.mPathPoints
&& src
.mPathPoints
>0)
1885 mPathPoints
= src
.mPathPoints
;
1886 delete[] mpPathTypes
;
1887 delete[] mpPathPoints
;
1888 mpPathTypes
= static_cast<BYTE
*>(malloc(mPathPoints
* sizeof(BYTE
)));
1889 mpPathPoints
= static_cast<POINT
*>(malloc(mPathPoints
* sizeof(POINT
)));//better than realloc
1891 if(src
.mPathPoints
>0)
1893 memcpy(mpPathTypes
, src
.mpPathTypes
, mPathPoints
*sizeof(BYTE
));
1894 memcpy(mpPathPoints
, src
.mpPathPoints
, mPathPoints
*sizeof(POINT
));
1900 PathData::~PathData()
1905 void PathData::_TrashPath()
1907 delete [] mpPathTypes
;
1908 delete [] mpPathPoints
;
1910 mpPathPoints
= NULL
;
1914 bool PathData::BeginPath(HDC hdc
)
1917 return !!::BeginPath(hdc
);
1920 bool PathData::EndPath(HDC hdc
)
1925 mPathPoints
= GetPath(hdc
, NULL
, NULL
, 0);
1928 mpPathTypes
= (BYTE
*)malloc(sizeof(BYTE
) * mPathPoints
);
1929 mpPathPoints
= (POINT
*)malloc(sizeof(POINT
) * mPathPoints
);
1930 if(mPathPoints
== GetPath(hdc
, mpPathPoints
, mpPathTypes
, mPathPoints
))
1937 bool PathData::PartialBeginPath(HDC hdc
, bool bClearPath
)
1941 return !!::BeginPath(hdc
);
1944 bool PathData::PartialEndPath(HDC hdc
, long dx
, long dy
)
1952 nPoints
= GetPath(hdc
, NULL
, NULL
, 0);
1955 pNewTypes
= (BYTE
*)realloc(mpPathTypes
, (mPathPoints
+ nPoints
) * sizeof(BYTE
));
1956 pNewPoints
= (POINT
*)realloc(mpPathPoints
, (mPathPoints
+ nPoints
) * sizeof(POINT
));
1958 mpPathTypes
= pNewTypes
;
1960 mpPathPoints
= pNewPoints
;
1961 BYTE
* pTypes
= new BYTE
[nPoints
];
1962 POINT
* pPoints
= new POINT
[nPoints
];
1963 if(pNewTypes
&& pNewPoints
&& nPoints
== GetPath(hdc
, pPoints
, pTypes
, nPoints
))
1965 for(int i
= 0; i
< nPoints
; ++i
)
1967 mpPathPoints
[mPathPoints
+ i
].x
= pPoints
[i
].x
+ dx
;
1968 mpPathPoints
[mPathPoints
+ i
].y
= pPoints
[i
].y
+ dy
;
1969 mpPathTypes
[mPathPoints
+ i
] = pTypes
[i
];
1971 mPathPoints
+= nPoints
;
1985 //////////////////////////////////////////////////////////////////////////
1989 ScanLineData::ScanLineData():mPathOffsetX(0),mPathOffsetY(0)
1993 ScanLineData::~ScanLineData()
1997 void ScanLineData::_ReallocEdgeBuffer(int edges
)
1999 mEdgeHeapSize
= edges
;
2000 mpEdgeBuffer
= (Edge
*)realloc(mpEdgeBuffer
, sizeof(Edge
)*edges
);
2003 void ScanLineData::_EvaluateBezier(const PathData
& path_data
, int ptbase
, bool fBSpline
)
2005 const POINT
* pt0
= path_data
.mpPathPoints
+ ptbase
;
2006 const POINT
* pt1
= path_data
.mpPathPoints
+ ptbase
+ 1;
2007 const POINT
* pt2
= path_data
.mpPathPoints
+ ptbase
+ 2;
2008 const POINT
* pt3
= path_data
.mpPathPoints
+ ptbase
+ 3;
2017 double cx3
, cx2
, cx1
, cx0
, cy3
, cy2
, cy1
, cy0
;
2024 double _1div6
= 1.0/6.0;
2025 cx3
= _1div6
*(- x0
+3*x1
-3*x2
+x3
);
2026 cx2
= _1div6
*( 3*x0
-6*x1
+3*x2
);
2027 cx1
= _1div6
*(-3*x0
+3*x2
);
2028 cx0
= _1div6
*( x0
+4*x1
+1*x2
);
2029 cy3
= _1div6
*(- y0
+3*y1
-3*y2
+y3
);
2030 cy2
= _1div6
*( 3*y0
-6*y1
+3*y2
);
2031 cy1
= _1div6
*(-3*y0
+3*y2
);
2032 cy0
= _1div6
*( y0
+4*y1
+1*y2
);
2040 cx3
= - x0
+3*x1
-3*x2
+x3
;
2041 cx2
= 3*x0
-6*x1
+3*x2
;
2044 cy3
= - y0
+3*y1
-3*y2
+y3
;
2045 cy2
= 3*y0
-6*y1
+3*y2
;
2050 // This equation is from Graphics Gems I.
2052 // The idea is that since we're approximating a cubic curve with lines,
2053 // any error we incur is due to the curvature of the line, which we can
2054 // estimate by calculating the maximum acceleration of the curve. For
2055 // a cubic, the acceleration (second derivative) is a line, meaning that
2056 // the absolute maximum acceleration must occur at either the beginning
2057 // (|c2|) or the end (|c2+c3|). Our bounds here are a little more
2058 // conservative than that, but that's okay.
2060 // If the acceleration of the parametric formula is zero (c2 = c3 = 0),
2061 // that component of the curve is linear and does not incur any error.
2062 // If a=0 for both X and Y, the curve is a line segment and we can
2063 // use a step size of 1.
2064 double maxaccel1
= fabs(2*cy2
) + fabs(6*cy3
);
2065 double maxaccel2
= fabs(2*cx2
) + fabs(6*cx3
);
2066 double maxaccel
= maxaccel1
> maxaccel2
? maxaccel1
: maxaccel2
;
2068 if(maxaccel
> 8.0) h
= sqrt(8.0 / maxaccel
);
2069 if(!fFirstSet
) {firstp
.x
= (LONG
)cx0
; firstp
.y
= (LONG
)cy0
; lastp
= firstp
; fFirstSet
= true;}
2070 for(double t
= 0; t
< 1.0; t
+= h
)
2072 double x
= cx0
+ t
*(cx1
+ t
*(cx2
+ t
*cx3
));
2073 double y
= cy0
+ t
*(cy1
+ t
*(cy2
+ t
*cy3
));
2074 _EvaluateLine(lastp
.x
, lastp
.y
, (int)x
, (int)y
);
2076 double x
= cx0
+ cx1
+ cx2
+ cx3
;
2077 double y
= cy0
+ cy1
+ cy2
+ cy3
;
2078 _EvaluateLine(lastp
.x
, lastp
.y
, (int)x
, (int)y
);
2081 void ScanLineData::_EvaluateLine(const PathData
& path_data
, int pt1idx
, int pt2idx
)
2083 const POINT
* pt1
= path_data
.mpPathPoints
+ pt1idx
;
2084 const POINT
* pt2
= path_data
.mpPathPoints
+ pt2idx
;
2085 _EvaluateLine(pt1
->x
, pt1
->y
, pt2
->x
, pt2
->y
);
2088 void ScanLineData::_EvaluateLine(int x0
, int y0
, int x1
, int y1
)
2090 if(lastp
.x
!= x0
|| lastp
.y
!= y0
)
2092 _EvaluateLine(lastp
.x
, lastp
.y
, x0
, y0
);
2094 if(!fFirstSet
) {firstp
.x
= x0
; firstp
.y
= y0
; fFirstSet
= true;}
2099 __int64 xacc
= (__int64
)x0
<< 13;
2102 int y
= ((y0
+ 3)&~7) + 4;
2107 __int64 invslope
= (__int64(x1
- x0
) << 16) / dy
;
2108 while(mEdgeNext
+ y1
+ 1 - iy
> mEdgeHeapSize
)
2109 _ReallocEdgeBuffer(mEdgeHeapSize
*2);
2110 xacc
+= (invslope
* (y
- y0
)) >> 3;
2113 int ix
= (int)((xacc
+ 32768) >> 16);
2114 mpEdgeBuffer
[mEdgeNext
].next
= mpScanBuffer
[iy
];
2115 mpEdgeBuffer
[mEdgeNext
].posandflag
= ix
*2 + 1;
2116 mpScanBuffer
[iy
] = mEdgeNext
++;
2122 else if(y1
< y0
) // up
2124 __int64 xacc
= (__int64
)x1
<< 13;
2127 int y
= ((y1
+ 3)&~7) + 4;
2132 __int64 invslope
= (__int64(x0
- x1
) << 16) / dy
;
2133 while(mEdgeNext
+ y0
+ 1 - iy
> mEdgeHeapSize
)
2134 _ReallocEdgeBuffer(mEdgeHeapSize
*2);
2135 xacc
+= (invslope
* (y
- y1
)) >> 3;
2138 int ix
= (int)((xacc
+ 32768) >> 16);
2139 mpEdgeBuffer
[mEdgeNext
].next
= mpScanBuffer
[iy
];
2140 mpEdgeBuffer
[mEdgeNext
].posandflag
= ix
*2;
2141 mpScanBuffer
[iy
] = mEdgeNext
++;
2149 bool ScanLineData::ScanConvert(SharedPtrPathData path_data
)
2151 int lastmoveto
= -1;
2153 // Drop any outlines we may have.
2155 mWideOutline
.clear();
2157 // Determine bounding box
2158 if(!path_data
->mPathPoints
)
2160 mPathOffsetX
= mPathOffsetY
= 0;
2161 mWidth
= mHeight
= 0;
2168 for(i
=0; i
<path_data
->mPathPoints
; ++i
)
2170 int ix
= path_data
->mpPathPoints
[i
].x
;
2171 int iy
= path_data
->mpPathPoints
[i
].y
;
2172 if(ix
< minx
) minx
= ix
;
2173 if(ix
> maxx
) maxx
= ix
;
2174 if(iy
< miny
) miny
= iy
;
2175 if(iy
> maxy
) maxy
= iy
;
2177 minx
= (minx
>> 3) & ~7;
2178 miny
= (miny
>> 3) & ~7;
2179 maxx
= (maxx
+ 7) >> 3;
2180 maxy
= (maxy
+ 7) >> 3;
2181 for(i
=0; i
<path_data
->mPathPoints
; ++i
)
2183 path_data
->mpPathPoints
[i
].x
-= minx
*8;
2184 path_data
->mpPathPoints
[i
].y
-= miny
*8;
2186 if(minx
> maxx
|| miny
> maxy
)
2188 mWidth
= mHeight
= 0;
2189 mPathOffsetX
= mPathOffsetY
= 0;
2190 path_data
->_TrashPath();
2193 mWidth
= maxx
+ 1 - minx
;
2194 mHeight
= maxy
+ 1 - miny
;
2195 mPathOffsetX
= minx
;
2196 mPathOffsetY
= miny
;
2197 // Initialize edge buffer. We use edge 0 as a sentinel.
2199 mEdgeHeapSize
= 2048;
2200 mpEdgeBuffer
= (Edge
*)malloc(sizeof(Edge
)*mEdgeHeapSize
);
2201 // Initialize scanline list.
2202 mpScanBuffer
= new unsigned int[mHeight
];
2203 memset(mpScanBuffer
, 0, mHeight
*sizeof(unsigned int));
2204 // Scan convert the outline. Yuck, Bezier curves....
2205 // Unfortunately, Windows 95/98 GDI has a bad habit of giving us text
2206 // paths with all but the first figure left open, so we can't rely
2207 // on the PT_CLOSEFIGURE flag being used appropriately.
2209 firstp
.x
= firstp
.y
= 0;
2210 lastp
.x
= lastp
.y
= 0;
2211 for(i
=0; i
<path_data
->mPathPoints
; ++i
)
2213 BYTE t
= path_data
->mpPathTypes
[i
] & ~PT_CLOSEFIGURE
;
2217 if(lastmoveto
>= 0 && firstp
!= lastp
)
2218 _EvaluateLine(lastp
.x
, lastp
.y
, firstp
.x
, firstp
.y
);
2221 lastp
= path_data
->mpPathPoints
[i
];
2226 if(path_data
->mPathPoints
- (i
-1) >= 2) _EvaluateLine(*path_data
, i
-1, i
);
2229 if(path_data
->mPathPoints
- (i
-1) >= 4) _EvaluateBezier(*path_data
, i
-1, false);
2233 if(path_data
->mPathPoints
- (i
-1) >= 4) _EvaluateBezier(*path_data
, i
-1, true);
2236 case PT_BSPLINEPATCHTO
:
2237 if(path_data
->mPathPoints
- (i
-3) >= 4) _EvaluateBezier(*path_data
, i
-3, true);
2241 if(lastmoveto
>= 0 && firstp
!= lastp
)
2242 _EvaluateLine(lastp
.x
, lastp
.y
, firstp
.x
, firstp
.y
);
2243 // Free the path since we don't need it anymore.
2244 path_data
->_TrashPath();
2245 // Convert the edges to spans. We couldn't do this before because some of
2246 // the regions may have winding numbers >+1 and it would have been a pain
2247 // to try to adjust the spans on the fly. We use one heap to detangle
2248 // a scanline's worth of edges from the singly-linked lists, and another
2249 // to collect the actual scans.
2250 std::vector
<int> heap
;
2251 mOutline
.reserve(mEdgeNext
/ 2);
2253 for(y
=0; y
<mHeight
; ++y
)
2256 // Detangle scanline into edge heap.
2257 for(unsigned ptr
= (unsigned)(mpScanBuffer
[y
]&0xffffffff); ptr
; ptr
= mpEdgeBuffer
[ptr
].next
)
2259 heap
.push_back(mpEdgeBuffer
[ptr
].posandflag
);
2261 // Sort edge heap. Note that we conveniently made the opening edges
2262 // one more than closing edges at the same spot, so we won't have any
2263 // problems with abutting spans.
2264 std::sort(heap
.begin(), heap
.end()/*begin() + heap.size()*/);
2265 // Process edges and add spans. Since we only check for a non-zero
2266 // winding number, it doesn't matter which way the outlines go!
2267 std::vector
<int>::iterator itX1
= heap
.begin();
2268 std::vector
<int>::iterator itX2
= heap
.end(); // begin() + heap.size();
2270 for(; itX1
!= itX2
; ++itX1
)
2283 mOutline
.push_back(std::pair
<__int64
,__int64
>((y
<<32)+x1
+0x4000000040000000i
64, (y
<<32)+x2
+0x4000000040000000i
64)); // G: damn Avery, this is evil! :)
2288 // Dump the edge and scan buffers, since we no longer need them.
2290 delete [] mpScanBuffer
;
2295 using namespace std
;
2297 void ScanLineData::_OverlapRegion(tSpanBuffer
& dst
, tSpanBuffer
& src
, int dx
, int dy
)
2300 temp
.reserve(dst
.size() + src
.size());
2302 tSpanBuffer::iterator itA
= temp
.begin();
2303 tSpanBuffer::iterator itAE
= temp
.end();
2304 tSpanBuffer::iterator itB
= src
.begin();
2305 tSpanBuffer::iterator itBE
= src
.end();
2306 // Don't worry -- even if dy<0 this will still work! // G: hehe, the evil twin :)
2307 unsigned __int64 offset1
= (((__int64
)dy
)<<32) - dx
;
2308 unsigned __int64 offset2
= (((__int64
)dy
)<<32) + dx
;
2309 while(itA
!= itAE
&& itB
!= itBE
)
2311 if((*itB
).first
+ offset1
< (*itA
).first
)
2313 // B span is earlier. Use it.
2314 unsigned __int64 x1
= (*itB
).first
+ offset1
;
2315 unsigned __int64 x2
= (*itB
).second
+ offset2
;
2317 // B spans don't overlap, so begin merge loop with A first.
2320 // If we run out of A spans or the A span doesn't overlap,
2321 // then the next B span can't either (because B spans don't
2322 // overlap) and we exit.
2323 if(itA
== itAE
|| (*itA
).first
> x2
)
2325 do {x2
= _MAX(x2
, (*itA
++).second
);}
2326 while(itA
!= itAE
&& (*itA
).first
<= x2
);
2327 // If we run out of B spans or the B span doesn't overlap,
2328 // then the next A span can't either (because A spans don't
2329 // overlap) and we exit.
2330 if(itB
== itBE
|| (*itB
).first
+ offset1
> x2
)
2332 do {x2
= _MAX(x2
, (*itB
++).second
+ offset2
);}
2333 while(itB
!= itBE
&& (*itB
).first
+ offset1
<= x2
);
2336 dst
.push_back(tSpan(x1
, x2
));
2340 // A span is earlier. Use it.
2341 unsigned __int64 x1
= (*itA
).first
;
2342 unsigned __int64 x2
= (*itA
).second
;
2344 // A spans don't overlap, so begin merge loop with B first.
2347 // If we run out of B spans or the B span doesn't overlap,
2348 // then the next A span can't either (because A spans don't
2349 // overlap) and we exit.
2350 if(itB
== itBE
|| (*itB
).first
+ offset1
> x2
)
2352 do {x2
= _MAX(x2
, (*itB
++).second
+ offset2
);}
2353 while(itB
!= itBE
&& (*itB
).first
+ offset1
<= x2
);
2354 // If we run out of A spans or the A span doesn't overlap,
2355 // then the next B span can't either (because B spans don't
2356 // overlap) and we exit.
2357 if(itA
== itAE
|| (*itA
).first
> x2
)
2359 do {x2
= _MAX(x2
, (*itA
++).second
);}
2360 while(itA
!= itAE
&& (*itA
).first
<= x2
);
2363 dst
.push_back(tSpan(x1
, x2
));
2366 // Copy over leftover spans.
2368 dst
.push_back(*itA
++);
2371 dst
.push_back(tSpan((*itB
).first
+ offset1
, (*itB
).second
+ offset2
));
2376 bool ScanLineData::CreateWidenedRegion(int rx
, int ry
)
2380 mWideBorder
= max(rx
,ry
);
2383 // Do a half circle.
2384 // _OverlapRegion mirrors this so both halves are done.
2385 for(int y
= -ry
; y
<= ry
; ++y
)
2387 int x
= (int)(0.5 + sqrt(float(ry
*ry
- y
*y
)) * float(rx
)/float(ry
));
2388 _OverlapRegion(mWideOutline
, mOutline
, x
, y
);
2391 else if (ry
== 0 && rx
> 0)
2393 // There are artifacts if we don't make at least two overlaps of the line, even at same Y coord
2394 _OverlapRegion(mWideOutline
, mOutline
, rx
, 0);
2395 _OverlapRegion(mWideOutline
, mOutline
, rx
, 0);
2400 void ScanLineData::DeleteOutlines()
2402 mWideOutline
.clear();