2 * Copyright (C) 2003-2006 Gabest
3 * http://www.gabest.org
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GNU Make; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
23 #include "MemSubPic.h"
24 #include "color_conv_table.h"
31 static void SaveRect2File(const CRect
& cRect
, const char * filename
)
33 std::ofstream
os(filename
);
34 os
<<cRect
.left
<<","<<cRect
.top
<<","<<cRect
.right
<<","<<cRect
.bottom
;
36 static void SaveAxxx2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
38 std::ofstream
axxx(filename
);
39 int w
= cRect
.Width(), h
= cRect
.Height();
41 BYTE
* top
= (BYTE
*)spd
.bits
+ spd
.pitch
*cRect
.top
+ cRect
.left
*4;
42 BYTE
* bottom
= top
+ spd
.pitch
*h
;
44 for(; top
< bottom
; top
+= spd
.pitch
) {
47 for(; s
< e
; s
+=4) { // ARGB ARGB -> AxYU AxYV
48 axxx
<<(int)s
[0]<<","<<(int)s
[1]<<","<<(int)s
[2]<<","<<(int)s
[3];
61 static void SaveArgb2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
63 SaveAxxx2File(spd
, cRect
, filename
);
65 static void SaveAyuv2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
67 SaveAxxx2File(spd
, cRect
, filename
);
69 static void SaveNvxx2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
71 std::ofstream
os(filename
);
72 int w
= cRect
.Width(), h
= cRect
.Height();
74 BYTE
* top
= (BYTE
*)spd
.bits
;
75 BYTE
* bottom
= top
+ spd
.pitch
*h
;
77 for(; top
< bottom
; top
+= spd
.pitch
) {
81 BYTE
* sY
= s
+ spd
.pitch
*spd
.h
;
82 BYTE
* sU
= sY
+ spd
.pitch
*spd
.h
;
84 for(; s
< e
; s
++, sY
++, sU
+=2,sV
+=2) {
85 os
<<(int)s
[0]<<","<<(int)sY
[0]<<","<<(int)sU
[0]<<","<<(int)sV
[0];
99 #define ONCER(expr) {\
100 static bool entered=false;\
112 // alpha blend functions
114 #include "xy_intrinsics.h"
115 #include "../dsutil/vd.h"
118 static void AlphaBlt_YUY2_MMX(int w
, int h
, BYTE
* d
, int dstpitch
, PCUINT8 s
, int srcpitch
)
120 for(int j
= 0; j
< h
; j
++, s
+= srcpitch
, d
+= dstpitch
)
124 PCUINT8 s2end
= s2
+ w
*4;
125 DWORD
* d2
= (DWORD
*)d
;
127 int last_a
= w
>0?s2
[3]:0;
128 for(; s2
< s2end
; s2
+= 8, d2
++)
130 ia
= (last_a
+ 2*s2
[3] + s2
[7])>>2;
134 //int y1 = (BYTE)(((((*d2&0xff))*s2[3])>>8) + s2[1]); // + y1;
135 //int u = (BYTE)((((((*d2>>8)&0xff))*ia)>>8) + s2[0]); // + u;
136 //int y2 = (BYTE)((((((*d2>>16)&0xff))*s2[7])>>8) + s2[5]); // + y2;
137 //int v = (BYTE)((((((*d2>>24)&0xff))*ia)>>8) + s2[4]); // + v;
138 //*d2 = (v<<24)|(y2<<16)|(u<<8)|y1;
140 ia
= (ia
<<24)|(s2
[7]<<16)|(ia
<<8)|s2
[3];
141 c
= (s2
[4]<<24)|(s2
[5]<<16)|(s2
[0]<<8)|s2
[1]; // (v<<24)|(y2<<16)|(u<<8)|y1;
152 psraw mm4
, 1 //or else, overflow because psraw shift in sign bit
166 void AlphaBlt_YUY2_C(int w
, int h
, BYTE
* d
, int dstpitch
, PCUINT8 s
, int srcpitch
)
168 for(int j
= 0; j
< h
; j
++, s
+= srcpitch
, d
+= dstpitch
)
172 PCUINT8 s2end
= s2
+ w
*4;
173 DWORD
* d2
= (DWORD
*)d
;
175 int last_a
= w
>0?s2
[3]:0;
176 for(; s2
< s2end
; s2
+= 8, d2
++)
178 ia
= (last_a
+ 2*s2
[3] + s2
[7])>>2;
182 DWORD y1
= (BYTE
)(((((*d2
&0xff))*s2
[3])>>8) + s2
[1]); // + y1;
183 DWORD u
= (BYTE
)((((((*d2
>>8)&0xff))*ia
)>>8) + s2
[0]); // + u;
184 DWORD y2
= (BYTE
)((((((*d2
>>16)&0xff))*s2
[7])>>8) + s2
[5]); // + y2;
185 DWORD v
= (BYTE
)((((((*d2
>>24)&0xff))*ia
)>>8) + s2
[4]); // + v;
186 *d2
= (v
<<24)|(y2
<<16)|(u
<<8)|y1
;
197 CMemSubPic::CMemSubPic(SubPicDesc
& spd
, int alpha_blt_dst_type
)
198 : m_spd(spd
), m_alpha_blt_dst_type(alpha_blt_dst_type
)
200 m_maxsize
.SetSize(spd
.w
, spd
.h
);
201 // m_rcDirty.SetRect(0, 0, spd.w, spd.h);
202 CRect
allSpd(0,0,spd
.w
, spd
.h
);
203 m_rectListDirty
.AddTail(allSpd
);
206 CMemSubPic::~CMemSubPic()
208 delete [] m_spd
.bits
, m_spd
.bits
= NULL
;
213 STDMETHODIMP_(void*) CMemSubPic::GetObject() const
215 return (void*)&m_spd
;
218 STDMETHODIMP
CMemSubPic::GetDesc(SubPicDesc
& spd
) const
220 spd
.type
= m_spd
.type
;
224 spd
.pitch
= m_spd
.pitch
;
225 spd
.bits
= m_spd
.bits
;
226 spd
.bitsU
= m_spd
.bitsU
;
227 spd
.bitsV
= m_spd
.bitsV
;
228 spd
.vidrect
= m_vidrect
;
232 STDMETHODIMP
CMemSubPic::CopyTo(ISubPicEx
* pSubPic
)
235 if(FAILED(hr
= __super::CopyTo(pSubPic
))) {
240 if(FAILED(GetDesc(src
)) || FAILED(pSubPic
->GetDesc(dst
))) {
243 while(!m_rectListDirty
.IsEmpty())
245 CRect
& cRect
= m_rectListDirty
.GetHead();
246 int w
= cRect
.Width(), h
= cRect
.Height();
247 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*cRect
.top
+ cRect
.left
*4;
248 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*cRect
.top
+ cRect
.left
*4;
249 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
255 STDMETHODIMP
CMemSubPic::ClearDirtyRect(DWORD color
)
257 if(m_rectListDirty
.IsEmpty()) {
260 while(!m_rectListDirty
.IsEmpty())
262 //pDirtyRect = m_rectListDirty.RemoveHead();
263 CRect
& dirtyRect
= m_rectListDirty
.RemoveTail();
264 BYTE
* p
= (BYTE
*)m_spd
.bits
+ m_spd
.pitch
*(dirtyRect
.top
) + dirtyRect
.left
*(m_spd
.bpp
>>3);
265 int w
= dirtyRect
.Width();
266 if(m_spd
.type
!=MSP_AYUV_PLANAR
)
268 for(int j
= 0, h
= dirtyRect
.Height(); j
< h
; j
++, p
+= m_spd
.pitch
)
271 memsetd(p
, color
, w
*4); // nya
289 for(int j
= 0, h
= dirtyRect
.Height(); j
< h
; j
++, p
+= m_spd
.pitch
)
291 // memsetd(p, 0, m_rcDirty.Width());
292 //DbgLog((LOG_TRACE, 3, "w:%d", w));
293 //w = pDirtyRect->Width();
295 memset(p
+m_spd
.h
*m_spd
.pitch
, 0, w
);
296 memset(p
+m_spd
.h
*m_spd
.pitch
*2, 0, w
);
297 memset(p
+m_spd
.h
*m_spd
.pitch
*3, 0, w
);
301 m_rectListDirty
.RemoveAll();
305 STDMETHODIMP
CMemSubPic::Lock(SubPicDesc
& spd
)
310 STDMETHODIMP
CMemSubPic::Unlock( CAtlList
<CRect
>* dirtyRectList
)
312 int src_type
= m_spd
.type
;
313 int dst_type
= m_alpha_blt_dst_type
;
314 if( (src_type
==MSP_RGBA
&& (dst_type
== MSP_RGB32
||
315 dst_type
== MSP_RGB24
||
316 dst_type
== MSP_RGB16
||
317 dst_type
== MSP_RGB15
))
319 (src_type
==MSP_XY_AUYV
&& dst_type
== MSP_YUY2
)//ToDo: fix me MSP_AYUV
321 (src_type
==MSP_AYUV
&& dst_type
== MSP_AYUV
)
323 (src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_IYUV
||
324 dst_type
== MSP_YV12
||
325 dst_type
== MSP_P010
||
326 dst_type
== MSP_P016
||
327 dst_type
== MSP_NV12
||
328 dst_type
== MSP_NV21
)))
330 return UnlockOther(dirtyRectList
);
332 else if(src_type
==MSP_RGBA
&& (dst_type
== MSP_YUY2
||
333 dst_type
== MSP_AYUV
|| //ToDo: fix me MSP_AYUV
334 dst_type
== MSP_IYUV
||
335 dst_type
== MSP_YV12
||
336 dst_type
== MSP_NV12
||
337 dst_type
== MSP_NV21
||
338 dst_type
== MSP_P010
||
339 dst_type
== MSP_P016
))
341 return UnlockRGBA_YUV(dirtyRectList
);
346 HRESULT
CMemSubPic::UnlockOther(CAtlList
<CRect
>* dirtyRectList
)
348 SetDirtyRectEx(dirtyRectList
);
349 if(m_rectListDirty
.IsEmpty()) {
353 POSITION pos
= m_rectListDirty
.GetHeadPosition();
356 const CRect
& cRect
= m_rectListDirty
.GetNext(pos
);
357 int w
= cRect
.Width(), h
= cRect
.Height();
363 BYTE
* top
= (BYTE
*)m_spd
.bits
+ m_spd
.pitch
*(cRect
.top
) + cRect
.left
*4;
364 BYTE
* bottom
= top
+ m_spd
.pitch
*h
;
365 if(m_alpha_blt_dst_type
== MSP_RGB16
)
367 for(; top
< bottom
; top
+= m_spd
.pitch
)
369 DWORD
* s
= (DWORD
*)top
;
373 *s
= ((*s
>>3)&0x1f000000)|((*s
>>8)&0xf800)|((*s
>>5)&0x07e0)|((*s
>>3)&0x001f);
374 // *s = (*s&0xff000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
378 else if(m_alpha_blt_dst_type
== MSP_RGB15
)
380 for(; top
< bottom
; top
+= m_spd
.pitch
)
382 DWORD
* s
= (DWORD
*)top
;
386 *s
= ((*s
>>3)&0x1f000000)|((*s
>>9)&0x7c00)|((*s
>>6)&0x03e0)|((*s
>>3)&0x001f);
387 // *s = (*s&0xff000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
391 else if(m_alpha_blt_dst_type
== MSP_YUY2
)
393 XY_DO_ONCE( xy_logger::write_file("G:\\b1_ul", top
, m_spd
.pitch
*(h
-1)) );
395 for(BYTE
* tempTop
=top
; tempTop
< bottom
; tempTop
+= m_spd
.pitch
)
399 BYTE last_v
= s
[0], last_u
=s
[2];
400 for(; s
< e
; s
+=8) // AUYV AUYV -> AxYU AxYV
403 s
[4] = (last_v
+ 2*s
[0] + s
[4] + 2)>>2;
406 s
[0] = (last_u
+ 2*s
[2] + s
[6] + 2)>>2;
411 XY_DO_ONCE( xy_logger::write_file("G:\\a1_ul", top
, m_spd
.pitch
*(h
-1)) );
413 else if(m_alpha_blt_dst_type
== MSP_YV12
|| m_alpha_blt_dst_type
== MSP_IYUV
414 || m_alpha_blt_dst_type
== MSP_AYUV
)
418 else if ( m_alpha_blt_dst_type
== MSP_P010
|| m_alpha_blt_dst_type
== MSP_P016
419 || m_alpha_blt_dst_type
== MSP_NV12
)
421 SubsampleAndInterlace(cRect
, true);
423 else if( m_alpha_blt_dst_type
== MSP_NV21
)
425 SubsampleAndInterlace(cRect
, false);
431 HRESULT
CMemSubPic::UnlockRGBA_YUV(CAtlList
<CRect
>* dirtyRectList
)
434 ONCER( SaveRect2File(dirtyRectList
->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect") );
435 ONCER( SaveArgb2File(m_spd
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.argb") );
437 SetDirtyRectEx(dirtyRectList
);
439 ONCER( SaveRect2File(dirtyRectList
->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect2") );
440 if(m_rectListDirty
.IsEmpty()) {
444 POSITION pos
= m_rectListDirty
.GetHeadPosition();
447 const CRect
& cRect
= m_rectListDirty
.GetNext(pos
);
448 int w
= cRect
.Width(), h
= cRect
.Height();
454 BYTE
* top
= (BYTE
*)m_spd
.bits
+ m_spd
.pitch
*cRect
.top
+ cRect
.left
*4;
455 BYTE
* bottom
= top
+ m_spd
.pitch
*h
;
457 if( m_alpha_blt_dst_type
== MSP_YUY2
||
458 m_alpha_blt_dst_type
== MSP_YV12
||
459 m_alpha_blt_dst_type
== MSP_IYUV
||
460 m_alpha_blt_dst_type
== MSP_P010
||
461 m_alpha_blt_dst_type
== MSP_P016
||
462 m_alpha_blt_dst_type
== MSP_NV12
||
463 m_alpha_blt_dst_type
== MSP_NV21
) {
464 for(; top
< bottom
; top
+= m_spd
.pitch
) {
467 DWORD last_yuv
= ColorConvTable::PreMulArgb2Ayuv(s
[3], s
[2], s
[1], s
[0]);
468 for(; s
< e
; s
+=8) { // ARGB ARGB -> AxYU AxYV
469 if((s
[3]+s
[7]+(last_yuv
>>24)) < 0xff*3) {
470 DWORD tmp1
= ColorConvTable::PreMulArgb2Ayuv(s
[3], s
[2], s
[1], s
[0]);
471 DWORD tmp2
= ColorConvTable::PreMulArgb2Ayuv(s
[7], s
[6], s
[5], s
[4]);
473 s
[1] = (tmp1
>>16)&0xff;
474 s
[5] = (tmp2
>>16)&0xff;
476 s
[0] = (((last_yuv
>>8)&0xff) + 2*((tmp1
>>8)&0xff) + ((tmp2
>>8)&0xff) + 2)/4;
477 s
[4] = ((last_yuv
&0xff) + 2*(tmp1
&0xff) + (tmp2
&0xff) + 2)/4;
480 last_yuv
= ColorConvTable::PreMulArgb2Ayuv(s
[7], s
[6], s
[5], s
[4]);
488 else if(m_alpha_blt_dst_type
== MSP_AYUV
) {
489 for(; top
< bottom
; top
+= m_spd
.pitch
) {
492 for(; s
< e
; s
+=4) { // ARGB -> AYUV
494 *((DWORD
*)s
) = ColorConvTable::PreMulArgb2Ayuv(s
[3], s
[2], s
[1], s
[0]);
504 ONCER( SaveAxxx2File(m_spd
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.axuv") );
508 void CMemSubPic::SubsampleAndInterlace( const CRect
& cRect
, bool u_first
)
510 //fix me: check alignment and log error
511 int w
= cRect
.Width(), h
= cRect
.Height();
512 BYTE
* u_plan
= reinterpret_cast<BYTE
*>(m_spd
.bits
) + m_spd
.pitch
*m_spd
.h
*2;
513 BYTE
* u_start
= u_plan
+ m_spd
.pitch
*(cRect
.top
)+ cRect
.left
;
514 BYTE
* v_start
= u_start
+ m_spd
.pitch
*m_spd
.h
;
524 //Walkarround for alignment
525 if ( ((m_spd
.pitch
|w
) &15) == 0 && (g_cpuid
.m_flags
& CCpuID::sse2
) )
528 SubsampleAndInterlace(dst
, u_start
, v_start
, h
, w
, m_spd
.pitch
);
532 SubsampleAndInterlaceC(dst
, u_start
, v_start
, h
, w
, m_spd
.pitch
);
536 STDMETHODIMP
CMemSubPic::AlphaBlt( const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
538 if(!pSrc
|| !pDst
|| !pTarget
) {
541 int src_type
= m_spd
.type
;
542 int dst_type
= pTarget
->type
;
544 if( (src_type
==MSP_RGBA
&& (dst_type
== MSP_RGB32
||
545 dst_type
== MSP_RGB24
||
546 dst_type
== MSP_RGB16
||
547 dst_type
== MSP_RGB15
||
548 dst_type
== MSP_RGBA
||
549 dst_type
== MSP_YUY2
||//ToDo: fix me MSP_RGBA changed into AxYU AxYV after unlock, may be confusing
550 dst_type
== MSP_AYUV
))
552 (src_type
==MSP_XY_AUYV
&& dst_type
== MSP_YUY2
)//ToDo: fix me MSP_AYUV
554 (src_type
==MSP_AYUV
&& dst_type
== MSP_AYUV
)
556 (src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_IYUV
||
557 dst_type
== MSP_YV12
)) )
559 return AlphaBltOther(pSrc
, pDst
, pTarget
);
561 else if ( src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_NV12
||
562 dst_type
== MSP_NV21
) )
564 return AlphaBltAnv12_Nv12(pSrc
, pDst
, pTarget
);
567 else if( src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_P010
||
568 dst_type
== MSP_P016
) )
570 return AlphaBltAnv12_P010(pSrc
, pDst
, pTarget
);
572 else if( src_type
==MSP_RGBA
&& (dst_type
== MSP_IYUV
||
573 dst_type
== MSP_YV12
))
575 return AlphaBltAxyuAxyv_Yv12(pSrc
, pDst
, pTarget
);
577 else if( src_type
==MSP_RGBA
&& (dst_type
== MSP_NV12
||
578 dst_type
== MSP_NV21
))
580 return AlphaBltAxyuAxyv_Nv12(pSrc
, pDst
, pTarget
);
582 else if( src_type
==MSP_RGBA
&& (dst_type
== MSP_P010
||
583 dst_type
== MSP_P016
))
585 return AlphaBltAxyuAxyv_P010(pSrc
, pDst
, pTarget
);
590 HRESULT
CMemSubPic::AlphaBltOther(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
592 const SubPicDesc
& src
= m_spd
;
593 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
595 CRect
rs(*pSrc
), rd(*pDst
);
599 rd
.bottom
= dst
.h
- rd
.bottom
;
600 rd
.top
= dst
.h
- rd
.top
;
602 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
605 int w
= rs
.Width(), h
= rs
.Height();
606 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);//rs.left*4
607 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*rd
.top
+ ((rd
.left
*dst
.bpp
)>>3);
608 if(rd
.top
> rd
.bottom
)
610 if(dst
.type
== MSP_RGB32
|| dst
.type
== MSP_RGB24
611 || dst
.type
== MSP_RGB16
|| dst
.type
== MSP_RGB15
612 || dst
.type
== MSP_YUY2
|| dst
.type
== MSP_AYUV
)
614 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + (rd
.left
*dst
.bpp
>>3);
616 else if(dst
.type
== MSP_YV12
|| dst
.type
== MSP_IYUV
)
618 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + (rd
.left
*8>>3);
624 dst
.pitch
= -dst
.pitch
;
626 DbgLog((LOG_TRACE
, 5, TEXT("w=%d h=%d"), w
, h
));
630 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
633 BYTE
* s2end
= s2
+ w
*4;
634 DWORD
* d2
= (DWORD
*)d
;
635 for(; s2
< s2end
; s2
+= 4, d2
++)
639 DWORD bd
=0x00000100 -( (DWORD
) s2
[3]);
640 DWORD B
= ((*((DWORD
*)s2
)&0x000000ff)<<8)/bd
;
641 DWORD V
= ((*((DWORD
*)s2
)&0x0000ff00)/bd
)<<8;
642 DWORD R
= (((*((DWORD
*)s2
)&0x00ff0000)>>8)/bd
)<<16;
644 | (0xff000000-(*((DWORD
*)s2
)&0xff000000))&0xff000000;
650 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
653 BYTE
* s2end
= s2
+ w
*4;
654 DWORD
* d2
= (DWORD
*)d
;
655 for(; s2
< s2end
; s2
+= 4, d2
++)
659 *d2
= (((((*d2
&0x00ff00ff)*s2
[3])>>8) + (*((DWORD
*)s2
)&0x00ff00ff))&0x00ff00ff)
660 | (((((*d2
&0x0000ff00)*s2
[3])>>8) + (*((DWORD
*)s2
)&0x0000ff00))&0x0000ff00);
666 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
669 BYTE
* s2end
= s2
+ w
*4;
670 DWORD
* d2
= (DWORD
*)d
;
671 for(; s2
< s2end
; s2
+= 4, d2
++)
675 *d2
= (((((*d2
&0x00ff00ff)*s2
[3])>>8) + (*((DWORD
*)s2
)&0x00ff00ff))&0x00ff00ff)
676 | (((((*d2
&0x0000ff00)*s2
[3])>>8) + (*((DWORD
*)s2
)&0x0000ff00))&0x0000ff00)
683 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
686 BYTE
* s2end
= s2
+ w
*4;
688 for(; s2
< s2end
; s2
+= 4, d2
+= 3)
692 d2
[0] = ((d2
[0]*s2
[3])>>8) + s2
[0];
693 d2
[1] = ((d2
[1]*s2
[3])>>8) + s2
[1];
694 d2
[2] = ((d2
[2]*s2
[3])>>8) + s2
[2];
700 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
703 BYTE
* s2end
= s2
+ w
*4;
705 for(; s2
< s2end
; s2
+= 4, d2
++)
709 *d2
= (WORD
)((((((*d2
&0xf81f)*s2
[3])>>5) + (*(DWORD
*)s2
&0xf81f))&0xf81f)
710 | (((((*d2
&0x07e0)*s2
[3])>>5) + (*(DWORD
*)s2
&0x07e0))&0x07e0));
711 /* *d2 = (WORD)((((((*d2&0xf800)*s2[3])>>8) + (*(DWORD*)s2&0xf800))&0xf800)
712 | (((((*d2&0x07e0)*s2[3])>>8) + (*(DWORD*)s2&0x07e0))&0x07e0)
713 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
720 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
723 BYTE
* s2end
= s2
+ w
*4;
725 for(; s2
< s2end
; s2
+= 4, d2
++)
729 *d2
= (WORD
)((((((*d2
&0x7c1f)*s2
[3])>>5) + (*(DWORD
*)s2
&0x7c1f))&0x7c1f)
730 | (((((*d2
&0x03e0)*s2
[3])>>5) + (*(DWORD
*)s2
&0x03e0))&0x03e0));
731 /* *d2 = (WORD)((((((*d2&0x7c00)*s2[3])>>8) + (*(DWORD*)s2&0x7c00))&0x7c00)
732 | (((((*d2&0x03e0)*s2[3])>>8) + (*(DWORD*)s2&0x03e0))&0x03e0)
733 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
740 AlphaBlt_YUY2(w
, h
, d
, dst
.pitch
, s
, src
.pitch
);
745 //dst.pitch = abs(dst.pitch);
749 dst
.pitchUV
= abs(dst
.pitch
)/2;
751 if(!dst
.bitsU
|| !dst
.bitsV
)
753 dst
.bitsU
= (BYTE
*)dst
.bits
+ abs(dst
.pitch
)*dst
.h
;
754 dst
.bitsV
= dst
.bitsU
+ dst
.pitchUV
*dst
.h
/2;
755 if(dst
.type
== MSP_YV12
)
758 dst
.bitsU
= dst
.bitsV
;
763 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
764 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
765 if(rd
.top
> rd
.bottom
)
767 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
768 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
769 dst
.pitchUV
= -dst
.pitchUV
;
772 BYTE
* src_origin
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ rs
.left
;
775 ss
[0] = src_origin
+ src
.pitch
*src
.h
*2;//U
776 ss
[1] = src_origin
+ src
.pitch
*src
.h
*3;//V
778 AlphaBltYv12Luma( d
, dst
.pitch
, w
, h
, src_origin
+ src
.pitch
*src
.h
, src_origin
, src
.pitch
);
780 AlphaBltYv12Chroma( dd
[0], dst
.pitchUV
, w
, h2
, ss
[0], src_origin
, src
.pitch
);
781 AlphaBltYv12Chroma( dd
[1], dst
.pitchUV
, w
, h2
, ss
[1], src_origin
, src
.pitch
);
793 //emmsÒª40¸öcpuÖÜÆÚ
798 HRESULT
CMemSubPic::AlphaBltAxyuAxyv_P010(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
800 const SubPicDesc
& src
= m_spd
;
801 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
803 CRect
rs(*pSrc
), rd(*pDst
);
807 rd
.bottom
= dst
.h
- rd
.bottom
;
808 rd
.top
= dst
.h
- rd
.top
;
811 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
815 int w
= rs
.Width(), h
= rs
.Height();
818 BYTE
* s
= static_cast<BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
819 BYTE
* d
= static_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*rd
.top
+ rd
.left
*2;
821 if(rd
.top
> rd
.bottom
) {
822 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + rd
.left
;
824 dst
.pitch
= -dst
.pitch
;
827 for(ptrdiff_t i
=0; i
<h
; i
++, s
+= src
.pitch
, d
+= dst
.pitch
)
830 BYTE
* s2end
= s2
+ w
*4;
831 WORD
* d2
= reinterpret_cast<WORD
*>(d
);
832 for(; s2
< s2end
; s2
+= 4, d2
++)
835 d2
[0] = ((d2
[0]*s2
[3])>>8) + (s2
[1]<<8);
844 dst
.pitchUV
= abs(dst
.pitch
);
846 if(!dst
.bitsU
|| !dst
.bitsV
)
848 dst
.bitsU
= static_cast<BYTE
*>(dst
.bits
) + abs(dst
.pitch
)*dst
.h
;
849 dst
.bitsV
= dst
.bitsU
+ 2;
851 BYTE
* ddUV
= dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
*2;
852 if(rd
.top
> rd
.bottom
)
854 ddUV
= dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
*2;
855 dst
.pitchUV
= -dst
.pitchUV
;
858 s
= static_cast<BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
861 int pitch
= src
.pitch
;
862 for(int j
= 0; j
< h2
; j
++, s
+= 2*src
.pitch
, d
+= dst
.pitchUV
)
865 WORD
* d2
=reinterpret_cast<WORD
*>(d
);
866 WORD
* d2_end
= reinterpret_cast<WORD
*>(d
+2*w
);
867 DWORD last_alpha
= s2
[3]+s2
[3+src
.pitch
];
868 for( ; d2
<d2_end
; s2
+=8, d2
+=2)
872 (s2
[3] + s2
[3+src
.pitch
])*2 +
873 s2
[3+4]+ s2
[3+4+src
.pitch
]);
874 last_alpha
= s2
[3+4]+ s2
[3+4+src
.pitch
];
877 d2
[0] = (((d2
[0])*ia
)>>11) + ((s2
[0] + s2
[0+src
.pitch
])<<7);
878 d2
[1] = (((d2
[1])*ia
)>>11) + ((s2
[4] + s2
[4+src
.pitch
])<<7);
886 HRESULT
CMemSubPic::AlphaBltAxyuAxyv_Yv12(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
888 const SubPicDesc
& src
= m_spd
;
889 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
891 CRect
rs(*pSrc
), rd(*pDst
);
895 rd
.bottom
= dst
.h
- rd
.bottom
;
896 rd
.top
= dst
.h
- rd
.top
;
899 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
903 int w
= rs
.Width(), h
= rs
.Height();
905 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
906 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*rd
.top
+ rd
.left
;
908 if(rd
.top
> rd
.bottom
) {
909 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + rd
.left
;
911 dst
.pitch
= -dst
.pitch
;
914 for(ptrdiff_t j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
) {
916 BYTE
* s2end
= s2
+ w
*4;
918 for(; s2
< s2end
; s2
+= 4, d2
++) {
920 d2
[0] = ((d2
[0]*s2
[3])>>8) + s2
[1];
924 dst
.pitch
= abs(dst
.pitch
);
929 dst
.pitchUV
= dst
.pitch
/2;
933 ss
[0] = (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ rs
.left
*4;
936 if(!dst
.bitsU
|| !dst
.bitsV
) {
937 dst
.bitsU
= (BYTE
*)dst
.bits
+ dst
.pitch
*dst
.h
;
938 dst
.bitsV
= dst
.bitsU
+ dst
.pitchUV
*dst
.h
/2;
940 if(dst
.type
== MSP_YV12
) {
942 dst
.bitsU
= dst
.bitsV
;
948 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
949 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
951 if(rd
.top
> rd
.bottom
) {
952 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
953 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
954 dst
.pitchUV
= -dst
.pitchUV
;
957 for(ptrdiff_t i
= 0; i
< 2; i
++) {
961 for(ptrdiff_t j
= 0; j
< h2
; j
++, s
+= src
.pitch
*2, d
+= dst
.pitchUV
, a
+= src
.pitch
*2) {
963 BYTE
* s2end
= s2
+ w
*4;
967 DWORD last_alpha
= a2
[0]+a2
[0+src
.pitch
];
968 for(; s2
< s2end
; s2
+= 8, d2
++, a2
+= 8) {
969 unsigned int ia
= (last_alpha
+ 2*(a2
[0]+a2
[0+src
.pitch
]) + a2
[4] + a2
[4+src
.pitch
] + 4 )>>3;
970 last_alpha
= a2
[4] + a2
[4+src
.pitch
];
972 *d2
= ((*d2
*ia
)>>8) + ((s2
[0]+s2
[src
.pitch
])>>1);
981 HRESULT
CMemSubPic::AlphaBltAxyuAxyv_Nv12(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
983 ONCER( SaveArgb2File(*pTarget
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12") );
984 const SubPicDesc
& src
= m_spd
;
985 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
987 CRect
rs(*pSrc
), rd(*pDst
);
991 rd
.bottom
= dst
.h
- rd
.bottom
;
992 rd
.top
= dst
.h
- rd
.top
;
995 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
999 int w
= rs
.Width(), h
= rs
.Height();
1001 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
1002 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*rd
.top
+ rd
.left
;
1004 if(rd
.top
> rd
.bottom
) {
1005 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + rd
.left
;
1007 dst
.pitch
= -dst
.pitch
;
1010 for(ptrdiff_t j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
) {
1012 BYTE
* s2end
= s2
+ w
*4;
1014 for(; s2
< s2end
; s2
+= 4, d2
++) {
1016 d2
[0] = ((d2
[0]*s2
[3])>>8) + s2
[1];
1020 dst
.pitch
= abs(dst
.pitch
);
1025 dst
.pitchUV
= dst
.pitch
;
1029 ss
[0] = (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ rs
.left
*4;
1032 if(!dst
.bitsU
|| !dst
.bitsV
) {
1033 dst
.bitsU
= (BYTE
*)dst
.bits
+ dst
.pitch
*dst
.h
;
1034 dst
.bitsV
= dst
.bitsU
+ 1;
1036 if(dst
.type
== MSP_NV21
) {
1037 BYTE
* p
= dst
.bitsU
;
1038 dst
.bitsU
= dst
.bitsV
;
1044 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
;
1047 if(rd
.top
> rd
.bottom
) {
1048 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
;
1050 dst
.pitchUV
= -dst
.pitchUV
;
1053 for(ptrdiff_t i
= 0; i
< 2; i
++) {
1057 for(ptrdiff_t j
= 0; j
< h2
; j
++, s
+= src
.pitch
*2, d
+= dst
.pitchUV
, a
+= src
.pitch
*2) {
1059 BYTE
* s2end
= s2
+ w
*4;
1062 DWORD last_alpha
= a2
[0]+a2
[0+src
.pitch
];
1063 for(; s2
< s2end
; s2
+= 8, d2
+=2, a2
+= 8) {
1064 unsigned int ia
= (last_alpha
+2*(a2
[0]+a2
[0+src
.pitch
])+a2
[4]+a2
[4+src
.pitch
]+4)>>3;
1065 last_alpha
= a2
[4]+a2
[4+src
.pitch
];
1067 *d2
= ((*d2
*ia
)>>8) + ((s2
[0]+s2
[src
.pitch
])>>1);
1073 ONCER( SaveArgb2File(*pTarget
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12_2") );
1077 HRESULT
CMemSubPic::AlphaBltAnv12_P010( const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
1079 //fix me: check colorspace and log error
1080 const SubPicDesc
& src
= m_spd
;
1081 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
1083 CRect
rs(*pSrc
), rd(*pDst
);
1087 rd
.bottom
= dst
.h
- rd
.bottom
;
1088 rd
.top
= dst
.h
- rd
.top
;
1090 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
1091 return E_INVALIDARG
;
1093 int w
= rs
.Width(), h
= rs
.Height();
1094 bool bottom_down
= rd
.top
> rd
.bottom
;
1100 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*rd
.top
+ rd
.left
*2;
1101 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*rd
.top
/2 + rd
.left
*2;
1105 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*(rd
.top
-1) + rd
.left
*2;
1106 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*(rd
.top
/2-1) + rd
.left
*2;
1107 dst
.pitch
= -dst
.pitch
;
1109 ASSERT(dst
.pitchUV
==0 || dst
.pitchUV
==abs(dst
.pitch
));
1111 const BYTE
* sa
= reinterpret_cast<const BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ rs
.left
;
1112 const BYTE
* sy
= sa
+ src
.pitch
*src
.h
;
1113 const BYTE
* s_uv
= sy
+ src
.pitch
*src
.h
;//UV
1114 return AlphaBltAnv12_P010(sa
, sy
, s_uv
, src
.pitch
, d
, dUV
, dst
.pitch
, w
, h
);
1117 HRESULT
CMemSubPic::AlphaBltAnv12_Nv12( const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
1119 //fix me: check colorspace and log error
1120 const SubPicDesc
& src
= m_spd
;
1121 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
1123 CRect
rs(*pSrc
), rd(*pDst
);
1127 rd
.bottom
= dst
.h
- rd
.bottom
;
1128 rd
.top
= dst
.h
- rd
.top
;
1130 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
1131 return E_INVALIDARG
;
1133 int w
= rs
.Width(), h
= rs
.Height();
1134 bool bottom_down
= rd
.top
> rd
.bottom
;
1140 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*rd
.top
+ rd
.left
;
1141 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*rd
.top
/2 + rd
.left
;
1145 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*(rd
.top
-1) + rd
.left
;
1146 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*(rd
.top
/2-1) + rd
.left
;
1147 dst
.pitch
= -dst
.pitch
;
1149 ASSERT(dst
.pitchUV
==0 || dst
.pitchUV
==abs(dst
.pitch
));
1151 const BYTE
* sa
= reinterpret_cast<const BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ rs
.left
;
1152 const BYTE
* sy
= sa
+ src
.pitch
*src
.h
;
1153 const BYTE
* s_uv
= sy
+ src
.pitch
*src
.h
;//UV
1155 return AlphaBltAnv12_Nv12(sa
, sy
, s_uv
, src
.pitch
, d
, dUV
, dst
.pitch
, w
, h
);
1158 STDMETHODIMP
CMemSubPic::SetDirtyRectEx(CAtlList
<CRect
>* dirtyRectList
)
1160 //if(m_spd.type == MSP_YUY2 || m_spd.type == MSP_YV12 || m_spd.type == MSP_IYUV || m_spd.type == MSP_AYUV)
1161 if(dirtyRectList
!=NULL
)
1163 POSITION pos
= dirtyRectList
->GetHeadPosition();
1164 if(m_spd
.type
== MSP_AYUV_PLANAR
|| m_alpha_blt_dst_type
==MSP_IYUV
|| m_alpha_blt_dst_type
==MSP_YV12
1165 || m_alpha_blt_dst_type
==MSP_P010
|| m_alpha_blt_dst_type
==MSP_P016
1166 || m_alpha_blt_dst_type
==MSP_NV12
|| m_alpha_blt_dst_type
==MSP_NV21
)
1170 CRect
& cRectSrc
= dirtyRectList
->GetNext(pos
);
1171 cRectSrc
.left
&= ~15;
1172 cRectSrc
.right
= (cRectSrc
.right
+15)&~15;
1173 if(cRectSrc
.right
>m_spd
.w
)
1175 cRectSrc
.right
= m_spd
.w
;
1178 cRectSrc
.bottom
= (cRectSrc
.bottom
+1)&~1;
1179 ASSERT(cRectSrc
.bottom
<=m_spd
.h
);
1182 else if(m_spd
.type
== MSP_XY_AUYV
|| m_alpha_blt_dst_type
==MSP_YUY2
)
1186 CRect
& cRectSrc
= dirtyRectList
->GetNext(pos
);
1187 cRectSrc
.left
&= ~3;
1188 cRectSrc
.right
= (cRectSrc
.right
+3)&~3;
1189 cRectSrc
.right
= cRectSrc
.right
< m_spd
.w
? cRectSrc
.right
: m_spd
.w
;
1190 ASSERT((cRectSrc
.right
& 3)==0);
1194 return __super::SetDirtyRectEx(dirtyRectList
);
1201 void CMemSubPic::AlphaBltYv12Luma(byte
* dst
, int dst_pitch
,
1203 const byte
* sub
, const byte
* alpha
, int sub_pitch
)
1206 ((reinterpret_cast<intptr_t>(alpha
) ^ reinterpret_cast<intptr_t>(sub
))
1207 |(reinterpret_cast<intptr_t>(alpha
) ^ reinterpret_cast<intptr_t>(dst
))
1208 | static_cast<intptr_t>(sub_pitch
)
1209 | static_cast<intptr_t>(dst_pitch
) ) & 15 )==0
1210 && w
> 32 && (g_cpuid
.m_flags
& CCpuID::sse2
))
1212 int head
= (16 - (reinterpret_cast<intptr_t>(alpha
)&15))&15;
1213 int tail
= (w
-head
) & 15;
1214 int w1
= w
- head
- tail
;
1215 for(int i
=0; i
<h
; i
++, dst
+= dst_pitch
, alpha
+= sub_pitch
, sub
+= sub_pitch
)
1217 const BYTE
* sa
= alpha
;
1218 const BYTE
* s2
= sub
;
1219 const BYTE
* s2end_mod16
= s2
+ w1
;
1220 const BYTE
* s2end
= s2
+ w
;
1223 for( ; (reinterpret_cast<intptr_t>(s2
)&15) != 0; s2
++, sa
++, d2
++)
1227 d2
[0] = ((d2
[0]*sa
[0])>>8) + s2
[0];
1230 for(; s2
< s2end_mod16
; s2
+=16, sa
+=16, d2
+=16)
1232 pix_alpha_blend_yv12_luma_sse2(d2
, sa
, s2
);
1234 for(; s2
< s2end
; s2
++, sa
++, d2
++)
1238 d2
[0] = ((d2
[0]*sa
[0])>>8) + s2
[0];
1243 else //fix me: only a workaround for non-mod-16 size video
1245 CMemSubPic::AlphaBltYv12LumaC(dst
, dst_pitch
, w
, h
, sub
, alpha
, sub_pitch
);
1249 void CMemSubPic::AlphaBltYv12LumaC( byte
* dst
, int dst_pitch
, int w
, int h
, const byte
* sub
, const byte
* alpha
, int sub_pitch
)
1251 for(int i
=0; i
<h
; i
++, dst
+= dst_pitch
, alpha
+= sub_pitch
, sub
+= sub_pitch
)
1253 const BYTE
* sa
= alpha
;
1254 const BYTE
* s2
= sub
;
1255 const BYTE
* s2end
= s2
+ w
;
1257 for(; s2
< s2end
; s2
+=1, sa
+=1, d2
+=1)
1261 // d2[0] = (((d2[0]-0x10)*s2[3])>>8) + s2[1];
1262 d2
[0] = ((d2
[0]*sa
[0])>>8) + s2
[0];
1268 void CMemSubPic::AlphaBltYv12Chroma(byte
* dst_uv
, int dst_pitch
,
1269 int w
, int chroma_h
,
1270 const byte
* src_uv
, const byte
* src_a
, int src_pitch
)
1273 ((reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(src_uv
))
1274 |(reinterpret_cast<intptr_t>(src_a
) ^ (2*reinterpret_cast<intptr_t>(dst_uv
)))
1275 | static_cast<intptr_t>(src_pitch
)
1276 | (2*static_cast<intptr_t>(dst_pitch
)) ) & 15) ==0 &&
1277 w
> 16 && (g_cpuid
.m_flags
& CCpuID::sse2
))
1279 int head
= (16 - (reinterpret_cast<intptr_t>(src_a
)&15))&15;
1280 int tail
= (w
-head
) & 15;
1281 int w00
= w
- head
- tail
;
1283 int pitch
= src_pitch
;
1284 for(int j
= 0; j
< chroma_h
; j
++, src_uv
+= src_pitch
*2, src_a
+= src_pitch
*2, dst_uv
+= dst_pitch
)
1286 hleft_vmid_mix_uv_yv12_c2(dst_uv
, head
, src_uv
, src_a
, src_pitch
);
1287 hleft_vmid_mix_uv_yv12_sse2(dst_uv
+(head
>>1), w00
, src_uv
+head
, src_a
+head
, src_pitch
, head
>0 ? -1 : 0);
1288 hleft_vmid_mix_uv_yv12_c2(dst_uv
+((head
+w00
)>>1), tail
, src_uv
+head
+w00
, src_a
+head
+w00
, src_pitch
, (w00
+head
)>0 ? -1 : 0);
1291 else//fix me: only a workaround for non-mod-16 size video
1293 AlphaBltYv12ChromaC(dst_uv
, dst_pitch
, w
, chroma_h
, src_uv
, src_a
, src_pitch
);
1297 void CMemSubPic::AlphaBltYv12ChromaC( byte
* dst
, int dst_pitch
, int w
, int chroma_h
, const byte
* sub_chroma
, const byte
* alpha
, int sub_pitch
)
1299 for(int j
= 0; j
< chroma_h
; j
++, sub_chroma
+= sub_pitch
*2, alpha
+= sub_pitch
*2, dst
+= dst_pitch
)
1301 hleft_vmid_mix_uv_yv12_c(dst
, w
, sub_chroma
, alpha
, sub_pitch
);
1305 HRESULT
CMemSubPic::AlphaBltAnv12_P010( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
,
1306 BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1308 if ( g_cpuid
.m_flags
& CCpuID::sse2
)
1310 const BYTE
* sa
= src_a
;
1312 ((reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(src_y
))
1313 |(reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(dst_y
))
1314 | static_cast<intptr_t>(src_pitch
)
1315 | static_cast<intptr_t>(dst_pitch
) ) & 15 )==0 &&
1318 int head
= (16 - reinterpret_cast<intptr_t>(src_a
)&15)&15;
1319 int tail
= (w
- head
) & 15;
1321 for(int i
=0; i
<h
; i
++, sa
+= src_pitch
, src_y
+= src_pitch
, dst_y
+= dst_pitch
)
1323 const BYTE
* sa2
= sa
;
1324 const BYTE
* s2
= src_y
;
1325 const BYTE
* s2end_mod16
= s2
+ (w
&~15);
1327 WORD
* d_w
=reinterpret_cast<WORD
*>(dst_y
);
1329 switch( head
)//important: it is safe since w > 16
1332 #define _XY_MIX_ONE if(sa2[0] < 0xff) { d_w[0] = ((d_w[0]*sa2[0])>>8) + (s2[0]<<8); } sa2++;d_w++;s2++;
1360 case 1://fall through on purpose
1363 for(; s2
< s2end_mod16
; s2
+=16, sa2
+=16, d_w
+=16)
1365 mix_16_y_p010_sse2( reinterpret_cast<BYTE
*>(d_w
), s2
, sa2
);
1367 switch( tail
)//important: it is safe since w > 16
1397 case 1://fall through on purpose
1402 else //fix me: only a workaround for non-mod-16 size video
1404 for(int i
=0; i
<h
; i
++, sa
+= src_pitch
, src_y
+= src_pitch
, dst_y
+= dst_pitch
)
1406 const BYTE
* sa2
= sa
;
1407 const BYTE
* s2
= src_y
;
1408 const BYTE
* s2end
= s2
+ w
;
1409 WORD
* d_w
= reinterpret_cast<WORD
*>(dst_y
);
1410 for(; s2
< s2end
; s2
+=1, sa2
+=1, d_w
+=1)
1414 d_w
[0] = ((d_w
[0]*sa2
[0])>>8) + (s2
[0]<<8);
1423 ((reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(src_uv
))
1424 |(reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(dst_uv
))
1425 | static_cast<intptr_t>(src_pitch
)
1426 | static_cast<intptr_t>(dst_pitch
) ) & 15) ==0 &&
1429 int head
= (16-(reinterpret_cast<intptr_t>(src_a
)&15))&15;
1430 int tail
= (w
-head
) & 15;
1431 int w00
= w
- head
- tail
;
1433 ASSERT(w
>0);//the calls to mix may failed if w==0
1434 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1436 hleft_vmid_mix_uv_p010_c2(d
, head
, src_uv
, src_a
, src_pitch
);
1437 hleft_vmid_mix_uv_p010_sse2(d
+2*head
, w00
, src_uv
+head
, src_a
+head
, src_pitch
, head
>0 ? -1 : 0);
1438 hleft_vmid_mix_uv_p010_c2(d
+2*(head
+w00
), tail
, src_uv
+head
+w00
, src_a
+head
+w00
, src_pitch
, (w00
+head
)>0 ? -1 : 0);
1443 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1445 hleft_vmid_mix_uv_p010_c(d
, w
, src_uv
, src_a
, src_pitch
);
1456 return AlphaBltAnv12_P010_C(src_a
, src_y
, src_uv
, src_pitch
, dst_y
, dst_uv
, dst_pitch
, w
, h
);
1460 HRESULT
CMemSubPic::AlphaBltAnv12_P010_C( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
, BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1462 const BYTE
* sa
= src_a
;
1463 for(int i
=0; i
<h
; i
++, sa
+= src_pitch
, src_y
+= src_pitch
, dst_y
+= dst_pitch
)
1465 const BYTE
* sa2
= sa
;
1466 const BYTE
* s2
= src_y
;
1467 const BYTE
* s2end
= s2
+ w
;
1468 WORD
* d2
= reinterpret_cast<WORD
*>(dst_y
);
1469 for(; s2
< s2end
; s2
+=1, sa2
+=1, d2
+=1)
1473 d2
[0] = ((d2
[0]*sa2
[0])>>8) + (s2
[0]<<8);
1480 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1482 hleft_vmid_mix_uv_p010_c(d
, w
, src_uv
, src_a
, src_pitch
);
1487 HRESULT
CMemSubPic::AlphaBltAnv12_Nv12( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
,
1488 BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1490 AlphaBltYv12Luma( dst_y
, dst_pitch
, w
, h
, src_y
, src_a
, src_pitch
);
1494 ((reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(src_uv
))
1495 |(reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(dst_uv
))
1496 | static_cast<intptr_t>(src_pitch
)
1497 | static_cast<intptr_t>(dst_pitch
) ) & 15) ==0 &&
1498 w
> 16 && (g_cpuid
.m_flags
& CCpuID::sse2
) )
1502 int head
= (16-(reinterpret_cast<intptr_t>(src_a
)&15))&15;
1503 int tail
= (w
-head
) & 15;
1504 int w00
= w
- head
- tail
;
1506 ASSERT(w
>0);//the calls to mix may failed if w==0
1507 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1509 hleft_vmid_mix_uv_nv12_c2(d
, head
, src_uv
, src_a
, src_pitch
);
1510 hleft_vmid_mix_uv_nv12_sse2(d
+head
, w00
, src_uv
+head
, src_a
+head
, src_pitch
, head
>0 ? -1 : 0);
1511 hleft_vmid_mix_uv_nv12_c2(d
+head
+w00
, tail
, src_uv
+head
+w00
, src_a
+head
+w00
, src_pitch
, (w00
+head
)>0 ? -1 : 0);
1521 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1523 hleft_vmid_mix_uv_nv12_c(d
, w
, src_uv
, src_a
, src_pitch
);
1529 HRESULT
CMemSubPic::AlphaBltAnv12_Nv12_C( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
, BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1531 AlphaBltYv12LumaC( dst_y
, dst_pitch
, w
, h
, src_y
, src_a
, src_pitch
);
1534 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1536 hleft_vmid_mix_uv_nv12_c(d
, w
, src_uv
, src_a
, src_pitch
);
1541 void CMemSubPic::SubsampleAndInterlace( BYTE
* dst
, const BYTE
* u
, const BYTE
* v
, int h
, int w
, int pitch
)
1543 for (int i
=0;i
<h
;i
+=2)
1545 hleft_vmid_subsample_and_interlace_2_line_sse2(dst
, u
, v
, w
, pitch
);
1552 void CMemSubPic::SubsampleAndInterlaceC( BYTE
* dst
, const BYTE
* u
, const BYTE
* v
, int h
, int w
, int pitch
)
1554 for (int i
=0;i
<h
;i
+=2)
1556 hleft_vmid_subsample_and_interlace_2_line_c(dst
, u
, v
, w
, pitch
);
1563 void CMemSubPic::AlphaBlt_YUY2(int w
, int h
, BYTE
* d
, int dstpitch
, PCUINT8 s
, int srcpitch
)
1566 AlphaBlt_YUY2_C(w
, h
, d
, dstpitch
, s
, srcpitch
);
1568 AlphaBlt_YUY2_MMX(w
, h
, d
, dstpitch
, s
, srcpitch
);
1573 // CMemSubPicAllocator
1576 CMemSubPicAllocator::CMemSubPicAllocator(int alpha_blt_dst_type
, SIZE maxsize
, int type
/*=-1*/)
1577 : CSubPicExAllocatorImpl(maxsize
, false, false)
1578 , m_alpha_blt_dst_type(alpha_blt_dst_type
)
1579 , m_maxsize(maxsize
)
1584 switch(alpha_blt_dst_type
)
1587 m_type
= MSP_XY_AUYV
;
1598 m_type
= MSP_AYUV_PLANAR
;
1607 // ISubPicAllocatorImpl
1609 bool CMemSubPicAllocator::AllocEx(bool fStatic
, ISubPicEx
** ppSubPic
)
1615 spd
.w
= m_maxsize
.cx
;
1616 spd
.h
= m_maxsize
.cy
;
1618 spd
.pitch
= (spd
.w
*spd
.bpp
)>>3;
1620 spd
.bits
= DEBUG_NEW BYTE
[spd
.pitch
*spd
.h
];
1624 *ppSubPic
= DEBUG_NEW
CMemSubPic(spd
, m_alpha_blt_dst_type
);
1628 (*ppSubPic
)->AddRef();