2 * Copyright (C) 2003-2006 Gabest
3 * http://www.gabest.org
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GNU Make; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
23 #include "MemSubPic.h"
24 #include "color_conv_table.h"
31 static void SaveRect2File(const CRect
& cRect
, const char * filename
)
33 std::ofstream
os(filename
);
34 os
<<cRect
.left
<<","<<cRect
.top
<<","<<cRect
.right
<<","<<cRect
.bottom
;
36 static void SaveAxxx2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
38 std::ofstream
axxx(filename
);
39 int w
= cRect
.Width(), h
= cRect
.Height();
41 BYTE
* top
= (BYTE
*)spd
.bits
+ spd
.pitch
*cRect
.top
+ cRect
.left
*4;
42 BYTE
* bottom
= top
+ spd
.pitch
*h
;
44 for(; top
< bottom
; top
+= spd
.pitch
) {
47 for(; s
< e
; s
+=4) { // ARGB ARGB -> AxYU AxYV
48 axxx
<<(int)s
[0]<<","<<(int)s
[1]<<","<<(int)s
[2]<<","<<(int)s
[3];
61 static void SaveArgb2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
63 SaveAxxx2File(spd
, cRect
, filename
);
65 static void SaveAyuv2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
67 SaveAxxx2File(spd
, cRect
, filename
);
69 static void SaveNvxx2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
71 std::ofstream
os(filename
);
72 int w
= cRect
.Width(), h
= cRect
.Height();
74 BYTE
* top
= (BYTE
*)spd
.bits
;
75 BYTE
* bottom
= top
+ spd
.pitch
*h
;
77 for(; top
< bottom
; top
+= spd
.pitch
) {
81 BYTE
* sY
= s
+ spd
.pitch
*spd
.h
;
82 BYTE
* sU
= sY
+ spd
.pitch
*spd
.h
;
84 for(; s
< e
; s
++, sY
++, sU
+=2,sV
+=2) {
85 os
<<(int)s
[0]<<","<<(int)sY
[0]<<","<<(int)sU
[0]<<","<<(int)sV
[0];
99 #define ONCER(expr) {\
100 static bool entered=false;\
112 // alpha blend functions
114 #include "xy_intrinsics.h"
115 #include "../dsutil/vd.h"
118 static void AlphaBlt_YUY2_MMX(int w
, int h
, BYTE
* d
, int dstpitch
, PCUINT8 s
, int srcpitch
)
120 for(int j
= 0; j
< h
; j
++, s
+= srcpitch
, d
+= dstpitch
)
124 PCUINT8 s2end
= s2
+ w
*4;
125 DWORD
* d2
= (DWORD
*)d
;
127 int last_a
= w
>0?s2
[3]:0;
128 for(; s2
< s2end
; s2
+= 8, d2
++)
130 ia
= (last_a
+ 2*s2
[3] + s2
[7])>>2;
134 //int y1 = (BYTE)(((((*d2&0xff))*s2[3])>>8) + s2[1]); // + y1;
135 //int u = (BYTE)((((((*d2>>8)&0xff))*ia)>>8) + s2[0]); // + u;
136 //int y2 = (BYTE)((((((*d2>>16)&0xff))*s2[7])>>8) + s2[5]); // + y2;
137 //int v = (BYTE)((((((*d2>>24)&0xff))*ia)>>8) + s2[4]); // + v;
138 //*d2 = (v<<24)|(y2<<16)|(u<<8)|y1;
140 ia
= (ia
<<24)|(s2
[7]<<16)|(ia
<<8)|s2
[3];
141 c
= (s2
[4]<<24)|(s2
[5]<<16)|(s2
[0]<<8)|s2
[1]; // (v<<24)|(y2<<16)|(u<<8)|y1;
152 psraw mm4
, 1 //or else, overflow because psraw shift in sign bit
166 void AlphaBlt_YUY2_C(int w
, int h
, BYTE
* d
, int dstpitch
, PCUINT8 s
, int srcpitch
)
168 for(int j
= 0; j
< h
; j
++, s
+= srcpitch
, d
+= dstpitch
)
172 PCUINT8 s2end
= s2
+ w
*4;
173 DWORD
* d2
= (DWORD
*)d
;
175 int last_a
= w
>0?s2
[3]:0;
176 for(; s2
< s2end
; s2
+= 8, d2
++)
178 ia
= (last_a
+ 2*s2
[3] + s2
[7])>>2;
182 DWORD y1
= (BYTE
)(((((*d2
&0xff))*s2
[3])>>8) + s2
[1]); // + y1;
183 DWORD u
= (BYTE
)((((((*d2
>>8)&0xff))*ia
)>>8) + s2
[0]); // + u;
184 DWORD y2
= (BYTE
)((((((*d2
>>16)&0xff))*s2
[7])>>8) + s2
[5]); // + y2;
185 DWORD v
= (BYTE
)((((((*d2
>>24)&0xff))*ia
)>>8) + s2
[4]); // + v;
186 *d2
= (v
<<24)|(y2
<<16)|(u
<<8)|y1
;
197 CMemSubPic::CMemSubPic(SubPicDesc
& spd
, int alpha_blt_dst_type
)
198 : m_spd(spd
), m_alpha_blt_dst_type(alpha_blt_dst_type
)
200 m_maxsize
.SetSize(spd
.w
, spd
.h
);
201 // m_rcDirty.SetRect(0, 0, spd.w, spd.h);
202 CRect
allSpd(0,0,spd
.w
, spd
.h
);
203 m_rectListDirty
.AddTail(allSpd
);
206 CMemSubPic::~CMemSubPic()
208 delete [] m_spd
.bits
, m_spd
.bits
= NULL
;
213 STDMETHODIMP_(void*) CMemSubPic::GetObject() const
215 return (void*)&m_spd
;
218 STDMETHODIMP
CMemSubPic::GetDesc(SubPicDesc
& spd
) const
220 spd
.type
= m_spd
.type
;
224 spd
.pitch
= m_spd
.pitch
;
225 spd
.bits
= m_spd
.bits
;
226 spd
.bitsU
= m_spd
.bitsU
;
227 spd
.bitsV
= m_spd
.bitsV
;
228 spd
.vidrect
= m_vidrect
;
232 STDMETHODIMP
CMemSubPic::CopyTo(ISubPicEx
* pSubPic
)
235 if(FAILED(hr
= __super::CopyTo(pSubPic
))) {
240 if(FAILED(GetDesc(src
)) || FAILED(pSubPic
->GetDesc(dst
))) {
243 while(!m_rectListDirty
.IsEmpty())
245 CRect
& cRect
= m_rectListDirty
.GetHead();
246 int w
= cRect
.Width(), h
= cRect
.Height();
247 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*cRect
.top
+ cRect
.left
*4;
248 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*cRect
.top
+ cRect
.left
*4;
249 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
255 STDMETHODIMP
CMemSubPic::ClearDirtyRect(DWORD color
)
257 if(m_rectListDirty
.IsEmpty()) {
260 while(!m_rectListDirty
.IsEmpty())
262 //pDirtyRect = m_rectListDirty.RemoveHead();
263 CRect
& dirtyRect
= m_rectListDirty
.RemoveTail();
264 BYTE
* p
= (BYTE
*)m_spd
.bits
+ m_spd
.pitch
*(dirtyRect
.top
) + dirtyRect
.left
*(m_spd
.bpp
>>3);
265 int w
= dirtyRect
.Width();
266 if(m_spd
.type
!=MSP_AYUV_PLANAR
)
268 for(int j
= 0, h
= dirtyRect
.Height(); j
< h
; j
++, p
+= m_spd
.pitch
)
271 memsetd(p
, color
, w
*4); // nya
289 for(int j
= 0, h
= dirtyRect
.Height(); j
< h
; j
++, p
+= m_spd
.pitch
)
291 // memsetd(p, 0, m_rcDirty.Width());
292 //DbgLog((LOG_TRACE, 3, "w:%d", w));
293 //w = pDirtyRect->Width();
295 memset(p
+m_spd
.h
*m_spd
.pitch
, 0, w
);
296 memset(p
+m_spd
.h
*m_spd
.pitch
*2, 0, w
);
297 memset(p
+m_spd
.h
*m_spd
.pitch
*3, 0, w
);
301 m_rectListDirty
.RemoveAll();
305 STDMETHODIMP
CMemSubPic::Lock(SubPicDesc
& spd
)
310 STDMETHODIMP
CMemSubPic::Unlock( CAtlList
<CRect
>* dirtyRectList
)
312 int src_type
= m_spd
.type
;
313 int dst_type
= m_alpha_blt_dst_type
;
314 if( (src_type
==MSP_RGBA
&& (dst_type
== MSP_RGB32
||
315 dst_type
== MSP_RGB24
||
316 dst_type
== MSP_RGB16
||
317 dst_type
== MSP_RGB15
))
319 (src_type
==MSP_XY_AUYV
&& dst_type
== MSP_YUY2
)//ToDo: fix me MSP_AYUV
321 (src_type
==MSP_AYUV
&& dst_type
== MSP_AYUV
)
323 (src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_IYUV
||
324 dst_type
== MSP_YV12
||
325 dst_type
== MSP_P010
||
326 dst_type
== MSP_P016
||
327 dst_type
== MSP_NV12
||
328 dst_type
== MSP_NV21
)))
330 return UnlockOther(dirtyRectList
);
332 else if(src_type
==MSP_RGBA
&& (dst_type
== MSP_YUY2
||
333 dst_type
== MSP_AYUV
|| //ToDo: fix me MSP_AYUV
334 dst_type
== MSP_IYUV
||
335 dst_type
== MSP_YV12
||
336 dst_type
== MSP_NV12
||
337 dst_type
== MSP_NV21
||
338 dst_type
== MSP_P010
||
339 dst_type
== MSP_P016
))
341 return UnlockRGBA_YUV(dirtyRectList
);
346 HRESULT
CMemSubPic::UnlockOther(CAtlList
<CRect
>* dirtyRectList
)
348 SetDirtyRectEx(dirtyRectList
);
349 if(m_rectListDirty
.IsEmpty()) {
353 POSITION pos
= m_rectListDirty
.GetHeadPosition();
356 const CRect
& cRect
= m_rectListDirty
.GetNext(pos
);
357 int w
= cRect
.Width(), h
= cRect
.Height();
363 BYTE
* top
= (BYTE
*)m_spd
.bits
+ m_spd
.pitch
*(cRect
.top
) + cRect
.left
*4;
364 BYTE
* bottom
= top
+ m_spd
.pitch
*h
;
365 if(m_alpha_blt_dst_type
== MSP_RGB16
)
367 for(; top
< bottom
; top
+= m_spd
.pitch
)
369 DWORD
* s
= (DWORD
*)top
;
373 *s
= ((*s
>>3)&0x1f000000)|((*s
>>8)&0xf800)|((*s
>>5)&0x07e0)|((*s
>>3)&0x001f);
374 // *s = (*s&0xff000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
378 else if(m_alpha_blt_dst_type
== MSP_RGB15
)
380 for(; top
< bottom
; top
+= m_spd
.pitch
)
382 DWORD
* s
= (DWORD
*)top
;
386 *s
= ((*s
>>3)&0x1f000000)|((*s
>>9)&0x7c00)|((*s
>>6)&0x03e0)|((*s
>>3)&0x001f);
387 // *s = (*s&0xff000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
391 else if(m_alpha_blt_dst_type
== MSP_YUY2
)
393 XY_DO_ONCE( xy_logger::write_file("G:\\b1_ul", top
, m_spd
.pitch
*(h
-1)) );
395 for(BYTE
* tempTop
=top
; tempTop
< bottom
; tempTop
+= m_spd
.pitch
)
399 BYTE last_v
= s
[0], last_u
=s
[2];
400 for(; s
< e
; s
+=8) // AUYV AUYV -> AxYU AxYV
403 s
[4] = (last_v
+ 2*s
[0] + s
[4] + 2)>>2;
406 s
[0] = (last_u
+ 2*s
[2] + s
[6] + 2)>>2;
411 XY_DO_ONCE( xy_logger::write_file("G:\\a1_ul", top
, m_spd
.pitch
*(h
-1)) );
413 else if(m_alpha_blt_dst_type
== MSP_YV12
|| m_alpha_blt_dst_type
== MSP_IYUV
)
417 else if ( m_alpha_blt_dst_type
== MSP_P010
|| m_alpha_blt_dst_type
== MSP_P016
418 || m_alpha_blt_dst_type
== MSP_NV12
)
420 SubsampleAndInterlace(cRect
, true);
422 else if( m_alpha_blt_dst_type
== MSP_NV21
)
424 SubsampleAndInterlace(cRect
, false);
430 HRESULT
CMemSubPic::UnlockRGBA_YUV(CAtlList
<CRect
>* dirtyRectList
)
433 ONCER( SaveRect2File(dirtyRectList
->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect") );
434 ONCER( SaveArgb2File(m_spd
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.argb") );
436 SetDirtyRectEx(dirtyRectList
);
438 ONCER( SaveRect2File(dirtyRectList
->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect2") );
439 if(m_rectListDirty
.IsEmpty()) {
443 POSITION pos
= m_rectListDirty
.GetHeadPosition();
446 const CRect
& cRect
= m_rectListDirty
.GetNext(pos
);
447 int w
= cRect
.Width(), h
= cRect
.Height();
453 BYTE
* top
= (BYTE
*)m_spd
.bits
+ m_spd
.pitch
*cRect
.top
+ cRect
.left
*4;
454 BYTE
* bottom
= top
+ m_spd
.pitch
*h
;
456 if( m_alpha_blt_dst_type
== MSP_YUY2
||
457 m_alpha_blt_dst_type
== MSP_YV12
||
458 m_alpha_blt_dst_type
== MSP_IYUV
||
459 m_alpha_blt_dst_type
== MSP_P010
||
460 m_alpha_blt_dst_type
== MSP_P016
||
461 m_alpha_blt_dst_type
== MSP_NV12
||
462 m_alpha_blt_dst_type
== MSP_NV21
) {
463 for(; top
< bottom
; top
+= m_spd
.pitch
) {
466 DWORD last_yuv
= ColorConvTable::PreMulArgb2Ayuv(s
[3], s
[2], s
[1], s
[0]);
467 for(; s
< e
; s
+=8) { // ARGB ARGB -> AxYU AxYV
468 if((s
[3]+s
[7]+(last_yuv
>>24)) < 0xff*3) {
469 DWORD tmp1
= ColorConvTable::PreMulArgb2Ayuv(s
[3], s
[2], s
[1], s
[0]);
470 DWORD tmp2
= ColorConvTable::PreMulArgb2Ayuv(s
[7], s
[6], s
[5], s
[4]);
472 s
[1] = (tmp1
>>16)&0xff;
473 s
[5] = (tmp2
>>16)&0xff;
475 s
[0] = (((last_yuv
>>8)&0xff) + 2*((tmp1
>>8)&0xff) + ((tmp2
>>8)&0xff) + 2)/4;
476 s
[4] = ((last_yuv
&0xff) + 2*(tmp1
&0xff) + (tmp2
&0xff) + 2)/4;
479 last_yuv
= ColorConvTable::PreMulArgb2Ayuv(s
[7], s
[6], s
[5], s
[4]);
487 else if(m_alpha_blt_dst_type
== MSP_AYUV
) {
488 for(; top
< bottom
; top
+= m_spd
.pitch
) {
491 for(; s
< e
; s
+=4) { // ARGB -> AYUV
493 *((DWORD
*)s
) = ColorConvTable::PreMulArgb2Ayuv(s
[3], s
[2], s
[1], s
[0]);
503 ONCER( SaveAxxx2File(m_spd
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.axuv") );
507 void CMemSubPic::SubsampleAndInterlace( const CRect
& cRect
, bool u_first
)
509 //fix me: check alignment and log error
510 int w
= cRect
.Width(), h
= cRect
.Height();
511 BYTE
* u_plan
= reinterpret_cast<BYTE
*>(m_spd
.bits
) + m_spd
.pitch
*m_spd
.h
*2;
512 BYTE
* u_start
= u_plan
+ m_spd
.pitch
*(cRect
.top
)+ cRect
.left
;
513 BYTE
* v_start
= u_start
+ m_spd
.pitch
*m_spd
.h
;
523 //Walkarround for alignment
524 if ( ((m_spd
.pitch
|w
) &15) == 0 && (g_cpuid
.m_flags
& CCpuID::sse2
) )
527 SubsampleAndInterlace(dst
, u_start
, v_start
, h
, w
, m_spd
.pitch
);
531 SubsampleAndInterlaceC(dst
, u_start
, v_start
, h
, w
, m_spd
.pitch
);
535 STDMETHODIMP
CMemSubPic::AlphaBlt( const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
537 if(!pSrc
|| !pDst
|| !pTarget
) {
540 int src_type
= m_spd
.type
;
541 int dst_type
= pTarget
->type
;
543 if( (src_type
==MSP_RGBA
&& (dst_type
== MSP_RGB32
||
544 dst_type
== MSP_RGB24
||
545 dst_type
== MSP_RGB16
||
546 dst_type
== MSP_RGB15
||
547 dst_type
== MSP_RGBA
||
548 dst_type
== MSP_YUY2
||//ToDo: fix me MSP_RGBA changed into AxYU AxYV after unlock, may be confusing
549 dst_type
== MSP_AYUV
))
551 (src_type
==MSP_XY_AUYV
&& dst_type
== MSP_YUY2
)//ToDo: fix me MSP_AYUV
553 (src_type
==MSP_AYUV
&& dst_type
== MSP_AYUV
)
555 (src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_IYUV
||
556 dst_type
== MSP_YV12
)) )
558 return AlphaBltOther(pSrc
, pDst
, pTarget
);
560 else if ( src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_NV12
||
561 dst_type
== MSP_NV21
) )
563 return AlphaBltAnv12_Nv12(pSrc
, pDst
, pTarget
);
566 else if( src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_P010
||
567 dst_type
== MSP_P016
) )
569 return AlphaBltAnv12_P010(pSrc
, pDst
, pTarget
);
571 else if( src_type
==MSP_RGBA
&& (dst_type
== MSP_IYUV
||
572 dst_type
== MSP_YV12
))
574 return AlphaBltAxyuAxyv_Yv12(pSrc
, pDst
, pTarget
);
576 else if( src_type
==MSP_RGBA
&& (dst_type
== MSP_NV12
||
577 dst_type
== MSP_NV21
))
579 return AlphaBltAxyuAxyv_Nv12(pSrc
, pDst
, pTarget
);
581 else if( src_type
==MSP_RGBA
&& (dst_type
== MSP_P010
||
582 dst_type
== MSP_P016
))
584 return AlphaBltAxyuAxyv_P010(pSrc
, pDst
, pTarget
);
589 HRESULT
CMemSubPic::AlphaBltOther(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
591 const SubPicDesc
& src
= m_spd
;
592 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
594 CRect
rs(*pSrc
), rd(*pDst
);
598 rd
.bottom
= dst
.h
- rd
.bottom
;
599 rd
.top
= dst
.h
- rd
.top
;
601 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
604 int w
= rs
.Width(), h
= rs
.Height();
605 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);//rs.left*4
606 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*rd
.top
+ ((rd
.left
*dst
.bpp
)>>3);
607 if(rd
.top
> rd
.bottom
)
609 if(dst
.type
== MSP_RGB32
|| dst
.type
== MSP_RGB24
610 || dst
.type
== MSP_RGB16
|| dst
.type
== MSP_RGB15
611 || dst
.type
== MSP_YUY2
|| dst
.type
== MSP_AYUV
)
613 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + (rd
.left
*dst
.bpp
>>3);
615 else if(dst
.type
== MSP_YV12
|| dst
.type
== MSP_IYUV
)
617 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + (rd
.left
*8>>3);
623 dst
.pitch
= -dst
.pitch
;
625 DbgLog((LOG_TRACE
, 5, TEXT("w=%d h=%d"), w
, h
));
629 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
632 BYTE
* s2end
= s2
+ w
*4;
633 DWORD
* d2
= (DWORD
*)d
;
634 for(; s2
< s2end
; s2
+= 4, d2
++)
638 DWORD bd
=0x00000100 -( (DWORD
) s2
[3]);
639 DWORD B
= ((*((DWORD
*)s2
)&0x000000ff)<<8)/bd
;
640 DWORD V
= ((*((DWORD
*)s2
)&0x0000ff00)/bd
)<<8;
641 DWORD R
= (((*((DWORD
*)s2
)&0x00ff0000)>>8)/bd
)<<16;
643 | (0xff000000-(*((DWORD
*)s2
)&0xff000000))&0xff000000;
649 case MSP_AYUV
: //ToDo: fix me MSP_VUYA indeed?
650 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
653 BYTE
* s2end
= s2
+ w
*4;
654 DWORD
* d2
= (DWORD
*)d
;
655 for(; s2
< s2end
; s2
+= 4, d2
++)
658 DWORD ia
= 256-s2
[3];
660 *d2
= ((((*d2
&0x00ff00ff)*s2
[3])>>8) + (((*((DWORD
*)s2
)&0x00ff00ff)*ia
)>>8)&0x00ff00ff)
661 | ((((*d2
&0x0000ff00)*s2
[3])>>8) + (((*((DWORD
*)s2
)&0x0000ff00)*ia
)>>8)&0x0000ff00);
666 *d2
= (((((*d2
&0x00ff00ff)*s2
[3])>>8) + (*((DWORD
*)s2
)&0x00ff00ff))&0x00ff00ff)
667 | (((((*d2
&0x0000ff00)*s2
[3])>>8) + (*((DWORD
*)s2
)&0x0000ff00))&0x0000ff00);
674 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
677 BYTE
* s2end
= s2
+ w
*4;
679 for(; s2
< s2end
; s2
+= 4, d2
+= 3)
683 d2
[0] = ((d2
[0]*s2
[3])>>8) + s2
[0];
684 d2
[1] = ((d2
[1]*s2
[3])>>8) + s2
[1];
685 d2
[2] = ((d2
[2]*s2
[3])>>8) + s2
[2];
691 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
694 BYTE
* s2end
= s2
+ w
*4;
696 for(; s2
< s2end
; s2
+= 4, d2
++)
700 *d2
= (WORD
)((((((*d2
&0xf81f)*s2
[3])>>5) + (*(DWORD
*)s2
&0xf81f))&0xf81f)
701 | (((((*d2
&0x07e0)*s2
[3])>>5) + (*(DWORD
*)s2
&0x07e0))&0x07e0));
702 /* *d2 = (WORD)((((((*d2&0xf800)*s2[3])>>8) + (*(DWORD*)s2&0xf800))&0xf800)
703 | (((((*d2&0x07e0)*s2[3])>>8) + (*(DWORD*)s2&0x07e0))&0x07e0)
704 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
711 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
714 BYTE
* s2end
= s2
+ w
*4;
716 for(; s2
< s2end
; s2
+= 4, d2
++)
720 *d2
= (WORD
)((((((*d2
&0x7c1f)*s2
[3])>>5) + (*(DWORD
*)s2
&0x7c1f))&0x7c1f)
721 | (((((*d2
&0x03e0)*s2
[3])>>5) + (*(DWORD
*)s2
&0x03e0))&0x03e0));
722 /* *d2 = (WORD)((((((*d2&0x7c00)*s2[3])>>8) + (*(DWORD*)s2&0x7c00))&0x7c00)
723 | (((((*d2&0x03e0)*s2[3])>>8) + (*(DWORD*)s2&0x03e0))&0x03e0)
724 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
731 AlphaBlt_YUY2(w
, h
, d
, dst
.pitch
, s
, src
.pitch
);
736 //dst.pitch = abs(dst.pitch);
740 dst
.pitchUV
= abs(dst
.pitch
)/2;
742 if(!dst
.bitsU
|| !dst
.bitsV
)
744 dst
.bitsU
= (BYTE
*)dst
.bits
+ abs(dst
.pitch
)*dst
.h
;
745 dst
.bitsV
= dst
.bitsU
+ dst
.pitchUV
*dst
.h
/2;
746 if(dst
.type
== MSP_YV12
)
749 dst
.bitsU
= dst
.bitsV
;
754 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
755 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
756 if(rd
.top
> rd
.bottom
)
758 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
759 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
760 dst
.pitchUV
= -dst
.pitchUV
;
763 BYTE
* src_origin
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ rs
.left
;
766 ss
[0] = src_origin
+ src
.pitch
*src
.h
*2;//U
767 ss
[1] = src_origin
+ src
.pitch
*src
.h
*3;//V
769 AlphaBltYv12Luma( d
, dst
.pitch
, w
, h
, src_origin
+ src
.pitch
*src
.h
, src_origin
, src
.pitch
);
771 AlphaBltYv12Chroma( dd
[0], dst
.pitchUV
, w
, h2
, ss
[0], src_origin
, src
.pitch
);
772 AlphaBltYv12Chroma( dd
[1], dst
.pitchUV
, w
, h2
, ss
[1], src_origin
, src
.pitch
);
781 //emmsÒª40¸öcpuÖÜÆÚ
786 HRESULT
CMemSubPic::AlphaBltAxyuAxyv_P010(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
788 const SubPicDesc
& src
= m_spd
;
789 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
791 CRect
rs(*pSrc
), rd(*pDst
);
795 rd
.bottom
= dst
.h
- rd
.bottom
;
796 rd
.top
= dst
.h
- rd
.top
;
799 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
803 int w
= rs
.Width(), h
= rs
.Height();
806 BYTE
* s
= static_cast<BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
807 BYTE
* d
= static_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*rd
.top
+ rd
.left
*2;
809 if(rd
.top
> rd
.bottom
) {
810 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + rd
.left
;
812 dst
.pitch
= -dst
.pitch
;
815 for(ptrdiff_t i
=0; i
<h
; i
++, s
+= src
.pitch
, d
+= dst
.pitch
)
818 BYTE
* s2end
= s2
+ w
*4;
819 WORD
* d2
= reinterpret_cast<WORD
*>(d
);
820 for(; s2
< s2end
; s2
+= 4, d2
++)
823 d2
[0] = ((d2
[0]*s2
[3])>>8) + (s2
[1]<<8);
832 dst
.pitchUV
= abs(dst
.pitch
);
834 if(!dst
.bitsU
|| !dst
.bitsV
)
836 dst
.bitsU
= static_cast<BYTE
*>(dst
.bits
) + abs(dst
.pitch
)*dst
.h
;
837 dst
.bitsV
= dst
.bitsU
+ 2;
839 BYTE
* ddUV
= dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
*2;
840 if(rd
.top
> rd
.bottom
)
842 ddUV
= dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
*2;
843 dst
.pitchUV
= -dst
.pitchUV
;
846 s
= static_cast<BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
849 int pitch
= src
.pitch
;
850 for(int j
= 0; j
< h2
; j
++, s
+= 2*src
.pitch
, d
+= dst
.pitchUV
)
853 WORD
* d2
=reinterpret_cast<WORD
*>(d
);
854 WORD
* d2_end
= reinterpret_cast<WORD
*>(d
+2*w
);
855 DWORD last_alpha
= s2
[3]+s2
[3+src
.pitch
];
856 for( ; d2
<d2_end
; s2
+=8, d2
+=2)
860 (s2
[3] + s2
[3+src
.pitch
])*2 +
861 s2
[3+4]+ s2
[3+4+src
.pitch
]);
862 last_alpha
= s2
[3+4]+ s2
[3+4+src
.pitch
];
865 d2
[0] = (((d2
[0])*ia
)>>11) + ((s2
[0] + s2
[0+src
.pitch
])<<7);
866 d2
[1] = (((d2
[1])*ia
)>>11) + ((s2
[4] + s2
[4+src
.pitch
])<<7);
874 HRESULT
CMemSubPic::AlphaBltAxyuAxyv_Yv12(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
876 const SubPicDesc
& src
= m_spd
;
877 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
879 CRect
rs(*pSrc
), rd(*pDst
);
883 rd
.bottom
= dst
.h
- rd
.bottom
;
884 rd
.top
= dst
.h
- rd
.top
;
887 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
891 int w
= rs
.Width(), h
= rs
.Height();
893 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
894 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*rd
.top
+ rd
.left
;
896 if(rd
.top
> rd
.bottom
) {
897 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + rd
.left
;
899 dst
.pitch
= -dst
.pitch
;
902 for(ptrdiff_t j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
) {
904 BYTE
* s2end
= s2
+ w
*4;
906 for(; s2
< s2end
; s2
+= 4, d2
++) {
908 d2
[0] = ((d2
[0]*s2
[3])>>8) + s2
[1];
912 dst
.pitch
= abs(dst
.pitch
);
917 dst
.pitchUV
= dst
.pitch
/2;
921 ss
[0] = (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ rs
.left
*4;
924 if(!dst
.bitsU
|| !dst
.bitsV
) {
925 dst
.bitsU
= (BYTE
*)dst
.bits
+ dst
.pitch
*dst
.h
;
926 dst
.bitsV
= dst
.bitsU
+ dst
.pitchUV
*dst
.h
/2;
928 if(dst
.type
== MSP_YV12
) {
930 dst
.bitsU
= dst
.bitsV
;
936 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
937 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
939 if(rd
.top
> rd
.bottom
) {
940 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
941 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
942 dst
.pitchUV
= -dst
.pitchUV
;
945 for(ptrdiff_t i
= 0; i
< 2; i
++) {
949 for(ptrdiff_t j
= 0; j
< h2
; j
++, s
+= src
.pitch
*2, d
+= dst
.pitchUV
, a
+= src
.pitch
*2) {
951 BYTE
* s2end
= s2
+ w
*4;
955 DWORD last_alpha
= a2
[0]+a2
[0+src
.pitch
];
956 for(; s2
< s2end
; s2
+= 8, d2
++, a2
+= 8) {
957 unsigned int ia
= (last_alpha
+ 2*(a2
[0]+a2
[0+src
.pitch
]) + a2
[4] + a2
[4+src
.pitch
] + 4 )>>3;
958 last_alpha
= a2
[4] + a2
[4+src
.pitch
];
960 *d2
= ((*d2
*ia
)>>8) + ((s2
[0]+s2
[src
.pitch
])>>1);
969 HRESULT
CMemSubPic::AlphaBltAxyuAxyv_Nv12(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
971 ONCER( SaveArgb2File(*pTarget
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12") );
972 const SubPicDesc
& src
= m_spd
;
973 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
975 CRect
rs(*pSrc
), rd(*pDst
);
979 rd
.bottom
= dst
.h
- rd
.bottom
;
980 rd
.top
= dst
.h
- rd
.top
;
983 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
987 int w
= rs
.Width(), h
= rs
.Height();
989 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
990 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*rd
.top
+ rd
.left
;
992 if(rd
.top
> rd
.bottom
) {
993 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + rd
.left
;
995 dst
.pitch
= -dst
.pitch
;
998 for(ptrdiff_t j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
) {
1000 BYTE
* s2end
= s2
+ w
*4;
1002 for(; s2
< s2end
; s2
+= 4, d2
++) {
1004 d2
[0] = ((d2
[0]*s2
[3])>>8) + s2
[1];
1008 dst
.pitch
= abs(dst
.pitch
);
1013 dst
.pitchUV
= dst
.pitch
;
1017 ss
[0] = (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ rs
.left
*4;
1020 if(!dst
.bitsU
|| !dst
.bitsV
) {
1021 dst
.bitsU
= (BYTE
*)dst
.bits
+ dst
.pitch
*dst
.h
;
1022 dst
.bitsV
= dst
.bitsU
+ 1;
1024 if(dst
.type
== MSP_NV21
) {
1025 BYTE
* p
= dst
.bitsU
;
1026 dst
.bitsU
= dst
.bitsV
;
1032 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
;
1035 if(rd
.top
> rd
.bottom
) {
1036 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
;
1038 dst
.pitchUV
= -dst
.pitchUV
;
1041 for(ptrdiff_t i
= 0; i
< 2; i
++) {
1045 for(ptrdiff_t j
= 0; j
< h2
; j
++, s
+= src
.pitch
*2, d
+= dst
.pitchUV
, a
+= src
.pitch
*2) {
1047 BYTE
* s2end
= s2
+ w
*4;
1050 DWORD last_alpha
= a2
[0]+a2
[0+src
.pitch
];
1051 for(; s2
< s2end
; s2
+= 8, d2
+=2, a2
+= 8) {
1052 unsigned int ia
= (last_alpha
+2*(a2
[0]+a2
[0+src
.pitch
])+a2
[4]+a2
[4+src
.pitch
]+4)>>3;
1053 last_alpha
= a2
[4]+a2
[4+src
.pitch
];
1055 *d2
= ((*d2
*ia
)>>8) + ((s2
[0]+s2
[src
.pitch
])>>1);
1061 ONCER( SaveArgb2File(*pTarget
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12_2") );
1065 HRESULT
CMemSubPic::AlphaBltAnv12_P010( const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
1067 //fix me: check colorspace and log error
1068 const SubPicDesc
& src
= m_spd
;
1069 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
1071 CRect
rs(*pSrc
), rd(*pDst
);
1075 rd
.bottom
= dst
.h
- rd
.bottom
;
1076 rd
.top
= dst
.h
- rd
.top
;
1078 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
1079 return E_INVALIDARG
;
1081 int w
= rs
.Width(), h
= rs
.Height();
1082 bool bottom_down
= rd
.top
> rd
.bottom
;
1088 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*rd
.top
+ rd
.left
*2;
1089 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*rd
.top
/2 + rd
.left
*2;
1093 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*(rd
.top
-1) + rd
.left
*2;
1094 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*(rd
.top
/2-1) + rd
.left
*2;
1095 dst
.pitch
= -dst
.pitch
;
1097 ASSERT(dst
.pitchUV
==0 || dst
.pitchUV
==abs(dst
.pitch
));
1099 const BYTE
* sa
= reinterpret_cast<const BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ rs
.left
;
1100 const BYTE
* sy
= sa
+ src
.pitch
*src
.h
;
1101 const BYTE
* s_uv
= sy
+ src
.pitch
*src
.h
;//UV
1102 return AlphaBltAnv12_P010(sa
, sy
, s_uv
, src
.pitch
, d
, dUV
, dst
.pitch
, w
, h
);
1105 HRESULT
CMemSubPic::AlphaBltAnv12_Nv12( const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
1107 //fix me: check colorspace and log error
1108 const SubPicDesc
& src
= m_spd
;
1109 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
1111 CRect
rs(*pSrc
), rd(*pDst
);
1115 rd
.bottom
= dst
.h
- rd
.bottom
;
1116 rd
.top
= dst
.h
- rd
.top
;
1118 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
1119 return E_INVALIDARG
;
1121 int w
= rs
.Width(), h
= rs
.Height();
1122 bool bottom_down
= rd
.top
> rd
.bottom
;
1128 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*rd
.top
+ rd
.left
;
1129 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*rd
.top
/2 + rd
.left
;
1133 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*(rd
.top
-1) + rd
.left
;
1134 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*(rd
.top
/2-1) + rd
.left
;
1135 dst
.pitch
= -dst
.pitch
;
1137 ASSERT(dst
.pitchUV
==0 || dst
.pitchUV
==abs(dst
.pitch
));
1139 const BYTE
* sa
= reinterpret_cast<const BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ rs
.left
;
1140 const BYTE
* sy
= sa
+ src
.pitch
*src
.h
;
1141 const BYTE
* s_uv
= sy
+ src
.pitch
*src
.h
;//UV
1143 return AlphaBltAnv12_Nv12(sa
, sy
, s_uv
, src
.pitch
, d
, dUV
, dst
.pitch
, w
, h
);
1146 STDMETHODIMP
CMemSubPic::SetDirtyRectEx(CAtlList
<CRect
>* dirtyRectList
)
1148 //if(m_spd.type == MSP_YUY2 || m_spd.type == MSP_YV12 || m_spd.type == MSP_IYUV || m_spd.type == MSP_AYUV)
1149 if(dirtyRectList
!=NULL
)
1151 POSITION pos
= dirtyRectList
->GetHeadPosition();
1152 if(m_spd
.type
== MSP_AYUV_PLANAR
|| m_alpha_blt_dst_type
==MSP_IYUV
|| m_alpha_blt_dst_type
==MSP_YV12
1153 || m_alpha_blt_dst_type
==MSP_P010
|| m_alpha_blt_dst_type
==MSP_P016
1154 || m_alpha_blt_dst_type
==MSP_NV12
|| m_alpha_blt_dst_type
==MSP_NV21
)
1158 CRect
& cRectSrc
= dirtyRectList
->GetNext(pos
);
1159 cRectSrc
.left
&= ~15;
1160 cRectSrc
.right
= (cRectSrc
.right
+15)&~15;
1161 if(cRectSrc
.right
>m_spd
.w
)
1163 cRectSrc
.right
= m_spd
.w
;
1166 cRectSrc
.bottom
= (cRectSrc
.bottom
+1)&~1;
1169 else if(m_spd
.type
== MSP_XY_AUYV
|| m_alpha_blt_dst_type
==MSP_YUY2
)
1173 CRect
& cRectSrc
= dirtyRectList
->GetNext(pos
);
1174 cRectSrc
.left
&= ~3;
1175 cRectSrc
.right
= (cRectSrc
.right
+3)&~3;
1179 return __super::SetDirtyRectEx(dirtyRectList
);
1186 void CMemSubPic::AlphaBltYv12Luma(byte
* dst
, int dst_pitch
,
1188 const byte
* sub
, const byte
* alpha
, int sub_pitch
)
1191 ((reinterpret_cast<intptr_t>(alpha
) ^ reinterpret_cast<intptr_t>(sub
))
1192 |(reinterpret_cast<intptr_t>(alpha
) ^ reinterpret_cast<intptr_t>(dst
))
1193 | static_cast<intptr_t>(sub_pitch
)
1194 | static_cast<intptr_t>(dst_pitch
) ) & 15 )==0
1195 && w
> 32 && (g_cpuid
.m_flags
& CCpuID::sse2
))
1197 int head
= (16 - (reinterpret_cast<intptr_t>(alpha
)&15))&15;
1198 int tail
= (w
-head
) & 15;
1199 int w1
= w
- head
- tail
;
1200 for(int i
=0; i
<h
; i
++, dst
+= dst_pitch
, alpha
+= sub_pitch
, sub
+= sub_pitch
)
1202 const BYTE
* sa
= alpha
;
1203 const BYTE
* s2
= sub
;
1204 const BYTE
* s2end_mod16
= s2
+ w1
;
1205 const BYTE
* s2end
= s2
+ w
;
1208 for( ; (reinterpret_cast<intptr_t>(s2
)&15) != 0; s2
++, sa
++, d2
++)
1212 d2
[0] = ((d2
[0]*sa
[0])>>8) + s2
[0];
1215 for(; s2
< s2end_mod16
; s2
+=16, sa
+=16, d2
+=16)
1217 pix_alpha_blend_yv12_luma_sse2(d2
, sa
, s2
);
1219 for(; s2
< s2end
; s2
++, sa
++, d2
++)
1223 d2
[0] = ((d2
[0]*sa
[0])>>8) + s2
[0];
1228 else //fix me: only a workaround for non-mod-16 size video
1230 CMemSubPic::AlphaBltYv12LumaC(dst
, dst_pitch
, w
, h
, sub
, alpha
, sub_pitch
);
1234 void CMemSubPic::AlphaBltYv12LumaC( byte
* dst
, int dst_pitch
, int w
, int h
, const byte
* sub
, const byte
* alpha
, int sub_pitch
)
1236 for(int i
=0; i
<h
; i
++, dst
+= dst_pitch
, alpha
+= sub_pitch
, sub
+= sub_pitch
)
1238 const BYTE
* sa
= alpha
;
1239 const BYTE
* s2
= sub
;
1240 const BYTE
* s2end
= s2
+ w
;
1242 for(; s2
< s2end
; s2
+=1, sa
+=1, d2
+=1)
1246 // d2[0] = (((d2[0]-0x10)*s2[3])>>8) + s2[1];
1247 d2
[0] = ((d2
[0]*sa
[0])>>8) + s2
[0];
1253 void CMemSubPic::AlphaBltYv12Chroma(byte
* dst_uv
, int dst_pitch
,
1254 int w
, int chroma_h
,
1255 const byte
* src_uv
, const byte
* src_a
, int src_pitch
)
1258 ((reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(src_uv
))
1259 |(reinterpret_cast<intptr_t>(src_a
) ^ (2*reinterpret_cast<intptr_t>(dst_uv
)))
1260 | static_cast<intptr_t>(src_pitch
)
1261 | (2*static_cast<intptr_t>(dst_pitch
)) ) & 15) ==0 &&
1262 w
> 16 && (g_cpuid
.m_flags
& CCpuID::sse2
))
1264 int head
= (16 - (reinterpret_cast<intptr_t>(src_a
)&15))&15;
1265 int tail
= (w
-head
) & 15;
1266 int w00
= w
- head
- tail
;
1268 int pitch
= src_pitch
;
1269 for(int j
= 0; j
< chroma_h
; j
++, src_uv
+= src_pitch
*2, src_a
+= src_pitch
*2, dst_uv
+= dst_pitch
)
1271 hleft_vmid_mix_uv_yv12_c2(dst_uv
, head
, src_uv
, src_a
, src_pitch
);
1272 hleft_vmid_mix_uv_yv12_sse2(dst_uv
+(head
>>1), w00
, src_uv
+head
, src_a
+head
, src_pitch
, head
>0 ? -1 : 0);
1273 hleft_vmid_mix_uv_yv12_c2(dst_uv
+((head
+w00
)>>1), tail
, src_uv
+head
+w00
, src_a
+head
+w00
, src_pitch
, (w00
+head
)>0 ? -1 : 0);
1276 else//fix me: only a workaround for non-mod-16 size video
1278 AlphaBltYv12ChromaC(dst_uv
, dst_pitch
, w
, chroma_h
, src_uv
, src_a
, src_pitch
);
1282 void CMemSubPic::AlphaBltYv12ChromaC( byte
* dst
, int dst_pitch
, int w
, int chroma_h
, const byte
* sub_chroma
, const byte
* alpha
, int sub_pitch
)
1284 for(int j
= 0; j
< chroma_h
; j
++, sub_chroma
+= sub_pitch
*2, alpha
+= sub_pitch
*2, dst
+= dst_pitch
)
1286 hleft_vmid_mix_uv_yv12_c(dst
, w
, sub_chroma
, alpha
, sub_pitch
);
1290 HRESULT
CMemSubPic::AlphaBltAnv12_P010( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
,
1291 BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1293 if ( g_cpuid
.m_flags
& CCpuID::sse2
)
1295 const BYTE
* sa
= src_a
;
1297 ((reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(src_y
))
1298 |(reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(dst_y
))
1299 | static_cast<intptr_t>(src_pitch
)
1300 | static_cast<intptr_t>(dst_pitch
) ) & 15 )==0 &&
1303 int head
= (16 - reinterpret_cast<intptr_t>(src_a
)&15)&15;
1304 int tail
= (w
- head
) & 15;
1306 for(int i
=0; i
<h
; i
++, sa
+= src_pitch
, src_y
+= src_pitch
, dst_y
+= dst_pitch
)
1308 const BYTE
* sa2
= sa
;
1309 const BYTE
* s2
= src_y
;
1310 const BYTE
* s2end_mod16
= s2
+ (w
&~15);
1312 WORD
* d_w
=reinterpret_cast<WORD
*>(dst_y
);
1314 switch( head
)//important: it is safe since w > 16
1317 #define _XY_MIX_ONE if(sa2[0] < 0xff) { d_w[0] = ((d_w[0]*sa2[0])>>8) + (s2[0]<<8); } sa2++;d_w++;s2++;
1345 case 1://fall through on purpose
1348 for(; s2
< s2end_mod16
; s2
+=16, sa2
+=16, d_w
+=16)
1350 mix_16_y_p010_sse2( reinterpret_cast<BYTE
*>(d_w
), s2
, sa2
);
1352 switch( tail
)//important: it is safe since w > 16
1382 case 1://fall through on purpose
1387 else //fix me: only a workaround for non-mod-16 size video
1389 for(int i
=0; i
<h
; i
++, sa
+= src_pitch
, src_y
+= src_pitch
, dst_y
+= dst_pitch
)
1391 const BYTE
* sa2
= sa
;
1392 const BYTE
* s2
= src_y
;
1393 const BYTE
* s2end
= s2
+ w
;
1394 WORD
* d_w
= reinterpret_cast<WORD
*>(dst_y
);
1395 for(; s2
< s2end
; s2
+=1, sa2
+=1, d_w
+=1)
1399 d_w
[0] = ((d_w
[0]*sa2
[0])>>8) + (s2
[0]<<8);
1408 ((reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(src_uv
))
1409 |(reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(dst_uv
))
1410 | static_cast<intptr_t>(src_pitch
)
1411 | static_cast<intptr_t>(dst_pitch
) ) & 15) ==0 &&
1414 int head
= (16-(reinterpret_cast<intptr_t>(src_a
)&15))&15;
1415 int tail
= (w
-head
) & 15;
1416 int w00
= w
- head
- tail
;
1418 ASSERT(w
>0);//the calls to mix may failed if w==0
1419 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1421 hleft_vmid_mix_uv_p010_c2(d
, head
, src_uv
, src_a
, src_pitch
);
1422 hleft_vmid_mix_uv_p010_sse2(d
+2*head
, w00
, src_uv
+head
, src_a
+head
, src_pitch
, head
>0 ? -1 : 0);
1423 hleft_vmid_mix_uv_p010_c2(d
+2*(head
+w00
), tail
, src_uv
+head
+w00
, src_a
+head
+w00
, src_pitch
, (w00
+head
)>0 ? -1 : 0);
1428 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1430 hleft_vmid_mix_uv_p010_c(d
, w
, src_uv
, src_a
, src_pitch
);
1438 return AlphaBltAnv12_P010_C(src_a
, src_y
, src_uv
, src_pitch
, dst_y
, dst_uv
, dst_pitch
, w
, h
);
1442 HRESULT
CMemSubPic::AlphaBltAnv12_P010_C( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
, BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1444 const BYTE
* sa
= src_a
;
1445 for(int i
=0; i
<h
; i
++, sa
+= src_pitch
, src_y
+= src_pitch
, dst_y
+= dst_pitch
)
1447 const BYTE
* sa2
= sa
;
1448 const BYTE
* s2
= src_y
;
1449 const BYTE
* s2end
= s2
+ w
;
1450 WORD
* d2
= reinterpret_cast<WORD
*>(dst_y
);
1451 for(; s2
< s2end
; s2
+=1, sa2
+=1, d2
+=1)
1455 d2
[0] = ((d2
[0]*sa2
[0])>>8) + (s2
[0]<<8);
1462 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1464 hleft_vmid_mix_uv_p010_c(d
, w
, src_uv
, src_a
, src_pitch
);
1469 HRESULT
CMemSubPic::AlphaBltAnv12_Nv12( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
,
1470 BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1472 AlphaBltYv12Luma( dst_y
, dst_pitch
, w
, h
, src_y
, src_a
, src_pitch
);
1476 ((reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(src_uv
))
1477 |(reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(dst_uv
))
1478 | static_cast<intptr_t>(src_pitch
)
1479 | static_cast<intptr_t>(dst_pitch
) ) & 15) ==0 &&
1480 w
> 16 && (g_cpuid
.m_flags
& CCpuID::sse2
) )
1484 int head
= (16-(reinterpret_cast<intptr_t>(src_a
)&15))&15;
1485 int tail
= (w
-head
) & 15;
1486 int w00
= w
- head
- tail
;
1488 ASSERT(w
>0);//the calls to mix may failed if w==0
1489 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1491 hleft_vmid_mix_uv_nv12_c2(d
, head
, src_uv
, src_a
, src_pitch
);
1492 hleft_vmid_mix_uv_nv12_sse2(d
+head
, w00
, src_uv
+head
, src_a
+head
, src_pitch
, head
>0 ? -1 : 0);
1493 hleft_vmid_mix_uv_nv12_c2(d
+head
+w00
, tail
, src_uv
+head
+w00
, src_a
+head
+w00
, src_pitch
, (w00
+head
)>0 ? -1 : 0);
1500 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1502 hleft_vmid_mix_uv_nv12_c(d
, w
, src_uv
, src_a
, src_pitch
);
1508 HRESULT
CMemSubPic::AlphaBltAnv12_Nv12_C( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
, BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1510 AlphaBltYv12LumaC( dst_y
, dst_pitch
, w
, h
, src_y
, src_a
, src_pitch
);
1513 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1515 hleft_vmid_mix_uv_nv12_c(d
, w
, src_uv
, src_a
, src_pitch
);
1520 void CMemSubPic::SubsampleAndInterlace( BYTE
* dst
, const BYTE
* u
, const BYTE
* v
, int h
, int w
, int pitch
)
1522 for (int i
=0;i
<h
;i
+=2)
1524 hleft_vmid_subsample_and_interlace_2_line_sse2(dst
, u
, v
, w
, pitch
);
1531 void CMemSubPic::SubsampleAndInterlaceC( BYTE
* dst
, const BYTE
* u
, const BYTE
* v
, int h
, int w
, int pitch
)
1533 for (int i
=0;i
<h
;i
+=2)
1535 hleft_vmid_subsample_and_interlace_2_line_c(dst
, u
, v
, w
, pitch
);
1542 void CMemSubPic::AlphaBlt_YUY2(int w
, int h
, BYTE
* d
, int dstpitch
, PCUINT8 s
, int srcpitch
)
1545 AlphaBlt_YUY2_C(w
, h
, d
, dstpitch
, s
, srcpitch
);
1547 AlphaBlt_YUY2_MMX(w
, h
, d
, dstpitch
, s
, srcpitch
);
1552 // CMemSubPicAllocator
1555 CMemSubPicAllocator::CMemSubPicAllocator(int alpha_blt_dst_type
, SIZE maxsize
, int type
/*=-1*/)
1556 : CSubPicExAllocatorImpl(maxsize
, false, false)
1557 , m_alpha_blt_dst_type(alpha_blt_dst_type
)
1558 , m_maxsize(maxsize
)
1563 switch(alpha_blt_dst_type
)
1566 m_type
= MSP_XY_AUYV
;
1577 m_type
= MSP_AYUV_PLANAR
;
1586 // ISubPicAllocatorImpl
1588 bool CMemSubPicAllocator::AllocEx(bool fStatic
, ISubPicEx
** ppSubPic
)
1594 spd
.w
= m_maxsize
.cx
;
1595 spd
.h
= m_maxsize
.cy
;
1597 spd
.pitch
= (spd
.w
*spd
.bpp
)>>3;
1599 spd
.bits
= DNew BYTE
[spd
.pitch
*spd
.h
];
1603 *ppSubPic
= DNew
CMemSubPic(spd
, m_alpha_blt_dst_type
);
1607 (*ppSubPic
)->AddRef();