2 * Copyright (C) 2003-2006 Gabest
3 * http://www.gabest.org
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GNU Make; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
23 #include "MemSubPic.h"
24 #include "color_conv_table.h"
31 static void SaveRect2File(const CRect
& cRect
, const char * filename
)
33 std::ofstream
os(filename
);
34 os
<<cRect
.left
<<","<<cRect
.top
<<","<<cRect
.right
<<","<<cRect
.bottom
;
36 static void SaveAxxx2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
38 std::ofstream
axxx(filename
);
39 int w
= cRect
.Width(), h
= cRect
.Height();
41 BYTE
* top
= (BYTE
*)spd
.bits
+ spd
.pitch
*cRect
.top
+ cRect
.left
*4;
42 BYTE
* bottom
= top
+ spd
.pitch
*h
;
44 for(; top
< bottom
; top
+= spd
.pitch
) {
47 for(; s
< e
; s
+=4) { // ARGB ARGB -> AxYU AxYV
48 axxx
<<(int)s
[0]<<","<<(int)s
[1]<<","<<(int)s
[2]<<","<<(int)s
[3];
61 static void SaveArgb2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
63 SaveAxxx2File(spd
, cRect
, filename
);
65 static void SaveAyuv2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
67 SaveAxxx2File(spd
, cRect
, filename
);
69 static void SaveNvxx2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
71 std::ofstream
os(filename
);
72 int w
= cRect
.Width(), h
= cRect
.Height();
74 BYTE
* top
= (BYTE
*)spd
.bits
;
75 BYTE
* bottom
= top
+ spd
.pitch
*h
;
77 for(; top
< bottom
; top
+= spd
.pitch
) {
81 BYTE
* sY
= s
+ spd
.pitch
*spd
.h
;
82 BYTE
* sU
= sY
+ spd
.pitch
*spd
.h
;
84 for(; s
< e
; s
++, sY
++, sU
+=2,sV
+=2) {
85 os
<<(int)s
[0]<<","<<(int)sY
[0]<<","<<(int)sU
[0]<<","<<(int)sV
[0];
99 #define ONCER(expr) {\
100 static bool entered=false;\
112 // alpha blend functions
114 #include "xy_intrinsics.h"
116 static void AlphaBltYv12Luma(byte
* dst
, int dst_pitch
,
118 const byte
* sub
, const byte
* alpha
, int sub_pitch
)
120 if( ((reinterpret_cast<intptr_t>(alpha
) | reinterpret_cast<intptr_t>(sub
) | static_cast<intptr_t>(sub_pitch
) |
121 reinterpret_cast<intptr_t>(dst
) | static_cast<intptr_t>(dst_pitch
) ) & 15 )==0 )
123 for(int i
=0; i
<h
; i
++, dst
+= dst_pitch
, alpha
+= sub_pitch
, sub
+= sub_pitch
)
125 const BYTE
* sa
= alpha
;
126 const BYTE
* s2
= sub
;
127 const BYTE
* s2end_mod16
= s2
+ (w
&~15);
128 const BYTE
* s2end
= s2
+ w
;
131 for(; s2
< s2end_mod16
; s2
+=16, sa
+=16, d2
+=16)
133 pix_alpha_blend_yv12_luma_sse2(d2
, sa
, s2
);
135 for(; s2
< s2end
; s2
++, sa
++, d2
++)
139 d2
[0] = ((d2
[0]*sa
[0])>>8) + s2
[0];
144 else //fix me: only a workaround for non-mod-16 size video
146 for(int i
=0; i
<h
; i
++, dst
+= dst_pitch
, alpha
+= sub_pitch
, sub
+= sub_pitch
)
148 const BYTE
* sa
= alpha
;
149 const BYTE
* s2
= sub
;
150 const BYTE
* s2end_mod16
= s2
+ (w
&~15);
151 const BYTE
* s2end
= s2
+ w
;
153 for(; s2
< s2end
; s2
+=1, sa
+=1, d2
+=1)
157 // d2[0] = (((d2[0]-0x10)*s2[3])>>8) + s2[1];
158 d2
[0] = ((d2
[0]*sa
[0])>>8) + s2
[0];
165 static void AlphaBltYv12Chroma(byte
* dst
, int dst_pitch
,
167 const byte
* sub_chroma
, const byte
* alpha
, int sub_pitch
)
169 if( ((reinterpret_cast<intptr_t>(sub_chroma
) |
170 //reinterpret_cast<intptr_t>(dst) |
171 reinterpret_cast<intptr_t>(alpha
) | static_cast<intptr_t>(sub_pitch
)
172 //| (static_cast<intptr_t>(dst_pitch)&7)
175 int pitch
= sub_pitch
;
176 for(int j
= 0; j
< chroma_h
; j
++, sub_chroma
+= sub_pitch
*2, alpha
+= sub_pitch
*2, dst
+= dst_pitch
)
178 hleft_vmid_mix_uv_yv12_sse2(dst
, w
, sub_chroma
, alpha
, sub_pitch
);
181 else//fix me: only a workaround for non-mod-16 size video
183 for(int j
= 0; j
< chroma_h
; j
++, sub_chroma
+= sub_pitch
*2, alpha
+= sub_pitch
*2, dst
+= dst_pitch
)
185 hleft_vmid_mix_uv_yv12_c(dst
, w
, sub_chroma
, alpha
, sub_pitch
);
194 CMemSubPic::CMemSubPic(SubPicDesc
& spd
, int alpha_blt_dst_type
)
195 : m_spd(spd
), m_alpha_blt_dst_type(alpha_blt_dst_type
)
197 m_maxsize
.SetSize(spd
.w
, spd
.h
);
198 // m_rcDirty.SetRect(0, 0, spd.w, spd.h);
199 CRect
allSpd(0,0,spd
.w
, spd
.h
);
200 m_rectListDirty
.AddTail(allSpd
);
203 CMemSubPic::~CMemSubPic()
205 delete [] m_spd
.bits
, m_spd
.bits
= NULL
;
210 STDMETHODIMP_(void*) CMemSubPic::GetObject() const
212 return (void*)&m_spd
;
215 STDMETHODIMP
CMemSubPic::GetDesc(SubPicDesc
& spd
) const
217 spd
.type
= m_spd
.type
;
221 spd
.pitch
= m_spd
.pitch
;
222 spd
.bits
= m_spd
.bits
;
223 spd
.bitsU
= m_spd
.bitsU
;
224 spd
.bitsV
= m_spd
.bitsV
;
225 spd
.vidrect
= m_vidrect
;
229 STDMETHODIMP
CMemSubPic::CopyTo(ISubPicEx
* pSubPic
)
232 if(FAILED(hr
= __super::CopyTo(pSubPic
))) {
237 if(FAILED(GetDesc(src
)) || FAILED(pSubPic
->GetDesc(dst
))) {
240 while(!m_rectListDirty
.IsEmpty())
242 CRect
& cRect
= m_rectListDirty
.GetHead();
243 int w
= cRect
.Width(), h
= cRect
.Height();
244 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*cRect
.top
+ cRect
.left
*4;
245 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*cRect
.top
+ cRect
.left
*4;
246 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
252 STDMETHODIMP
CMemSubPic::ClearDirtyRect(DWORD color
)
254 if(m_rectListDirty
.IsEmpty()) {
257 while(!m_rectListDirty
.IsEmpty())
259 //pDirtyRect = m_rectListDirty.RemoveHead();
260 CRect
& dirtyRect
= m_rectListDirty
.RemoveTail();
261 BYTE
* p
= (BYTE
*)m_spd
.bits
+ m_spd
.pitch
*(dirtyRect
.top
) + dirtyRect
.left
*(m_spd
.bpp
>>3);
262 int w
= dirtyRect
.Width();
263 if(m_spd
.type
!=MSP_AYUV_PLANAR
)
265 for(int j
= 0, h
= dirtyRect
.Height(); j
< h
; j
++, p
+= m_spd
.pitch
)
268 memsetd(p
, color
, w
*4); // nya
286 for(int j
= 0, h
= dirtyRect
.Height(); j
< h
; j
++, p
+= m_spd
.pitch
)
288 // memsetd(p, 0, m_rcDirty.Width());
289 //DbgLog((LOG_TRACE, 3, "w:%d", w));
290 //w = pDirtyRect->Width();
292 memset(p
+m_spd
.h
*m_spd
.pitch
, 0, w
);
293 memset(p
+m_spd
.h
*m_spd
.pitch
*2, 0, w
);
294 memset(p
+m_spd
.h
*m_spd
.pitch
*3, 0, w
);
298 m_rectListDirty
.RemoveAll();
302 STDMETHODIMP
CMemSubPic::Lock(SubPicDesc
& spd
)
307 STDMETHODIMP
CMemSubPic::Unlock( CAtlList
<CRect
>* dirtyRectList
)
309 int src_type
= m_spd
.type
;
310 int dst_type
= m_alpha_blt_dst_type
;
311 if( (src_type
==MSP_RGBA
&& (dst_type
== MSP_RGB32
||
312 dst_type
== MSP_RGB24
||
313 dst_type
== MSP_RGB16
||
314 dst_type
== MSP_RGB15
))
316 (src_type
==MSP_XY_AUYV
&& dst_type
== MSP_YUY2
)//ToDo: fix me MSP_AYUV
318 (src_type
==MSP_AYUV
&& dst_type
== MSP_AYUV
)
320 (src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_IYUV
||
321 dst_type
== MSP_YV12
||
322 dst_type
== MSP_P010
||
323 dst_type
== MSP_P016
||
324 dst_type
== MSP_NV12
||
325 dst_type
== MSP_NV21
)))
327 return UnlockOther(dirtyRectList
);
329 else if(src_type
==MSP_RGBA
&& (dst_type
== MSP_YUY2
||
330 dst_type
== MSP_AYUV
|| //ToDo: fix me MSP_AYUV
331 dst_type
== MSP_IYUV
||
332 dst_type
== MSP_YV12
||
333 dst_type
== MSP_NV12
||
334 dst_type
== MSP_NV21
||
335 dst_type
== MSP_P010
||
336 dst_type
== MSP_P016
))
338 return UnlockRGBA_YUV(dirtyRectList
);
343 HRESULT
CMemSubPic::UnlockOther(CAtlList
<CRect
>* dirtyRectList
)
345 SetDirtyRectEx(dirtyRectList
);
346 if(m_rectListDirty
.IsEmpty()) {
350 POSITION pos
= m_rectListDirty
.GetHeadPosition();
353 const CRect
& cRect
= m_rectListDirty
.GetNext(pos
);
354 int w
= cRect
.Width(), h
= cRect
.Height();
360 BYTE
* top
= (BYTE
*)m_spd
.bits
+ m_spd
.pitch
*(cRect
.top
) + cRect
.left
*4;
361 BYTE
* bottom
= top
+ m_spd
.pitch
*h
;
362 if(m_alpha_blt_dst_type
== MSP_RGB16
)
364 for(; top
< bottom
; top
+= m_spd
.pitch
)
366 DWORD
* s
= (DWORD
*)top
;
370 *s
= ((*s
>>3)&0x1f000000)|((*s
>>8)&0xf800)|((*s
>>5)&0x07e0)|((*s
>>3)&0x001f);
371 // *s = (*s&0xff000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
375 else if(m_alpha_blt_dst_type
== MSP_RGB15
)
377 for(; top
< bottom
; top
+= m_spd
.pitch
)
379 DWORD
* s
= (DWORD
*)top
;
383 *s
= ((*s
>>3)&0x1f000000)|((*s
>>9)&0x7c00)|((*s
>>6)&0x03e0)|((*s
>>3)&0x001f);
384 // *s = (*s&0xff000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
388 else if(m_alpha_blt_dst_type
== MSP_YUY2
)
390 XY_DO_ONCE( xy_logger::write_file("G:\\b1_ul", top
, m_spd
.pitch
*(h
-1)) );
392 for(BYTE
* tempTop
=top
; tempTop
< bottom
; tempTop
+= m_spd
.pitch
)
396 BYTE last_v
= s
[0], last_u
=s
[2];
397 for(; s
< e
; s
+=8) // AUYV AUYV -> AxYU AxYV
400 s
[4] = (last_v
+ 2*s
[0] + s
[4] + 2)>>2;
403 s
[0] = (last_u
+ 2*s
[2] + s
[6] + 2)>>2;
408 XY_DO_ONCE( xy_logger::write_file("G:\\a1_ul", top
, m_spd
.pitch
*(h
-1)) );
410 else if(m_alpha_blt_dst_type
== MSP_YV12
|| m_alpha_blt_dst_type
== MSP_IYUV
)
414 else if ( m_alpha_blt_dst_type
== MSP_P010
|| m_alpha_blt_dst_type
== MSP_P016
415 || m_alpha_blt_dst_type
== MSP_NV12
)
417 SubsampleAndInterlace(cRect
, true);
419 else if( m_alpha_blt_dst_type
== MSP_NV21
)
421 SubsampleAndInterlace(cRect
, false);
427 HRESULT
CMemSubPic::UnlockRGBA_YUV(CAtlList
<CRect
>* dirtyRectList
)
430 ONCER( SaveRect2File(dirtyRectList
->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect") );
431 ONCER( SaveArgb2File(m_spd
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.argb") );
433 SetDirtyRectEx(dirtyRectList
);
435 ONCER( SaveRect2File(dirtyRectList
->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect2") );
436 if(m_rectListDirty
.IsEmpty()) {
440 POSITION pos
= m_rectListDirty
.GetHeadPosition();
443 const CRect
& cRect
= m_rectListDirty
.GetNext(pos
);
444 int w
= cRect
.Width(), h
= cRect
.Height();
450 BYTE
* top
= (BYTE
*)m_spd
.bits
+ m_spd
.pitch
*cRect
.top
+ cRect
.left
*4;
451 BYTE
* bottom
= top
+ m_spd
.pitch
*h
;
453 if( m_alpha_blt_dst_type
== MSP_YUY2
||
454 m_alpha_blt_dst_type
== MSP_YV12
||
455 m_alpha_blt_dst_type
== MSP_IYUV
||
456 m_alpha_blt_dst_type
== MSP_P010
||
457 m_alpha_blt_dst_type
== MSP_P016
||
458 m_alpha_blt_dst_type
== MSP_NV12
||
459 m_alpha_blt_dst_type
== MSP_NV21
) {
460 for(; top
< bottom
; top
+= m_spd
.pitch
) {
463 DWORD last_yuv
= ColorConvTable::PreMulArgb2Ayuv(s
[3], s
[2], s
[1], s
[0]);
464 for(; s
< e
; s
+=8) { // ARGB ARGB -> AxYU AxYV
465 if((s
[3]+s
[7]+(last_yuv
>>24)) < 0xff*3) {
466 DWORD tmp1
= ColorConvTable::PreMulArgb2Ayuv(s
[3], s
[2], s
[1], s
[0]);
467 DWORD tmp2
= ColorConvTable::PreMulArgb2Ayuv(s
[7], s
[6], s
[5], s
[4]);
469 s
[1] = (tmp1
>>16)&0xff;
470 s
[5] = (tmp2
>>16)&0xff;
472 s
[0] = (((last_yuv
>>8)&0xff) + 2*((tmp1
>>8)&0xff) + ((tmp2
>>8)&0xff) + 2)/4;
473 s
[4] = ((last_yuv
&0xff) + 2*(tmp1
&0xff) + (tmp2
&0xff) + 2)/4;
476 last_yuv
= ColorConvTable::PreMulArgb2Ayuv(s
[7], s
[6], s
[5], s
[4]);
484 else if(m_alpha_blt_dst_type
== MSP_AYUV
) {
485 for(; top
< bottom
; top
+= m_spd
.pitch
) {
488 for(; s
< e
; s
+=4) { // ARGB -> AYUV
490 *((DWORD
*)s
) = ColorConvTable::PreMulArgb2Ayuv(s
[3], s
[2], s
[1], s
[0]);
500 ONCER( SaveAxxx2File(m_spd
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.axuv") );
504 void CMemSubPic::SubsampleAndInterlace( const CRect
& cRect
, bool u_first
)
506 //fix me: check alignment and log error
507 int w
= cRect
.Width(), h
= cRect
.Height();
508 BYTE
* u_plan
= reinterpret_cast<BYTE
*>(m_spd
.bits
) + m_spd
.pitch
*m_spd
.h
*2;
509 BYTE
* u_start
= u_plan
+ m_spd
.pitch
*(cRect
.top
)+ cRect
.left
;
510 BYTE
* v_start
= u_start
+ m_spd
.pitch
*m_spd
.h
;
520 //Walkarround for alignment
521 if ( (m_spd
.pitch
&15) == 0 )
524 for (int i
=0;i
<h
;i
+=2)
526 hleft_vmid_subsample_and_interlace_2_line_sse2(dst
, u_start
, v_start
, w
, m_spd
.pitch
);
527 u_start
+= 2*m_spd
.pitch
;
528 v_start
+= 2*m_spd
.pitch
;
534 for (int i
=0;i
<h
;i
+=2)
536 hleft_vmid_subsample_and_interlace_2_line_c(dst
, u_start
, v_start
, w
, m_spd
.pitch
);
537 u_start
+= 2*m_spd
.pitch
;
538 v_start
+= 2*m_spd
.pitch
;
544 STDMETHODIMP
CMemSubPic::AlphaBlt( const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
546 if(!pSrc
|| !pDst
|| !pTarget
) {
549 int src_type
= m_spd
.type
;
550 int dst_type
= pTarget
->type
;
552 if( (src_type
==MSP_RGBA
&& (dst_type
== MSP_RGB32
||
553 dst_type
== MSP_RGB24
||
554 dst_type
== MSP_RGB16
||
555 dst_type
== MSP_RGB15
||
556 dst_type
== MSP_RGBA
||
557 dst_type
== MSP_YUY2
||//ToDo: fix me MSP_RGBA changed into AxYU AxYV after unlock, may be confusing
558 dst_type
== MSP_AYUV
))
560 (src_type
==MSP_XY_AUYV
&& dst_type
== MSP_YUY2
)//ToDo: fix me MSP_AYUV
562 (src_type
==MSP_AYUV
&& dst_type
== MSP_AYUV
)
564 (src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_IYUV
||
565 dst_type
== MSP_YV12
)) )
567 return AlphaBltOther(pSrc
, pDst
, pTarget
);
569 else if ( src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_NV12
||
570 dst_type
== MSP_NV21
) )
572 return AlphaBltAnv12_Nv12(pSrc
, pDst
, pTarget
);
575 else if( src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_P010
||
576 dst_type
== MSP_P016
) )
578 return AlphaBltAnv12_P010(pSrc
, pDst
, pTarget
);
580 else if( src_type
==MSP_RGBA
&& (dst_type
== MSP_IYUV
||
581 dst_type
== MSP_YV12
))
583 return AlphaBltAxyuAxyv_Yv12(pSrc
, pDst
, pTarget
);
585 else if( src_type
==MSP_RGBA
&& (dst_type
== MSP_NV12
||
586 dst_type
== MSP_NV21
))
588 return AlphaBltAxyuAxyv_Nv12(pSrc
, pDst
, pTarget
);
590 else if( src_type
==MSP_RGBA
&& (dst_type
== MSP_P010
||
591 dst_type
== MSP_P016
))
593 return AlphaBltAxyuAxyv_P010(pSrc
, pDst
, pTarget
);
598 HRESULT
CMemSubPic::AlphaBltOther(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
600 const SubPicDesc
& src
= m_spd
;
601 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
603 CRect
rs(*pSrc
), rd(*pDst
);
607 rd
.bottom
= dst
.h
- rd
.bottom
;
608 rd
.top
= dst
.h
- rd
.top
;
610 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
613 int w
= rs
.Width(), h
= rs
.Height();
614 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);//rs.left*4
615 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*rd
.top
+ ((rd
.left
*dst
.bpp
)>>3);
616 if(rd
.top
> rd
.bottom
)
618 if(dst
.type
== MSP_RGB32
|| dst
.type
== MSP_RGB24
619 || dst
.type
== MSP_RGB16
|| dst
.type
== MSP_RGB15
620 || dst
.type
== MSP_YUY2
|| dst
.type
== MSP_AYUV
)
622 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + (rd
.left
*dst
.bpp
>>3);
624 else if(dst
.type
== MSP_YV12
|| dst
.type
== MSP_IYUV
)
626 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + (rd
.left
*8>>3);
632 dst
.pitch
= -dst
.pitch
;
634 DbgLog((LOG_TRACE
, 5, TEXT("w=%d h=%d"), w
, h
));
638 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
641 BYTE
* s2end
= s2
+ w
*4;
642 DWORD
* d2
= (DWORD
*)d
;
643 for(; s2
< s2end
; s2
+= 4, d2
++)
647 DWORD bd
=0x00000100 -( (DWORD
) s2
[3]);
648 DWORD B
= ((*((DWORD
*)s2
)&0x000000ff)<<8)/bd
;
649 DWORD V
= ((*((DWORD
*)s2
)&0x0000ff00)/bd
)<<8;
650 DWORD R
= (((*((DWORD
*)s2
)&0x00ff0000)>>8)/bd
)<<16;
652 | (0xff000000-(*((DWORD
*)s2
)&0xff000000))&0xff000000;
658 case MSP_AYUV
: //ToDo: fix me MSP_VUYA indeed?
659 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
662 BYTE
* s2end
= s2
+ w
*4;
663 DWORD
* d2
= (DWORD
*)d
;
664 for(; s2
< s2end
; s2
+= 4, d2
++)
667 DWORD ia
= 256-s2
[3];
669 *d2
= ((((*d2
&0x00ff00ff)*s2
[3])>>8) + (((*((DWORD
*)s2
)&0x00ff00ff)*ia
)>>8)&0x00ff00ff)
670 | ((((*d2
&0x0000ff00)*s2
[3])>>8) + (((*((DWORD
*)s2
)&0x0000ff00)*ia
)>>8)&0x0000ff00);
675 *d2
= (((((*d2
&0x00ff00ff)*s2
[3])>>8) + (*((DWORD
*)s2
)&0x00ff00ff))&0x00ff00ff)
676 | (((((*d2
&0x0000ff00)*s2
[3])>>8) + (*((DWORD
*)s2
)&0x0000ff00))&0x0000ff00);
683 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
686 BYTE
* s2end
= s2
+ w
*4;
688 for(; s2
< s2end
; s2
+= 4, d2
+= 3)
692 d2
[0] = ((d2
[0]*s2
[3])>>8) + s2
[0];
693 d2
[1] = ((d2
[1]*s2
[3])>>8) + s2
[1];
694 d2
[2] = ((d2
[2]*s2
[3])>>8) + s2
[2];
700 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
703 BYTE
* s2end
= s2
+ w
*4;
705 for(; s2
< s2end
; s2
+= 4, d2
++)
709 *d2
= (WORD
)((((((*d2
&0xf81f)*s2
[3])>>5) + (*(DWORD
*)s2
&0xf81f))&0xf81f)
710 | (((((*d2
&0x07e0)*s2
[3])>>5) + (*(DWORD
*)s2
&0x07e0))&0x07e0));
711 /* *d2 = (WORD)((((((*d2&0xf800)*s2[3])>>8) + (*(DWORD*)s2&0xf800))&0xf800)
712 | (((((*d2&0x07e0)*s2[3])>>8) + (*(DWORD*)s2&0x07e0))&0x07e0)
713 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
720 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
723 BYTE
* s2end
= s2
+ w
*4;
725 for(; s2
< s2end
; s2
+= 4, d2
++)
729 *d2
= (WORD
)((((((*d2
&0x7c1f)*s2
[3])>>5) + (*(DWORD
*)s2
&0x7c1f))&0x7c1f)
730 | (((((*d2
&0x03e0)*s2
[3])>>5) + (*(DWORD
*)s2
&0x03e0))&0x03e0));
731 /* *d2 = (WORD)((((((*d2&0x7c00)*s2[3])>>8) + (*(DWORD*)s2&0x7c00))&0x7c00)
732 | (((((*d2&0x03e0)*s2[3])>>8) + (*(DWORD*)s2&0x03e0))&0x03e0)
733 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
740 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
744 BYTE
* s2end
= s2
+ w
*4;
745 DWORD
* d2
= (DWORD
*)d
;
747 int last_a
= w
>0?s2
[3]:0;
748 for(; s2
< s2end
; s2
+= 8, d2
++)
750 ia
= (last_a
+ 2*s2
[3] + s2
[7])>>2;
754 //int y1 = (BYTE)(((((*d2&0xff))*s2[3])>>8) + s2[1]); // + y1;
755 //int u = (BYTE)((((((*d2>>8)&0xff))*ia)>>8) + s2[0]); // + u;
756 //int y2 = (BYTE)((((((*d2>>16)&0xff))*s2[7])>>8) + s2[5]); // + y2;
757 //int v = (BYTE)((((((*d2>>24)&0xff))*ia)>>8) + s2[4]); // + v;
758 //*d2 = (v<<24)|(y2<<16)|(u<<8)|y1;
760 ia
= (ia
<<24)|(s2
[7]<<16)|(ia
<<8)|s2
[3];
761 c
= (s2
[4]<<24)|(s2
[5]<<16)|(s2
[0]<<8)|s2
[1]; // (v<<24)|(y2<<16)|(u<<8)|y1;
772 psraw mm4
, 1 //or else, overflow because psraw shift in sign bit
787 //dst.pitch = abs(dst.pitch);
791 dst
.pitchUV
= abs(dst
.pitch
)/2;
793 if(!dst
.bitsU
|| !dst
.bitsV
)
795 dst
.bitsU
= (BYTE
*)dst
.bits
+ abs(dst
.pitch
)*dst
.h
;
796 dst
.bitsV
= dst
.bitsU
+ dst
.pitchUV
*dst
.h
/2;
797 if(dst
.type
== MSP_YV12
)
800 dst
.bitsU
= dst
.bitsV
;
805 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
806 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
807 if(rd
.top
> rd
.bottom
)
809 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
810 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
811 dst
.pitchUV
= -dst
.pitchUV
;
814 BYTE
* src_origin
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ rs
.left
;
817 ss
[0] = src_origin
+ src
.pitch
*src
.h
*2;//U
818 ss
[1] = src_origin
+ src
.pitch
*src
.h
*3;//V
820 AlphaBltYv12Luma( d
, dst
.pitch
, w
, h
, src_origin
+ src
.pitch
*src
.h
, src_origin
, src
.pitch
);
822 AlphaBltYv12Chroma( dd
[0], dst
.pitchUV
, w
, h2
, ss
[0], src_origin
, src
.pitch
);
823 AlphaBltYv12Chroma( dd
[1], dst
.pitchUV
, w
, h2
, ss
[1], src_origin
, src
.pitch
);
833 //emmsÒª40¸öcpuÖÜÆÚ
838 HRESULT
CMemSubPic::AlphaBltAxyuAxyv_P010(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
840 const SubPicDesc
& src
= m_spd
;
841 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
843 CRect
rs(*pSrc
), rd(*pDst
);
847 rd
.bottom
= dst
.h
- rd
.bottom
;
848 rd
.top
= dst
.h
- rd
.top
;
851 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
855 int w
= rs
.Width(), h
= rs
.Height();
858 BYTE
* s
= static_cast<BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
859 BYTE
* d
= static_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*rd
.top
+ rd
.left
*2;
861 if(rd
.top
> rd
.bottom
) {
862 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + rd
.left
;
864 dst
.pitch
= -dst
.pitch
;
867 for(ptrdiff_t i
=0; i
<h
; i
++, s
+= src
.pitch
, d
+= dst
.pitch
)
870 BYTE
* s2end
= s2
+ w
*4;
871 WORD
* d2
= reinterpret_cast<WORD
*>(d
);
872 for(; s2
< s2end
; s2
+= 4, d2
++)
875 d2
[0] = ((d2
[0]*s2
[3])>>8) + (s2
[1]<<8);
884 dst
.pitchUV
= abs(dst
.pitch
);
886 if(!dst
.bitsU
|| !dst
.bitsV
)
888 dst
.bitsU
= static_cast<BYTE
*>(dst
.bits
) + abs(dst
.pitch
)*dst
.h
;
889 dst
.bitsV
= dst
.bitsU
+ 2;
891 BYTE
* ddUV
= dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
*2;
892 if(rd
.top
> rd
.bottom
)
894 ddUV
= dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
*2;
895 dst
.pitchUV
= -dst
.pitchUV
;
898 s
= static_cast<BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
901 int pitch
= src
.pitch
;
902 for(int j
= 0; j
< h2
; j
++, s
+= 2*src
.pitch
, d
+= dst
.pitchUV
)
905 WORD
* d2
=reinterpret_cast<WORD
*>(d
);
906 WORD
* d2_end
= reinterpret_cast<WORD
*>(d
+2*w
);
907 DWORD last_alpha
= s2
[3]+s2
[3+src
.pitch
];
908 for( ; d2
<d2_end
; s2
+=8, d2
+=2)
912 (s2
[3] + s2
[3+src
.pitch
])*2 +
913 s2
[3+4]+ s2
[3+4+src
.pitch
]);
914 last_alpha
= s2
[3+4]+ s2
[3+4+src
.pitch
];
917 d2
[0] = (((d2
[0])*ia
)>>11) + ((s2
[0] + s2
[0+src
.pitch
])<<7);
918 d2
[1] = (((d2
[1])*ia
)>>11) + ((s2
[4] + s2
[4+src
.pitch
])<<7);
926 HRESULT
CMemSubPic::AlphaBltAxyuAxyv_Yv12(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
928 const SubPicDesc
& src
= m_spd
;
929 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
931 CRect
rs(*pSrc
), rd(*pDst
);
935 rd
.bottom
= dst
.h
- rd
.bottom
;
936 rd
.top
= dst
.h
- rd
.top
;
939 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
943 int w
= rs
.Width(), h
= rs
.Height();
945 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
946 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*rd
.top
+ rd
.left
;
948 if(rd
.top
> rd
.bottom
) {
949 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + rd
.left
;
951 dst
.pitch
= -dst
.pitch
;
954 for(ptrdiff_t j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
) {
956 BYTE
* s2end
= s2
+ w
*4;
958 for(; s2
< s2end
; s2
+= 4, d2
++) {
960 d2
[0] = ((d2
[0]*s2
[3])>>8) + s2
[1];
964 dst
.pitch
= abs(dst
.pitch
);
969 dst
.pitchUV
= dst
.pitch
/2;
973 ss
[0] = (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ rs
.left
*4;
976 if(!dst
.bitsU
|| !dst
.bitsV
) {
977 dst
.bitsU
= (BYTE
*)dst
.bits
+ dst
.pitch
*dst
.h
;
978 dst
.bitsV
= dst
.bitsU
+ dst
.pitchUV
*dst
.h
/2;
980 if(dst
.type
== MSP_YV12
) {
982 dst
.bitsU
= dst
.bitsV
;
988 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
989 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
991 if(rd
.top
> rd
.bottom
) {
992 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
993 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
994 dst
.pitchUV
= -dst
.pitchUV
;
997 for(ptrdiff_t i
= 0; i
< 2; i
++) {
1001 for(ptrdiff_t j
= 0; j
< h2
; j
++, s
+= src
.pitch
*2, d
+= dst
.pitchUV
, a
+= src
.pitch
*2) {
1003 BYTE
* s2end
= s2
+ w
*4;
1007 DWORD last_alpha
= a2
[0]+a2
[0+src
.pitch
];
1008 for(; s2
< s2end
; s2
+= 8, d2
++, a2
+= 8) {
1009 unsigned int ia
= (last_alpha
+ 2*(a2
[0]+a2
[0+src
.pitch
]) + a2
[4] + a2
[4+src
.pitch
] + 4 )>>3;
1010 last_alpha
= a2
[4] + a2
[4+src
.pitch
];
1012 *d2
= ((*d2
*ia
)>>8) + ((s2
[0]+s2
[src
.pitch
])>>1);
1021 HRESULT
CMemSubPic::AlphaBltAxyuAxyv_Nv12(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
1023 ONCER( SaveArgb2File(*pTarget
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12") );
1024 const SubPicDesc
& src
= m_spd
;
1025 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
1027 CRect
rs(*pSrc
), rd(*pDst
);
1031 rd
.bottom
= dst
.h
- rd
.bottom
;
1032 rd
.top
= dst
.h
- rd
.top
;
1035 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
1036 return E_INVALIDARG
;
1039 int w
= rs
.Width(), h
= rs
.Height();
1041 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
1042 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*rd
.top
+ rd
.left
;
1044 if(rd
.top
> rd
.bottom
) {
1045 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + rd
.left
;
1047 dst
.pitch
= -dst
.pitch
;
1050 for(ptrdiff_t j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
) {
1052 BYTE
* s2end
= s2
+ w
*4;
1054 for(; s2
< s2end
; s2
+= 4, d2
++) {
1056 d2
[0] = ((d2
[0]*s2
[3])>>8) + s2
[1];
1060 dst
.pitch
= abs(dst
.pitch
);
1065 dst
.pitchUV
= dst
.pitch
;
1069 ss
[0] = (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ rs
.left
*4;
1072 if(!dst
.bitsU
|| !dst
.bitsV
) {
1073 dst
.bitsU
= (BYTE
*)dst
.bits
+ dst
.pitch
*dst
.h
;
1074 dst
.bitsV
= dst
.bitsU
+ 1;
1076 if(dst
.type
== MSP_NV21
) {
1077 BYTE
* p
= dst
.bitsU
;
1078 dst
.bitsU
= dst
.bitsV
;
1084 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
;
1087 if(rd
.top
> rd
.bottom
) {
1088 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
;
1090 dst
.pitchUV
= -dst
.pitchUV
;
1093 for(ptrdiff_t i
= 0; i
< 2; i
++) {
1097 for(ptrdiff_t j
= 0; j
< h2
; j
++, s
+= src
.pitch
*2, d
+= dst
.pitchUV
, a
+= src
.pitch
*2) {
1099 BYTE
* s2end
= s2
+ w
*4;
1102 DWORD last_alpha
= a2
[0]+a2
[0+src
.pitch
];
1103 for(; s2
< s2end
; s2
+= 8, d2
+=2, a2
+= 8) {
1104 unsigned int ia
= (last_alpha
+2*(a2
[0]+a2
[0+src
.pitch
])+a2
[4]+a2
[4+src
.pitch
]+4)>>3;
1105 last_alpha
= a2
[4]+a2
[4+src
.pitch
];
1107 *d2
= ((*d2
*ia
)>>8) + ((s2
[0]+s2
[src
.pitch
])>>1);
1113 ONCER( SaveArgb2File(*pTarget
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12_2") );
1117 HRESULT
CMemSubPic::AlphaBltAnv12_P010( const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
1119 //fix me: check colorspace and log error
1120 const SubPicDesc
& src
= m_spd
;
1121 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
1123 CRect
rs(*pSrc
), rd(*pDst
);
1127 rd
.bottom
= dst
.h
- rd
.bottom
;
1128 rd
.top
= dst
.h
- rd
.top
;
1130 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
1131 return E_INVALIDARG
;
1133 int w
= rs
.Width(), h
= rs
.Height();
1134 bool bottom_down
= rd
.top
> rd
.bottom
;
1140 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*rd
.top
+ rd
.left
*2;
1141 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*rd
.top
/2 + rd
.left
*2;
1145 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*(rd
.top
-1) + rd
.left
*2;
1146 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*(rd
.top
/2-1) + rd
.left
*2;
1147 dst
.pitch
= -dst
.pitch
;
1149 ASSERT(dst
.pitchUV
==0 || dst
.pitchUV
==abs(dst
.pitch
));
1151 const BYTE
* sa
= reinterpret_cast<const BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ rs
.left
;
1152 const BYTE
* sy
= sa
+ src
.pitch
*src
.h
;
1153 const BYTE
* s_uv
= sy
+ src
.pitch
*src
.h
;//UV
1154 return AlphaBltAnv12_P010(sa
, sy
, s_uv
, src
.pitch
, d
, dUV
, dst
.pitch
, w
, h
);
1157 HRESULT
CMemSubPic::AlphaBltAnv12_Nv12( const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
1159 //fix me: check colorspace and log error
1160 const SubPicDesc
& src
= m_spd
;
1161 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
1163 CRect
rs(*pSrc
), rd(*pDst
);
1167 rd
.bottom
= dst
.h
- rd
.bottom
;
1168 rd
.top
= dst
.h
- rd
.top
;
1170 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
1171 return E_INVALIDARG
;
1173 int w
= rs
.Width(), h
= rs
.Height();
1174 bool bottom_down
= rd
.top
> rd
.bottom
;
1180 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*rd
.top
+ rd
.left
;
1181 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*rd
.top
/2 + rd
.left
;
1185 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*(rd
.top
-1) + rd
.left
;
1186 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*(rd
.top
/2-1) + rd
.left
;
1187 dst
.pitch
= -dst
.pitch
;
1189 ASSERT(dst
.pitchUV
==0 || dst
.pitchUV
==abs(dst
.pitch
));
1191 const BYTE
* sa
= reinterpret_cast<const BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ rs
.left
;
1192 const BYTE
* sy
= sa
+ src
.pitch
*src
.h
;
1193 const BYTE
* s_uv
= sy
+ src
.pitch
*src
.h
;//UV
1195 return AlphaBltAnv12_Nv12(sa
, sy
, s_uv
, src
.pitch
, d
, dUV
, dst
.pitch
, w
, h
);
1198 STDMETHODIMP
CMemSubPic::SetDirtyRectEx(CAtlList
<CRect
>* dirtyRectList
)
1200 //if(m_spd.type == MSP_YUY2 || m_spd.type == MSP_YV12 || m_spd.type == MSP_IYUV || m_spd.type == MSP_AYUV)
1201 if(dirtyRectList
!=NULL
)
1203 POSITION pos
= dirtyRectList
->GetHeadPosition();
1204 if(m_spd
.type
== MSP_AYUV_PLANAR
|| m_alpha_blt_dst_type
==MSP_IYUV
|| m_alpha_blt_dst_type
==MSP_YV12
1205 || m_alpha_blt_dst_type
==MSP_P010
|| m_alpha_blt_dst_type
==MSP_P016
1206 || m_alpha_blt_dst_type
==MSP_NV12
|| m_alpha_blt_dst_type
==MSP_NV21
)
1210 CRect
& cRectSrc
= dirtyRectList
->GetNext(pos
);
1211 cRectSrc
.left
&= ~15;
1212 cRectSrc
.right
= (cRectSrc
.right
+15)&~15;
1213 if(cRectSrc
.right
>m_spd
.w
)
1215 cRectSrc
.right
= m_spd
.w
;
1218 cRectSrc
.bottom
= (cRectSrc
.bottom
+1)&~1;
1221 else if(m_spd
.type
== MSP_XY_AUYV
|| m_alpha_blt_dst_type
==MSP_YUY2
)
1225 CRect
& cRectSrc
= dirtyRectList
->GetNext(pos
);
1226 cRectSrc
.left
&= ~3;
1227 cRectSrc
.right
= (cRectSrc
.right
+3)&~3;
1231 return __super::SetDirtyRectEx(dirtyRectList
);
1238 HRESULT
CMemSubPic::AlphaBltAnv12_P010( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
,
1239 BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1241 const BYTE
* sa
= src_a
;
1242 if( ((reinterpret_cast<intptr_t>(src_a
) | reinterpret_cast<intptr_t>(src_y
) | static_cast<intptr_t>(src_pitch
) |
1243 reinterpret_cast<intptr_t>(dst_y
) | static_cast<intptr_t>(dst_pitch
) ) & 15 )==0 )
1245 for(int i
=0; i
<h
; i
++, sa
+= src_pitch
, src_y
+= src_pitch
, dst_y
+= dst_pitch
)
1247 const BYTE
* sa2
= sa
;
1248 const BYTE
* s2
= src_y
;
1249 const BYTE
* s2end_mod16
= s2
+ (w
&~15);
1250 const BYTE
* s2end
= s2
+ w
;
1253 for(; s2
< s2end_mod16
; s2
+=16, sa2
+=16, d2
+=32)
1255 mix_16_y_p010_sse2(d2
, s2
, sa2
);
1257 for( WORD
* d3
=reinterpret_cast<WORD
*>(d2
); s2
< s2end
; s2
++, sa2
++, d3
++)
1261 d3
[0] = ((d3
[0]*sa2
[0])>>8) + (s2
[0]<<8);
1266 else //fix me: only a workaround for non-mod-16 size video
1268 for(int i
=0; i
<h
; i
++, sa
+= src_pitch
, src_y
+= src_pitch
, dst_y
+= dst_pitch
)
1270 const BYTE
* sa2
= sa
;
1271 const BYTE
* s2
= src_y
;
1272 const BYTE
* s2end
= s2
+ w
;
1273 WORD
* d2
= reinterpret_cast<WORD
*>(dst_y
);
1274 for(; s2
< s2end
; s2
+=1, sa2
+=1, d2
+=1)
1278 d2
[0] = ((d2
[0]*sa2
[0])>>8) + (s2
[0]<<8);
1286 if( ((reinterpret_cast<intptr_t>(src_a
) | reinterpret_cast<intptr_t>(src_uv
) | static_cast<intptr_t>(src_pitch
) |
1287 reinterpret_cast<intptr_t>(dst_uv
) | static_cast<intptr_t>(dst_pitch
) ) & 15 )==0 )
1289 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1291 hleft_vmid_mix_uv_p010_sse2(d
, w
, src_uv
, src_a
, src_pitch
);
1296 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1298 hleft_vmid_mix_uv_p010_c(d
, w
, src_uv
, src_a
, src_pitch
);
1305 HRESULT
CMemSubPic::AlphaBltAnv12_Nv12( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
,
1306 BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1308 AlphaBltYv12Luma( dst_y
, dst_pitch
, w
, h
, src_y
, src_a
, src_pitch
);
1311 if( ((reinterpret_cast<intptr_t>(src_a
) | reinterpret_cast<intptr_t>(src_uv
) | static_cast<intptr_t>(src_pitch
) |
1312 reinterpret_cast<intptr_t>(dst_uv
) | static_cast<intptr_t>(dst_pitch
) ) & 15 )==0 )
1315 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1317 hleft_vmid_mix_uv_nv12_sse2(d
, w
, src_uv
, src_a
, src_pitch
);
1323 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1325 hleft_vmid_mix_uv_nv12_c(d
, w
, src_uv
, src_a
, src_pitch
);
1334 // CMemSubPicAllocator
1337 CMemSubPicAllocator::CMemSubPicAllocator(int alpha_blt_dst_type
, SIZE maxsize
, int type
/*=-1*/)
1338 : CSubPicExAllocatorImpl(maxsize
, false, false)
1339 , m_alpha_blt_dst_type(alpha_blt_dst_type
)
1340 , m_maxsize(maxsize
)
1345 switch(alpha_blt_dst_type
)
1348 m_type
= MSP_XY_AUYV
;
1359 m_type
= MSP_AYUV_PLANAR
;
1368 // ISubPicAllocatorImpl
1370 bool CMemSubPicAllocator::AllocEx(bool fStatic
, ISubPicEx
** ppSubPic
)
1376 spd
.w
= m_maxsize
.cx
;
1377 spd
.h
= m_maxsize
.cy
;
1379 spd
.pitch
= (spd
.w
*spd
.bpp
)>>3;
1381 spd
.bits
= DNew BYTE
[spd
.pitch
*spd
.h
];
1385 *ppSubPic
= DNew
CMemSubPic(spd
, m_alpha_blt_dst_type
);
1389 (*ppSubPic
)->AddRef();