Minor fix on MemSubpic.[PART 4](fix a bug which can not be triggered for now.)
[xy_vsfilter.git] / src / subpic / MemSubPic.cpp
blobd2325013ca1f1f984a5b585c14bad7f6a85850fa
1 /*
2 * Copyright (C) 2003-2006 Gabest
3 * http://www.gabest.org
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
8 * any later version.
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GNU Make; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
22 #include "stdafx.h"
23 #include "MemSubPic.h"
24 #include "color_conv_table.h"
26 #if 0
27 #include <fstream>
29 // debug functions
30 //
31 static void SaveRect2File(const CRect& cRect, const char * filename)
33 std::ofstream os(filename);
34 os<<cRect.left<<","<<cRect.top<<","<<cRect.right<<","<<cRect.bottom;
36 static void SaveAxxx2File(SubPicDesc& spd, const CRect& cRect, const char * filename)
38 std::ofstream axxx(filename);
39 int w = cRect.Width(), h = cRect.Height();
41 BYTE* top = (BYTE*)spd.bits + spd.pitch*cRect.top + cRect.left*4;
42 BYTE* bottom = top + spd.pitch*h;
44 for(; top < bottom ; top += spd.pitch) {
45 BYTE* s = top;
46 BYTE* e = s + w*4;
47 for(; s < e; s+=4) { // ARGB ARGB -> AxYU AxYV
48 axxx<<(int)s[0]<<","<<(int)s[1]<<","<<(int)s[2]<<","<<(int)s[3];
49 if(s+4>=e)
51 axxx<<std::endl;
53 else
55 axxx<<",";
59 axxx.close();
61 static void SaveArgb2File(SubPicDesc& spd, const CRect& cRect, const char * filename)
63 SaveAxxx2File(spd, cRect, filename);
65 static void SaveAyuv2File(SubPicDesc& spd, const CRect& cRect, const char * filename)
67 SaveAxxx2File(spd, cRect, filename);
69 static void SaveNvxx2File(SubPicDesc& spd, const CRect& cRect, const char * filename)
71 std::ofstream os(filename);
72 int w = cRect.Width(), h = cRect.Height();
74 BYTE* top = (BYTE*)spd.bits;
75 BYTE* bottom = top + spd.pitch*h;
77 for(; top < bottom ; top += spd.pitch) {
78 BYTE* s = top;
79 BYTE* e = s + w;
81 BYTE* sY = s + spd.pitch*spd.h;
82 BYTE* sU = sY + spd.pitch*spd.h;
83 BYTE* sV = sU + 1;
84 for(; s < e; s++, sY++, sU+=2,sV+=2) {
85 os<<(int)s[0]<<","<<(int)sY[0]<<","<<(int)sU[0]<<","<<(int)sV[0];
86 if(s+1>=e)
88 os<<std::endl;
90 else
92 os<<",";
96 os.close();
99 #define ONCER(expr) {\
100 static bool entered=false;\
101 if(!entered)\
103 entered=true;\
104 expr;\
107 #else
108 #define ONCER(expr)
109 #endif
112 // alpha blend functions
114 #include "xy_intrinsics.h"
116 static void AlphaBltYv12Luma(byte* dst, int dst_pitch,
117 int w, int h,
118 const byte* sub, const byte* alpha, int sub_pitch)
120 if( ((reinterpret_cast<intptr_t>(alpha) | reinterpret_cast<intptr_t>(sub) | static_cast<intptr_t>(sub_pitch) |
121 reinterpret_cast<intptr_t>(dst) | static_cast<intptr_t>(dst_pitch) ) & 15 )==0 )
123 for(int i=0; i<h; i++, dst += dst_pitch, alpha += sub_pitch, sub += sub_pitch)
125 const BYTE* sa = alpha;
126 const BYTE* s2 = sub;
127 const BYTE* s2end_mod16 = s2 + (w&~15);
128 const BYTE* s2end = s2 + w;
129 BYTE* d2 = dst;
131 for(; s2 < s2end_mod16; s2+=16, sa+=16, d2+=16)
133 pix_alpha_blend_yv12_luma_sse2(d2, sa, s2);
135 for(; s2 < s2end; s2++, sa++, d2++)
137 if(sa[0] < 0xff)
139 d2[0] = ((d2[0]*sa[0])>>8) + s2[0];
144 else //fix me: only a workaround for non-mod-16 size video
146 for(int i=0; i<h; i++, dst += dst_pitch, alpha += sub_pitch, sub += sub_pitch)
148 const BYTE* sa = alpha;
149 const BYTE* s2 = sub;
150 const BYTE* s2end_mod16 = s2 + (w&~15);
151 const BYTE* s2end = s2 + w;
152 BYTE* d2 = dst;
153 for(; s2 < s2end; s2+=1, sa+=1, d2+=1)
155 if(sa[0] < 0xff)
157 // d2[0] = (((d2[0]-0x10)*s2[3])>>8) + s2[1];
158 d2[0] = ((d2[0]*sa[0])>>8) + s2[0];
165 static void AlphaBltYv12Chroma(byte* dst, int dst_pitch,
166 int w, int chroma_h,
167 const byte* sub_chroma, const byte* alpha, int sub_pitch)
169 if( ((reinterpret_cast<intptr_t>(sub_chroma) |
170 //reinterpret_cast<intptr_t>(dst) |
171 reinterpret_cast<intptr_t>(alpha) | static_cast<intptr_t>(sub_pitch)
172 //| (static_cast<intptr_t>(dst_pitch)&7)
173 ) & 15 )==0 )
175 int pitch = sub_pitch;
176 for(int j = 0; j < chroma_h; j++, sub_chroma += sub_pitch*2, alpha += sub_pitch*2, dst += dst_pitch)
178 hleft_vmid_mix_uv_yv12_sse2(dst, w, sub_chroma, alpha, sub_pitch);
181 else//fix me: only a workaround for non-mod-16 size video
183 for(int j = 0; j < chroma_h; j++, sub_chroma += sub_pitch*2, alpha += sub_pitch*2, dst += dst_pitch)
185 hleft_vmid_mix_uv_yv12_c(dst, w, sub_chroma, alpha, sub_pitch);
191 // CMemSubPic
194 CMemSubPic::CMemSubPic(SubPicDesc& spd, int alpha_blt_dst_type)
195 : m_spd(spd), m_alpha_blt_dst_type(alpha_blt_dst_type)
197 m_maxsize.SetSize(spd.w, spd.h);
198 // m_rcDirty.SetRect(0, 0, spd.w, spd.h);
199 CRect allSpd(0,0,spd.w, spd.h);
200 m_rectListDirty.AddTail(allSpd);
203 CMemSubPic::~CMemSubPic()
205 delete [] m_spd.bits, m_spd.bits = NULL;
208 // ISubPic
210 STDMETHODIMP_(void*) CMemSubPic::GetObject() const
212 return (void*)&m_spd;
215 STDMETHODIMP CMemSubPic::GetDesc(SubPicDesc& spd) const
217 spd.type = m_spd.type;
218 spd.w = m_size.cx;
219 spd.h = m_size.cy;
220 spd.bpp = m_spd.bpp;
221 spd.pitch = m_spd.pitch;
222 spd.bits = m_spd.bits;
223 spd.bitsU = m_spd.bitsU;
224 spd.bitsV = m_spd.bitsV;
225 spd.vidrect = m_vidrect;
226 return S_OK;
229 STDMETHODIMP CMemSubPic::CopyTo(ISubPicEx* pSubPic)
231 HRESULT hr;
232 if(FAILED(hr = __super::CopyTo(pSubPic))) {
233 return hr;
236 SubPicDesc src, dst;
237 if(FAILED(GetDesc(src)) || FAILED(pSubPic->GetDesc(dst))) {
238 return E_FAIL;
240 while(!m_rectListDirty.IsEmpty())
242 CRect& cRect = m_rectListDirty.GetHead();
243 int w = cRect.Width(), h = cRect.Height();
244 BYTE* s = (BYTE*)src.bits + src.pitch*cRect.top + cRect.left*4;
245 BYTE* d = (BYTE*)dst.bits + dst.pitch*cRect.top + cRect.left*4;
246 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
247 memcpy(d, s, w*4);
249 return S_OK;
252 STDMETHODIMP CMemSubPic::ClearDirtyRect(DWORD color)
254 if(m_rectListDirty.IsEmpty()) {
255 return S_OK;
257 while(!m_rectListDirty.IsEmpty())
259 //pDirtyRect = m_rectListDirty.RemoveHead();
260 CRect& dirtyRect = m_rectListDirty.RemoveTail();
261 BYTE* p = (BYTE*)m_spd.bits + m_spd.pitch*(dirtyRect.top) + dirtyRect.left*(m_spd.bpp>>3);
262 int w = dirtyRect.Width();
263 if(m_spd.type!=MSP_AYUV_PLANAR)
265 for(int j = 0, h = dirtyRect.Height(); j < h; j++, p += m_spd.pitch)
267 #ifdef _WIN64
268 memsetd(p, color, w*4); // nya
269 #else
270 __asm
272 mov eax, color
273 mov ecx, w
274 mov edi, p
276 rep stosd
279 #endif
282 else
284 ///TODO:
285 ///FIX ME
286 for(int j = 0, h = dirtyRect.Height(); j < h; j++, p += m_spd.pitch)
288 // memsetd(p, 0, m_rcDirty.Width());
289 //DbgLog((LOG_TRACE, 3, "w:%d", w));
290 //w = pDirtyRect->Width();
291 memset(p, 0xFF, w);
292 memset(p+m_spd.h*m_spd.pitch, 0, w);
293 memset(p+m_spd.h*m_spd.pitch*2, 0, w);
294 memset(p+m_spd.h*m_spd.pitch*3, 0, w);
298 m_rectListDirty.RemoveAll();
299 return S_OK;
302 STDMETHODIMP CMemSubPic::Lock(SubPicDesc& spd)
304 return GetDesc(spd);
307 STDMETHODIMP CMemSubPic::Unlock( CAtlList<CRect>* dirtyRectList )
309 int src_type = m_spd.type;
310 int dst_type = m_alpha_blt_dst_type;
311 if( (src_type==MSP_RGBA && (dst_type == MSP_RGB32 ||
312 dst_type == MSP_RGB24 ||
313 dst_type == MSP_RGB16 ||
314 dst_type == MSP_RGB15))
316 (src_type==MSP_XY_AUYV && dst_type == MSP_YUY2)//ToDo: fix me MSP_AYUV
318 (src_type==MSP_AYUV && dst_type == MSP_AYUV)
320 (src_type==MSP_AYUV_PLANAR && (dst_type == MSP_IYUV ||
321 dst_type == MSP_YV12 ||
322 dst_type == MSP_P010 ||
323 dst_type == MSP_P016 ||
324 dst_type == MSP_NV12 ||
325 dst_type == MSP_NV21)))
327 return UnlockOther(dirtyRectList);
329 else if(src_type==MSP_RGBA && (dst_type == MSP_YUY2 ||
330 dst_type == MSP_AYUV || //ToDo: fix me MSP_AYUV
331 dst_type == MSP_IYUV ||
332 dst_type == MSP_YV12 ||
333 dst_type == MSP_NV12 ||
334 dst_type == MSP_NV21 ||
335 dst_type == MSP_P010 ||
336 dst_type == MSP_P016))
338 return UnlockRGBA_YUV(dirtyRectList);
340 return E_NOTIMPL;
343 HRESULT CMemSubPic::UnlockOther(CAtlList<CRect>* dirtyRectList)
345 SetDirtyRectEx(dirtyRectList);
346 if(m_rectListDirty.IsEmpty()) {
347 return S_OK;
350 POSITION pos = m_rectListDirty.GetHeadPosition();
351 while(pos!=NULL)
353 const CRect& cRect = m_rectListDirty.GetNext(pos);
354 int w = cRect.Width(), h = cRect.Height();
355 if (w<=0 || h<=0)
357 continue;
360 BYTE* top = (BYTE*)m_spd.bits + m_spd.pitch*(cRect.top) + cRect.left*4;
361 BYTE* bottom = top + m_spd.pitch*h;
362 if(m_alpha_blt_dst_type == MSP_RGB16)
364 for(; top < bottom ; top += m_spd.pitch)
366 DWORD* s = (DWORD*)top;
367 DWORD* e = s + w;
368 for(; s < e; s++)
370 *s = ((*s>>3)&0x1f000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
371 // *s = (*s&0xff000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
375 else if(m_alpha_blt_dst_type == MSP_RGB15)
377 for(; top < bottom; top += m_spd.pitch)
379 DWORD* s = (DWORD*)top;
380 DWORD* e = s + w;
381 for(; s < e; s++)
383 *s = ((*s>>3)&0x1f000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
384 // *s = (*s&0xff000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
388 else if(m_alpha_blt_dst_type == MSP_YUY2)
390 XY_DO_ONCE( xy_logger::write_file("G:\\b1_ul", top, m_spd.pitch*(h-1)) );
392 for(BYTE* tempTop=top; tempTop < bottom ; tempTop += m_spd.pitch)
394 BYTE* s = tempTop;
395 BYTE* e = s + w*4;
396 BYTE last_v = s[0], last_u=s[2];
397 for(; s < e; s+=8) // AUYV AUYV -> AxYU AxYV
399 BYTE tmp = s[4];
400 s[4] = (last_v + 2*s[0] + s[4] + 2)>>2;
401 last_v = tmp;
403 s[0] = (last_u + 2*s[2] + s[6] + 2)>>2;
404 last_u = s[6];
408 XY_DO_ONCE( xy_logger::write_file("G:\\a1_ul", top, m_spd.pitch*(h-1)) );
410 else if(m_alpha_blt_dst_type == MSP_YV12 || m_alpha_blt_dst_type == MSP_IYUV )
412 //nothing to do
414 else if ( m_alpha_blt_dst_type == MSP_P010 || m_alpha_blt_dst_type == MSP_P016
415 || m_alpha_blt_dst_type == MSP_NV12 )
417 SubsampleAndInterlace(cRect, true);
419 else if( m_alpha_blt_dst_type == MSP_NV21 )
421 SubsampleAndInterlace(cRect, false);
424 return S_OK;
427 HRESULT CMemSubPic::UnlockRGBA_YUV(CAtlList<CRect>* dirtyRectList)
429 //debug
430 ONCER( SaveRect2File(dirtyRectList->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect") );
431 ONCER( SaveArgb2File(m_spd, CRect(CPoint(0,0), m_size), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.argb") );
433 SetDirtyRectEx(dirtyRectList);
435 ONCER( SaveRect2File(dirtyRectList->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect2") );
436 if(m_rectListDirty.IsEmpty()) {
437 return S_OK;
440 POSITION pos = m_rectListDirty.GetHeadPosition();
441 while(pos!=NULL)
443 const CRect& cRect = m_rectListDirty.GetNext(pos);
444 int w = cRect.Width(), h = cRect.Height();
445 if(w<=0 || h<=0)
447 continue;
450 BYTE* top = (BYTE*)m_spd.bits + m_spd.pitch*cRect.top + cRect.left*4;
451 BYTE* bottom = top + m_spd.pitch*h;
453 if( m_alpha_blt_dst_type == MSP_YUY2 ||
454 m_alpha_blt_dst_type == MSP_YV12 ||
455 m_alpha_blt_dst_type == MSP_IYUV ||
456 m_alpha_blt_dst_type == MSP_P010 ||
457 m_alpha_blt_dst_type == MSP_P016 ||
458 m_alpha_blt_dst_type == MSP_NV12 ||
459 m_alpha_blt_dst_type == MSP_NV21) {
460 for(; top < bottom ; top += m_spd.pitch) {
461 BYTE* s = top;
462 BYTE* e = s + w*4;
463 DWORD last_yuv = ColorConvTable::PreMulArgb2Ayuv(s[3], s[2], s[1], s[0]);
464 for(; s < e; s+=8) { // ARGB ARGB -> AxYU AxYV
465 if((s[3]+s[7]+(last_yuv>>24)) < 0xff*3) {
466 DWORD tmp1 = ColorConvTable::PreMulArgb2Ayuv(s[3], s[2], s[1], s[0]);
467 DWORD tmp2 = ColorConvTable::PreMulArgb2Ayuv(s[7], s[6], s[5], s[4]);
469 s[1] = (tmp1>>16)&0xff;
470 s[5] = (tmp2>>16)&0xff;
472 s[0] = (((last_yuv>>8)&0xff) + 2*((tmp1>>8)&0xff) + ((tmp2>>8)&0xff) + 2)/4;
473 s[4] = ((last_yuv&0xff) + 2*(tmp1&0xff) + (tmp2&0xff) + 2)/4;
474 last_yuv = tmp2;
475 } else {
476 last_yuv = ColorConvTable::PreMulArgb2Ayuv(s[7], s[6], s[5], s[4]);
478 s[1] = s[5] = 0;
479 s[0] = s[4] = 0;
484 else if(m_alpha_blt_dst_type == MSP_AYUV) {
485 for(; top < bottom ; top += m_spd.pitch) {
486 BYTE* s = top;
487 BYTE* e = s + w*4;
488 for(; s < e; s+=4) { // ARGB -> AYUV
489 if(s[3] < 0xff) {
490 *((DWORD*)s) = ColorConvTable::PreMulArgb2Ayuv(s[3], s[2], s[1], s[0]);
491 } else {
492 s[0] = s[1] = 0;
493 s[2] = 0;
500 ONCER( SaveAxxx2File(m_spd, CRect(CPoint(0,0), m_size), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.axuv") );
501 return S_OK;
504 void CMemSubPic::SubsampleAndInterlace( const CRect& cRect, bool u_first )
506 //fix me: check alignment and log error
507 int w = cRect.Width(), h = cRect.Height();
508 BYTE* u_plan = reinterpret_cast<BYTE*>(m_spd.bits) + m_spd.pitch*m_spd.h*2;
509 BYTE* u_start = u_plan + m_spd.pitch*(cRect.top)+ cRect.left;
510 BYTE* v_start = u_start + m_spd.pitch*m_spd.h;
511 BYTE* dst = u_start;
512 if(!u_first)
514 BYTE* tmp = v_start;
515 v_start = u_start;
516 u_start = tmp;
519 //Todo: fix me.
520 //Walkarround for alignment
521 if ( (m_spd.pitch&15) == 0 )
523 ASSERT(w%16==0);
524 for (int i=0;i<h;i+=2)
526 hleft_vmid_subsample_and_interlace_2_line_sse2(dst, u_start, v_start, w, m_spd.pitch);
527 u_start += 2*m_spd.pitch;
528 v_start += 2*m_spd.pitch;
529 dst += m_spd.pitch;
532 else
534 for (int i=0;i<h;i+=2)
536 hleft_vmid_subsample_and_interlace_2_line_c(dst, u_start, v_start, w, m_spd.pitch);
537 u_start += 2*m_spd.pitch;
538 v_start += 2*m_spd.pitch;
539 dst += m_spd.pitch;
544 STDMETHODIMP CMemSubPic::AlphaBlt( const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget )
546 if(!pSrc || !pDst || !pTarget) {
547 return E_POINTER;
549 int src_type = m_spd.type;
550 int dst_type = pTarget->type;
552 if( (src_type==MSP_RGBA && (dst_type == MSP_RGB32 ||
553 dst_type == MSP_RGB24 ||
554 dst_type == MSP_RGB16 ||
555 dst_type == MSP_RGB15 ||
556 dst_type == MSP_RGBA ||
557 dst_type == MSP_YUY2 ||//ToDo: fix me MSP_RGBA changed into AxYU AxYV after unlock, may be confusing
558 dst_type == MSP_AYUV ))
560 (src_type==MSP_XY_AUYV && dst_type == MSP_YUY2)//ToDo: fix me MSP_AYUV
562 (src_type==MSP_AYUV && dst_type == MSP_AYUV)
564 (src_type==MSP_AYUV_PLANAR && (dst_type == MSP_IYUV ||
565 dst_type == MSP_YV12)) )
567 return AlphaBltOther(pSrc, pDst, pTarget);
569 else if ( src_type==MSP_AYUV_PLANAR && (dst_type == MSP_NV12 ||
570 dst_type == MSP_NV21 ) )
572 return AlphaBltAnv12_Nv12(pSrc, pDst, pTarget);
575 else if( src_type==MSP_AYUV_PLANAR && (dst_type == MSP_P010 ||
576 dst_type == MSP_P016 ) )
578 return AlphaBltAnv12_P010(pSrc, pDst, pTarget);
580 else if( src_type==MSP_RGBA && (dst_type == MSP_IYUV ||
581 dst_type == MSP_YV12))
583 return AlphaBltAxyuAxyv_Yv12(pSrc, pDst, pTarget);
585 else if( src_type==MSP_RGBA && (dst_type == MSP_NV12||
586 dst_type == MSP_NV21))
588 return AlphaBltAxyuAxyv_Nv12(pSrc, pDst, pTarget);
590 else if( src_type==MSP_RGBA && (dst_type == MSP_P010 ||
591 dst_type == MSP_P016))
593 return AlphaBltAxyuAxyv_P010(pSrc, pDst, pTarget);
595 return E_NOTIMPL;
598 HRESULT CMemSubPic::AlphaBltOther(const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget)
600 const SubPicDesc& src = m_spd;
601 SubPicDesc dst = *pTarget; // copy, because we might modify it
603 CRect rs(*pSrc), rd(*pDst);
604 if(dst.h < 0)
606 dst.h = -dst.h;
607 rd.bottom = dst.h - rd.bottom;
608 rd.top = dst.h - rd.top;
610 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
611 return E_INVALIDARG;
613 int w = rs.Width(), h = rs.Height();
614 BYTE* s = (BYTE*)src.bits + src.pitch*rs.top + ((rs.left*src.bpp)>>3);//rs.left*4
615 BYTE* d = (BYTE*)dst.bits + dst.pitch*rd.top + ((rd.left*dst.bpp)>>3);
616 if(rd.top > rd.bottom)
618 if(dst.type == MSP_RGB32 || dst.type == MSP_RGB24
619 || dst.type == MSP_RGB16 || dst.type == MSP_RGB15
620 || dst.type == MSP_YUY2 || dst.type == MSP_AYUV)
622 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + (rd.left*dst.bpp>>3);
624 else if(dst.type == MSP_YV12 || dst.type == MSP_IYUV)
626 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + (rd.left*8>>3);
628 else
630 return E_NOTIMPL;
632 dst.pitch = -dst.pitch;
634 DbgLog((LOG_TRACE, 5, TEXT("w=%d h=%d"), w, h));
635 switch(dst.type)
637 case MSP_RGBA:
638 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
640 BYTE* s2 = s;
641 BYTE* s2end = s2 + w*4;
642 DWORD* d2 = (DWORD*)d;
643 for(; s2 < s2end; s2 += 4, d2++)
645 if(s2[3] < 0xff)
647 DWORD bd =0x00000100 -( (DWORD) s2[3]);
648 DWORD B = ((*((DWORD*)s2)&0x000000ff)<<8)/bd;
649 DWORD V = ((*((DWORD*)s2)&0x0000ff00)/bd)<<8;
650 DWORD R = (((*((DWORD*)s2)&0x00ff0000)>>8)/bd)<<16;
651 *d2 = B | V | R
652 | (0xff000000-(*((DWORD*)s2)&0xff000000))&0xff000000;
656 break;
657 case MSP_RGB32:
658 case MSP_AYUV: //ToDo: fix me MSP_VUYA indeed?
659 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
661 BYTE* s2 = s;
662 BYTE* s2end = s2 + w*4;
663 DWORD* d2 = (DWORD*)d;
664 for(; s2 < s2end; s2 += 4, d2++)
666 #ifdef _WIN64
667 DWORD ia = 256-s2[3];
668 if(s2[3] < 0xff) {
669 *d2 = ((((*d2&0x00ff00ff)*s2[3])>>8) + (((*((DWORD*)s2)&0x00ff00ff)*ia)>>8)&0x00ff00ff)
670 | ((((*d2&0x0000ff00)*s2[3])>>8) + (((*((DWORD*)s2)&0x0000ff00)*ia)>>8)&0x0000ff00);
672 #else
673 if(s2[3] < 0xff)
675 *d2 = (((((*d2&0x00ff00ff)*s2[3])>>8) + (*((DWORD*)s2)&0x00ff00ff))&0x00ff00ff)
676 | (((((*d2&0x0000ff00)*s2[3])>>8) + (*((DWORD*)s2)&0x0000ff00))&0x0000ff00);
678 #endif
681 break;
682 case MSP_RGB24:
683 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
685 BYTE* s2 = s;
686 BYTE* s2end = s2 + w*4;
687 BYTE* d2 = d;
688 for(; s2 < s2end; s2 += 4, d2 += 3)
690 if(s2[3] < 0xff)
692 d2[0] = ((d2[0]*s2[3])>>8) + s2[0];
693 d2[1] = ((d2[1]*s2[3])>>8) + s2[1];
694 d2[2] = ((d2[2]*s2[3])>>8) + s2[2];
698 break;
699 case MSP_RGB16:
700 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
702 BYTE* s2 = s;
703 BYTE* s2end = s2 + w*4;
704 WORD* d2 = (WORD*)d;
705 for(; s2 < s2end; s2 += 4, d2++)
707 if(s2[3] < 0x1f)
709 *d2 = (WORD)((((((*d2&0xf81f)*s2[3])>>5) + (*(DWORD*)s2&0xf81f))&0xf81f)
710 | (((((*d2&0x07e0)*s2[3])>>5) + (*(DWORD*)s2&0x07e0))&0x07e0));
711 /* *d2 = (WORD)((((((*d2&0xf800)*s2[3])>>8) + (*(DWORD*)s2&0xf800))&0xf800)
712 | (((((*d2&0x07e0)*s2[3])>>8) + (*(DWORD*)s2&0x07e0))&0x07e0)
713 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
718 break;
719 case MSP_RGB15:
720 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
722 BYTE* s2 = s;
723 BYTE* s2end = s2 + w*4;
724 WORD* d2 = (WORD*)d;
725 for(; s2 < s2end; s2 += 4, d2++)
727 if(s2[3] < 0x1f)
729 *d2 = (WORD)((((((*d2&0x7c1f)*s2[3])>>5) + (*(DWORD*)s2&0x7c1f))&0x7c1f)
730 | (((((*d2&0x03e0)*s2[3])>>5) + (*(DWORD*)s2&0x03e0))&0x03e0));
731 /* *d2 = (WORD)((((((*d2&0x7c00)*s2[3])>>8) + (*(DWORD*)s2&0x7c00))&0x7c00)
732 | (((((*d2&0x03e0)*s2[3])>>8) + (*(DWORD*)s2&0x03e0))&0x03e0)
733 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
738 break;
739 case MSP_YUY2:
740 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
742 unsigned int ia, c;
743 BYTE* s2 = s;
744 BYTE* s2end = s2 + w*4;
745 DWORD* d2 = (DWORD*)d;
746 ASSERT(w>0);
747 int last_a = w>0?s2[3]:0;
748 for(; s2 < s2end; s2 += 8, d2++)
750 ia = (last_a + 2*s2[3] + s2[7])>>2;
751 last_a = s2[7];
752 if(ia < 0xff)
754 //int y1 = (BYTE)(((((*d2&0xff))*s2[3])>>8) + s2[1]); // + y1;
755 //int u = (BYTE)((((((*d2>>8)&0xff))*ia)>>8) + s2[0]); // + u;
756 //int y2 = (BYTE)((((((*d2>>16)&0xff))*s2[7])>>8) + s2[5]); // + y2;
757 //int v = (BYTE)((((((*d2>>24)&0xff))*ia)>>8) + s2[4]); // + v;
758 //*d2 = (v<<24)|(y2<<16)|(u<<8)|y1;
760 ia = (ia<<24)|(s2[7]<<16)|(ia<<8)|s2[3];
761 c = (s2[4]<<24)|(s2[5]<<16)|(s2[0]<<8)|s2[1]; // (v<<24)|(y2<<16)|(u<<8)|y1;
762 __asm
764 mov edi, d2
765 pxor mm0, mm0
766 movd mm2, c
767 punpcklbw mm2, mm0
768 movd mm3, [edi]
769 punpcklbw mm3, mm0
770 movd mm4, ia
771 punpcklbw mm4, mm0
772 psraw mm4, 1 //or else, overflow because psraw shift in sign bit
773 pmullw mm3, mm4
774 psraw mm3, 7
775 paddsw mm3, mm2
776 packuswb mm3, mm3
777 movd [edi], mm3
782 __asm emms;
783 break;
784 case MSP_YV12:
785 case MSP_IYUV:
787 //dst.pitch = abs(dst.pitch);
788 int h2 = h/2;
789 if(!dst.pitchUV)
791 dst.pitchUV = abs(dst.pitch)/2;
793 if(!dst.bitsU || !dst.bitsV)
795 dst.bitsU = (BYTE*)dst.bits + abs(dst.pitch)*dst.h;
796 dst.bitsV = dst.bitsU + dst.pitchUV*dst.h/2;
797 if(dst.type == MSP_YV12)
799 BYTE* p = dst.bitsU;
800 dst.bitsU = dst.bitsV;
801 dst.bitsV = p;
804 BYTE* dd[2];
805 dd[0] = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left/2;
806 dd[1] = dst.bitsV + dst.pitchUV*rd.top/2 + rd.left/2;
807 if(rd.top > rd.bottom)
809 dd[0] = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left/2;
810 dd[1] = dst.bitsV + dst.pitchUV*(rd.top/2-1) + rd.left/2;
811 dst.pitchUV = -dst.pitchUV;
814 BYTE* src_origin= (BYTE*)src.bits + src.pitch*rs.top + rs.left;
816 BYTE* ss[2];
817 ss[0] = src_origin + src.pitch*src.h*2;//U
818 ss[1] = src_origin + src.pitch*src.h*3;//V
820 AlphaBltYv12Luma( d, dst.pitch, w, h, src_origin + src.pitch*src.h, src_origin, src.pitch );
822 AlphaBltYv12Chroma( dd[0], dst.pitchUV, w, h2, ss[0], src_origin, src.pitch);
823 AlphaBltYv12Chroma( dd[1], dst.pitchUV, w, h2, ss[1], src_origin, src.pitch);
825 __asm emms;
827 break;
828 default:
829 return E_NOTIMPL;
830 break;
833 //emmsÒª40¸öcpuÖÜÆÚ
834 //__asm emms;
835 return S_OK;
838 HRESULT CMemSubPic::AlphaBltAxyuAxyv_P010(const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget)
840 const SubPicDesc& src = m_spd;
841 SubPicDesc dst = *pTarget; // copy, because we might modify it
843 CRect rs(*pSrc), rd(*pDst);
845 if(dst.h < 0) {
846 dst.h = -dst.h;
847 rd.bottom = dst.h - rd.bottom;
848 rd.top = dst.h - rd.top;
851 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
852 return E_INVALIDARG;
855 int w = rs.Width(), h = rs.Height();
858 BYTE* s = static_cast<BYTE*>(src.bits) + src.pitch*rs.top + ((rs.left*src.bpp)>>3);
859 BYTE* d = static_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left*2;
861 if(rd.top > rd.bottom) {
862 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + rd.left;
864 dst.pitch = -dst.pitch;
867 for(ptrdiff_t i=0; i<h; i++, s += src.pitch, d += dst.pitch)
869 BYTE* s2 = s;
870 BYTE* s2end = s2 + w*4;
871 WORD* d2 = reinterpret_cast<WORD*>(d);
872 for(; s2 < s2end; s2 += 4, d2++)
874 if(s2[3] < 0xff) {
875 d2[0] = ((d2[0]*s2[3])>>8) + (s2[1]<<8);
880 //UV
881 int h2 = h/2;
882 if(!dst.pitchUV)
884 dst.pitchUV = abs(dst.pitch);
886 if(!dst.bitsU || !dst.bitsV)
888 dst.bitsU = static_cast<BYTE*>(dst.bits) + abs(dst.pitch)*dst.h;
889 dst.bitsV = dst.bitsU + 2;
891 BYTE* ddUV = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left*2;
892 if(rd.top > rd.bottom)
894 ddUV = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left*2;
895 dst.pitchUV = -dst.pitchUV;
898 s = static_cast<BYTE*>(src.bits) + src.pitch*rs.top + ((rs.left*src.bpp)>>3);
900 d = ddUV;
901 int pitch = src.pitch;
902 for(int j = 0; j < h2; j++, s += 2*src.pitch, d += dst.pitchUV )
904 BYTE* s2 = s;
905 WORD* d2=reinterpret_cast<WORD*>(d);
906 WORD* d2_end = reinterpret_cast<WORD*>(d+2*w);
907 DWORD last_alpha = s2[3]+s2[3+src.pitch];
908 for( ; d2<d2_end; s2+=8, d2+=2)
910 unsigned int ia = (
911 last_alpha +
912 (s2[3] + s2[3+src.pitch])*2 +
913 s2[3+4]+ s2[3+4+src.pitch]);
914 last_alpha = s2[3+4]+ s2[3+4+src.pitch];
915 if( ia!=0xFF*8 )
917 d2[0] = (((d2[0])*ia)>>11) + ((s2[0] + s2[0+src.pitch])<<7);
918 d2[1] = (((d2[1])*ia)>>11) + ((s2[4] + s2[4+src.pitch])<<7);
923 return S_OK;
926 HRESULT CMemSubPic::AlphaBltAxyuAxyv_Yv12(const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget)
928 const SubPicDesc& src = m_spd;
929 SubPicDesc dst = *pTarget; // copy, because we might modify it
931 CRect rs(*pSrc), rd(*pDst);
933 if(dst.h < 0) {
934 dst.h = -dst.h;
935 rd.bottom = dst.h - rd.bottom;
936 rd.top = dst.h - rd.top;
939 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
940 return E_INVALIDARG;
943 int w = rs.Width(), h = rs.Height();
945 BYTE* s = (BYTE*)src.bits + src.pitch*rs.top + ((rs.left*src.bpp)>>3);
946 BYTE* d = (BYTE*)dst.bits + dst.pitch*rd.top + rd.left;
948 if(rd.top > rd.bottom) {
949 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + rd.left;
951 dst.pitch = -dst.pitch;
954 for(ptrdiff_t j = 0; j < h; j++, s += src.pitch, d += dst.pitch) {
955 BYTE* s2 = s;
956 BYTE* s2end = s2 + w*4;
957 BYTE* d2 = d;
958 for(; s2 < s2end; s2 += 4, d2++) {
959 if(s2[3] < 0xff) {
960 d2[0] = ((d2[0]*s2[3])>>8) + s2[1];
964 dst.pitch = abs(dst.pitch);
966 int h2 = h/2;
968 if(!dst.pitchUV) {
969 dst.pitchUV = dst.pitch/2;
972 BYTE* ss[2];
973 ss[0] = (BYTE*)src.bits + src.pitch*rs.top + rs.left*4;
974 ss[1] = ss[0] + 4;
976 if(!dst.bitsU || !dst.bitsV) {
977 dst.bitsU = (BYTE*)dst.bits + dst.pitch*dst.h;
978 dst.bitsV = dst.bitsU + dst.pitchUV*dst.h/2;
980 if(dst.type == MSP_YV12) {
981 BYTE* p = dst.bitsU;
982 dst.bitsU = dst.bitsV;
983 dst.bitsV = p;
987 BYTE* dd[2];
988 dd[0] = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left/2;
989 dd[1] = dst.bitsV + dst.pitchUV*rd.top/2 + rd.left/2;
991 if(rd.top > rd.bottom) {
992 dd[0] = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left/2;
993 dd[1] = dst.bitsV + dst.pitchUV*(rd.top/2-1) + rd.left/2;
994 dst.pitchUV = -dst.pitchUV;
997 for(ptrdiff_t i = 0; i < 2; i++) {
998 s = ss[i];
999 d = dd[i];
1000 BYTE* a = ss[0]+3;
1001 for(ptrdiff_t j = 0; j < h2; j++, s += src.pitch*2, d += dst.pitchUV, a += src.pitch*2) {
1002 BYTE* s2 = s;
1003 BYTE* s2end = s2 + w*4;
1004 BYTE* d2 = d;
1005 BYTE* a2 = a;
1007 DWORD last_alpha = a2[0]+a2[0+src.pitch];
1008 for(; s2 < s2end; s2 += 8, d2++, a2 += 8) {
1009 unsigned int ia = (last_alpha + 2*(a2[0]+a2[0+src.pitch]) + a2[4] + a2[4+src.pitch] + 4 )>>3;
1010 last_alpha = a2[4] + a2[4+src.pitch];
1011 if(ia < 0xff) {
1012 *d2 = ((*d2*ia)>>8) + ((s2[0]+s2[src.pitch])>>1);
1018 return S_OK;
1021 HRESULT CMemSubPic::AlphaBltAxyuAxyv_Nv12(const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget)
1023 ONCER( SaveArgb2File(*pTarget, CRect(CPoint(0,0), m_size), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12") );
1024 const SubPicDesc& src = m_spd;
1025 SubPicDesc dst = *pTarget; // copy, because we might modify it
1027 CRect rs(*pSrc), rd(*pDst);
1029 if(dst.h < 0) {
1030 dst.h = -dst.h;
1031 rd.bottom = dst.h - rd.bottom;
1032 rd.top = dst.h - rd.top;
1035 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
1036 return E_INVALIDARG;
1039 int w = rs.Width(), h = rs.Height();
1041 BYTE* s = (BYTE*)src.bits + src.pitch*rs.top + ((rs.left*src.bpp)>>3);
1042 BYTE* d = (BYTE*)dst.bits + dst.pitch*rd.top + rd.left;
1044 if(rd.top > rd.bottom) {
1045 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + rd.left;
1047 dst.pitch = -dst.pitch;
1050 for(ptrdiff_t j = 0; j < h; j++, s += src.pitch, d += dst.pitch) {
1051 BYTE* s2 = s;
1052 BYTE* s2end = s2 + w*4;
1053 BYTE* d2 = d;
1054 for(; s2 < s2end; s2 += 4, d2++) {
1055 if(s2[3] < 0xff) {
1056 d2[0] = ((d2[0]*s2[3])>>8) + s2[1];
1060 dst.pitch = abs(dst.pitch);
1062 int h2 = h/2;
1064 if(!dst.pitchUV) {
1065 dst.pitchUV = dst.pitch;
1068 BYTE* ss[2];
1069 ss[0] = (BYTE*)src.bits + src.pitch*rs.top + rs.left*4;
1070 ss[1] = ss[0] + 4;
1072 if(!dst.bitsU || !dst.bitsV) {
1073 dst.bitsU = (BYTE*)dst.bits + dst.pitch*dst.h;
1074 dst.bitsV = dst.bitsU + 1;
1076 if(dst.type == MSP_NV21) {
1077 BYTE* p = dst.bitsU;
1078 dst.bitsU = dst.bitsV;
1079 dst.bitsV = p;
1083 BYTE* dd[2];
1084 dd[0] = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left;
1085 dd[1] = dd[0]+1;
1087 if(rd.top > rd.bottom) {
1088 dd[0] = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left;
1089 dd[1] = dd[0]+1;
1090 dst.pitchUV = -dst.pitchUV;
1093 for(ptrdiff_t i = 0; i < 2; i++) {
1094 s = ss[i];
1095 d = dd[i];
1096 BYTE* a = ss[0]+3;
1097 for(ptrdiff_t j = 0; j < h2; j++, s += src.pitch*2, d += dst.pitchUV, a += src.pitch*2) {
1098 BYTE* s2 = s;
1099 BYTE* s2end = s2 + w*4;
1100 BYTE* d2 = d;
1101 BYTE* a2 = a;
1102 DWORD last_alpha = a2[0]+a2[0+src.pitch];
1103 for(; s2 < s2end; s2 += 8, d2+=2, a2 += 8) {
1104 unsigned int ia = (last_alpha+2*(a2[0]+a2[0+src.pitch])+a2[4]+a2[4+src.pitch]+4)>>3;
1105 last_alpha = a2[4]+a2[4+src.pitch];
1106 if(ia < 0xff) {
1107 *d2 = ((*d2*ia)>>8) + ((s2[0]+s2[src.pitch])>>1);
1113 ONCER( SaveArgb2File(*pTarget, CRect(CPoint(0,0), m_size), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12_2") );
1114 return S_OK;
1117 HRESULT CMemSubPic::AlphaBltAnv12_P010( const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget )
1119 //fix me: check colorspace and log error
1120 const SubPicDesc& src = m_spd;
1121 SubPicDesc dst = *pTarget; // copy, because we might modify it
1123 CRect rs(*pSrc), rd(*pDst);
1124 if(dst.h < 0)
1126 dst.h = -dst.h;
1127 rd.bottom = dst.h - rd.bottom;
1128 rd.top = dst.h - rd.top;
1130 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
1131 return E_INVALIDARG;
1133 int w = rs.Width(), h = rs.Height();
1134 bool bottom_down = rd.top > rd.bottom;
1136 BYTE* d = NULL;
1137 BYTE* dUV = NULL;
1138 if(!bottom_down)
1140 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left*2;
1141 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*rd.top/2 + rd.left*2;
1143 else
1145 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + rd.left*2;
1146 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*(rd.top/2-1) + rd.left*2;
1147 dst.pitch = -dst.pitch;
1149 ASSERT(dst.pitchUV==0 || dst.pitchUV==abs(dst.pitch));
1151 const BYTE* sa = reinterpret_cast<const BYTE*>(src.bits) + src.pitch*rs.top + rs.left;
1152 const BYTE* sy = sa + src.pitch*src.h;
1153 const BYTE* s_uv = sy + src.pitch*src.h;//UV
1154 return AlphaBltAnv12_P010(sa, sy, s_uv, src.pitch, d, dUV, dst.pitch, w, h);
1157 HRESULT CMemSubPic::AlphaBltAnv12_Nv12( const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget )
1159 //fix me: check colorspace and log error
1160 const SubPicDesc& src = m_spd;
1161 SubPicDesc dst = *pTarget; // copy, because we might modify it
1163 CRect rs(*pSrc), rd(*pDst);
1164 if(dst.h < 0)
1166 dst.h = -dst.h;
1167 rd.bottom = dst.h - rd.bottom;
1168 rd.top = dst.h - rd.top;
1170 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
1171 return E_INVALIDARG;
1173 int w = rs.Width(), h = rs.Height();
1174 bool bottom_down = rd.top > rd.bottom;
1176 BYTE* d = NULL;
1177 BYTE* dUV = NULL;
1178 if (!bottom_down)
1180 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left;
1181 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*rd.top/2 + rd.left;
1183 else
1185 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + rd.left;
1186 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*(rd.top/2-1) + rd.left;
1187 dst.pitch = -dst.pitch;
1189 ASSERT(dst.pitchUV==0 || dst.pitchUV==abs(dst.pitch));
1191 const BYTE* sa = reinterpret_cast<const BYTE*>(src.bits) + src.pitch*rs.top + rs.left;
1192 const BYTE* sy = sa + src.pitch*src.h;
1193 const BYTE* s_uv = sy + src.pitch*src.h;//UV
1195 return AlphaBltAnv12_Nv12(sa, sy, s_uv, src.pitch, d, dUV, dst.pitch, w, h);
1198 STDMETHODIMP CMemSubPic::SetDirtyRectEx(CAtlList<CRect>* dirtyRectList )
1200 //if(m_spd.type == MSP_YUY2 || m_spd.type == MSP_YV12 || m_spd.type == MSP_IYUV || m_spd.type == MSP_AYUV)
1201 if(dirtyRectList!=NULL)
1203 POSITION pos = dirtyRectList->GetHeadPosition();
1204 if(m_spd.type == MSP_AYUV_PLANAR || m_alpha_blt_dst_type==MSP_IYUV || m_alpha_blt_dst_type==MSP_YV12
1205 || m_alpha_blt_dst_type==MSP_P010 || m_alpha_blt_dst_type==MSP_P016
1206 || m_alpha_blt_dst_type==MSP_NV12 || m_alpha_blt_dst_type==MSP_NV21 )
1208 while(pos!=NULL)
1210 CRect& cRectSrc = dirtyRectList->GetNext(pos);
1211 cRectSrc.left &= ~15;
1212 cRectSrc.right = (cRectSrc.right+15)&~15;
1213 if(cRectSrc.right>m_spd.w)
1215 cRectSrc.right = m_spd.w;
1217 cRectSrc.top &= ~1;
1218 cRectSrc.bottom = (cRectSrc.bottom+1)&~1;
1221 else if(m_spd.type == MSP_XY_AUYV || m_alpha_blt_dst_type==MSP_YUY2)
1223 while(pos!=NULL)
1225 CRect& cRectSrc = dirtyRectList->GetNext(pos);
1226 cRectSrc.left &= ~3;
1227 cRectSrc.right = (cRectSrc.right+3)&~3;
1231 return __super::SetDirtyRectEx(dirtyRectList);
1235 // static
1238 HRESULT CMemSubPic::AlphaBltAnv12_P010( const BYTE* src_a, const BYTE* src_y, const BYTE* src_uv, int src_pitch,
1239 BYTE* dst_y, BYTE* dst_uv, int dst_pitch, int w, int h )
1241 const BYTE* sa = src_a;
1242 if( ((reinterpret_cast<intptr_t>(src_a) | reinterpret_cast<intptr_t>(src_y) | static_cast<intptr_t>(src_pitch) |
1243 reinterpret_cast<intptr_t>(dst_y) | static_cast<intptr_t>(dst_pitch) ) & 15 )==0 )
1245 for(int i=0; i<h; i++, sa += src_pitch, src_y += src_pitch, dst_y += dst_pitch)
1247 const BYTE* sa2 = sa;
1248 const BYTE* s2 = src_y;
1249 const BYTE* s2end_mod16 = s2 + (w&~15);
1250 const BYTE* s2end = s2 + w;
1251 BYTE* d2 = dst_y;
1253 for(; s2 < s2end_mod16; s2+=16, sa2+=16, d2+=32)
1255 mix_16_y_p010_sse2(d2, s2, sa2);
1257 for( WORD* d3=reinterpret_cast<WORD*>(d2); s2 < s2end; s2++, sa2++, d3++)
1259 if(sa2[0] < 0xff)
1261 d3[0] = ((d3[0]*sa2[0])>>8) + (s2[0]<<8);
1266 else //fix me: only a workaround for non-mod-16 size video
1268 for(int i=0; i<h; i++, sa += src_pitch, src_y += src_pitch, dst_y += dst_pitch)
1270 const BYTE* sa2 = sa;
1271 const BYTE* s2 = src_y;
1272 const BYTE* s2end = s2 + w;
1273 WORD* d2 = reinterpret_cast<WORD*>(dst_y);
1274 for(; s2 < s2end; s2+=1, sa2+=1, d2+=1)
1276 if(sa2[0] < 0xff)
1278 d2[0] = ((d2[0]*sa2[0])>>8) + (s2[0]<<8);
1283 //UV
1284 int h2 = h/2;
1285 BYTE* d = dst_uv;
1286 if( ((reinterpret_cast<intptr_t>(src_a) | reinterpret_cast<intptr_t>(src_uv) | static_cast<intptr_t>(src_pitch) |
1287 reinterpret_cast<intptr_t>(dst_uv) | static_cast<intptr_t>(dst_pitch) ) & 15 )==0 )
1289 for(int j = 0; j < h2; j++, src_uv += src_pitch, src_a += src_pitch*2, d += dst_pitch)
1291 hleft_vmid_mix_uv_p010_sse2(d, w, src_uv, src_a, src_pitch);
1294 else
1296 for(int j = 0; j < h2; j++, src_uv += src_pitch, src_a += src_pitch*2, d += dst_pitch)
1298 hleft_vmid_mix_uv_p010_c(d, w, src_uv, src_a, src_pitch);
1301 __asm emms;
1302 return S_OK;
1305 HRESULT CMemSubPic::AlphaBltAnv12_Nv12( const BYTE* src_a, const BYTE* src_y, const BYTE* src_uv, int src_pitch,
1306 BYTE* dst_y, BYTE* dst_uv, int dst_pitch, int w, int h )
1308 AlphaBltYv12Luma( dst_y, dst_pitch, w, h, src_y, src_a, src_pitch );
1310 int h2 = h/2;
1311 if( ((reinterpret_cast<intptr_t>(src_a) | reinterpret_cast<intptr_t>(src_uv) | static_cast<intptr_t>(src_pitch) |
1312 reinterpret_cast<intptr_t>(dst_uv) | static_cast<intptr_t>(dst_pitch) ) & 15 )==0 )
1314 BYTE* d = dst_uv;
1315 for(int j = 0; j < h2; j++, src_uv += src_pitch, src_a += src_pitch*2, d += dst_pitch)
1317 hleft_vmid_mix_uv_nv12_sse2(d, w, src_uv, src_a, src_pitch);
1320 else
1322 BYTE* d = dst_uv;
1323 for(int j = 0; j < h2; j++, src_uv += src_pitch, src_a += src_pitch*2, d += dst_pitch)
1325 hleft_vmid_mix_uv_nv12_c(d, w, src_uv, src_a, src_pitch);
1329 __asm emms;
1330 return S_OK;
1334 // CMemSubPicAllocator
1337 CMemSubPicAllocator::CMemSubPicAllocator(int alpha_blt_dst_type, SIZE maxsize, int type/*=-1*/)
1338 : CSubPicExAllocatorImpl(maxsize, false, false)
1339 , m_alpha_blt_dst_type(alpha_blt_dst_type)
1340 , m_maxsize(maxsize)
1341 , m_type(type)
1343 if(m_type==-1)
1345 switch(alpha_blt_dst_type)
1347 case MSP_YUY2:
1348 m_type = MSP_XY_AUYV;
1349 break;
1350 case MSP_AYUV:
1351 m_type = MSP_AYUV;
1352 break;
1353 case MSP_IYUV:
1354 case MSP_YV12:
1355 case MSP_P010:
1356 case MSP_P016:
1357 case MSP_NV12:
1358 case MSP_NV21:
1359 m_type = MSP_AYUV_PLANAR;
1360 break;
1361 default:
1362 m_type = MSP_RGBA;
1363 break;
1368 // ISubPicAllocatorImpl
1370 bool CMemSubPicAllocator::AllocEx(bool fStatic, ISubPicEx** ppSubPic)
1372 if(!ppSubPic) {
1373 return false;
1375 SubPicDesc spd;
1376 spd.w = m_maxsize.cx;
1377 spd.h = m_maxsize.cy;
1378 spd.bpp = 32;
1379 spd.pitch = (spd.w*spd.bpp)>>3;
1380 spd.type = m_type;
1381 spd.bits = DNew BYTE[spd.pitch*spd.h];
1382 if(!spd.bits) {
1383 return false;
1385 *ppSubPic = DNew CMemSubPic(spd, m_alpha_blt_dst_type);
1386 if(!(*ppSubPic)) {
1387 return false;
1389 (*ppSubPic)->AddRef();
1390 return true;