Deal with head/tail non-aligned part (Nv12/YV12 luma)
[xy_vsfilter.git] / src / subpic / MemSubPic.cpp
blob9b4d9da6263fae7f431a6b950456b6f441060b1e
1 /*
2 * Copyright (C) 2003-2006 Gabest
3 * http://www.gabest.org
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
8 * any later version.
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GNU Make; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
22 #include "stdafx.h"
23 #include "MemSubPic.h"
24 #include "color_conv_table.h"
26 #if 0
27 #include <fstream>
29 // debug functions
30 //
31 static void SaveRect2File(const CRect& cRect, const char * filename)
33 std::ofstream os(filename);
34 os<<cRect.left<<","<<cRect.top<<","<<cRect.right<<","<<cRect.bottom;
36 static void SaveAxxx2File(SubPicDesc& spd, const CRect& cRect, const char * filename)
38 std::ofstream axxx(filename);
39 int w = cRect.Width(), h = cRect.Height();
41 BYTE* top = (BYTE*)spd.bits + spd.pitch*cRect.top + cRect.left*4;
42 BYTE* bottom = top + spd.pitch*h;
44 for(; top < bottom ; top += spd.pitch) {
45 BYTE* s = top;
46 BYTE* e = s + w*4;
47 for(; s < e; s+=4) { // ARGB ARGB -> AxYU AxYV
48 axxx<<(int)s[0]<<","<<(int)s[1]<<","<<(int)s[2]<<","<<(int)s[3];
49 if(s+4>=e)
51 axxx<<std::endl;
53 else
55 axxx<<",";
59 axxx.close();
61 static void SaveArgb2File(SubPicDesc& spd, const CRect& cRect, const char * filename)
63 SaveAxxx2File(spd, cRect, filename);
65 static void SaveAyuv2File(SubPicDesc& spd, const CRect& cRect, const char * filename)
67 SaveAxxx2File(spd, cRect, filename);
69 static void SaveNvxx2File(SubPicDesc& spd, const CRect& cRect, const char * filename)
71 std::ofstream os(filename);
72 int w = cRect.Width(), h = cRect.Height();
74 BYTE* top = (BYTE*)spd.bits;
75 BYTE* bottom = top + spd.pitch*h;
77 for(; top < bottom ; top += spd.pitch) {
78 BYTE* s = top;
79 BYTE* e = s + w;
81 BYTE* sY = s + spd.pitch*spd.h;
82 BYTE* sU = sY + spd.pitch*spd.h;
83 BYTE* sV = sU + 1;
84 for(; s < e; s++, sY++, sU+=2,sV+=2) {
85 os<<(int)s[0]<<","<<(int)sY[0]<<","<<(int)sU[0]<<","<<(int)sV[0];
86 if(s+1>=e)
88 os<<std::endl;
90 else
92 os<<",";
96 os.close();
99 #define ONCER(expr) {\
100 static bool entered=false;\
101 if(!entered)\
103 entered=true;\
104 expr;\
107 #else
108 #define ONCER(expr)
109 #endif
112 // alpha blend functions
114 #include "xy_intrinsics.h"
117 // CMemSubPic
120 CMemSubPic::CMemSubPic(SubPicDesc& spd, int alpha_blt_dst_type)
121 : m_spd(spd), m_alpha_blt_dst_type(alpha_blt_dst_type)
123 m_maxsize.SetSize(spd.w, spd.h);
124 // m_rcDirty.SetRect(0, 0, spd.w, spd.h);
125 CRect allSpd(0,0,spd.w, spd.h);
126 m_rectListDirty.AddTail(allSpd);
129 CMemSubPic::~CMemSubPic()
131 delete [] m_spd.bits, m_spd.bits = NULL;
134 // ISubPic
136 STDMETHODIMP_(void*) CMemSubPic::GetObject() const
138 return (void*)&m_spd;
141 STDMETHODIMP CMemSubPic::GetDesc(SubPicDesc& spd) const
143 spd.type = m_spd.type;
144 spd.w = m_size.cx;
145 spd.h = m_size.cy;
146 spd.bpp = m_spd.bpp;
147 spd.pitch = m_spd.pitch;
148 spd.bits = m_spd.bits;
149 spd.bitsU = m_spd.bitsU;
150 spd.bitsV = m_spd.bitsV;
151 spd.vidrect = m_vidrect;
152 return S_OK;
155 STDMETHODIMP CMemSubPic::CopyTo(ISubPicEx* pSubPic)
157 HRESULT hr;
158 if(FAILED(hr = __super::CopyTo(pSubPic))) {
159 return hr;
162 SubPicDesc src, dst;
163 if(FAILED(GetDesc(src)) || FAILED(pSubPic->GetDesc(dst))) {
164 return E_FAIL;
166 while(!m_rectListDirty.IsEmpty())
168 CRect& cRect = m_rectListDirty.GetHead();
169 int w = cRect.Width(), h = cRect.Height();
170 BYTE* s = (BYTE*)src.bits + src.pitch*cRect.top + cRect.left*4;
171 BYTE* d = (BYTE*)dst.bits + dst.pitch*cRect.top + cRect.left*4;
172 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
173 memcpy(d, s, w*4);
175 return S_OK;
178 STDMETHODIMP CMemSubPic::ClearDirtyRect(DWORD color)
180 if(m_rectListDirty.IsEmpty()) {
181 return S_OK;
183 while(!m_rectListDirty.IsEmpty())
185 //pDirtyRect = m_rectListDirty.RemoveHead();
186 CRect& dirtyRect = m_rectListDirty.RemoveTail();
187 BYTE* p = (BYTE*)m_spd.bits + m_spd.pitch*(dirtyRect.top) + dirtyRect.left*(m_spd.bpp>>3);
188 int w = dirtyRect.Width();
189 if(m_spd.type!=MSP_AYUV_PLANAR)
191 for(int j = 0, h = dirtyRect.Height(); j < h; j++, p += m_spd.pitch)
193 #ifdef _WIN64
194 memsetd(p, color, w*4); // nya
195 #else
196 __asm
198 mov eax, color
199 mov ecx, w
200 mov edi, p
202 rep stosd
205 #endif
208 else
210 ///TODO:
211 ///FIX ME
212 for(int j = 0, h = dirtyRect.Height(); j < h; j++, p += m_spd.pitch)
214 // memsetd(p, 0, m_rcDirty.Width());
215 //DbgLog((LOG_TRACE, 3, "w:%d", w));
216 //w = pDirtyRect->Width();
217 memset(p, 0xFF, w);
218 memset(p+m_spd.h*m_spd.pitch, 0, w);
219 memset(p+m_spd.h*m_spd.pitch*2, 0, w);
220 memset(p+m_spd.h*m_spd.pitch*3, 0, w);
224 m_rectListDirty.RemoveAll();
225 return S_OK;
228 STDMETHODIMP CMemSubPic::Lock(SubPicDesc& spd)
230 return GetDesc(spd);
233 STDMETHODIMP CMemSubPic::Unlock( CAtlList<CRect>* dirtyRectList )
235 int src_type = m_spd.type;
236 int dst_type = m_alpha_blt_dst_type;
237 if( (src_type==MSP_RGBA && (dst_type == MSP_RGB32 ||
238 dst_type == MSP_RGB24 ||
239 dst_type == MSP_RGB16 ||
240 dst_type == MSP_RGB15))
242 (src_type==MSP_XY_AUYV && dst_type == MSP_YUY2)//ToDo: fix me MSP_AYUV
244 (src_type==MSP_AYUV && dst_type == MSP_AYUV)
246 (src_type==MSP_AYUV_PLANAR && (dst_type == MSP_IYUV ||
247 dst_type == MSP_YV12 ||
248 dst_type == MSP_P010 ||
249 dst_type == MSP_P016 ||
250 dst_type == MSP_NV12 ||
251 dst_type == MSP_NV21)))
253 return UnlockOther(dirtyRectList);
255 else if(src_type==MSP_RGBA && (dst_type == MSP_YUY2 ||
256 dst_type == MSP_AYUV || //ToDo: fix me MSP_AYUV
257 dst_type == MSP_IYUV ||
258 dst_type == MSP_YV12 ||
259 dst_type == MSP_NV12 ||
260 dst_type == MSP_NV21 ||
261 dst_type == MSP_P010 ||
262 dst_type == MSP_P016))
264 return UnlockRGBA_YUV(dirtyRectList);
266 return E_NOTIMPL;
269 HRESULT CMemSubPic::UnlockOther(CAtlList<CRect>* dirtyRectList)
271 SetDirtyRectEx(dirtyRectList);
272 if(m_rectListDirty.IsEmpty()) {
273 return S_OK;
276 POSITION pos = m_rectListDirty.GetHeadPosition();
277 while(pos!=NULL)
279 const CRect& cRect = m_rectListDirty.GetNext(pos);
280 int w = cRect.Width(), h = cRect.Height();
281 if (w<=0 || h<=0)
283 continue;
286 BYTE* top = (BYTE*)m_spd.bits + m_spd.pitch*(cRect.top) + cRect.left*4;
287 BYTE* bottom = top + m_spd.pitch*h;
288 if(m_alpha_blt_dst_type == MSP_RGB16)
290 for(; top < bottom ; top += m_spd.pitch)
292 DWORD* s = (DWORD*)top;
293 DWORD* e = s + w;
294 for(; s < e; s++)
296 *s = ((*s>>3)&0x1f000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
297 // *s = (*s&0xff000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
301 else if(m_alpha_blt_dst_type == MSP_RGB15)
303 for(; top < bottom; top += m_spd.pitch)
305 DWORD* s = (DWORD*)top;
306 DWORD* e = s + w;
307 for(; s < e; s++)
309 *s = ((*s>>3)&0x1f000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
310 // *s = (*s&0xff000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
314 else if(m_alpha_blt_dst_type == MSP_YUY2)
316 XY_DO_ONCE( xy_logger::write_file("G:\\b1_ul", top, m_spd.pitch*(h-1)) );
318 for(BYTE* tempTop=top; tempTop < bottom ; tempTop += m_spd.pitch)
320 BYTE* s = tempTop;
321 BYTE* e = s + w*4;
322 BYTE last_v = s[0], last_u=s[2];
323 for(; s < e; s+=8) // AUYV AUYV -> AxYU AxYV
325 BYTE tmp = s[4];
326 s[4] = (last_v + 2*s[0] + s[4] + 2)>>2;
327 last_v = tmp;
329 s[0] = (last_u + 2*s[2] + s[6] + 2)>>2;
330 last_u = s[6];
334 XY_DO_ONCE( xy_logger::write_file("G:\\a1_ul", top, m_spd.pitch*(h-1)) );
336 else if(m_alpha_blt_dst_type == MSP_YV12 || m_alpha_blt_dst_type == MSP_IYUV )
338 //nothing to do
340 else if ( m_alpha_blt_dst_type == MSP_P010 || m_alpha_blt_dst_type == MSP_P016
341 || m_alpha_blt_dst_type == MSP_NV12 )
343 SubsampleAndInterlace(cRect, true);
345 else if( m_alpha_blt_dst_type == MSP_NV21 )
347 SubsampleAndInterlace(cRect, false);
350 return S_OK;
353 HRESULT CMemSubPic::UnlockRGBA_YUV(CAtlList<CRect>* dirtyRectList)
355 //debug
356 ONCER( SaveRect2File(dirtyRectList->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect") );
357 ONCER( SaveArgb2File(m_spd, CRect(CPoint(0,0), m_size), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.argb") );
359 SetDirtyRectEx(dirtyRectList);
361 ONCER( SaveRect2File(dirtyRectList->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect2") );
362 if(m_rectListDirty.IsEmpty()) {
363 return S_OK;
366 POSITION pos = m_rectListDirty.GetHeadPosition();
367 while(pos!=NULL)
369 const CRect& cRect = m_rectListDirty.GetNext(pos);
370 int w = cRect.Width(), h = cRect.Height();
371 if(w<=0 || h<=0)
373 continue;
376 BYTE* top = (BYTE*)m_spd.bits + m_spd.pitch*cRect.top + cRect.left*4;
377 BYTE* bottom = top + m_spd.pitch*h;
379 if( m_alpha_blt_dst_type == MSP_YUY2 ||
380 m_alpha_blt_dst_type == MSP_YV12 ||
381 m_alpha_blt_dst_type == MSP_IYUV ||
382 m_alpha_blt_dst_type == MSP_P010 ||
383 m_alpha_blt_dst_type == MSP_P016 ||
384 m_alpha_blt_dst_type == MSP_NV12 ||
385 m_alpha_blt_dst_type == MSP_NV21) {
386 for(; top < bottom ; top += m_spd.pitch) {
387 BYTE* s = top;
388 BYTE* e = s + w*4;
389 DWORD last_yuv = ColorConvTable::PreMulArgb2Ayuv(s[3], s[2], s[1], s[0]);
390 for(; s < e; s+=8) { // ARGB ARGB -> AxYU AxYV
391 if((s[3]+s[7]+(last_yuv>>24)) < 0xff*3) {
392 DWORD tmp1 = ColorConvTable::PreMulArgb2Ayuv(s[3], s[2], s[1], s[0]);
393 DWORD tmp2 = ColorConvTable::PreMulArgb2Ayuv(s[7], s[6], s[5], s[4]);
395 s[1] = (tmp1>>16)&0xff;
396 s[5] = (tmp2>>16)&0xff;
398 s[0] = (((last_yuv>>8)&0xff) + 2*((tmp1>>8)&0xff) + ((tmp2>>8)&0xff) + 2)/4;
399 s[4] = ((last_yuv&0xff) + 2*(tmp1&0xff) + (tmp2&0xff) + 2)/4;
400 last_yuv = tmp2;
401 } else {
402 last_yuv = ColorConvTable::PreMulArgb2Ayuv(s[7], s[6], s[5], s[4]);
404 s[1] = s[5] = 0;
405 s[0] = s[4] = 0;
410 else if(m_alpha_blt_dst_type == MSP_AYUV) {
411 for(; top < bottom ; top += m_spd.pitch) {
412 BYTE* s = top;
413 BYTE* e = s + w*4;
414 for(; s < e; s+=4) { // ARGB -> AYUV
415 if(s[3] < 0xff) {
416 *((DWORD*)s) = ColorConvTable::PreMulArgb2Ayuv(s[3], s[2], s[1], s[0]);
417 } else {
418 s[0] = s[1] = 0;
419 s[2] = 0;
426 ONCER( SaveAxxx2File(m_spd, CRect(CPoint(0,0), m_size), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.axuv") );
427 return S_OK;
430 void CMemSubPic::SubsampleAndInterlace( const CRect& cRect, bool u_first )
432 //fix me: check alignment and log error
433 int w = cRect.Width(), h = cRect.Height();
434 BYTE* u_plan = reinterpret_cast<BYTE*>(m_spd.bits) + m_spd.pitch*m_spd.h*2;
435 BYTE* u_start = u_plan + m_spd.pitch*(cRect.top)+ cRect.left;
436 BYTE* v_start = u_start + m_spd.pitch*m_spd.h;
437 BYTE* dst = u_start;
438 if(!u_first)
440 BYTE* tmp = v_start;
441 v_start = u_start;
442 u_start = tmp;
445 //Todo: fix me.
446 //Walkarround for alignment
447 if ( ((m_spd.pitch|w)&15) == 0 )
449 ASSERT(w%16==0);
450 for (int i=0;i<h;i+=2)
452 hleft_vmid_subsample_and_interlace_2_line_sse2(dst, u_start, v_start, w, m_spd.pitch);
453 u_start += 2*m_spd.pitch;
454 v_start += 2*m_spd.pitch;
455 dst += m_spd.pitch;
458 else
460 for (int i=0;i<h;i+=2)
462 hleft_vmid_subsample_and_interlace_2_line_c(dst, u_start, v_start, w, m_spd.pitch);
463 u_start += 2*m_spd.pitch;
464 v_start += 2*m_spd.pitch;
465 dst += m_spd.pitch;
470 STDMETHODIMP CMemSubPic::AlphaBlt( const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget )
472 if(!pSrc || !pDst || !pTarget) {
473 return E_POINTER;
475 int src_type = m_spd.type;
476 int dst_type = pTarget->type;
478 if( (src_type==MSP_RGBA && (dst_type == MSP_RGB32 ||
479 dst_type == MSP_RGB24 ||
480 dst_type == MSP_RGB16 ||
481 dst_type == MSP_RGB15 ||
482 dst_type == MSP_RGBA ||
483 dst_type == MSP_YUY2 ||//ToDo: fix me MSP_RGBA changed into AxYU AxYV after unlock, may be confusing
484 dst_type == MSP_AYUV ))
486 (src_type==MSP_XY_AUYV && dst_type == MSP_YUY2)//ToDo: fix me MSP_AYUV
488 (src_type==MSP_AYUV && dst_type == MSP_AYUV)
490 (src_type==MSP_AYUV_PLANAR && (dst_type == MSP_IYUV ||
491 dst_type == MSP_YV12)) )
493 return AlphaBltOther(pSrc, pDst, pTarget);
495 else if ( src_type==MSP_AYUV_PLANAR && (dst_type == MSP_NV12 ||
496 dst_type == MSP_NV21 ) )
498 return AlphaBltAnv12_Nv12(pSrc, pDst, pTarget);
501 else if( src_type==MSP_AYUV_PLANAR && (dst_type == MSP_P010 ||
502 dst_type == MSP_P016 ) )
504 return AlphaBltAnv12_P010(pSrc, pDst, pTarget);
506 else if( src_type==MSP_RGBA && (dst_type == MSP_IYUV ||
507 dst_type == MSP_YV12))
509 return AlphaBltAxyuAxyv_Yv12(pSrc, pDst, pTarget);
511 else if( src_type==MSP_RGBA && (dst_type == MSP_NV12||
512 dst_type == MSP_NV21))
514 return AlphaBltAxyuAxyv_Nv12(pSrc, pDst, pTarget);
516 else if( src_type==MSP_RGBA && (dst_type == MSP_P010 ||
517 dst_type == MSP_P016))
519 return AlphaBltAxyuAxyv_P010(pSrc, pDst, pTarget);
521 return E_NOTIMPL;
524 HRESULT CMemSubPic::AlphaBltOther(const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget)
526 const SubPicDesc& src = m_spd;
527 SubPicDesc dst = *pTarget; // copy, because we might modify it
529 CRect rs(*pSrc), rd(*pDst);
530 if(dst.h < 0)
532 dst.h = -dst.h;
533 rd.bottom = dst.h - rd.bottom;
534 rd.top = dst.h - rd.top;
536 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
537 return E_INVALIDARG;
539 int w = rs.Width(), h = rs.Height();
540 BYTE* s = (BYTE*)src.bits + src.pitch*rs.top + ((rs.left*src.bpp)>>3);//rs.left*4
541 BYTE* d = (BYTE*)dst.bits + dst.pitch*rd.top + ((rd.left*dst.bpp)>>3);
542 if(rd.top > rd.bottom)
544 if(dst.type == MSP_RGB32 || dst.type == MSP_RGB24
545 || dst.type == MSP_RGB16 || dst.type == MSP_RGB15
546 || dst.type == MSP_YUY2 || dst.type == MSP_AYUV)
548 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + (rd.left*dst.bpp>>3);
550 else if(dst.type == MSP_YV12 || dst.type == MSP_IYUV)
552 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + (rd.left*8>>3);
554 else
556 return E_NOTIMPL;
558 dst.pitch = -dst.pitch;
560 DbgLog((LOG_TRACE, 5, TEXT("w=%d h=%d"), w, h));
561 switch(dst.type)
563 case MSP_RGBA:
564 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
566 BYTE* s2 = s;
567 BYTE* s2end = s2 + w*4;
568 DWORD* d2 = (DWORD*)d;
569 for(; s2 < s2end; s2 += 4, d2++)
571 if(s2[3] < 0xff)
573 DWORD bd =0x00000100 -( (DWORD) s2[3]);
574 DWORD B = ((*((DWORD*)s2)&0x000000ff)<<8)/bd;
575 DWORD V = ((*((DWORD*)s2)&0x0000ff00)/bd)<<8;
576 DWORD R = (((*((DWORD*)s2)&0x00ff0000)>>8)/bd)<<16;
577 *d2 = B | V | R
578 | (0xff000000-(*((DWORD*)s2)&0xff000000))&0xff000000;
582 break;
583 case MSP_RGB32:
584 case MSP_AYUV: //ToDo: fix me MSP_VUYA indeed?
585 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
587 BYTE* s2 = s;
588 BYTE* s2end = s2 + w*4;
589 DWORD* d2 = (DWORD*)d;
590 for(; s2 < s2end; s2 += 4, d2++)
592 #ifdef _WIN64
593 DWORD ia = 256-s2[3];
594 if(s2[3] < 0xff) {
595 *d2 = ((((*d2&0x00ff00ff)*s2[3])>>8) + (((*((DWORD*)s2)&0x00ff00ff)*ia)>>8)&0x00ff00ff)
596 | ((((*d2&0x0000ff00)*s2[3])>>8) + (((*((DWORD*)s2)&0x0000ff00)*ia)>>8)&0x0000ff00);
598 #else
599 if(s2[3] < 0xff)
601 *d2 = (((((*d2&0x00ff00ff)*s2[3])>>8) + (*((DWORD*)s2)&0x00ff00ff))&0x00ff00ff)
602 | (((((*d2&0x0000ff00)*s2[3])>>8) + (*((DWORD*)s2)&0x0000ff00))&0x0000ff00);
604 #endif
607 break;
608 case MSP_RGB24:
609 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
611 BYTE* s2 = s;
612 BYTE* s2end = s2 + w*4;
613 BYTE* d2 = d;
614 for(; s2 < s2end; s2 += 4, d2 += 3)
616 if(s2[3] < 0xff)
618 d2[0] = ((d2[0]*s2[3])>>8) + s2[0];
619 d2[1] = ((d2[1]*s2[3])>>8) + s2[1];
620 d2[2] = ((d2[2]*s2[3])>>8) + s2[2];
624 break;
625 case MSP_RGB16:
626 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
628 BYTE* s2 = s;
629 BYTE* s2end = s2 + w*4;
630 WORD* d2 = (WORD*)d;
631 for(; s2 < s2end; s2 += 4, d2++)
633 if(s2[3] < 0x1f)
635 *d2 = (WORD)((((((*d2&0xf81f)*s2[3])>>5) + (*(DWORD*)s2&0xf81f))&0xf81f)
636 | (((((*d2&0x07e0)*s2[3])>>5) + (*(DWORD*)s2&0x07e0))&0x07e0));
637 /* *d2 = (WORD)((((((*d2&0xf800)*s2[3])>>8) + (*(DWORD*)s2&0xf800))&0xf800)
638 | (((((*d2&0x07e0)*s2[3])>>8) + (*(DWORD*)s2&0x07e0))&0x07e0)
639 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
644 break;
645 case MSP_RGB15:
646 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
648 BYTE* s2 = s;
649 BYTE* s2end = s2 + w*4;
650 WORD* d2 = (WORD*)d;
651 for(; s2 < s2end; s2 += 4, d2++)
653 if(s2[3] < 0x1f)
655 *d2 = (WORD)((((((*d2&0x7c1f)*s2[3])>>5) + (*(DWORD*)s2&0x7c1f))&0x7c1f)
656 | (((((*d2&0x03e0)*s2[3])>>5) + (*(DWORD*)s2&0x03e0))&0x03e0));
657 /* *d2 = (WORD)((((((*d2&0x7c00)*s2[3])>>8) + (*(DWORD*)s2&0x7c00))&0x7c00)
658 | (((((*d2&0x03e0)*s2[3])>>8) + (*(DWORD*)s2&0x03e0))&0x03e0)
659 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
664 break;
665 case MSP_YUY2:
666 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
668 unsigned int ia, c;
669 BYTE* s2 = s;
670 BYTE* s2end = s2 + w*4;
671 DWORD* d2 = (DWORD*)d;
672 ASSERT(w>0);
673 int last_a = w>0?s2[3]:0;
674 for(; s2 < s2end; s2 += 8, d2++)
676 ia = (last_a + 2*s2[3] + s2[7])>>2;
677 last_a = s2[7];
678 if(ia < 0xff)
680 //int y1 = (BYTE)(((((*d2&0xff))*s2[3])>>8) + s2[1]); // + y1;
681 //int u = (BYTE)((((((*d2>>8)&0xff))*ia)>>8) + s2[0]); // + u;
682 //int y2 = (BYTE)((((((*d2>>16)&0xff))*s2[7])>>8) + s2[5]); // + y2;
683 //int v = (BYTE)((((((*d2>>24)&0xff))*ia)>>8) + s2[4]); // + v;
684 //*d2 = (v<<24)|(y2<<16)|(u<<8)|y1;
686 ia = (ia<<24)|(s2[7]<<16)|(ia<<8)|s2[3];
687 c = (s2[4]<<24)|(s2[5]<<16)|(s2[0]<<8)|s2[1]; // (v<<24)|(y2<<16)|(u<<8)|y1;
688 __asm
690 mov edi, d2
691 pxor mm0, mm0
692 movd mm2, c
693 punpcklbw mm2, mm0
694 movd mm3, [edi]
695 punpcklbw mm3, mm0
696 movd mm4, ia
697 punpcklbw mm4, mm0
698 psraw mm4, 1 //or else, overflow because psraw shift in sign bit
699 pmullw mm3, mm4
700 psraw mm3, 7
701 paddsw mm3, mm2
702 packuswb mm3, mm3
703 movd [edi], mm3
708 __asm emms;
709 break;
710 case MSP_YV12:
711 case MSP_IYUV:
713 //dst.pitch = abs(dst.pitch);
714 int h2 = h/2;
715 if(!dst.pitchUV)
717 dst.pitchUV = abs(dst.pitch)/2;
719 if(!dst.bitsU || !dst.bitsV)
721 dst.bitsU = (BYTE*)dst.bits + abs(dst.pitch)*dst.h;
722 dst.bitsV = dst.bitsU + dst.pitchUV*dst.h/2;
723 if(dst.type == MSP_YV12)
725 BYTE* p = dst.bitsU;
726 dst.bitsU = dst.bitsV;
727 dst.bitsV = p;
730 BYTE* dd[2];
731 dd[0] = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left/2;
732 dd[1] = dst.bitsV + dst.pitchUV*rd.top/2 + rd.left/2;
733 if(rd.top > rd.bottom)
735 dd[0] = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left/2;
736 dd[1] = dst.bitsV + dst.pitchUV*(rd.top/2-1) + rd.left/2;
737 dst.pitchUV = -dst.pitchUV;
740 BYTE* src_origin= (BYTE*)src.bits + src.pitch*rs.top + rs.left;
742 BYTE* ss[2];
743 ss[0] = src_origin + src.pitch*src.h*2;//U
744 ss[1] = src_origin + src.pitch*src.h*3;//V
746 AlphaBltYv12Luma( d, dst.pitch, w, h, src_origin + src.pitch*src.h, src_origin, src.pitch );
748 AlphaBltYv12Chroma( dd[0], dst.pitchUV, w, h2, ss[0], src_origin, src.pitch);
749 AlphaBltYv12Chroma( dd[1], dst.pitchUV, w, h2, ss[1], src_origin, src.pitch);
751 __asm emms;
753 break;
754 default:
755 return E_NOTIMPL;
756 break;
759 //emmsÒª40¸öcpuÖÜÆÚ
760 //__asm emms;
761 return S_OK;
764 HRESULT CMemSubPic::AlphaBltAxyuAxyv_P010(const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget)
766 const SubPicDesc& src = m_spd;
767 SubPicDesc dst = *pTarget; // copy, because we might modify it
769 CRect rs(*pSrc), rd(*pDst);
771 if(dst.h < 0) {
772 dst.h = -dst.h;
773 rd.bottom = dst.h - rd.bottom;
774 rd.top = dst.h - rd.top;
777 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
778 return E_INVALIDARG;
781 int w = rs.Width(), h = rs.Height();
784 BYTE* s = static_cast<BYTE*>(src.bits) + src.pitch*rs.top + ((rs.left*src.bpp)>>3);
785 BYTE* d = static_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left*2;
787 if(rd.top > rd.bottom) {
788 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + rd.left;
790 dst.pitch = -dst.pitch;
793 for(ptrdiff_t i=0; i<h; i++, s += src.pitch, d += dst.pitch)
795 BYTE* s2 = s;
796 BYTE* s2end = s2 + w*4;
797 WORD* d2 = reinterpret_cast<WORD*>(d);
798 for(; s2 < s2end; s2 += 4, d2++)
800 if(s2[3] < 0xff) {
801 d2[0] = ((d2[0]*s2[3])>>8) + (s2[1]<<8);
806 //UV
807 int h2 = h/2;
808 if(!dst.pitchUV)
810 dst.pitchUV = abs(dst.pitch);
812 if(!dst.bitsU || !dst.bitsV)
814 dst.bitsU = static_cast<BYTE*>(dst.bits) + abs(dst.pitch)*dst.h;
815 dst.bitsV = dst.bitsU + 2;
817 BYTE* ddUV = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left*2;
818 if(rd.top > rd.bottom)
820 ddUV = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left*2;
821 dst.pitchUV = -dst.pitchUV;
824 s = static_cast<BYTE*>(src.bits) + src.pitch*rs.top + ((rs.left*src.bpp)>>3);
826 d = ddUV;
827 int pitch = src.pitch;
828 for(int j = 0; j < h2; j++, s += 2*src.pitch, d += dst.pitchUV )
830 BYTE* s2 = s;
831 WORD* d2=reinterpret_cast<WORD*>(d);
832 WORD* d2_end = reinterpret_cast<WORD*>(d+2*w);
833 DWORD last_alpha = s2[3]+s2[3+src.pitch];
834 for( ; d2<d2_end; s2+=8, d2+=2)
836 unsigned int ia = (
837 last_alpha +
838 (s2[3] + s2[3+src.pitch])*2 +
839 s2[3+4]+ s2[3+4+src.pitch]);
840 last_alpha = s2[3+4]+ s2[3+4+src.pitch];
841 if( ia!=0xFF*8 )
843 d2[0] = (((d2[0])*ia)>>11) + ((s2[0] + s2[0+src.pitch])<<7);
844 d2[1] = (((d2[1])*ia)>>11) + ((s2[4] + s2[4+src.pitch])<<7);
849 return S_OK;
852 HRESULT CMemSubPic::AlphaBltAxyuAxyv_Yv12(const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget)
854 const SubPicDesc& src = m_spd;
855 SubPicDesc dst = *pTarget; // copy, because we might modify it
857 CRect rs(*pSrc), rd(*pDst);
859 if(dst.h < 0) {
860 dst.h = -dst.h;
861 rd.bottom = dst.h - rd.bottom;
862 rd.top = dst.h - rd.top;
865 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
866 return E_INVALIDARG;
869 int w = rs.Width(), h = rs.Height();
871 BYTE* s = (BYTE*)src.bits + src.pitch*rs.top + ((rs.left*src.bpp)>>3);
872 BYTE* d = (BYTE*)dst.bits + dst.pitch*rd.top + rd.left;
874 if(rd.top > rd.bottom) {
875 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + rd.left;
877 dst.pitch = -dst.pitch;
880 for(ptrdiff_t j = 0; j < h; j++, s += src.pitch, d += dst.pitch) {
881 BYTE* s2 = s;
882 BYTE* s2end = s2 + w*4;
883 BYTE* d2 = d;
884 for(; s2 < s2end; s2 += 4, d2++) {
885 if(s2[3] < 0xff) {
886 d2[0] = ((d2[0]*s2[3])>>8) + s2[1];
890 dst.pitch = abs(dst.pitch);
892 int h2 = h/2;
894 if(!dst.pitchUV) {
895 dst.pitchUV = dst.pitch/2;
898 BYTE* ss[2];
899 ss[0] = (BYTE*)src.bits + src.pitch*rs.top + rs.left*4;
900 ss[1] = ss[0] + 4;
902 if(!dst.bitsU || !dst.bitsV) {
903 dst.bitsU = (BYTE*)dst.bits + dst.pitch*dst.h;
904 dst.bitsV = dst.bitsU + dst.pitchUV*dst.h/2;
906 if(dst.type == MSP_YV12) {
907 BYTE* p = dst.bitsU;
908 dst.bitsU = dst.bitsV;
909 dst.bitsV = p;
913 BYTE* dd[2];
914 dd[0] = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left/2;
915 dd[1] = dst.bitsV + dst.pitchUV*rd.top/2 + rd.left/2;
917 if(rd.top > rd.bottom) {
918 dd[0] = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left/2;
919 dd[1] = dst.bitsV + dst.pitchUV*(rd.top/2-1) + rd.left/2;
920 dst.pitchUV = -dst.pitchUV;
923 for(ptrdiff_t i = 0; i < 2; i++) {
924 s = ss[i];
925 d = dd[i];
926 BYTE* a = ss[0]+3;
927 for(ptrdiff_t j = 0; j < h2; j++, s += src.pitch*2, d += dst.pitchUV, a += src.pitch*2) {
928 BYTE* s2 = s;
929 BYTE* s2end = s2 + w*4;
930 BYTE* d2 = d;
931 BYTE* a2 = a;
933 DWORD last_alpha = a2[0]+a2[0+src.pitch];
934 for(; s2 < s2end; s2 += 8, d2++, a2 += 8) {
935 unsigned int ia = (last_alpha + 2*(a2[0]+a2[0+src.pitch]) + a2[4] + a2[4+src.pitch] + 4 )>>3;
936 last_alpha = a2[4] + a2[4+src.pitch];
937 if(ia < 0xff) {
938 *d2 = ((*d2*ia)>>8) + ((s2[0]+s2[src.pitch])>>1);
944 return S_OK;
947 HRESULT CMemSubPic::AlphaBltAxyuAxyv_Nv12(const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget)
949 ONCER( SaveArgb2File(*pTarget, CRect(CPoint(0,0), m_size), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12") );
950 const SubPicDesc& src = m_spd;
951 SubPicDesc dst = *pTarget; // copy, because we might modify it
953 CRect rs(*pSrc), rd(*pDst);
955 if(dst.h < 0) {
956 dst.h = -dst.h;
957 rd.bottom = dst.h - rd.bottom;
958 rd.top = dst.h - rd.top;
961 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
962 return E_INVALIDARG;
965 int w = rs.Width(), h = rs.Height();
967 BYTE* s = (BYTE*)src.bits + src.pitch*rs.top + ((rs.left*src.bpp)>>3);
968 BYTE* d = (BYTE*)dst.bits + dst.pitch*rd.top + rd.left;
970 if(rd.top > rd.bottom) {
971 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + rd.left;
973 dst.pitch = -dst.pitch;
976 for(ptrdiff_t j = 0; j < h; j++, s += src.pitch, d += dst.pitch) {
977 BYTE* s2 = s;
978 BYTE* s2end = s2 + w*4;
979 BYTE* d2 = d;
980 for(; s2 < s2end; s2 += 4, d2++) {
981 if(s2[3] < 0xff) {
982 d2[0] = ((d2[0]*s2[3])>>8) + s2[1];
986 dst.pitch = abs(dst.pitch);
988 int h2 = h/2;
990 if(!dst.pitchUV) {
991 dst.pitchUV = dst.pitch;
994 BYTE* ss[2];
995 ss[0] = (BYTE*)src.bits + src.pitch*rs.top + rs.left*4;
996 ss[1] = ss[0] + 4;
998 if(!dst.bitsU || !dst.bitsV) {
999 dst.bitsU = (BYTE*)dst.bits + dst.pitch*dst.h;
1000 dst.bitsV = dst.bitsU + 1;
1002 if(dst.type == MSP_NV21) {
1003 BYTE* p = dst.bitsU;
1004 dst.bitsU = dst.bitsV;
1005 dst.bitsV = p;
1009 BYTE* dd[2];
1010 dd[0] = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left;
1011 dd[1] = dd[0]+1;
1013 if(rd.top > rd.bottom) {
1014 dd[0] = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left;
1015 dd[1] = dd[0]+1;
1016 dst.pitchUV = -dst.pitchUV;
1019 for(ptrdiff_t i = 0; i < 2; i++) {
1020 s = ss[i];
1021 d = dd[i];
1022 BYTE* a = ss[0]+3;
1023 for(ptrdiff_t j = 0; j < h2; j++, s += src.pitch*2, d += dst.pitchUV, a += src.pitch*2) {
1024 BYTE* s2 = s;
1025 BYTE* s2end = s2 + w*4;
1026 BYTE* d2 = d;
1027 BYTE* a2 = a;
1028 DWORD last_alpha = a2[0]+a2[0+src.pitch];
1029 for(; s2 < s2end; s2 += 8, d2+=2, a2 += 8) {
1030 unsigned int ia = (last_alpha+2*(a2[0]+a2[0+src.pitch])+a2[4]+a2[4+src.pitch]+4)>>3;
1031 last_alpha = a2[4]+a2[4+src.pitch];
1032 if(ia < 0xff) {
1033 *d2 = ((*d2*ia)>>8) + ((s2[0]+s2[src.pitch])>>1);
1039 ONCER( SaveArgb2File(*pTarget, CRect(CPoint(0,0), m_size), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12_2") );
1040 return S_OK;
1043 HRESULT CMemSubPic::AlphaBltAnv12_P010( const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget )
1045 //fix me: check colorspace and log error
1046 const SubPicDesc& src = m_spd;
1047 SubPicDesc dst = *pTarget; // copy, because we might modify it
1049 CRect rs(*pSrc), rd(*pDst);
1050 if(dst.h < 0)
1052 dst.h = -dst.h;
1053 rd.bottom = dst.h - rd.bottom;
1054 rd.top = dst.h - rd.top;
1056 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
1057 return E_INVALIDARG;
1059 int w = rs.Width(), h = rs.Height();
1060 bool bottom_down = rd.top > rd.bottom;
1062 BYTE* d = NULL;
1063 BYTE* dUV = NULL;
1064 if(!bottom_down)
1066 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left*2;
1067 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*rd.top/2 + rd.left*2;
1069 else
1071 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + rd.left*2;
1072 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*(rd.top/2-1) + rd.left*2;
1073 dst.pitch = -dst.pitch;
1075 ASSERT(dst.pitchUV==0 || dst.pitchUV==abs(dst.pitch));
1077 const BYTE* sa = reinterpret_cast<const BYTE*>(src.bits) + src.pitch*rs.top + rs.left;
1078 const BYTE* sy = sa + src.pitch*src.h;
1079 const BYTE* s_uv = sy + src.pitch*src.h;//UV
1080 return AlphaBltAnv12_P010(sa, sy, s_uv, src.pitch, d, dUV, dst.pitch, w, h);
1083 HRESULT CMemSubPic::AlphaBltAnv12_Nv12( const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget )
1085 //fix me: check colorspace and log error
1086 const SubPicDesc& src = m_spd;
1087 SubPicDesc dst = *pTarget; // copy, because we might modify it
1089 CRect rs(*pSrc), rd(*pDst);
1090 if(dst.h < 0)
1092 dst.h = -dst.h;
1093 rd.bottom = dst.h - rd.bottom;
1094 rd.top = dst.h - rd.top;
1096 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
1097 return E_INVALIDARG;
1099 int w = rs.Width(), h = rs.Height();
1100 bool bottom_down = rd.top > rd.bottom;
1102 BYTE* d = NULL;
1103 BYTE* dUV = NULL;
1104 if (!bottom_down)
1106 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left;
1107 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*rd.top/2 + rd.left;
1109 else
1111 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + rd.left;
1112 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*(rd.top/2-1) + rd.left;
1113 dst.pitch = -dst.pitch;
1115 ASSERT(dst.pitchUV==0 || dst.pitchUV==abs(dst.pitch));
1117 const BYTE* sa = reinterpret_cast<const BYTE*>(src.bits) + src.pitch*rs.top + rs.left;
1118 const BYTE* sy = sa + src.pitch*src.h;
1119 const BYTE* s_uv = sy + src.pitch*src.h;//UV
1121 return AlphaBltAnv12_Nv12(sa, sy, s_uv, src.pitch, d, dUV, dst.pitch, w, h);
1124 STDMETHODIMP CMemSubPic::SetDirtyRectEx(CAtlList<CRect>* dirtyRectList )
1126 //if(m_spd.type == MSP_YUY2 || m_spd.type == MSP_YV12 || m_spd.type == MSP_IYUV || m_spd.type == MSP_AYUV)
1127 if(dirtyRectList!=NULL)
1129 POSITION pos = dirtyRectList->GetHeadPosition();
1130 if(m_spd.type == MSP_AYUV_PLANAR || m_alpha_blt_dst_type==MSP_IYUV || m_alpha_blt_dst_type==MSP_YV12
1131 || m_alpha_blt_dst_type==MSP_P010 || m_alpha_blt_dst_type==MSP_P016
1132 || m_alpha_blt_dst_type==MSP_NV12 || m_alpha_blt_dst_type==MSP_NV21 )
1134 while(pos!=NULL)
1136 CRect& cRectSrc = dirtyRectList->GetNext(pos);
1137 cRectSrc.left &= ~15;
1138 cRectSrc.right = (cRectSrc.right+15)&~15;
1139 if(cRectSrc.right>m_spd.w)
1141 cRectSrc.right = m_spd.w;
1143 cRectSrc.top &= ~1;
1144 cRectSrc.bottom = (cRectSrc.bottom+1)&~1;
1147 else if(m_spd.type == MSP_XY_AUYV || m_alpha_blt_dst_type==MSP_YUY2)
1149 while(pos!=NULL)
1151 CRect& cRectSrc = dirtyRectList->GetNext(pos);
1152 cRectSrc.left &= ~3;
1153 cRectSrc.right = (cRectSrc.right+3)&~3;
1157 return __super::SetDirtyRectEx(dirtyRectList);
1161 // static
1164 void CMemSubPic::AlphaBltYv12Luma(byte* dst, int dst_pitch,
1165 int w, int h,
1166 const byte* sub, const byte* alpha, int sub_pitch)
1168 if( (
1169 ((reinterpret_cast<intptr_t>(alpha) ^ reinterpret_cast<intptr_t>(sub))
1170 |(reinterpret_cast<intptr_t>(alpha) ^ reinterpret_cast<intptr_t>(dst))
1171 | static_cast<intptr_t>(sub_pitch)
1172 | static_cast<intptr_t>(dst_pitch) ) & 15 )==0
1173 && w > 32)
1175 int head = (16 - (reinterpret_cast<intptr_t>(alpha)&15))&15;
1176 int tail = (w-head) & 15;
1177 int w1 = w - head - tail;
1178 for(int i=0; i<h; i++, dst += dst_pitch, alpha += sub_pitch, sub += sub_pitch)
1180 const BYTE* sa = alpha;
1181 const BYTE* s2 = sub;
1182 const BYTE* s2end_mod16 = s2 + w1;
1183 const BYTE* s2end = s2 + w;
1184 BYTE* d2 = dst;
1186 for( ; (reinterpret_cast<intptr_t>(s2)&15) != 0; s2++, sa++, d2++)
1188 if(sa[0] < 0xff)
1190 d2[0] = ((d2[0]*sa[0])>>8) + s2[0];
1193 for(; s2 < s2end_mod16; s2+=16, sa+=16, d2+=16)
1195 pix_alpha_blend_yv12_luma_sse2(d2, sa, s2);
1197 for(; s2 < s2end; s2++, sa++, d2++)
1199 if(sa[0] < 0xff)
1201 d2[0] = ((d2[0]*sa[0])>>8) + s2[0];
1206 else //fix me: only a workaround for non-mod-16 size video
1208 for(int i=0; i<h; i++, dst += dst_pitch, alpha += sub_pitch, sub += sub_pitch)
1210 const BYTE* sa = alpha;
1211 const BYTE* s2 = sub;
1212 const BYTE* s2end = s2 + w;
1213 BYTE* d2 = dst;
1214 for(; s2 < s2end; s2+=1, sa+=1, d2+=1)
1216 if(sa[0] < 0xff)
1218 // d2[0] = (((d2[0]-0x10)*s2[3])>>8) + s2[1];
1219 d2[0] = ((d2[0]*sa[0])>>8) + s2[0];
1226 void CMemSubPic::AlphaBltYv12Chroma(byte* dst, int dst_pitch,
1227 int w, int chroma_h,
1228 const byte* sub_chroma, const byte* alpha, int sub_pitch)
1230 if( ((reinterpret_cast<intptr_t>(sub_chroma) |
1231 //reinterpret_cast<intptr_t>(dst) |
1232 reinterpret_cast<intptr_t>(alpha) | static_cast<intptr_t>(sub_pitch)
1233 //| (static_cast<intptr_t>(dst_pitch)&7)
1234 ) & 15 )==0 )
1236 int pitch = sub_pitch;
1237 for(int j = 0; j < chroma_h; j++, sub_chroma += sub_pitch*2, alpha += sub_pitch*2, dst += dst_pitch)
1239 hleft_vmid_mix_uv_yv12_sse2(dst, w, sub_chroma, alpha, sub_pitch);
1242 else//fix me: only a workaround for non-mod-16 size video
1244 for(int j = 0; j < chroma_h; j++, sub_chroma += sub_pitch*2, alpha += sub_pitch*2, dst += dst_pitch)
1246 hleft_vmid_mix_uv_yv12_c(dst, w, sub_chroma, alpha, sub_pitch);
1251 HRESULT CMemSubPic::AlphaBltAnv12_P010( const BYTE* src_a, const BYTE* src_y, const BYTE* src_uv, int src_pitch,
1252 BYTE* dst_y, BYTE* dst_uv, int dst_pitch, int w, int h )
1254 const BYTE* sa = src_a;
1255 if( ((reinterpret_cast<intptr_t>(src_a) | reinterpret_cast<intptr_t>(src_y) | static_cast<intptr_t>(src_pitch) |
1256 reinterpret_cast<intptr_t>(dst_y) | static_cast<intptr_t>(dst_pitch) ) & 15 )==0 )
1258 for(int i=0; i<h; i++, sa += src_pitch, src_y += src_pitch, dst_y += dst_pitch)
1260 const BYTE* sa2 = sa;
1261 const BYTE* s2 = src_y;
1262 const BYTE* s2end_mod16 = s2 + (w&~15);
1263 const BYTE* s2end = s2 + w;
1264 BYTE* d2 = dst_y;
1266 for(; s2 < s2end_mod16; s2+=16, sa2+=16, d2+=32)
1268 mix_16_y_p010_sse2(d2, s2, sa2);
1270 for( WORD* d3=reinterpret_cast<WORD*>(d2); s2 < s2end; s2++, sa2++, d3++)
1272 if(sa2[0] < 0xff)
1274 d3[0] = ((d3[0]*sa2[0])>>8) + (s2[0]<<8);
1279 else //fix me: only a workaround for non-mod-16 size video
1281 for(int i=0; i<h; i++, sa += src_pitch, src_y += src_pitch, dst_y += dst_pitch)
1283 const BYTE* sa2 = sa;
1284 const BYTE* s2 = src_y;
1285 const BYTE* s2end = s2 + w;
1286 WORD* d2 = reinterpret_cast<WORD*>(dst_y);
1287 for(; s2 < s2end; s2+=1, sa2+=1, d2+=1)
1289 if(sa2[0] < 0xff)
1291 d2[0] = ((d2[0]*sa2[0])>>8) + (s2[0]<<8);
1296 //UV
1297 int h2 = h/2;
1298 BYTE* d = dst_uv;
1299 if( ((reinterpret_cast<intptr_t>(src_a) | reinterpret_cast<intptr_t>(src_uv) | static_cast<intptr_t>(src_pitch) |
1300 reinterpret_cast<intptr_t>(dst_uv) | static_cast<intptr_t>(dst_pitch) ) & 15 )==0 )
1302 for(int j = 0; j < h2; j++, src_uv += src_pitch, src_a += src_pitch*2, d += dst_pitch)
1304 hleft_vmid_mix_uv_p010_sse2(d, w, src_uv, src_a, src_pitch);
1307 else
1309 for(int j = 0; j < h2; j++, src_uv += src_pitch, src_a += src_pitch*2, d += dst_pitch)
1311 hleft_vmid_mix_uv_p010_c(d, w, src_uv, src_a, src_pitch);
1314 __asm emms;
1315 return S_OK;
1318 HRESULT CMemSubPic::AlphaBltAnv12_Nv12( const BYTE* src_a, const BYTE* src_y, const BYTE* src_uv, int src_pitch,
1319 BYTE* dst_y, BYTE* dst_uv, int dst_pitch, int w, int h )
1321 AlphaBltYv12Luma( dst_y, dst_pitch, w, h, src_y, src_a, src_pitch );
1323 int h2 = h/2;
1324 if( (
1325 ((reinterpret_cast<intptr_t>(src_a) ^ reinterpret_cast<intptr_t>(src_uv))
1326 |(reinterpret_cast<intptr_t>(src_a) ^ reinterpret_cast<intptr_t>(dst_uv))
1327 | static_cast<intptr_t>(src_pitch)
1328 | static_cast<intptr_t>(dst_pitch) ) & 15) ==0 &&
1329 w > 16 )
1331 BYTE* d = dst_uv;
1333 int head = (16-(reinterpret_cast<intptr_t>(src_a)&15))&15;
1334 int tail = (w-head) & 15;
1335 int w00 = w - head - tail;
1337 ASSERT(w>0);//the calls to mix may failed if w==0
1338 for(int j = 0; j < h2; j++, src_uv += src_pitch, src_a += src_pitch*2, d += dst_pitch)
1340 hleft_vmid_mix_uv_nv12_c2(d, head, src_uv, src_a, src_pitch);
1341 hleft_vmid_mix_uv_nv12_sse2(d+head, w00, src_uv+head, src_a+head, src_pitch, head>0 ? -1 : 0);
1342 hleft_vmid_mix_uv_nv12_c2(d+head+w00, tail, src_uv+head+w00, src_a+head+w00, src_pitch, (w00+head)>0 ? -1 : 0);
1345 else
1347 BYTE* d = dst_uv;
1348 for(int j = 0; j < h2; j++, src_uv += src_pitch, src_a += src_pitch*2, d += dst_pitch)
1350 hleft_vmid_mix_uv_nv12_c(d, w, src_uv, src_a, src_pitch);
1354 __asm emms;
1355 return S_OK;
1359 // CMemSubPicAllocator
1362 CMemSubPicAllocator::CMemSubPicAllocator(int alpha_blt_dst_type, SIZE maxsize, int type/*=-1*/)
1363 : CSubPicExAllocatorImpl(maxsize, false, false)
1364 , m_alpha_blt_dst_type(alpha_blt_dst_type)
1365 , m_maxsize(maxsize)
1366 , m_type(type)
1368 if(m_type==-1)
1370 switch(alpha_blt_dst_type)
1372 case MSP_YUY2:
1373 m_type = MSP_XY_AUYV;
1374 break;
1375 case MSP_AYUV:
1376 m_type = MSP_AYUV;
1377 break;
1378 case MSP_IYUV:
1379 case MSP_YV12:
1380 case MSP_P010:
1381 case MSP_P016:
1382 case MSP_NV12:
1383 case MSP_NV21:
1384 m_type = MSP_AYUV_PLANAR;
1385 break;
1386 default:
1387 m_type = MSP_RGBA;
1388 break;
1393 // ISubPicAllocatorImpl
1395 bool CMemSubPicAllocator::AllocEx(bool fStatic, ISubPicEx** ppSubPic)
1397 if(!ppSubPic) {
1398 return false;
1400 SubPicDesc spd;
1401 spd.w = m_maxsize.cx;
1402 spd.h = m_maxsize.cy;
1403 spd.bpp = 32;
1404 spd.pitch = (spd.w*spd.bpp)>>3;
1405 spd.type = m_type;
1406 spd.bits = DNew BYTE[spd.pitch*spd.h];
1407 if(!spd.bits) {
1408 return false;
1410 *ppSubPic = DNew CMemSubPic(spd, m_alpha_blt_dst_type);
1411 if(!(*ppSubPic)) {
1412 return false;
1414 (*ppSubPic)->AddRef();
1415 return true;