Avoid Possible Bug.
[xy_vsfilter.git] / src / subpic / MemSubPic.cpp
blob367fccf09937bbc9d46fed0b0e11c865f1a2fc8e
1 /*
2 * Copyright (C) 2003-2006 Gabest
3 * http://www.gabest.org
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
8 * any later version.
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GNU Make; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
22 #include "stdafx.h"
23 #include "MemSubPic.h"
24 #include "color_conv_table.h"
26 #if 0
27 #include <fstream>
29 // debug functions
30 //
31 static void SaveRect2File(const CRect& cRect, const char * filename)
33 std::ofstream os(filename);
34 os<<cRect.left<<","<<cRect.top<<","<<cRect.right<<","<<cRect.bottom;
36 static void SaveAxxx2File(SubPicDesc& spd, const CRect& cRect, const char * filename)
38 std::ofstream axxx(filename);
39 int w = cRect.Width(), h = cRect.Height();
41 BYTE* top = (BYTE*)spd.bits + spd.pitch*cRect.top + cRect.left*4;
42 BYTE* bottom = top + spd.pitch*h;
44 for(; top < bottom ; top += spd.pitch) {
45 BYTE* s = top;
46 BYTE* e = s + w*4;
47 for(; s < e; s+=4) { // ARGB ARGB -> AxYU AxYV
48 axxx<<(int)s[0]<<","<<(int)s[1]<<","<<(int)s[2]<<","<<(int)s[3];
49 if(s+4>=e)
51 axxx<<std::endl;
53 else
55 axxx<<",";
59 axxx.close();
61 static void SaveArgb2File(SubPicDesc& spd, const CRect& cRect, const char * filename)
63 SaveAxxx2File(spd, cRect, filename);
65 static void SaveAyuv2File(SubPicDesc& spd, const CRect& cRect, const char * filename)
67 SaveAxxx2File(spd, cRect, filename);
69 static void SaveNvxx2File(SubPicDesc& spd, const CRect& cRect, const char * filename)
71 std::ofstream os(filename);
72 int w = cRect.Width(), h = cRect.Height();
74 BYTE* top = (BYTE*)spd.bits;
75 BYTE* bottom = top + spd.pitch*h;
77 for(; top < bottom ; top += spd.pitch) {
78 BYTE* s = top;
79 BYTE* e = s + w;
81 BYTE* sY = s + spd.pitch*spd.h;
82 BYTE* sU = sY + spd.pitch*spd.h;
83 BYTE* sV = sU + 1;
84 for(; s < e; s++, sY++, sU+=2,sV+=2) {
85 os<<(int)s[0]<<","<<(int)sY[0]<<","<<(int)sU[0]<<","<<(int)sV[0];
86 if(s+1>=e)
88 os<<std::endl;
90 else
92 os<<",";
96 os.close();
99 #define ONCER(expr) {\
100 static bool entered=false;\
101 if(!entered)\
103 entered=true;\
104 expr;\
107 #else
108 #define ONCER(expr)
109 #endif
112 // alpha blend functions
114 #include "xy_intrinsics.h"
117 // CMemSubPic
120 CMemSubPic::CMemSubPic(SubPicDesc& spd, int alpha_blt_dst_type)
121 : m_spd(spd), m_alpha_blt_dst_type(alpha_blt_dst_type)
123 m_maxsize.SetSize(spd.w, spd.h);
124 // m_rcDirty.SetRect(0, 0, spd.w, spd.h);
125 CRect allSpd(0,0,spd.w, spd.h);
126 m_rectListDirty.AddTail(allSpd);
129 CMemSubPic::~CMemSubPic()
131 delete [] m_spd.bits, m_spd.bits = NULL;
134 // ISubPic
136 STDMETHODIMP_(void*) CMemSubPic::GetObject() const
138 return (void*)&m_spd;
141 STDMETHODIMP CMemSubPic::GetDesc(SubPicDesc& spd) const
143 spd.type = m_spd.type;
144 spd.w = m_size.cx;
145 spd.h = m_size.cy;
146 spd.bpp = m_spd.bpp;
147 spd.pitch = m_spd.pitch;
148 spd.bits = m_spd.bits;
149 spd.bitsU = m_spd.bitsU;
150 spd.bitsV = m_spd.bitsV;
151 spd.vidrect = m_vidrect;
152 return S_OK;
155 STDMETHODIMP CMemSubPic::CopyTo(ISubPicEx* pSubPic)
157 HRESULT hr;
158 if(FAILED(hr = __super::CopyTo(pSubPic))) {
159 return hr;
162 SubPicDesc src, dst;
163 if(FAILED(GetDesc(src)) || FAILED(pSubPic->GetDesc(dst))) {
164 return E_FAIL;
166 while(!m_rectListDirty.IsEmpty())
168 CRect& cRect = m_rectListDirty.GetHead();
169 int w = cRect.Width(), h = cRect.Height();
170 BYTE* s = (BYTE*)src.bits + src.pitch*cRect.top + cRect.left*4;
171 BYTE* d = (BYTE*)dst.bits + dst.pitch*cRect.top + cRect.left*4;
172 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
173 memcpy(d, s, w*4);
175 return S_OK;
178 STDMETHODIMP CMemSubPic::ClearDirtyRect(DWORD color)
180 if(m_rectListDirty.IsEmpty()) {
181 return S_OK;
183 while(!m_rectListDirty.IsEmpty())
185 //pDirtyRect = m_rectListDirty.RemoveHead();
186 CRect& dirtyRect = m_rectListDirty.RemoveTail();
187 BYTE* p = (BYTE*)m_spd.bits + m_spd.pitch*(dirtyRect.top) + dirtyRect.left*(m_spd.bpp>>3);
188 int w = dirtyRect.Width();
189 if(m_spd.type!=MSP_AYUV_PLANAR)
191 for(int j = 0, h = dirtyRect.Height(); j < h; j++, p += m_spd.pitch)
193 #ifdef _WIN64
194 memsetd(p, color, w*4); // nya
195 #else
196 __asm
198 mov eax, color
199 mov ecx, w
200 mov edi, p
202 rep stosd
205 #endif
208 else
210 ///TODO:
211 ///FIX ME
212 for(int j = 0, h = dirtyRect.Height(); j < h; j++, p += m_spd.pitch)
214 // memsetd(p, 0, m_rcDirty.Width());
215 //DbgLog((LOG_TRACE, 3, "w:%d", w));
216 //w = pDirtyRect->Width();
217 memset(p, 0xFF, w);
218 memset(p+m_spd.h*m_spd.pitch, 0, w);
219 memset(p+m_spd.h*m_spd.pitch*2, 0, w);
220 memset(p+m_spd.h*m_spd.pitch*3, 0, w);
224 m_rectListDirty.RemoveAll();
225 return S_OK;
228 STDMETHODIMP CMemSubPic::Lock(SubPicDesc& spd)
230 return GetDesc(spd);
233 STDMETHODIMP CMemSubPic::Unlock( CAtlList<CRect>* dirtyRectList )
235 int src_type = m_spd.type;
236 int dst_type = m_alpha_blt_dst_type;
237 if( (src_type==MSP_RGBA && (dst_type == MSP_RGB32 ||
238 dst_type == MSP_RGB24 ||
239 dst_type == MSP_RGB16 ||
240 dst_type == MSP_RGB15))
242 (src_type==MSP_XY_AUYV && dst_type == MSP_YUY2)//ToDo: fix me MSP_AYUV
244 (src_type==MSP_AYUV && dst_type == MSP_AYUV)
246 (src_type==MSP_AYUV_PLANAR && (dst_type == MSP_IYUV ||
247 dst_type == MSP_YV12 ||
248 dst_type == MSP_P010 ||
249 dst_type == MSP_P016 ||
250 dst_type == MSP_NV12 ||
251 dst_type == MSP_NV21)))
253 return UnlockOther(dirtyRectList);
255 else if(src_type==MSP_RGBA && (dst_type == MSP_YUY2 ||
256 dst_type == MSP_AYUV || //ToDo: fix me MSP_AYUV
257 dst_type == MSP_IYUV ||
258 dst_type == MSP_YV12 ||
259 dst_type == MSP_NV12 ||
260 dst_type == MSP_NV21 ||
261 dst_type == MSP_P010 ||
262 dst_type == MSP_P016))
264 return UnlockRGBA_YUV(dirtyRectList);
266 return E_NOTIMPL;
269 HRESULT CMemSubPic::UnlockOther(CAtlList<CRect>* dirtyRectList)
271 SetDirtyRectEx(dirtyRectList);
272 if(m_rectListDirty.IsEmpty()) {
273 return S_OK;
276 POSITION pos = m_rectListDirty.GetHeadPosition();
277 while(pos!=NULL)
279 const CRect& cRect = m_rectListDirty.GetNext(pos);
280 int w = cRect.Width(), h = cRect.Height();
281 if (w<=0 || h<=0)
283 continue;
286 BYTE* top = (BYTE*)m_spd.bits + m_spd.pitch*(cRect.top) + cRect.left*4;
287 BYTE* bottom = top + m_spd.pitch*h;
288 if(m_alpha_blt_dst_type == MSP_RGB16)
290 for(; top < bottom ; top += m_spd.pitch)
292 DWORD* s = (DWORD*)top;
293 DWORD* e = s + w;
294 for(; s < e; s++)
296 *s = ((*s>>3)&0x1f000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
297 // *s = (*s&0xff000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
301 else if(m_alpha_blt_dst_type == MSP_RGB15)
303 for(; top < bottom; top += m_spd.pitch)
305 DWORD* s = (DWORD*)top;
306 DWORD* e = s + w;
307 for(; s < e; s++)
309 *s = ((*s>>3)&0x1f000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
310 // *s = (*s&0xff000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
314 else if(m_alpha_blt_dst_type == MSP_YUY2)
316 XY_DO_ONCE( xy_logger::write_file("G:\\b1_ul", top, m_spd.pitch*(h-1)) );
318 for(BYTE* tempTop=top; tempTop < bottom ; tempTop += m_spd.pitch)
320 BYTE* s = tempTop;
321 BYTE* e = s + w*4;
322 BYTE last_v = s[0], last_u=s[2];
323 for(; s < e; s+=8) // AUYV AUYV -> AxYU AxYV
325 BYTE tmp = s[4];
326 s[4] = (last_v + 2*s[0] + s[4] + 2)>>2;
327 last_v = tmp;
329 s[0] = (last_u + 2*s[2] + s[6] + 2)>>2;
330 last_u = s[6];
334 XY_DO_ONCE( xy_logger::write_file("G:\\a1_ul", top, m_spd.pitch*(h-1)) );
336 else if(m_alpha_blt_dst_type == MSP_YV12 || m_alpha_blt_dst_type == MSP_IYUV )
338 //nothing to do
340 else if ( m_alpha_blt_dst_type == MSP_P010 || m_alpha_blt_dst_type == MSP_P016
341 || m_alpha_blt_dst_type == MSP_NV12 )
343 SubsampleAndInterlace(cRect, true);
345 else if( m_alpha_blt_dst_type == MSP_NV21 )
347 SubsampleAndInterlace(cRect, false);
350 return S_OK;
353 HRESULT CMemSubPic::UnlockRGBA_YUV(CAtlList<CRect>* dirtyRectList)
355 //debug
356 ONCER( SaveRect2File(dirtyRectList->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect") );
357 ONCER( SaveArgb2File(m_spd, CRect(CPoint(0,0), m_size), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.argb") );
359 SetDirtyRectEx(dirtyRectList);
361 ONCER( SaveRect2File(dirtyRectList->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect2") );
362 if(m_rectListDirty.IsEmpty()) {
363 return S_OK;
366 POSITION pos = m_rectListDirty.GetHeadPosition();
367 while(pos!=NULL)
369 const CRect& cRect = m_rectListDirty.GetNext(pos);
370 int w = cRect.Width(), h = cRect.Height();
371 if(w<=0 || h<=0)
373 continue;
376 BYTE* top = (BYTE*)m_spd.bits + m_spd.pitch*cRect.top + cRect.left*4;
377 BYTE* bottom = top + m_spd.pitch*h;
379 if( m_alpha_blt_dst_type == MSP_YUY2 ||
380 m_alpha_blt_dst_type == MSP_YV12 ||
381 m_alpha_blt_dst_type == MSP_IYUV ||
382 m_alpha_blt_dst_type == MSP_P010 ||
383 m_alpha_blt_dst_type == MSP_P016 ||
384 m_alpha_blt_dst_type == MSP_NV12 ||
385 m_alpha_blt_dst_type == MSP_NV21) {
386 for(; top < bottom ; top += m_spd.pitch) {
387 BYTE* s = top;
388 BYTE* e = s + w*4;
389 DWORD last_yuv = ColorConvTable::PreMulArgb2Ayuv(s[3], s[2], s[1], s[0]);
390 for(; s < e; s+=8) { // ARGB ARGB -> AxYU AxYV
391 if((s[3]+s[7]+(last_yuv>>24)) < 0xff*3) {
392 DWORD tmp1 = ColorConvTable::PreMulArgb2Ayuv(s[3], s[2], s[1], s[0]);
393 DWORD tmp2 = ColorConvTable::PreMulArgb2Ayuv(s[7], s[6], s[5], s[4]);
395 s[1] = (tmp1>>16)&0xff;
396 s[5] = (tmp2>>16)&0xff;
398 s[0] = (((last_yuv>>8)&0xff) + 2*((tmp1>>8)&0xff) + ((tmp2>>8)&0xff) + 2)/4;
399 s[4] = ((last_yuv&0xff) + 2*(tmp1&0xff) + (tmp2&0xff) + 2)/4;
400 last_yuv = tmp2;
401 } else {
402 last_yuv = ColorConvTable::PreMulArgb2Ayuv(s[7], s[6], s[5], s[4]);
404 s[1] = s[5] = 0;
405 s[0] = s[4] = 0;
410 else if(m_alpha_blt_dst_type == MSP_AYUV) {
411 for(; top < bottom ; top += m_spd.pitch) {
412 BYTE* s = top;
413 BYTE* e = s + w*4;
414 for(; s < e; s+=4) { // ARGB -> AYUV
415 if(s[3] < 0xff) {
416 *((DWORD*)s) = ColorConvTable::PreMulArgb2Ayuv(s[3], s[2], s[1], s[0]);
417 } else {
418 s[0] = s[1] = 0;
419 s[2] = 0;
426 ONCER( SaveAxxx2File(m_spd, CRect(CPoint(0,0), m_size), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.axuv") );
427 return S_OK;
430 void CMemSubPic::SubsampleAndInterlace( const CRect& cRect, bool u_first )
432 //fix me: check alignment and log error
433 int w = cRect.Width(), h = cRect.Height();
434 BYTE* u_plan = reinterpret_cast<BYTE*>(m_spd.bits) + m_spd.pitch*m_spd.h*2;
435 BYTE* u_start = u_plan + m_spd.pitch*(cRect.top)+ cRect.left;
436 BYTE* v_start = u_start + m_spd.pitch*m_spd.h;
437 BYTE* dst = u_start;
438 if(!u_first)
440 BYTE* tmp = v_start;
441 v_start = u_start;
442 u_start = tmp;
445 //Todo: fix me.
446 //Walkarround for alignment
447 if ( ((m_spd.pitch|w)&15) == 0 )
449 ASSERT(w%16==0);
450 for (int i=0;i<h;i+=2)
452 hleft_vmid_subsample_and_interlace_2_line_sse2(dst, u_start, v_start, w, m_spd.pitch);
453 u_start += 2*m_spd.pitch;
454 v_start += 2*m_spd.pitch;
455 dst += m_spd.pitch;
458 else
460 for (int i=0;i<h;i+=2)
462 hleft_vmid_subsample_and_interlace_2_line_c(dst, u_start, v_start, w, m_spd.pitch);
463 u_start += 2*m_spd.pitch;
464 v_start += 2*m_spd.pitch;
465 dst += m_spd.pitch;
470 STDMETHODIMP CMemSubPic::AlphaBlt( const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget )
472 if(!pSrc || !pDst || !pTarget) {
473 return E_POINTER;
475 int src_type = m_spd.type;
476 int dst_type = pTarget->type;
478 if( (src_type==MSP_RGBA && (dst_type == MSP_RGB32 ||
479 dst_type == MSP_RGB24 ||
480 dst_type == MSP_RGB16 ||
481 dst_type == MSP_RGB15 ||
482 dst_type == MSP_RGBA ||
483 dst_type == MSP_YUY2 ||//ToDo: fix me MSP_RGBA changed into AxYU AxYV after unlock, may be confusing
484 dst_type == MSP_AYUV ))
486 (src_type==MSP_XY_AUYV && dst_type == MSP_YUY2)//ToDo: fix me MSP_AYUV
488 (src_type==MSP_AYUV && dst_type == MSP_AYUV)
490 (src_type==MSP_AYUV_PLANAR && (dst_type == MSP_IYUV ||
491 dst_type == MSP_YV12)) )
493 return AlphaBltOther(pSrc, pDst, pTarget);
495 else if ( src_type==MSP_AYUV_PLANAR && (dst_type == MSP_NV12 ||
496 dst_type == MSP_NV21 ) )
498 return AlphaBltAnv12_Nv12(pSrc, pDst, pTarget);
501 else if( src_type==MSP_AYUV_PLANAR && (dst_type == MSP_P010 ||
502 dst_type == MSP_P016 ) )
504 return AlphaBltAnv12_P010(pSrc, pDst, pTarget);
506 else if( src_type==MSP_RGBA && (dst_type == MSP_IYUV ||
507 dst_type == MSP_YV12))
509 return AlphaBltAxyuAxyv_Yv12(pSrc, pDst, pTarget);
511 else if( src_type==MSP_RGBA && (dst_type == MSP_NV12||
512 dst_type == MSP_NV21))
514 return AlphaBltAxyuAxyv_Nv12(pSrc, pDst, pTarget);
516 else if( src_type==MSP_RGBA && (dst_type == MSP_P010 ||
517 dst_type == MSP_P016))
519 return AlphaBltAxyuAxyv_P010(pSrc, pDst, pTarget);
521 return E_NOTIMPL;
524 HRESULT CMemSubPic::AlphaBltOther(const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget)
526 const SubPicDesc& src = m_spd;
527 SubPicDesc dst = *pTarget; // copy, because we might modify it
529 CRect rs(*pSrc), rd(*pDst);
530 if(dst.h < 0)
532 dst.h = -dst.h;
533 rd.bottom = dst.h - rd.bottom;
534 rd.top = dst.h - rd.top;
536 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
537 return E_INVALIDARG;
539 int w = rs.Width(), h = rs.Height();
540 BYTE* s = (BYTE*)src.bits + src.pitch*rs.top + ((rs.left*src.bpp)>>3);//rs.left*4
541 BYTE* d = (BYTE*)dst.bits + dst.pitch*rd.top + ((rd.left*dst.bpp)>>3);
542 if(rd.top > rd.bottom)
544 if(dst.type == MSP_RGB32 || dst.type == MSP_RGB24
545 || dst.type == MSP_RGB16 || dst.type == MSP_RGB15
546 || dst.type == MSP_YUY2 || dst.type == MSP_AYUV)
548 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + (rd.left*dst.bpp>>3);
550 else if(dst.type == MSP_YV12 || dst.type == MSP_IYUV)
552 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + (rd.left*8>>3);
554 else
556 return E_NOTIMPL;
558 dst.pitch = -dst.pitch;
560 DbgLog((LOG_TRACE, 5, TEXT("w=%d h=%d"), w, h));
561 switch(dst.type)
563 case MSP_RGBA:
564 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
566 BYTE* s2 = s;
567 BYTE* s2end = s2 + w*4;
568 DWORD* d2 = (DWORD*)d;
569 for(; s2 < s2end; s2 += 4, d2++)
571 if(s2[3] < 0xff)
573 DWORD bd =0x00000100 -( (DWORD) s2[3]);
574 DWORD B = ((*((DWORD*)s2)&0x000000ff)<<8)/bd;
575 DWORD V = ((*((DWORD*)s2)&0x0000ff00)/bd)<<8;
576 DWORD R = (((*((DWORD*)s2)&0x00ff0000)>>8)/bd)<<16;
577 *d2 = B | V | R
578 | (0xff000000-(*((DWORD*)s2)&0xff000000))&0xff000000;
582 break;
583 case MSP_RGB32:
584 case MSP_AYUV: //ToDo: fix me MSP_VUYA indeed?
585 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
587 BYTE* s2 = s;
588 BYTE* s2end = s2 + w*4;
589 DWORD* d2 = (DWORD*)d;
590 for(; s2 < s2end; s2 += 4, d2++)
592 #ifdef _WIN64
593 DWORD ia = 256-s2[3];
594 if(s2[3] < 0xff) {
595 *d2 = ((((*d2&0x00ff00ff)*s2[3])>>8) + (((*((DWORD*)s2)&0x00ff00ff)*ia)>>8)&0x00ff00ff)
596 | ((((*d2&0x0000ff00)*s2[3])>>8) + (((*((DWORD*)s2)&0x0000ff00)*ia)>>8)&0x0000ff00);
598 #else
599 if(s2[3] < 0xff)
601 *d2 = (((((*d2&0x00ff00ff)*s2[3])>>8) + (*((DWORD*)s2)&0x00ff00ff))&0x00ff00ff)
602 | (((((*d2&0x0000ff00)*s2[3])>>8) + (*((DWORD*)s2)&0x0000ff00))&0x0000ff00);
604 #endif
607 break;
608 case MSP_RGB24:
609 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
611 BYTE* s2 = s;
612 BYTE* s2end = s2 + w*4;
613 BYTE* d2 = d;
614 for(; s2 < s2end; s2 += 4, d2 += 3)
616 if(s2[3] < 0xff)
618 d2[0] = ((d2[0]*s2[3])>>8) + s2[0];
619 d2[1] = ((d2[1]*s2[3])>>8) + s2[1];
620 d2[2] = ((d2[2]*s2[3])>>8) + s2[2];
624 break;
625 case MSP_RGB16:
626 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
628 BYTE* s2 = s;
629 BYTE* s2end = s2 + w*4;
630 WORD* d2 = (WORD*)d;
631 for(; s2 < s2end; s2 += 4, d2++)
633 if(s2[3] < 0x1f)
635 *d2 = (WORD)((((((*d2&0xf81f)*s2[3])>>5) + (*(DWORD*)s2&0xf81f))&0xf81f)
636 | (((((*d2&0x07e0)*s2[3])>>5) + (*(DWORD*)s2&0x07e0))&0x07e0));
637 /* *d2 = (WORD)((((((*d2&0xf800)*s2[3])>>8) + (*(DWORD*)s2&0xf800))&0xf800)
638 | (((((*d2&0x07e0)*s2[3])>>8) + (*(DWORD*)s2&0x07e0))&0x07e0)
639 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
644 break;
645 case MSP_RGB15:
646 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
648 BYTE* s2 = s;
649 BYTE* s2end = s2 + w*4;
650 WORD* d2 = (WORD*)d;
651 for(; s2 < s2end; s2 += 4, d2++)
653 if(s2[3] < 0x1f)
655 *d2 = (WORD)((((((*d2&0x7c1f)*s2[3])>>5) + (*(DWORD*)s2&0x7c1f))&0x7c1f)
656 | (((((*d2&0x03e0)*s2[3])>>5) + (*(DWORD*)s2&0x03e0))&0x03e0));
657 /* *d2 = (WORD)((((((*d2&0x7c00)*s2[3])>>8) + (*(DWORD*)s2&0x7c00))&0x7c00)
658 | (((((*d2&0x03e0)*s2[3])>>8) + (*(DWORD*)s2&0x03e0))&0x03e0)
659 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
664 break;
665 case MSP_YUY2:
666 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
668 unsigned int ia, c;
669 BYTE* s2 = s;
670 BYTE* s2end = s2 + w*4;
671 DWORD* d2 = (DWORD*)d;
672 ASSERT(w>0);
673 int last_a = w>0?s2[3]:0;
674 for(; s2 < s2end; s2 += 8, d2++)
676 ia = (last_a + 2*s2[3] + s2[7])>>2;
677 last_a = s2[7];
678 if(ia < 0xff)
680 //int y1 = (BYTE)(((((*d2&0xff))*s2[3])>>8) + s2[1]); // + y1;
681 //int u = (BYTE)((((((*d2>>8)&0xff))*ia)>>8) + s2[0]); // + u;
682 //int y2 = (BYTE)((((((*d2>>16)&0xff))*s2[7])>>8) + s2[5]); // + y2;
683 //int v = (BYTE)((((((*d2>>24)&0xff))*ia)>>8) + s2[4]); // + v;
684 //*d2 = (v<<24)|(y2<<16)|(u<<8)|y1;
686 ia = (ia<<24)|(s2[7]<<16)|(ia<<8)|s2[3];
687 c = (s2[4]<<24)|(s2[5]<<16)|(s2[0]<<8)|s2[1]; // (v<<24)|(y2<<16)|(u<<8)|y1;
688 __asm
690 mov edi, d2
691 pxor mm0, mm0
692 movd mm2, c
693 punpcklbw mm2, mm0
694 movd mm3, [edi]
695 punpcklbw mm3, mm0
696 movd mm4, ia
697 punpcklbw mm4, mm0
698 psraw mm4, 1 //or else, overflow because psraw shift in sign bit
699 pmullw mm3, mm4
700 psraw mm3, 7
701 paddsw mm3, mm2
702 packuswb mm3, mm3
703 movd [edi], mm3
708 __asm emms;
709 break;
710 case MSP_YV12:
711 case MSP_IYUV:
713 //dst.pitch = abs(dst.pitch);
714 int h2 = h/2;
715 if(!dst.pitchUV)
717 dst.pitchUV = abs(dst.pitch)/2;
719 if(!dst.bitsU || !dst.bitsV)
721 dst.bitsU = (BYTE*)dst.bits + abs(dst.pitch)*dst.h;
722 dst.bitsV = dst.bitsU + dst.pitchUV*dst.h/2;
723 if(dst.type == MSP_YV12)
725 BYTE* p = dst.bitsU;
726 dst.bitsU = dst.bitsV;
727 dst.bitsV = p;
730 BYTE* dd[2];
731 dd[0] = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left/2;
732 dd[1] = dst.bitsV + dst.pitchUV*rd.top/2 + rd.left/2;
733 if(rd.top > rd.bottom)
735 dd[0] = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left/2;
736 dd[1] = dst.bitsV + dst.pitchUV*(rd.top/2-1) + rd.left/2;
737 dst.pitchUV = -dst.pitchUV;
740 BYTE* src_origin= (BYTE*)src.bits + src.pitch*rs.top + rs.left;
742 BYTE* ss[2];
743 ss[0] = src_origin + src.pitch*src.h*2;//U
744 ss[1] = src_origin + src.pitch*src.h*3;//V
746 AlphaBltYv12Luma( d, dst.pitch, w, h, src_origin + src.pitch*src.h, src_origin, src.pitch );
748 AlphaBltYv12Chroma( dd[0], dst.pitchUV, w, h2, ss[0], src_origin, src.pitch);
749 AlphaBltYv12Chroma( dd[1], dst.pitchUV, w, h2, ss[1], src_origin, src.pitch);
751 __asm emms;
753 break;
754 default:
755 return E_NOTIMPL;
756 break;
759 //emmsÒª40¸öcpuÖÜÆÚ
760 //__asm emms;
761 return S_OK;
764 HRESULT CMemSubPic::AlphaBltAxyuAxyv_P010(const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget)
766 const SubPicDesc& src = m_spd;
767 SubPicDesc dst = *pTarget; // copy, because we might modify it
769 CRect rs(*pSrc), rd(*pDst);
771 if(dst.h < 0) {
772 dst.h = -dst.h;
773 rd.bottom = dst.h - rd.bottom;
774 rd.top = dst.h - rd.top;
777 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
778 return E_INVALIDARG;
781 int w = rs.Width(), h = rs.Height();
784 BYTE* s = static_cast<BYTE*>(src.bits) + src.pitch*rs.top + ((rs.left*src.bpp)>>3);
785 BYTE* d = static_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left*2;
787 if(rd.top > rd.bottom) {
788 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + rd.left;
790 dst.pitch = -dst.pitch;
793 for(ptrdiff_t i=0; i<h; i++, s += src.pitch, d += dst.pitch)
795 BYTE* s2 = s;
796 BYTE* s2end = s2 + w*4;
797 WORD* d2 = reinterpret_cast<WORD*>(d);
798 for(; s2 < s2end; s2 += 4, d2++)
800 if(s2[3] < 0xff) {
801 d2[0] = ((d2[0]*s2[3])>>8) + (s2[1]<<8);
806 //UV
807 int h2 = h/2;
808 if(!dst.pitchUV)
810 dst.pitchUV = abs(dst.pitch);
812 if(!dst.bitsU || !dst.bitsV)
814 dst.bitsU = static_cast<BYTE*>(dst.bits) + abs(dst.pitch)*dst.h;
815 dst.bitsV = dst.bitsU + 2;
817 BYTE* ddUV = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left*2;
818 if(rd.top > rd.bottom)
820 ddUV = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left*2;
821 dst.pitchUV = -dst.pitchUV;
824 s = static_cast<BYTE*>(src.bits) + src.pitch*rs.top + ((rs.left*src.bpp)>>3);
826 d = ddUV;
827 int pitch = src.pitch;
828 for(int j = 0; j < h2; j++, s += 2*src.pitch, d += dst.pitchUV )
830 BYTE* s2 = s;
831 WORD* d2=reinterpret_cast<WORD*>(d);
832 WORD* d2_end = reinterpret_cast<WORD*>(d+2*w);
833 DWORD last_alpha = s2[3]+s2[3+src.pitch];
834 for( ; d2<d2_end; s2+=8, d2+=2)
836 unsigned int ia = (
837 last_alpha +
838 (s2[3] + s2[3+src.pitch])*2 +
839 s2[3+4]+ s2[3+4+src.pitch]);
840 last_alpha = s2[3+4]+ s2[3+4+src.pitch];
841 if( ia!=0xFF*8 )
843 d2[0] = (((d2[0])*ia)>>11) + ((s2[0] + s2[0+src.pitch])<<7);
844 d2[1] = (((d2[1])*ia)>>11) + ((s2[4] + s2[4+src.pitch])<<7);
849 return S_OK;
852 HRESULT CMemSubPic::AlphaBltAxyuAxyv_Yv12(const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget)
854 const SubPicDesc& src = m_spd;
855 SubPicDesc dst = *pTarget; // copy, because we might modify it
857 CRect rs(*pSrc), rd(*pDst);
859 if(dst.h < 0) {
860 dst.h = -dst.h;
861 rd.bottom = dst.h - rd.bottom;
862 rd.top = dst.h - rd.top;
865 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
866 return E_INVALIDARG;
869 int w = rs.Width(), h = rs.Height();
871 BYTE* s = (BYTE*)src.bits + src.pitch*rs.top + ((rs.left*src.bpp)>>3);
872 BYTE* d = (BYTE*)dst.bits + dst.pitch*rd.top + rd.left;
874 if(rd.top > rd.bottom) {
875 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + rd.left;
877 dst.pitch = -dst.pitch;
880 for(ptrdiff_t j = 0; j < h; j++, s += src.pitch, d += dst.pitch) {
881 BYTE* s2 = s;
882 BYTE* s2end = s2 + w*4;
883 BYTE* d2 = d;
884 for(; s2 < s2end; s2 += 4, d2++) {
885 if(s2[3] < 0xff) {
886 d2[0] = ((d2[0]*s2[3])>>8) + s2[1];
890 dst.pitch = abs(dst.pitch);
892 int h2 = h/2;
894 if(!dst.pitchUV) {
895 dst.pitchUV = dst.pitch/2;
898 BYTE* ss[2];
899 ss[0] = (BYTE*)src.bits + src.pitch*rs.top + rs.left*4;
900 ss[1] = ss[0] + 4;
902 if(!dst.bitsU || !dst.bitsV) {
903 dst.bitsU = (BYTE*)dst.bits + dst.pitch*dst.h;
904 dst.bitsV = dst.bitsU + dst.pitchUV*dst.h/2;
906 if(dst.type == MSP_YV12) {
907 BYTE* p = dst.bitsU;
908 dst.bitsU = dst.bitsV;
909 dst.bitsV = p;
913 BYTE* dd[2];
914 dd[0] = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left/2;
915 dd[1] = dst.bitsV + dst.pitchUV*rd.top/2 + rd.left/2;
917 if(rd.top > rd.bottom) {
918 dd[0] = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left/2;
919 dd[1] = dst.bitsV + dst.pitchUV*(rd.top/2-1) + rd.left/2;
920 dst.pitchUV = -dst.pitchUV;
923 for(ptrdiff_t i = 0; i < 2; i++) {
924 s = ss[i];
925 d = dd[i];
926 BYTE* a = ss[0]+3;
927 for(ptrdiff_t j = 0; j < h2; j++, s += src.pitch*2, d += dst.pitchUV, a += src.pitch*2) {
928 BYTE* s2 = s;
929 BYTE* s2end = s2 + w*4;
930 BYTE* d2 = d;
931 BYTE* a2 = a;
933 DWORD last_alpha = a2[0]+a2[0+src.pitch];
934 for(; s2 < s2end; s2 += 8, d2++, a2 += 8) {
935 unsigned int ia = (last_alpha + 2*(a2[0]+a2[0+src.pitch]) + a2[4] + a2[4+src.pitch] + 4 )>>3;
936 last_alpha = a2[4] + a2[4+src.pitch];
937 if(ia < 0xff) {
938 *d2 = ((*d2*ia)>>8) + ((s2[0]+s2[src.pitch])>>1);
944 return S_OK;
947 HRESULT CMemSubPic::AlphaBltAxyuAxyv_Nv12(const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget)
949 ONCER( SaveArgb2File(*pTarget, CRect(CPoint(0,0), m_size), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12") );
950 const SubPicDesc& src = m_spd;
951 SubPicDesc dst = *pTarget; // copy, because we might modify it
953 CRect rs(*pSrc), rd(*pDst);
955 if(dst.h < 0) {
956 dst.h = -dst.h;
957 rd.bottom = dst.h - rd.bottom;
958 rd.top = dst.h - rd.top;
961 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
962 return E_INVALIDARG;
965 int w = rs.Width(), h = rs.Height();
967 BYTE* s = (BYTE*)src.bits + src.pitch*rs.top + ((rs.left*src.bpp)>>3);
968 BYTE* d = (BYTE*)dst.bits + dst.pitch*rd.top + rd.left;
970 if(rd.top > rd.bottom) {
971 d = (BYTE*)dst.bits + dst.pitch*(rd.top-1) + rd.left;
973 dst.pitch = -dst.pitch;
976 for(ptrdiff_t j = 0; j < h; j++, s += src.pitch, d += dst.pitch) {
977 BYTE* s2 = s;
978 BYTE* s2end = s2 + w*4;
979 BYTE* d2 = d;
980 for(; s2 < s2end; s2 += 4, d2++) {
981 if(s2[3] < 0xff) {
982 d2[0] = ((d2[0]*s2[3])>>8) + s2[1];
986 dst.pitch = abs(dst.pitch);
988 int h2 = h/2;
990 if(!dst.pitchUV) {
991 dst.pitchUV = dst.pitch;
994 BYTE* ss[2];
995 ss[0] = (BYTE*)src.bits + src.pitch*rs.top + rs.left*4;
996 ss[1] = ss[0] + 4;
998 if(!dst.bitsU || !dst.bitsV) {
999 dst.bitsU = (BYTE*)dst.bits + dst.pitch*dst.h;
1000 dst.bitsV = dst.bitsU + 1;
1002 if(dst.type == MSP_NV21) {
1003 BYTE* p = dst.bitsU;
1004 dst.bitsU = dst.bitsV;
1005 dst.bitsV = p;
1009 BYTE* dd[2];
1010 dd[0] = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left;
1011 dd[1] = dd[0]+1;
1013 if(rd.top > rd.bottom) {
1014 dd[0] = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left;
1015 dd[1] = dd[0]+1;
1016 dst.pitchUV = -dst.pitchUV;
1019 for(ptrdiff_t i = 0; i < 2; i++) {
1020 s = ss[i];
1021 d = dd[i];
1022 BYTE* a = ss[0]+3;
1023 for(ptrdiff_t j = 0; j < h2; j++, s += src.pitch*2, d += dst.pitchUV, a += src.pitch*2) {
1024 BYTE* s2 = s;
1025 BYTE* s2end = s2 + w*4;
1026 BYTE* d2 = d;
1027 BYTE* a2 = a;
1028 DWORD last_alpha = a2[0]+a2[0+src.pitch];
1029 for(; s2 < s2end; s2 += 8, d2+=2, a2 += 8) {
1030 unsigned int ia = (last_alpha+2*(a2[0]+a2[0+src.pitch])+a2[4]+a2[4+src.pitch]+4)>>3;
1031 last_alpha = a2[4]+a2[4+src.pitch];
1032 if(ia < 0xff) {
1033 *d2 = ((*d2*ia)>>8) + ((s2[0]+s2[src.pitch])>>1);
1039 ONCER( SaveArgb2File(*pTarget, CRect(CPoint(0,0), m_size), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12_2") );
1040 return S_OK;
1043 HRESULT CMemSubPic::AlphaBltAnv12_P010( const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget )
1045 //fix me: check colorspace and log error
1046 const SubPicDesc& src = m_spd;
1047 SubPicDesc dst = *pTarget; // copy, because we might modify it
1049 CRect rs(*pSrc), rd(*pDst);
1050 if(dst.h < 0)
1052 dst.h = -dst.h;
1053 rd.bottom = dst.h - rd.bottom;
1054 rd.top = dst.h - rd.top;
1056 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
1057 return E_INVALIDARG;
1059 int w = rs.Width(), h = rs.Height();
1060 bool bottom_down = rd.top > rd.bottom;
1062 BYTE* d = NULL;
1063 BYTE* dUV = NULL;
1064 if(!bottom_down)
1066 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left*2;
1067 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*rd.top/2 + rd.left*2;
1069 else
1071 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + rd.left*2;
1072 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*(rd.top/2-1) + rd.left*2;
1073 dst.pitch = -dst.pitch;
1075 ASSERT(dst.pitchUV==0 || dst.pitchUV==abs(dst.pitch));
1077 const BYTE* sa = reinterpret_cast<const BYTE*>(src.bits) + src.pitch*rs.top + rs.left;
1078 const BYTE* sy = sa + src.pitch*src.h;
1079 const BYTE* s_uv = sy + src.pitch*src.h;//UV
1080 return AlphaBltAnv12_P010(sa, sy, s_uv, src.pitch, d, dUV, dst.pitch, w, h);
1083 HRESULT CMemSubPic::AlphaBltAnv12_Nv12( const RECT* pSrc, const RECT* pDst, SubPicDesc* pTarget )
1085 //fix me: check colorspace and log error
1086 const SubPicDesc& src = m_spd;
1087 SubPicDesc dst = *pTarget; // copy, because we might modify it
1089 CRect rs(*pSrc), rd(*pDst);
1090 if(dst.h < 0)
1092 dst.h = -dst.h;
1093 rd.bottom = dst.h - rd.bottom;
1094 rd.top = dst.h - rd.top;
1096 if(rs.Width() != rd.Width() || rs.Height() != abs(rd.Height())) {
1097 return E_INVALIDARG;
1099 int w = rs.Width(), h = rs.Height();
1100 bool bottom_down = rd.top > rd.bottom;
1102 BYTE* d = NULL;
1103 BYTE* dUV = NULL;
1104 if (!bottom_down)
1106 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left;
1107 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*rd.top/2 + rd.left;
1109 else
1111 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + rd.left;
1112 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*(rd.top/2-1) + rd.left;
1113 dst.pitch = -dst.pitch;
1115 ASSERT(dst.pitchUV==0 || dst.pitchUV==abs(dst.pitch));
1117 const BYTE* sa = reinterpret_cast<const BYTE*>(src.bits) + src.pitch*rs.top + rs.left;
1118 const BYTE* sy = sa + src.pitch*src.h;
1119 const BYTE* s_uv = sy + src.pitch*src.h;//UV
1121 return AlphaBltAnv12_Nv12(sa, sy, s_uv, src.pitch, d, dUV, dst.pitch, w, h);
1124 STDMETHODIMP CMemSubPic::SetDirtyRectEx(CAtlList<CRect>* dirtyRectList )
1126 //if(m_spd.type == MSP_YUY2 || m_spd.type == MSP_YV12 || m_spd.type == MSP_IYUV || m_spd.type == MSP_AYUV)
1127 if(dirtyRectList!=NULL)
1129 POSITION pos = dirtyRectList->GetHeadPosition();
1130 if(m_spd.type == MSP_AYUV_PLANAR || m_alpha_blt_dst_type==MSP_IYUV || m_alpha_blt_dst_type==MSP_YV12
1131 || m_alpha_blt_dst_type==MSP_P010 || m_alpha_blt_dst_type==MSP_P016
1132 || m_alpha_blt_dst_type==MSP_NV12 || m_alpha_blt_dst_type==MSP_NV21 )
1134 while(pos!=NULL)
1136 CRect& cRectSrc = dirtyRectList->GetNext(pos);
1137 cRectSrc.left &= ~15;
1138 cRectSrc.right = (cRectSrc.right+15)&~15;
1139 if(cRectSrc.right>m_spd.w)
1141 cRectSrc.right = m_spd.w;
1143 cRectSrc.top &= ~1;
1144 cRectSrc.bottom = (cRectSrc.bottom+1)&~1;
1147 else if(m_spd.type == MSP_XY_AUYV || m_alpha_blt_dst_type==MSP_YUY2)
1149 while(pos!=NULL)
1151 CRect& cRectSrc = dirtyRectList->GetNext(pos);
1152 cRectSrc.left &= ~3;
1153 cRectSrc.right = (cRectSrc.right+3)&~3;
1157 return __super::SetDirtyRectEx(dirtyRectList);
1161 // static
1164 void CMemSubPic::AlphaBltYv12Luma(byte* dst, int dst_pitch,
1165 int w, int h,
1166 const byte* sub, const byte* alpha, int sub_pitch)
1168 if( ((reinterpret_cast<intptr_t>(alpha) | reinterpret_cast<intptr_t>(sub) | static_cast<intptr_t>(sub_pitch) |
1169 reinterpret_cast<intptr_t>(dst) | static_cast<intptr_t>(dst_pitch) ) & 15 )==0 )
1171 for(int i=0; i<h; i++, dst += dst_pitch, alpha += sub_pitch, sub += sub_pitch)
1173 const BYTE* sa = alpha;
1174 const BYTE* s2 = sub;
1175 const BYTE* s2end_mod16 = s2 + (w&~15);
1176 const BYTE* s2end = s2 + w;
1177 BYTE* d2 = dst;
1179 for(; s2 < s2end_mod16; s2+=16, sa+=16, d2+=16)
1181 pix_alpha_blend_yv12_luma_sse2(d2, sa, s2);
1183 for(; s2 < s2end; s2++, sa++, d2++)
1185 if(sa[0] < 0xff)
1187 d2[0] = ((d2[0]*sa[0])>>8) + s2[0];
1192 else //fix me: only a workaround for non-mod-16 size video
1194 for(int i=0; i<h; i++, dst += dst_pitch, alpha += sub_pitch, sub += sub_pitch)
1196 const BYTE* sa = alpha;
1197 const BYTE* s2 = sub;
1198 const BYTE* s2end_mod16 = s2 + (w&~15);
1199 const BYTE* s2end = s2 + w;
1200 BYTE* d2 = dst;
1201 for(; s2 < s2end; s2+=1, sa+=1, d2+=1)
1203 if(sa[0] < 0xff)
1205 // d2[0] = (((d2[0]-0x10)*s2[3])>>8) + s2[1];
1206 d2[0] = ((d2[0]*sa[0])>>8) + s2[0];
1213 void CMemSubPic::AlphaBltYv12Chroma(byte* dst, int dst_pitch,
1214 int w, int chroma_h,
1215 const byte* sub_chroma, const byte* alpha, int sub_pitch)
1217 if( ((reinterpret_cast<intptr_t>(sub_chroma) |
1218 //reinterpret_cast<intptr_t>(dst) |
1219 reinterpret_cast<intptr_t>(alpha) | static_cast<intptr_t>(sub_pitch)
1220 //| (static_cast<intptr_t>(dst_pitch)&7)
1221 ) & 15 )==0 )
1223 int pitch = sub_pitch;
1224 for(int j = 0; j < chroma_h; j++, sub_chroma += sub_pitch*2, alpha += sub_pitch*2, dst += dst_pitch)
1226 hleft_vmid_mix_uv_yv12_sse2(dst, w, sub_chroma, alpha, sub_pitch);
1229 else//fix me: only a workaround for non-mod-16 size video
1231 for(int j = 0; j < chroma_h; j++, sub_chroma += sub_pitch*2, alpha += sub_pitch*2, dst += dst_pitch)
1233 hleft_vmid_mix_uv_yv12_c(dst, w, sub_chroma, alpha, sub_pitch);
1238 HRESULT CMemSubPic::AlphaBltAnv12_P010( const BYTE* src_a, const BYTE* src_y, const BYTE* src_uv, int src_pitch,
1239 BYTE* dst_y, BYTE* dst_uv, int dst_pitch, int w, int h )
1241 const BYTE* sa = src_a;
1242 if( ((reinterpret_cast<intptr_t>(src_a) | reinterpret_cast<intptr_t>(src_y) | static_cast<intptr_t>(src_pitch) |
1243 reinterpret_cast<intptr_t>(dst_y) | static_cast<intptr_t>(dst_pitch) ) & 15 )==0 )
1245 for(int i=0; i<h; i++, sa += src_pitch, src_y += src_pitch, dst_y += dst_pitch)
1247 const BYTE* sa2 = sa;
1248 const BYTE* s2 = src_y;
1249 const BYTE* s2end_mod16 = s2 + (w&~15);
1250 const BYTE* s2end = s2 + w;
1251 BYTE* d2 = dst_y;
1253 for(; s2 < s2end_mod16; s2+=16, sa2+=16, d2+=32)
1255 mix_16_y_p010_sse2(d2, s2, sa2);
1257 for( WORD* d3=reinterpret_cast<WORD*>(d2); s2 < s2end; s2++, sa2++, d3++)
1259 if(sa2[0] < 0xff)
1261 d3[0] = ((d3[0]*sa2[0])>>8) + (s2[0]<<8);
1266 else //fix me: only a workaround for non-mod-16 size video
1268 for(int i=0; i<h; i++, sa += src_pitch, src_y += src_pitch, dst_y += dst_pitch)
1270 const BYTE* sa2 = sa;
1271 const BYTE* s2 = src_y;
1272 const BYTE* s2end = s2 + w;
1273 WORD* d2 = reinterpret_cast<WORD*>(dst_y);
1274 for(; s2 < s2end; s2+=1, sa2+=1, d2+=1)
1276 if(sa2[0] < 0xff)
1278 d2[0] = ((d2[0]*sa2[0])>>8) + (s2[0]<<8);
1283 //UV
1284 int h2 = h/2;
1285 BYTE* d = dst_uv;
1286 if( ((reinterpret_cast<intptr_t>(src_a) | reinterpret_cast<intptr_t>(src_uv) | static_cast<intptr_t>(src_pitch) |
1287 reinterpret_cast<intptr_t>(dst_uv) | static_cast<intptr_t>(dst_pitch) ) & 15 )==0 )
1289 for(int j = 0; j < h2; j++, src_uv += src_pitch, src_a += src_pitch*2, d += dst_pitch)
1291 hleft_vmid_mix_uv_p010_sse2(d, w, src_uv, src_a, src_pitch);
1294 else
1296 for(int j = 0; j < h2; j++, src_uv += src_pitch, src_a += src_pitch*2, d += dst_pitch)
1298 hleft_vmid_mix_uv_p010_c(d, w, src_uv, src_a, src_pitch);
1301 __asm emms;
1302 return S_OK;
1305 HRESULT CMemSubPic::AlphaBltAnv12_Nv12( const BYTE* src_a, const BYTE* src_y, const BYTE* src_uv, int src_pitch,
1306 BYTE* dst_y, BYTE* dst_uv, int dst_pitch, int w, int h )
1308 AlphaBltYv12Luma( dst_y, dst_pitch, w, h, src_y, src_a, src_pitch );
1310 int h2 = h/2;
1311 if( ((reinterpret_cast<intptr_t>(src_a) | reinterpret_cast<intptr_t>(src_uv) | static_cast<intptr_t>(src_pitch) |
1312 reinterpret_cast<intptr_t>(dst_uv) | static_cast<intptr_t>(dst_pitch) ) & 15 )==0 )
1314 BYTE* d = dst_uv;
1315 for(int j = 0; j < h2; j++, src_uv += src_pitch, src_a += src_pitch*2, d += dst_pitch)
1317 hleft_vmid_mix_uv_nv12_sse2(d, w, src_uv, src_a, src_pitch);
1320 else
1322 BYTE* d = dst_uv;
1323 for(int j = 0; j < h2; j++, src_uv += src_pitch, src_a += src_pitch*2, d += dst_pitch)
1325 hleft_vmid_mix_uv_nv12_c(d, w, src_uv, src_a, src_pitch);
1329 __asm emms;
1330 return S_OK;
1334 // CMemSubPicAllocator
1337 CMemSubPicAllocator::CMemSubPicAllocator(int alpha_blt_dst_type, SIZE maxsize, int type/*=-1*/)
1338 : CSubPicExAllocatorImpl(maxsize, false, false)
1339 , m_alpha_blt_dst_type(alpha_blt_dst_type)
1340 , m_maxsize(maxsize)
1341 , m_type(type)
1343 if(m_type==-1)
1345 switch(alpha_blt_dst_type)
1347 case MSP_YUY2:
1348 m_type = MSP_XY_AUYV;
1349 break;
1350 case MSP_AYUV:
1351 m_type = MSP_AYUV;
1352 break;
1353 case MSP_IYUV:
1354 case MSP_YV12:
1355 case MSP_P010:
1356 case MSP_P016:
1357 case MSP_NV12:
1358 case MSP_NV21:
1359 m_type = MSP_AYUV_PLANAR;
1360 break;
1361 default:
1362 m_type = MSP_RGBA;
1363 break;
1368 // ISubPicAllocatorImpl
1370 bool CMemSubPicAllocator::AllocEx(bool fStatic, ISubPicEx** ppSubPic)
1372 if(!ppSubPic) {
1373 return false;
1375 SubPicDesc spd;
1376 spd.w = m_maxsize.cx;
1377 spd.h = m_maxsize.cy;
1378 spd.bpp = 32;
1379 spd.pitch = (spd.w*spd.bpp)>>3;
1380 spd.type = m_type;
1381 spd.bits = DNew BYTE[spd.pitch*spd.h];
1382 if(!spd.bits) {
1383 return false;
1385 *ppSubPic = DNew CMemSubPic(spd, m_alpha_blt_dst_type);
1386 if(!(*ppSubPic)) {
1387 return false;
1389 (*ppSubPic)->AddRef();
1390 return true;