Increase ParseScript cache from 30 to 90 seconds
[xy_vsfilter.git] / src / dsutil / convert_a.asm
blob5322bd8bfb6039cfc7122de56f1853e41ddb70f9
1 ; Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al.
2 ; http://www.avisynth.org
4 ; This program is free software; you can redistribute it and/or modify
5 ; it under the terms of the GNU General Public License as published by
6 ; the Free Software Foundation; either version 2 of the License, or
7 ; (at your option) any later version.
9 ; This program is distributed in the hope that it will be useful,
10 ; but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 ; GNU General Public License for more details.
14 ; You should have received a copy of the GNU General Public License
15 ; along with this program; if not, write to the Free Software
16 ; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 ; http://www.gnu.org/copyleft/gpl.html .
19 ; Linking Avisynth statically or dynamically with other modules is making a
20 ; combined work based on Avisynth. Thus, the terms and conditions of the GNU
21 ; General Public License cover the whole combination.
23 ; As a special exception, the copyright holders of Avisynth give you
24 ; permission to link Avisynth with independent modules that communicate with
25 ; Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
26 ; terms of these independent modules, and to copy and distribute the
27 ; resulting combined work under terms of your choice, provided that
28 ; every copy of the combined work is accompanied by a complete copy of
29 ; the source code of Avisynth (the version of Avisynth used to produce the
30 ; combined work), being distributed under the terms of the GNU General
31 ; Public License plus this exception. An independent module is a module
32 ; which is not derived from or based on Avisynth, such as 3rd-party filters,
33 ; import and export plugins, or graphical user interfaces.
35 .586
36 .mmx
37 .model flat
39 ; alignment has to be 'page' so that I can use 'align 32' below
41 _TEXT64 segment page public use32 'CODE'
43 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 align 8
47 yuv2rgb_constants:
49 x0000_0000_0010_0010 dq 00000000000100010h
50 x0080_0080_0080_0080 dq 00080008000800080h
51 x00FF_00FF_00FF_00FF dq 000FF00FF00FF00FFh
52 x00002000_00002000 dq 00000200000002000h
53 xFF000000_FF000000 dq 0FF000000FF000000h
54 cy dq 000004A8500004A85h
55 crv dq 03313000033130000h
56 cgu_cgv dq 0E5FCF377E5FCF377h
57 cbu dq 00000408D0000408Dh
59 yuv2rgb_constants_rec709:
61 dq 00000000000100010h
62 dq 00080008000800080h
63 dq 000FF00FF00FF00FFh
64 dq 00000200000002000h
65 dq 0FF000000FF000000h
66 dq 000004A8500004A85h
67 dq 03960000039600000h
68 dq 0EEF5F930EEF5F930h
69 dq 00000439B0000439Bh
71 ofs_x0000_0000_0010_0010 = 0
72 ofs_x0080_0080_0080_0080 = 8
73 ofs_x00FF_00FF_00FF_00FF = 16
74 ofs_x00002000_00002000 = 24
75 ofs_xFF000000_FF000000 = 32
76 ofs_cy = 40
77 ofs_crv = 48
78 ofs_cgu_cgv = 56
79 ofs_cbu = 64
81 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
83 GET_Y MACRO mma,uyvy
84 IF &uyvy
85 psrlw mma,8
86 ELSE
87 pand mma,[edx+ofs_x00FF_00FF_00FF_00FF]
88 ENDIF
89 ENDM
91 GET_UV MACRO mma,uyvy
92 GET_Y mma,1-uyvy
93 ENDM
95 YUV2RGB_INNER_LOOP MACRO uyvy,rgb32,no_next_pixel
97 ;; This YUV422->RGB conversion code uses only four MMX registers per
98 ;; source dword, so I convert two dwords in parallel. Lines corresponding
99 ;; to the "second pipe" are indented an extra space. There's almost no
100 ;; overlap, except at the end and in the three lines marked ***.
101 ;; revised 4july,2002 to properly set alpha in rgb32 to default "on" & other small memory optimizations
103 movd mm0, dword ptr [esi]
104 movd mm5, dword ptr [esi+4]
105 movq mm1,mm0
106 GET_Y mm0,&uyvy ; mm0 = __________Y1__Y0
107 movq mm4,mm5
108 GET_UV mm1,&uyvy ; mm1 = __________V0__U0
109 GET_Y mm4,&uyvy
110 movq mm2,mm5 ; *** avoid reload from [esi+4]
111 GET_UV mm5,&uyvy
112 psubw mm0, qword ptr [edx+ofs_x0000_0000_0010_0010]
113 movd mm6, dword ptr [esi+8-4*(no_next_pixel)]
114 GET_UV mm2,&uyvy ; mm2 = __________V2__U2
115 psubw mm4, qword ptr [edx+ofs_x0000_0000_0010_0010]
116 paddw mm2,mm1
117 GET_UV mm6,&uyvy
118 psubw mm1, qword ptr [edx+ofs_x0080_0080_0080_0080]
119 paddw mm6,mm5
120 psllq mm2,32
121 psubw mm5, qword ptr [edx+ofs_x0080_0080_0080_0080]
122 punpcklwd mm0,mm2 ; mm0 = ______Y1______Y0
123 psllq mm6,32
124 pmaddwd mm0, qword ptr [edx+ofs_cy]
125 punpcklwd mm4,mm6
126 paddw mm1,mm1
127 pmaddwd mm4, qword ptr [edx+ofs_cy]
128 paddw mm5,mm5
129 paddw mm1,mm2 ; mm1 = __V1__U1__V0__U0 * 2
130 paddd mm0,[edx+ofs_x00002000_00002000]
131 paddw mm5,mm6
132 movq mm2,mm1
133 paddd mm4,[edx+ofs_x00002000_00002000]
134 movq mm3,mm1
135 movq mm6,mm5
136 pmaddwd mm1,[edx+ofs_crv]
137 movq mm7,mm5
138 paddd mm1,mm0
139 pmaddwd mm5,[edx+ofs_crv]
140 psrad mm1,14 ; mm1 = RRRRRRRRrrrrrrrr
141 paddd mm5,mm4
142 pmaddwd mm2,[edx+ofs_cgu_cgv]
143 psrad mm5,14
144 paddd mm2,mm0
145 pmaddwd mm6,[edx+ofs_cgu_cgv]
146 psrad mm2,14 ; mm2 = GGGGGGGGgggggggg
147 paddd mm6,mm4
148 pmaddwd mm3,[edx+ofs_cbu]
149 psrad mm6,14
150 paddd mm3,mm0
151 pmaddwd mm7,[edx+ofs_cbu]
152 add esi,8
153 add edi,12+4*rgb32
154 IFE &no_next_pixel
155 cmp esi,ecx
156 ENDIF
157 psrad mm3,14 ; mm3 = BBBBBBBBbbbbbbbb
158 paddd mm7,mm4
159 pxor mm0,mm0
160 psrad mm7,14
161 packssdw mm3,mm2 ; mm3 = GGGGggggBBBBbbbb
162 packssdw mm7,mm6
163 packssdw mm1,mm0 ; mm1 = ________RRRRrrrr
164 packssdw mm5,mm0 ; *** avoid pxor mm4,mm4
165 movq mm2,mm3
166 movq mm6,mm7
167 punpcklwd mm2,mm1 ; mm2 = RRRRBBBBrrrrbbbb
168 punpcklwd mm6,mm5
169 punpckhwd mm3,mm1 ; mm3 = ____GGGG____gggg
170 punpckhwd mm7,mm5
171 movq mm0,mm2
172 movq mm4,mm6
173 punpcklwd mm0,mm3 ; mm0 = ____rrrrggggbbbb
174 punpcklwd mm4,mm7
175 IFE &rgb32
176 psllq mm0,16
177 psllq mm4,16
178 ENDIF
179 punpckhwd mm2,mm3 ; mm2 = ____RRRRGGGGBBBB
180 punpckhwd mm6,mm7
181 packuswb mm0,mm2 ; mm0 = __RRGGBB__rrggbb <- ta dah!
182 packuswb mm4,mm6
184 IF &rgb32
185 por mm0, [edx+ofs_xFF000000_FF000000] ; set alpha channels "on"
186 por mm4, [edx+ofs_xFF000000_FF000000]
187 movq [edi-16],mm0 ; store the quadwords independently
188 movq [edi-8],mm4
189 ELSE
190 psrlq mm0,8 ; pack the two quadwords into 12 bytes
191 psllq mm4,8 ; (note: the two shifts above leave
192 movd dword ptr [edi-12],mm0 ; mm0,4 = __RRGGBBrrggbb__)
193 psrlq mm0,32
194 por mm4,mm0
195 movd dword ptr [edi-8],mm4
196 psrlq mm4,32
197 movd dword ptr [edi-4],mm4
198 ENDIF
200 ENDM
202 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
204 YUV2RGB_PROC MACRO procname,uyvy,rgb32
206 PUBLIC C _&procname
208 ;;void __cdecl procname(
209 ;; [esp+ 4] const BYTE* src,
210 ;; [esp+ 8] BYTE* dst,
211 ;; [esp+12] const BYTE* src_end,
212 ;; [esp+16] int src_pitch,
213 ;; [esp+20] int row_size,
214 ;; [esp+24] bool rec709);
216 _&procname PROC
218 push esi
219 push edi
220 push ebx
222 mov eax,[esp+16+12]
223 mov esi,[esp+12+12] ; read source bottom-up
224 mov edi,[esp+8+12]
225 mov ebx,[esp+20+12]
226 mov edx,offset yuv2rgb_constants
227 test byte ptr [esp+24+12],1
228 jz loop0
229 mov edx,offset yuv2rgb_constants_rec709
231 loop0:
232 sub esi,eax
233 lea ecx,[esi+ebx-8]
235 align 32
236 loop1:
237 YUV2RGB_INNER_LOOP uyvy,rgb32,0
238 jb loop1
240 YUV2RGB_INNER_LOOP uyvy,rgb32,1
242 sub esi,ebx
243 cmp esi,[esp+4+12]
244 ja loop0
246 emms
247 pop ebx
248 pop edi
249 pop esi
250 retn
252 _&procname ENDP
254 ENDM
256 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
258 YUV2RGB_PROC mmx_YUY2toRGB24,0,0
259 YUV2RGB_PROC mmx_YUY2toRGB32,0,1
261 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;