flacdec: change frame bps validation to return an error value if bps
[FFMpeg-mirror/lagarith.git] / libavcodec / snow.c
blob42cec6b99b84a77701c64e3980092b1444f9ab8a
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "avcodec.h"
22 #include "dsputil.h"
23 #include "snow.h"
25 #include "rangecoder.h"
26 #include "mathops.h"
28 #include "mpegvideo.h"
30 #undef NDEBUG
31 #include <assert.h>
33 static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51 static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
105 static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
123 static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
141 static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
159 static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178 #if 0 //64*cubic
179 static const uint8_t obmc32[1024]={
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212 //error:0.000022
214 static const uint8_t obmc16[256]={
215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
231 //error:0.000033
233 #elif 1 // 64*linear
234 static const uint8_t obmc32[1024]={
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
267 //error:0.000020
269 static const uint8_t obmc16[256]={
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
286 //error:0.000015
288 #else //64*cos
289 static const uint8_t obmc32[1024]={
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
322 //error:0.000022
324 static const uint8_t obmc16[256]={
325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
341 //error:0.000022
343 #endif /* 0 */
345 //linear *64
346 static const uint8_t obmc8[64]={
347 4, 12, 20, 28, 28, 20, 12, 4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354 4, 12, 20, 28, 28, 20, 12, 4,
355 //error:0.000000
358 //linear *64
359 static const uint8_t obmc4[16]={
360 16, 48, 48, 16,
361 48,144,144, 48,
362 48,144,144, 48,
363 16, 48, 48, 16,
364 //error:0.000000
367 static const uint8_t * const obmc_tab[4]={
368 obmc32, obmc16, obmc8, obmc4
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
373 typedef struct BlockNode{
374 int16_t mx;
375 int16_t my;
376 uint8_t ref;
377 uint8_t color[3];
378 uint8_t type;
379 //#define TYPE_SPLIT 1
380 #define BLOCK_INTRA 1
381 #define BLOCK_OPT 2
382 //#define TYPE_NOCOLOR 4
383 uint8_t level; //FIXME merge into type?
384 }BlockNode;
386 static const BlockNode null_block= { //FIXME add border maybe
387 .color= {128,128,128},
388 .mx= 0,
389 .my= 0,
390 .ref= 0,
391 .type= 0,
392 .level= 0,
395 #define LOG2_MB_SIZE 4
396 #define MB_SIZE (1<<LOG2_MB_SIZE)
397 #define ENCODER_EXTRA_BITS 4
398 #define HTAPS_MAX 8
400 typedef struct x_and_coeff{
401 int16_t x;
402 uint16_t coeff;
403 } x_and_coeff;
405 typedef struct SubBand{
406 int level;
407 int stride;
408 int width;
409 int height;
410 int qlog; ///< log(qscale)/log[2^(1/6)]
411 DWTELEM *buf;
412 IDWTELEM *ibuf;
413 int buf_x_offset;
414 int buf_y_offset;
415 int stride_line; ///< Stride measured in lines, not pixels.
416 x_and_coeff * x_coeff;
417 struct SubBand *parent;
418 uint8_t state[/*7*2*/ 7 + 512][32];
419 }SubBand;
421 typedef struct Plane{
422 int width;
423 int height;
424 SubBand band[MAX_DECOMPOSITIONS][4];
426 int htaps;
427 int8_t hcoeff[HTAPS_MAX/2];
428 int diag_mc;
429 int fast_mc;
431 int last_htaps;
432 int8_t last_hcoeff[HTAPS_MAX/2];
433 int last_diag_mc;
434 }Plane;
436 typedef struct SnowContext{
437 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
439 AVCodecContext *avctx;
440 RangeCoder c;
441 DSPContext dsp;
442 AVFrame new_picture;
443 AVFrame input_picture; ///< new_picture with the internal linesizes
444 AVFrame current_picture;
445 AVFrame last_picture[MAX_REF_FRAMES];
446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
447 AVFrame mconly_picture;
448 // uint8_t q_context[16];
449 uint8_t header_state[32];
450 uint8_t block_state[128 + 32*128];
451 int keyframe;
452 int always_reset;
453 int version;
454 int spatial_decomposition_type;
455 int last_spatial_decomposition_type;
456 int temporal_decomposition_type;
457 int spatial_decomposition_count;
458 int last_spatial_decomposition_count;
459 int temporal_decomposition_count;
460 int max_ref_frames;
461 int ref_frames;
462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
463 uint32_t *ref_scores[MAX_REF_FRAMES];
464 DWTELEM *spatial_dwt_buffer;
465 IDWTELEM *spatial_idwt_buffer;
466 int colorspace_type;
467 int chroma_h_shift;
468 int chroma_v_shift;
469 int spatial_scalability;
470 int qlog;
471 int last_qlog;
472 int lambda;
473 int lambda2;
474 int pass1_rc;
475 int mv_scale;
476 int last_mv_scale;
477 int qbias;
478 int last_qbias;
479 #define QBIAS_SHIFT 3
480 int b_width;
481 int b_height;
482 int block_max_depth;
483 int last_block_max_depth;
484 Plane plane[MAX_PLANES];
485 BlockNode *block;
486 #define ME_CACHE_SIZE 1024
487 int me_cache[ME_CACHE_SIZE];
488 int me_cache_generation;
489 slice_buffer sb;
491 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
493 uint8_t *scratchbuf;
494 }SnowContext;
496 typedef struct {
497 IDWTELEM *b0;
498 IDWTELEM *b1;
499 IDWTELEM *b2;
500 IDWTELEM *b3;
501 int y;
502 } DWTCompose;
504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
507 static void iterative_me(SnowContext *s);
509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
511 int i;
513 buf->base_buffer = base_buffer;
514 buf->line_count = line_count;
515 buf->line_width = line_width;
516 buf->data_count = max_allocated_lines;
517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
520 for(i = 0; i < max_allocated_lines; i++){
521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
524 buf->data_stack_top = max_allocated_lines - 1;
527 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
529 int offset;
530 IDWTELEM * buffer;
532 assert(buf->data_stack_top >= 0);
533 // assert(!buf->line[line]);
534 if (buf->line[line])
535 return buf->line[line];
537 offset = buf->line_width * line;
538 buffer = buf->data_stack[buf->data_stack_top];
539 buf->data_stack_top--;
540 buf->line[line] = buffer;
542 return buffer;
545 static void slice_buffer_release(slice_buffer * buf, int line)
547 int offset;
548 IDWTELEM * buffer;
550 assert(line >= 0 && line < buf->line_count);
551 assert(buf->line[line]);
553 offset = buf->line_width * line;
554 buffer = buf->line[line];
555 buf->data_stack_top++;
556 buf->data_stack[buf->data_stack_top] = buffer;
557 buf->line[line] = NULL;
560 static void slice_buffer_flush(slice_buffer * buf)
562 int i;
563 for(i = 0; i < buf->line_count; i++){
564 if (buf->line[i])
565 slice_buffer_release(buf, i);
569 static void slice_buffer_destroy(slice_buffer * buf)
571 int i;
572 slice_buffer_flush(buf);
574 for(i = buf->data_count - 1; i >= 0; i--){
575 av_freep(&buf->data_stack[i]);
577 av_freep(&buf->data_stack);
578 av_freep(&buf->line);
581 #ifdef __sgi
582 // Avoid a name clash on SGI IRIX
583 #undef qexp
584 #endif
585 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
586 static uint8_t qexp[QROOT];
588 static inline int mirror(int v, int m){
589 while((unsigned)v > (unsigned)m){
590 v=-v;
591 if(v<0) v+= 2*m;
593 return v;
596 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
597 int i;
599 if(v){
600 const int a= FFABS(v);
601 const int e= av_log2(a);
602 #if 1
603 const int el= FFMIN(e, 10);
604 put_rac(c, state+0, 0);
606 for(i=0; i<el; i++){
607 put_rac(c, state+1+i, 1); //1..10
609 for(; i<e; i++){
610 put_rac(c, state+1+9, 1); //1..10
612 put_rac(c, state+1+FFMIN(i,9), 0);
614 for(i=e-1; i>=el; i--){
615 put_rac(c, state+22+9, (a>>i)&1); //22..31
617 for(; i>=0; i--){
618 put_rac(c, state+22+i, (a>>i)&1); //22..31
621 if(is_signed)
622 put_rac(c, state+11 + el, v < 0); //11..21
623 #else
625 put_rac(c, state+0, 0);
626 if(e<=9){
627 for(i=0; i<e; i++){
628 put_rac(c, state+1+i, 1); //1..10
630 put_rac(c, state+1+i, 0);
632 for(i=e-1; i>=0; i--){
633 put_rac(c, state+22+i, (a>>i)&1); //22..31
636 if(is_signed)
637 put_rac(c, state+11 + e, v < 0); //11..21
638 }else{
639 for(i=0; i<e; i++){
640 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
642 put_rac(c, state+1+FFMIN(i,9), 0);
644 for(i=e-1; i>=0; i--){
645 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
648 if(is_signed)
649 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
651 #endif /* 1 */
652 }else{
653 put_rac(c, state+0, 1);
657 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
658 if(get_rac(c, state+0))
659 return 0;
660 else{
661 int i, e, a;
662 e= 0;
663 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
664 e++;
667 a= 1;
668 for(i=e-1; i>=0; i--){
669 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
672 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
673 return -a;
674 else
675 return a;
679 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
680 int i;
681 int r= log2>=0 ? 1<<log2 : 1;
683 assert(v>=0);
684 assert(log2>=-4);
686 while(v >= r){
687 put_rac(c, state+4+log2, 1);
688 v -= r;
689 log2++;
690 if(log2>0) r+=r;
692 put_rac(c, state+4+log2, 0);
694 for(i=log2-1; i>=0; i--){
695 put_rac(c, state+31-i, (v>>i)&1);
699 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
700 int i;
701 int r= log2>=0 ? 1<<log2 : 1;
702 int v=0;
704 assert(log2>=-4);
706 while(get_rac(c, state+4+log2)){
707 v+= r;
708 log2++;
709 if(log2>0) r+=r;
712 for(i=log2-1; i>=0; i--){
713 v+= get_rac(c, state+31-i)<<i;
716 return v;
719 static av_always_inline void
720 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
721 int dst_step, int src_step, int ref_step,
722 int width, int mul, int add, int shift,
723 int highpass, int inverse){
724 const int mirror_left= !highpass;
725 const int mirror_right= (width&1) ^ highpass;
726 const int w= (width>>1) - 1 + (highpass & width);
727 int i;
729 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
730 if(mirror_left){
731 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
732 dst += dst_step;
733 src += src_step;
736 for(i=0; i<w; i++){
737 dst[i*dst_step] =
738 LIFT(src[i*src_step],
739 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
740 inverse);
743 if(mirror_right){
744 dst[w*dst_step] =
745 LIFT(src[w*src_step],
746 ((mul*2*ref[w*ref_step]+add)>>shift),
747 inverse);
751 static av_always_inline void
752 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
753 int dst_step, int src_step, int ref_step,
754 int width, int mul, int add, int shift,
755 int highpass, int inverse){
756 const int mirror_left= !highpass;
757 const int mirror_right= (width&1) ^ highpass;
758 const int w= (width>>1) - 1 + (highpass & width);
759 int i;
761 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
762 if(mirror_left){
763 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
764 dst += dst_step;
765 src += src_step;
768 for(i=0; i<w; i++){
769 dst[i*dst_step] =
770 LIFT(src[i*src_step],
771 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
772 inverse);
775 if(mirror_right){
776 dst[w*dst_step] =
777 LIFT(src[w*src_step],
778 ((mul*2*ref[w*ref_step]+add)>>shift),
779 inverse);
783 #ifndef liftS
784 static av_always_inline void
785 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
786 int dst_step, int src_step, int ref_step,
787 int width, int mul, int add, int shift,
788 int highpass, int inverse){
789 const int mirror_left= !highpass;
790 const int mirror_right= (width&1) ^ highpass;
791 const int w= (width>>1) - 1 + (highpass & width);
792 int i;
794 assert(shift == 4);
795 #define LIFTS(src, ref, inv) \
796 ((inv) ? \
797 (src) + (((ref) + 4*(src))>>shift): \
798 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
799 if(mirror_left){
800 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
801 dst += dst_step;
802 src += src_step;
805 for(i=0; i<w; i++){
806 dst[i*dst_step] =
807 LIFTS(src[i*src_step],
808 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
809 inverse);
812 if(mirror_right){
813 dst[w*dst_step] =
814 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
817 static av_always_inline void
818 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
819 int dst_step, int src_step, int ref_step,
820 int width, int mul, int add, int shift,
821 int highpass, int inverse){
822 const int mirror_left= !highpass;
823 const int mirror_right= (width&1) ^ highpass;
824 const int w= (width>>1) - 1 + (highpass & width);
825 int i;
827 assert(shift == 4);
828 #define LIFTS(src, ref, inv) \
829 ((inv) ? \
830 (src) + (((ref) + 4*(src))>>shift): \
831 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
832 if(mirror_left){
833 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
834 dst += dst_step;
835 src += src_step;
838 for(i=0; i<w; i++){
839 dst[i*dst_step] =
840 LIFTS(src[i*src_step],
841 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
842 inverse);
845 if(mirror_right){
846 dst[w*dst_step] =
847 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
850 #endif /* ! liftS */
852 static void horizontal_decompose53i(DWTELEM *b, int width){
853 DWTELEM temp[width];
854 const int width2= width>>1;
855 int x;
856 const int w2= (width+1)>>1;
858 for(x=0; x<width2; x++){
859 temp[x ]= b[2*x ];
860 temp[x+w2]= b[2*x + 1];
862 if(width&1)
863 temp[x ]= b[2*x ];
864 #if 0
866 int A1,A2,A3,A4;
867 A2= temp[1 ];
868 A4= temp[0 ];
869 A1= temp[0+width2];
870 A1 -= (A2 + A4)>>1;
871 A4 += (A1 + 1)>>1;
872 b[0+width2] = A1;
873 b[0 ] = A4;
874 for(x=1; x+1<width2; x+=2){
875 A3= temp[x+width2];
876 A4= temp[x+1 ];
877 A3 -= (A2 + A4)>>1;
878 A2 += (A1 + A3 + 2)>>2;
879 b[x+width2] = A3;
880 b[x ] = A2;
882 A1= temp[x+1+width2];
883 A2= temp[x+2 ];
884 A1 -= (A2 + A4)>>1;
885 A4 += (A1 + A3 + 2)>>2;
886 b[x+1+width2] = A1;
887 b[x+1 ] = A4;
889 A3= temp[width-1];
890 A3 -= A2;
891 A2 += (A1 + A3 + 2)>>2;
892 b[width -1] = A3;
893 b[width2-1] = A2;
895 #else
896 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
897 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
898 #endif /* 0 */
901 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
902 int i;
904 for(i=0; i<width; i++){
905 b1[i] -= (b0[i] + b2[i])>>1;
909 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
910 int i;
912 for(i=0; i<width; i++){
913 b1[i] += (b0[i] + b2[i] + 2)>>2;
917 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
918 int y;
919 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
920 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
922 for(y=-2; y<height; y+=2){
923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
926 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
927 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
929 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
930 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
932 b0=b2;
933 b1=b3;
937 static void horizontal_decompose97i(DWTELEM *b, int width){
938 DWTELEM temp[width];
939 const int w2= (width+1)>>1;
941 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
942 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
943 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
944 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
948 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
949 int i;
951 for(i=0; i<width; i++){
952 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
956 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
957 int i;
959 for(i=0; i<width; i++){
960 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
964 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
965 int i;
967 for(i=0; i<width; i++){
968 #ifdef liftS
969 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
970 #else
971 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
972 #endif
976 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
977 int i;
979 for(i=0; i<width; i++){
980 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
984 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
985 int y;
986 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
987 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
988 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
989 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
991 for(y=-4; y<height; y+=2){
992 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
993 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
995 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
996 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
998 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
999 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1000 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1001 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1003 b0=b2;
1004 b1=b3;
1005 b2=b4;
1006 b3=b5;
1010 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1011 int level;
1013 for(level=0; level<decomposition_count; level++){
1014 switch(type){
1015 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1016 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1021 static void horizontal_compose53i(IDWTELEM *b, int width){
1022 IDWTELEM temp[width];
1023 const int width2= width>>1;
1024 const int w2= (width+1)>>1;
1025 int x;
1027 #if 0
1028 int A1,A2,A3,A4;
1029 A2= temp[1 ];
1030 A4= temp[0 ];
1031 A1= temp[0+width2];
1032 A1 -= (A2 + A4)>>1;
1033 A4 += (A1 + 1)>>1;
1034 b[0+width2] = A1;
1035 b[0 ] = A4;
1036 for(x=1; x+1<width2; x+=2){
1037 A3= temp[x+width2];
1038 A4= temp[x+1 ];
1039 A3 -= (A2 + A4)>>1;
1040 A2 += (A1 + A3 + 2)>>2;
1041 b[x+width2] = A3;
1042 b[x ] = A2;
1044 A1= temp[x+1+width2];
1045 A2= temp[x+2 ];
1046 A1 -= (A2 + A4)>>1;
1047 A4 += (A1 + A3 + 2)>>2;
1048 b[x+1+width2] = A1;
1049 b[x+1 ] = A4;
1051 A3= temp[width-1];
1052 A3 -= A2;
1053 A2 += (A1 + A3 + 2)>>2;
1054 b[width -1] = A3;
1055 b[width2-1] = A2;
1056 #else
1057 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1058 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1059 #endif /* 0 */
1060 for(x=0; x<width2; x++){
1061 b[2*x ]= temp[x ];
1062 b[2*x + 1]= temp[x+w2];
1064 if(width&1)
1065 b[2*x ]= temp[x ];
1068 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1069 int i;
1071 for(i=0; i<width; i++){
1072 b1[i] += (b0[i] + b2[i])>>1;
1076 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1077 int i;
1079 for(i=0; i<width; i++){
1080 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1084 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1085 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1086 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1087 cs->y = -1;
1090 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1091 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1092 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1093 cs->y = -1;
1096 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1097 int y= cs->y;
1099 IDWTELEM *b0= cs->b0;
1100 IDWTELEM *b1= cs->b1;
1101 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1102 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1104 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1105 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1107 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1108 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1110 cs->b0 = b2;
1111 cs->b1 = b3;
1112 cs->y += 2;
1115 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1116 int y= cs->y;
1117 IDWTELEM *b0= cs->b0;
1118 IDWTELEM *b1= cs->b1;
1119 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1120 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1122 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1123 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1125 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1126 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1128 cs->b0 = b2;
1129 cs->b1 = b3;
1130 cs->y += 2;
1133 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1134 DWTCompose cs;
1135 spatial_compose53i_init(&cs, buffer, height, stride);
1136 while(cs.y <= height)
1137 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1141 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1142 IDWTELEM temp[width];
1143 const int w2= (width+1)>>1;
1145 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1146 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1147 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1148 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1151 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1152 int i;
1154 for(i=0; i<width; i++){
1155 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1159 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1160 int i;
1162 for(i=0; i<width; i++){
1163 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1167 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1168 int i;
1170 for(i=0; i<width; i++){
1171 #ifdef liftS
1172 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1173 #else
1174 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1175 #endif
1179 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1180 int i;
1182 for(i=0; i<width; i++){
1183 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1187 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1188 int i;
1190 for(i=0; i<width; i++){
1191 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1192 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1193 #ifdef liftS
1194 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1195 #else
1196 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1197 #endif
1198 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1202 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1203 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1204 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1205 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1206 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1207 cs->y = -3;
1210 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1211 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1212 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1213 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1214 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1215 cs->y = -3;
1218 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1219 int y = cs->y;
1221 IDWTELEM *b0= cs->b0;
1222 IDWTELEM *b1= cs->b1;
1223 IDWTELEM *b2= cs->b2;
1224 IDWTELEM *b3= cs->b3;
1225 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1226 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1228 if(y>0 && y+4<height){
1229 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1230 }else{
1231 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1232 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1233 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1234 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1237 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1238 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1240 cs->b0=b2;
1241 cs->b1=b3;
1242 cs->b2=b4;
1243 cs->b3=b5;
1244 cs->y += 2;
1247 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1248 int y = cs->y;
1249 IDWTELEM *b0= cs->b0;
1250 IDWTELEM *b1= cs->b1;
1251 IDWTELEM *b2= cs->b2;
1252 IDWTELEM *b3= cs->b3;
1253 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1254 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1256 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1257 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1258 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1259 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1261 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1262 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1264 cs->b0=b2;
1265 cs->b1=b3;
1266 cs->b2=b4;
1267 cs->b3=b5;
1268 cs->y += 2;
1271 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1272 DWTCompose cs;
1273 spatial_compose97i_init(&cs, buffer, height, stride);
1274 while(cs.y <= height)
1275 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1278 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1279 int level;
1280 for(level=decomposition_count-1; level>=0; level--){
1281 switch(type){
1282 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1283 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1288 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1289 int level;
1290 for(level=decomposition_count-1; level>=0; level--){
1291 switch(type){
1292 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1293 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1298 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1299 const int support = type==1 ? 3 : 5;
1300 int level;
1301 if(type==2) return;
1303 for(level=decomposition_count-1; level>=0; level--){
1304 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1305 switch(type){
1306 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1307 break;
1308 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1309 break;
1315 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1316 const int support = type==1 ? 3 : 5;
1317 int level;
1318 if(type==2) return;
1320 for(level=decomposition_count-1; level>=0; level--){
1321 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1322 switch(type){
1323 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1324 break;
1325 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1326 break;
1332 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1333 DWTCompose cs[MAX_DECOMPOSITIONS];
1334 int y;
1335 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1336 for(y=0; y<height; y+=4)
1337 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1340 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1341 const int w= b->width;
1342 const int h= b->height;
1343 int x, y;
1345 if(1){
1346 int run=0;
1347 int runs[w*h];
1348 int run_index=0;
1349 int max_index;
1351 for(y=0; y<h; y++){
1352 for(x=0; x<w; x++){
1353 int v, p=0;
1354 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1355 v= src[x + y*stride];
1357 if(y){
1358 t= src[x + (y-1)*stride];
1359 if(x){
1360 lt= src[x - 1 + (y-1)*stride];
1362 if(x + 1 < w){
1363 rt= src[x + 1 + (y-1)*stride];
1366 if(x){
1367 l= src[x - 1 + y*stride];
1368 /*if(x > 1){
1369 if(orientation==1) ll= src[y + (x-2)*stride];
1370 else ll= src[x - 2 + y*stride];
1373 if(parent){
1374 int px= x>>1;
1375 int py= y>>1;
1376 if(px<b->parent->width && py<b->parent->height)
1377 p= parent[px + py*2*stride];
1379 if(!(/*ll|*/l|lt|t|rt|p)){
1380 if(v){
1381 runs[run_index++]= run;
1382 run=0;
1383 }else{
1384 run++;
1389 max_index= run_index;
1390 runs[run_index++]= run;
1391 run_index=0;
1392 run= runs[run_index++];
1394 put_symbol2(&s->c, b->state[30], max_index, 0);
1395 if(run_index <= max_index)
1396 put_symbol2(&s->c, b->state[1], run, 3);
1398 for(y=0; y<h; y++){
1399 if(s->c.bytestream_end - s->c.bytestream < w*40){
1400 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1401 return -1;
1403 for(x=0; x<w; x++){
1404 int v, p=0;
1405 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1406 v= src[x + y*stride];
1408 if(y){
1409 t= src[x + (y-1)*stride];
1410 if(x){
1411 lt= src[x - 1 + (y-1)*stride];
1413 if(x + 1 < w){
1414 rt= src[x + 1 + (y-1)*stride];
1417 if(x){
1418 l= src[x - 1 + y*stride];
1419 /*if(x > 1){
1420 if(orientation==1) ll= src[y + (x-2)*stride];
1421 else ll= src[x - 2 + y*stride];
1424 if(parent){
1425 int px= x>>1;
1426 int py= y>>1;
1427 if(px<b->parent->width && py<b->parent->height)
1428 p= parent[px + py*2*stride];
1430 if(/*ll|*/l|lt|t|rt|p){
1431 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1433 put_rac(&s->c, &b->state[0][context], !!v);
1434 }else{
1435 if(!run){
1436 run= runs[run_index++];
1438 if(run_index <= max_index)
1439 put_symbol2(&s->c, b->state[1], run, 3);
1440 assert(v);
1441 }else{
1442 run--;
1443 assert(!v);
1446 if(v){
1447 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1448 int l2= 2*FFABS(l) + (l<0);
1449 int t2= 2*FFABS(t) + (t<0);
1451 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1452 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1457 return 0;
1460 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1461 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1462 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1463 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1464 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1467 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1468 const int w= b->width;
1469 const int h= b->height;
1470 int x,y;
1472 if(1){
1473 int run, runs;
1474 x_and_coeff *xc= b->x_coeff;
1475 x_and_coeff *prev_xc= NULL;
1476 x_and_coeff *prev2_xc= xc;
1477 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1478 x_and_coeff *prev_parent_xc= parent_xc;
1480 runs= get_symbol2(&s->c, b->state[30], 0);
1481 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1482 else run= INT_MAX;
1484 for(y=0; y<h; y++){
1485 int v=0;
1486 int lt=0, t=0, rt=0;
1488 if(y && prev_xc->x == 0){
1489 rt= prev_xc->coeff;
1491 for(x=0; x<w; x++){
1492 int p=0;
1493 const int l= v;
1495 lt= t; t= rt;
1497 if(y){
1498 if(prev_xc->x <= x)
1499 prev_xc++;
1500 if(prev_xc->x == x + 1)
1501 rt= prev_xc->coeff;
1502 else
1503 rt=0;
1505 if(parent_xc){
1506 if(x>>1 > parent_xc->x){
1507 parent_xc++;
1509 if(x>>1 == parent_xc->x){
1510 p= parent_xc->coeff;
1513 if(/*ll|*/l|lt|t|rt|p){
1514 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1516 v=get_rac(&s->c, &b->state[0][context]);
1517 if(v){
1518 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1519 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1521 xc->x=x;
1522 (xc++)->coeff= v;
1524 }else{
1525 if(!run){
1526 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1527 else run= INT_MAX;
1528 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1529 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1531 xc->x=x;
1532 (xc++)->coeff= v;
1533 }else{
1534 int max_run;
1535 run--;
1536 v=0;
1538 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1539 else max_run= FFMIN(run, w-x-1);
1540 if(parent_xc)
1541 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1542 x+= max_run;
1543 run-= max_run;
1547 (xc++)->x= w+1; //end marker
1548 prev_xc= prev2_xc;
1549 prev2_xc= xc;
1551 if(parent_xc){
1552 if(y&1){
1553 while(parent_xc->x != parent->width+1)
1554 parent_xc++;
1555 parent_xc++;
1556 prev_parent_xc= parent_xc;
1557 }else{
1558 parent_xc= prev_parent_xc;
1563 (xc++)->x= w+1; //end marker
1567 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1568 const int w= b->width;
1569 int y;
1570 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1571 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1572 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1573 int new_index = 0;
1575 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1576 qadd= 0;
1577 qmul= 1<<QEXPSHIFT;
1580 /* If we are on the second or later slice, restore our index. */
1581 if (start_y != 0)
1582 new_index = save_state[0];
1585 for(y=start_y; y<h; y++){
1586 int x = 0;
1587 int v;
1588 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1589 memset(line, 0, b->width*sizeof(IDWTELEM));
1590 v = b->x_coeff[new_index].coeff;
1591 x = b->x_coeff[new_index++].x;
1592 while(x < w){
1593 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1594 register int u= -(v&1);
1595 line[x] = (t^u) - u;
1597 v = b->x_coeff[new_index].coeff;
1598 x = b->x_coeff[new_index++].x;
1602 /* Save our variables for the next slice. */
1603 save_state[0] = new_index;
1605 return;
1608 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1609 int plane_index, level, orientation;
1611 for(plane_index=0; plane_index<3; plane_index++){
1612 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1613 for(orientation=level ? 1:0; orientation<4; orientation++){
1614 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1618 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1619 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1622 static int alloc_blocks(SnowContext *s){
1623 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1624 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1626 s->b_width = w;
1627 s->b_height= h;
1629 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1630 return 0;
1633 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1634 uint8_t *bytestream= d->bytestream;
1635 uint8_t *bytestream_start= d->bytestream_start;
1636 *d= *s;
1637 d->bytestream= bytestream;
1638 d->bytestream_start= bytestream_start;
1641 //near copy & paste from dsputil, FIXME
1642 static int pix_sum(uint8_t * pix, int line_size, int w)
1644 int s, i, j;
1646 s = 0;
1647 for (i = 0; i < w; i++) {
1648 for (j = 0; j < w; j++) {
1649 s += pix[0];
1650 pix ++;
1652 pix += line_size - w;
1654 return s;
1657 //near copy & paste from dsputil, FIXME
1658 static int pix_norm1(uint8_t * pix, int line_size, int w)
1660 int s, i, j;
1661 uint32_t *sq = ff_squareTbl + 256;
1663 s = 0;
1664 for (i = 0; i < w; i++) {
1665 for (j = 0; j < w; j ++) {
1666 s += sq[pix[0]];
1667 pix ++;
1669 pix += line_size - w;
1671 return s;
1674 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1675 const int w= s->b_width << s->block_max_depth;
1676 const int rem_depth= s->block_max_depth - level;
1677 const int index= (x + y*w) << rem_depth;
1678 const int block_w= 1<<rem_depth;
1679 BlockNode block;
1680 int i,j;
1682 block.color[0]= l;
1683 block.color[1]= cb;
1684 block.color[2]= cr;
1685 block.mx= mx;
1686 block.my= my;
1687 block.ref= ref;
1688 block.type= type;
1689 block.level= level;
1691 for(j=0; j<block_w; j++){
1692 for(i=0; i<block_w; i++){
1693 s->block[index + i + j*w]= block;
1698 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1699 const int offset[3]= {
1700 y*c-> stride + x,
1701 ((y*c->uvstride + x)>>1),
1702 ((y*c->uvstride + x)>>1),
1704 int i;
1705 for(i=0; i<3; i++){
1706 c->src[0][i]= src [i];
1707 c->ref[0][i]= ref [i] + offset[i];
1709 assert(!ref_index);
1712 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1713 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1714 if(s->ref_frames == 1){
1715 *mx = mid_pred(left->mx, top->mx, tr->mx);
1716 *my = mid_pred(left->my, top->my, tr->my);
1717 }else{
1718 const int *scale = scale_mv_ref[ref];
1719 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1720 (top ->mx * scale[top ->ref] + 128) >>8,
1721 (tr ->mx * scale[tr ->ref] + 128) >>8);
1722 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1723 (top ->my * scale[top ->ref] + 128) >>8,
1724 (tr ->my * scale[tr ->ref] + 128) >>8);
1728 //FIXME copy&paste
1729 #define P_LEFT P[1]
1730 #define P_TOP P[2]
1731 #define P_TOPRIGHT P[3]
1732 #define P_MEDIAN P[4]
1733 #define P_MV1 P[9]
1734 #define FLAG_QPEL 1 //must be 1
1736 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1737 uint8_t p_buffer[1024];
1738 uint8_t i_buffer[1024];
1739 uint8_t p_state[sizeof(s->block_state)];
1740 uint8_t i_state[sizeof(s->block_state)];
1741 RangeCoder pc, ic;
1742 uint8_t *pbbak= s->c.bytestream;
1743 uint8_t *pbbak_start= s->c.bytestream_start;
1744 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1745 const int w= s->b_width << s->block_max_depth;
1746 const int h= s->b_height << s->block_max_depth;
1747 const int rem_depth= s->block_max_depth - level;
1748 const int index= (x + y*w) << rem_depth;
1749 const int block_w= 1<<(LOG2_MB_SIZE - level);
1750 int trx= (x+1)<<rem_depth;
1751 int try= (y+1)<<rem_depth;
1752 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1753 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1754 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1755 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1756 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1757 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1758 int pl = left->color[0];
1759 int pcb= left->color[1];
1760 int pcr= left->color[2];
1761 int pmx, pmy;
1762 int mx=0, my=0;
1763 int l,cr,cb;
1764 const int stride= s->current_picture.linesize[0];
1765 const int uvstride= s->current_picture.linesize[1];
1766 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1767 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1768 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1769 int P[10][2];
1770 int16_t last_mv[3][2];
1771 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1772 const int shift= 1+qpel;
1773 MotionEstContext *c= &s->m.me;
1774 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1775 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1776 int my_context= av_log2(2*FFABS(left->my - top->my));
1777 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1778 int ref, best_ref, ref_score, ref_mx, ref_my;
1780 assert(sizeof(s->block_state) >= 256);
1781 if(s->keyframe){
1782 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1783 return 0;
1786 // clip predictors / edge ?
1788 P_LEFT[0]= left->mx;
1789 P_LEFT[1]= left->my;
1790 P_TOP [0]= top->mx;
1791 P_TOP [1]= top->my;
1792 P_TOPRIGHT[0]= tr->mx;
1793 P_TOPRIGHT[1]= tr->my;
1795 last_mv[0][0]= s->block[index].mx;
1796 last_mv[0][1]= s->block[index].my;
1797 last_mv[1][0]= right->mx;
1798 last_mv[1][1]= right->my;
1799 last_mv[2][0]= bottom->mx;
1800 last_mv[2][1]= bottom->my;
1802 s->m.mb_stride=2;
1803 s->m.mb_x=
1804 s->m.mb_y= 0;
1805 c->skip= 0;
1807 assert(c-> stride == stride);
1808 assert(c->uvstride == uvstride);
1810 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1811 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1812 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1813 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1815 c->xmin = - x*block_w - 16+2;
1816 c->ymin = - y*block_w - 16+2;
1817 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1818 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1820 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1821 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1822 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1823 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1824 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1825 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1826 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1828 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1829 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1831 if (!y) {
1832 c->pred_x= P_LEFT[0];
1833 c->pred_y= P_LEFT[1];
1834 } else {
1835 c->pred_x = P_MEDIAN[0];
1836 c->pred_y = P_MEDIAN[1];
1839 score= INT_MAX;
1840 best_ref= 0;
1841 for(ref=0; ref<s->ref_frames; ref++){
1842 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1844 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1845 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1847 assert(ref_mx >= c->xmin);
1848 assert(ref_mx <= c->xmax);
1849 assert(ref_my >= c->ymin);
1850 assert(ref_my <= c->ymax);
1852 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1853 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1854 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1855 if(s->ref_mvs[ref]){
1856 s->ref_mvs[ref][index][0]= ref_mx;
1857 s->ref_mvs[ref][index][1]= ref_my;
1858 s->ref_scores[ref][index]= ref_score;
1860 if(score > ref_score){
1861 score= ref_score;
1862 best_ref= ref;
1863 mx= ref_mx;
1864 my= ref_my;
1867 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1869 // subpel search
1870 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1871 pc= s->c;
1872 pc.bytestream_start=
1873 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1874 memcpy(p_state, s->block_state, sizeof(s->block_state));
1876 if(level!=s->block_max_depth)
1877 put_rac(&pc, &p_state[4 + s_context], 1);
1878 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1879 if(s->ref_frames > 1)
1880 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1881 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1882 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1883 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1884 p_len= pc.bytestream - pc.bytestream_start;
1885 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1887 block_s= block_w*block_w;
1888 sum = pix_sum(current_data[0], stride, block_w);
1889 l= (sum + block_s/2)/block_s;
1890 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1892 block_s= block_w*block_w>>2;
1893 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1894 cb= (sum + block_s/2)/block_s;
1895 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1896 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1897 cr= (sum + block_s/2)/block_s;
1898 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1900 ic= s->c;
1901 ic.bytestream_start=
1902 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1903 memcpy(i_state, s->block_state, sizeof(s->block_state));
1904 if(level!=s->block_max_depth)
1905 put_rac(&ic, &i_state[4 + s_context], 1);
1906 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1907 put_symbol(&ic, &i_state[32], l-pl , 1);
1908 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1909 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1910 i_len= ic.bytestream - ic.bytestream_start;
1911 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1913 // assert(score==256*256*256*64-1);
1914 assert(iscore < 255*255*256 + s->lambda2*10);
1915 assert(iscore >= 0);
1916 assert(l>=0 && l<=255);
1917 assert(pl>=0 && pl<=255);
1919 if(level==0){
1920 int varc= iscore >> 8;
1921 int vard= score >> 8;
1922 if (vard <= 64 || vard < varc)
1923 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1924 else
1925 c->scene_change_score+= s->m.qscale;
1928 if(level!=s->block_max_depth){
1929 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1930 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1931 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1932 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1933 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1934 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1936 if(score2 < score && score2 < iscore)
1937 return score2;
1940 if(iscore < score){
1941 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1942 memcpy(pbbak, i_buffer, i_len);
1943 s->c= ic;
1944 s->c.bytestream_start= pbbak_start;
1945 s->c.bytestream= pbbak + i_len;
1946 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1947 memcpy(s->block_state, i_state, sizeof(s->block_state));
1948 return iscore;
1949 }else{
1950 memcpy(pbbak, p_buffer, p_len);
1951 s->c= pc;
1952 s->c.bytestream_start= pbbak_start;
1953 s->c.bytestream= pbbak + p_len;
1954 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1955 memcpy(s->block_state, p_state, sizeof(s->block_state));
1956 return score;
1960 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1961 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1962 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1963 }else{
1964 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1968 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1969 const int w= s->b_width << s->block_max_depth;
1970 const int rem_depth= s->block_max_depth - level;
1971 const int index= (x + y*w) << rem_depth;
1972 int trx= (x+1)<<rem_depth;
1973 BlockNode *b= &s->block[index];
1974 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1975 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1976 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1977 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1978 int pl = left->color[0];
1979 int pcb= left->color[1];
1980 int pcr= left->color[2];
1981 int pmx, pmy;
1982 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1983 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1984 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1985 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1987 if(s->keyframe){
1988 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1989 return;
1992 if(level!=s->block_max_depth){
1993 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1994 put_rac(&s->c, &s->block_state[4 + s_context], 1);
1995 }else{
1996 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1997 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1998 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1999 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2000 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2001 return;
2004 if(b->type & BLOCK_INTRA){
2005 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2006 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2007 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2008 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2009 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2010 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2011 }else{
2012 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2013 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2014 if(s->ref_frames > 1)
2015 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2016 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2017 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2018 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2022 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2023 const int w= s->b_width << s->block_max_depth;
2024 const int rem_depth= s->block_max_depth - level;
2025 const int index= (x + y*w) << rem_depth;
2026 int trx= (x+1)<<rem_depth;
2027 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2028 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2029 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2030 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2031 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2033 if(s->keyframe){
2034 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2035 return;
2038 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2039 int type, mx, my;
2040 int l = left->color[0];
2041 int cb= left->color[1];
2042 int cr= left->color[2];
2043 int ref = 0;
2044 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2045 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2046 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2048 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2050 if(type){
2051 pred_mv(s, &mx, &my, 0, left, top, tr);
2052 l += get_symbol(&s->c, &s->block_state[32], 1);
2053 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2054 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2055 }else{
2056 if(s->ref_frames > 1)
2057 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2058 pred_mv(s, &mx, &my, ref, left, top, tr);
2059 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2060 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2062 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2063 }else{
2064 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2065 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2066 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2067 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2071 static void encode_blocks(SnowContext *s, int search){
2072 int x, y;
2073 int w= s->b_width;
2074 int h= s->b_height;
2076 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2077 iterative_me(s);
2079 for(y=0; y<h; y++){
2080 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2081 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2082 return;
2084 for(x=0; x<w; x++){
2085 if(s->avctx->me_method == ME_ITER || !search)
2086 encode_q_branch2(s, 0, x, y);
2087 else
2088 encode_q_branch (s, 0, x, y);
2093 static void decode_blocks(SnowContext *s){
2094 int x, y;
2095 int w= s->b_width;
2096 int h= s->b_height;
2098 for(y=0; y<h; y++){
2099 for(x=0; x<w; x++){
2100 decode_q_branch(s, 0, x, y);
2105 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2106 static const uint8_t weight[64]={
2107 8,7,6,5,4,3,2,1,
2108 7,7,0,0,0,0,0,1,
2109 6,0,6,0,0,0,2,0,
2110 5,0,0,5,0,3,0,0,
2111 4,0,0,0,4,0,0,0,
2112 3,0,0,5,0,3,0,0,
2113 2,0,6,0,0,0,2,0,
2114 1,7,0,0,0,0,0,1,
2117 static const uint8_t brane[256]={
2118 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2119 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2120 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2121 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2122 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2123 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2124 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2125 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2126 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2127 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2128 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2129 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2130 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2131 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2132 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2133 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2136 static const uint8_t needs[16]={
2137 0,1,0,0,
2138 2,4,2,0,
2139 0,1,0,0,
2143 int x, y, b, r, l;
2144 int16_t tmpIt [64*(32+HTAPS_MAX)];
2145 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2146 int16_t *tmpI= tmpIt;
2147 uint8_t *tmp2= tmp2t[0];
2148 const uint8_t *hpel[11];
2149 assert(dx<16 && dy<16);
2150 r= brane[dx + 16*dy]&15;
2151 l= brane[dx + 16*dy]>>4;
2153 b= needs[l] | needs[r];
2154 if(p && !p->diag_mc)
2155 b= 15;
2157 if(b&5){
2158 for(y=0; y < b_h+HTAPS_MAX-1; y++){
2159 for(x=0; x < b_w; x++){
2160 int a_1=src[x + HTAPS_MAX/2-4];
2161 int a0= src[x + HTAPS_MAX/2-3];
2162 int a1= src[x + HTAPS_MAX/2-2];
2163 int a2= src[x + HTAPS_MAX/2-1];
2164 int a3= src[x + HTAPS_MAX/2+0];
2165 int a4= src[x + HTAPS_MAX/2+1];
2166 int a5= src[x + HTAPS_MAX/2+2];
2167 int a6= src[x + HTAPS_MAX/2+3];
2168 int am=0;
2169 if(!p || p->fast_mc){
2170 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2171 tmpI[x]= am;
2172 am= (am+16)>>5;
2173 }else{
2174 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2175 tmpI[x]= am;
2176 am= (am+32)>>6;
2179 if(am&(~255)) am= ~(am>>31);
2180 tmp2[x]= am;
2182 tmpI+= 64;
2183 tmp2+= stride;
2184 src += stride;
2186 src -= stride*y;
2188 src += HTAPS_MAX/2 - 1;
2189 tmp2= tmp2t[1];
2191 if(b&2){
2192 for(y=0; y < b_h; y++){
2193 for(x=0; x < b_w+1; x++){
2194 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2195 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2196 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2197 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2198 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2199 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2200 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2201 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2202 int am=0;
2203 if(!p || p->fast_mc)
2204 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2205 else
2206 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2208 if(am&(~255)) am= ~(am>>31);
2209 tmp2[x]= am;
2211 src += stride;
2212 tmp2+= stride;
2214 src -= stride*y;
2216 src += stride*(HTAPS_MAX/2 - 1);
2217 tmp2= tmp2t[2];
2218 tmpI= tmpIt;
2219 if(b&4){
2220 for(y=0; y < b_h; y++){
2221 for(x=0; x < b_w; x++){
2222 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2223 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2224 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2225 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2226 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2227 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2228 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2229 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2230 int am=0;
2231 if(!p || p->fast_mc)
2232 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2233 else
2234 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2235 if(am&(~255)) am= ~(am>>31);
2236 tmp2[x]= am;
2238 tmpI+= 64;
2239 tmp2+= stride;
2243 hpel[ 0]= src;
2244 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2245 hpel[ 2]= src + 1;
2247 hpel[ 4]= tmp2t[1];
2248 hpel[ 5]= tmp2t[2];
2249 hpel[ 6]= tmp2t[1] + 1;
2251 hpel[ 8]= src + stride;
2252 hpel[ 9]= hpel[1] + stride;
2253 hpel[10]= hpel[8] + 1;
2255 if(b==15){
2256 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2257 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2258 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2259 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2260 dx&=7;
2261 dy&=7;
2262 for(y=0; y < b_h; y++){
2263 for(x=0; x < b_w; x++){
2264 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2265 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2267 src1+=stride;
2268 src2+=stride;
2269 src3+=stride;
2270 src4+=stride;
2271 dst +=stride;
2273 }else{
2274 const uint8_t *src1= hpel[l];
2275 const uint8_t *src2= hpel[r];
2276 int a= weight[((dx&7) + (8*(dy&7)))];
2277 int b= 8-a;
2278 for(y=0; y < b_h; y++){
2279 for(x=0; x < b_w; x++){
2280 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2282 src1+=stride;
2283 src2+=stride;
2284 dst +=stride;
2289 #define mca(dx,dy,b_w)\
2290 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2291 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2292 assert(h==b_w);\
2293 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2296 mca( 0, 0,16)
2297 mca( 8, 0,16)
2298 mca( 0, 8,16)
2299 mca( 8, 8,16)
2300 mca( 0, 0,8)
2301 mca( 8, 0,8)
2302 mca( 0, 8,8)
2303 mca( 8, 8,8)
2305 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2306 if(block->type & BLOCK_INTRA){
2307 int x, y;
2308 const int color = block->color[plane_index];
2309 const int color4= color*0x01010101;
2310 if(b_w==32){
2311 for(y=0; y < b_h; y++){
2312 *(uint32_t*)&dst[0 + y*stride]= color4;
2313 *(uint32_t*)&dst[4 + y*stride]= color4;
2314 *(uint32_t*)&dst[8 + y*stride]= color4;
2315 *(uint32_t*)&dst[12+ y*stride]= color4;
2316 *(uint32_t*)&dst[16+ y*stride]= color4;
2317 *(uint32_t*)&dst[20+ y*stride]= color4;
2318 *(uint32_t*)&dst[24+ y*stride]= color4;
2319 *(uint32_t*)&dst[28+ y*stride]= color4;
2321 }else if(b_w==16){
2322 for(y=0; y < b_h; y++){
2323 *(uint32_t*)&dst[0 + y*stride]= color4;
2324 *(uint32_t*)&dst[4 + y*stride]= color4;
2325 *(uint32_t*)&dst[8 + y*stride]= color4;
2326 *(uint32_t*)&dst[12+ y*stride]= color4;
2328 }else if(b_w==8){
2329 for(y=0; y < b_h; y++){
2330 *(uint32_t*)&dst[0 + y*stride]= color4;
2331 *(uint32_t*)&dst[4 + y*stride]= color4;
2333 }else if(b_w==4){
2334 for(y=0; y < b_h; y++){
2335 *(uint32_t*)&dst[0 + y*stride]= color4;
2337 }else{
2338 for(y=0; y < b_h; y++){
2339 for(x=0; x < b_w; x++){
2340 dst[x + y*stride]= color;
2344 }else{
2345 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2346 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2347 int mx= block->mx*scale;
2348 int my= block->my*scale;
2349 const int dx= mx&15;
2350 const int dy= my&15;
2351 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2352 sx += (mx>>4) - (HTAPS_MAX/2-1);
2353 sy += (my>>4) - (HTAPS_MAX/2-1);
2354 src += sx + sy*stride;
2355 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2356 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2357 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2358 src= tmp + MB_SIZE;
2360 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2361 // assert(!(b_w&(b_w-1)));
2362 assert(b_w>1 && b_h>1);
2363 assert((tab_index>=0 && tab_index<4) || b_w==32);
2364 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2365 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2366 else if(b_w==32){
2367 int y;
2368 for(y=0; y<b_h; y+=16){
2369 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2370 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2372 }else if(b_w==b_h)
2373 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2374 else if(b_w==2*b_h){
2375 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2376 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2377 }else{
2378 assert(2*b_w==b_h);
2379 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2380 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2385 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2386 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2387 int y, x;
2388 IDWTELEM * dst;
2389 for(y=0; y<b_h; y++){
2390 //FIXME ugly misuse of obmc_stride
2391 const uint8_t *obmc1= obmc + y*obmc_stride;
2392 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2393 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2394 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2395 dst = slice_buffer_get_line(sb, src_y + y);
2396 for(x=0; x<b_w; x++){
2397 int v= obmc1[x] * block[3][x + y*src_stride]
2398 +obmc2[x] * block[2][x + y*src_stride]
2399 +obmc3[x] * block[1][x + y*src_stride]
2400 +obmc4[x] * block[0][x + y*src_stride];
2402 v <<= 8 - LOG2_OBMC_MAX;
2403 if(FRAC_BITS != 8){
2404 v >>= 8 - FRAC_BITS;
2406 if(add){
2407 v += dst[x + src_x];
2408 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2409 if(v&(~255)) v= ~(v>>31);
2410 dst8[x + y*src_stride] = v;
2411 }else{
2412 dst[x + src_x] -= v;
2418 //FIXME name cleanup (b_w, block_w, b_width stuff)
2419 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2420 const int b_width = s->b_width << s->block_max_depth;
2421 const int b_height= s->b_height << s->block_max_depth;
2422 const int b_stride= b_width;
2423 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2424 BlockNode *rt= lt+1;
2425 BlockNode *lb= lt+b_stride;
2426 BlockNode *rb= lb+1;
2427 uint8_t *block[4];
2428 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2429 uint8_t *tmp = s->scratchbuf;
2430 uint8_t *ptmp;
2431 int x,y;
2433 if(b_x<0){
2434 lt= rt;
2435 lb= rb;
2436 }else if(b_x + 1 >= b_width){
2437 rt= lt;
2438 rb= lb;
2440 if(b_y<0){
2441 lt= lb;
2442 rt= rb;
2443 }else if(b_y + 1 >= b_height){
2444 lb= lt;
2445 rb= rt;
2448 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2449 obmc -= src_x;
2450 b_w += src_x;
2451 if(!sliced && !offset_dst)
2452 dst -= src_x;
2453 src_x=0;
2454 }else if(src_x + b_w > w){
2455 b_w = w - src_x;
2457 if(src_y<0){
2458 obmc -= src_y*obmc_stride;
2459 b_h += src_y;
2460 if(!sliced && !offset_dst)
2461 dst -= src_y*dst_stride;
2462 src_y=0;
2463 }else if(src_y + b_h> h){
2464 b_h = h - src_y;
2467 if(b_w<=0 || b_h<=0) return;
2469 assert(src_stride > 2*MB_SIZE + 5);
2471 if(!sliced && offset_dst)
2472 dst += src_x + src_y*dst_stride;
2473 dst8+= src_x + src_y*src_stride;
2474 // src += src_x + src_y*src_stride;
2476 ptmp= tmp + 3*tmp_step;
2477 block[0]= ptmp;
2478 ptmp+=tmp_step;
2479 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2481 if(same_block(lt, rt)){
2482 block[1]= block[0];
2483 }else{
2484 block[1]= ptmp;
2485 ptmp+=tmp_step;
2486 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2489 if(same_block(lt, lb)){
2490 block[2]= block[0];
2491 }else if(same_block(rt, lb)){
2492 block[2]= block[1];
2493 }else{
2494 block[2]= ptmp;
2495 ptmp+=tmp_step;
2496 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2499 if(same_block(lt, rb) ){
2500 block[3]= block[0];
2501 }else if(same_block(rt, rb)){
2502 block[3]= block[1];
2503 }else if(same_block(lb, rb)){
2504 block[3]= block[2];
2505 }else{
2506 block[3]= ptmp;
2507 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2509 #if 0
2510 for(y=0; y<b_h; y++){
2511 for(x=0; x<b_w; x++){
2512 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2513 if(add) dst[x + y*dst_stride] += v;
2514 else dst[x + y*dst_stride] -= v;
2517 for(y=0; y<b_h; y++){
2518 uint8_t *obmc2= obmc + (obmc_stride>>1);
2519 for(x=0; x<b_w; x++){
2520 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2521 if(add) dst[x + y*dst_stride] += v;
2522 else dst[x + y*dst_stride] -= v;
2525 for(y=0; y<b_h; y++){
2526 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2527 for(x=0; x<b_w; x++){
2528 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2529 if(add) dst[x + y*dst_stride] += v;
2530 else dst[x + y*dst_stride] -= v;
2533 for(y=0; y<b_h; y++){
2534 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2535 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2536 for(x=0; x<b_w; x++){
2537 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2538 if(add) dst[x + y*dst_stride] += v;
2539 else dst[x + y*dst_stride] -= v;
2542 #else
2543 if(sliced){
2544 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2545 }else{
2546 for(y=0; y<b_h; y++){
2547 //FIXME ugly misuse of obmc_stride
2548 const uint8_t *obmc1= obmc + y*obmc_stride;
2549 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2550 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2551 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2552 for(x=0; x<b_w; x++){
2553 int v= obmc1[x] * block[3][x + y*src_stride]
2554 +obmc2[x] * block[2][x + y*src_stride]
2555 +obmc3[x] * block[1][x + y*src_stride]
2556 +obmc4[x] * block[0][x + y*src_stride];
2558 v <<= 8 - LOG2_OBMC_MAX;
2559 if(FRAC_BITS != 8){
2560 v >>= 8 - FRAC_BITS;
2562 if(add){
2563 v += dst[x + y*dst_stride];
2564 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2565 if(v&(~255)) v= ~(v>>31);
2566 dst8[x + y*src_stride] = v;
2567 }else{
2568 dst[x + y*dst_stride] -= v;
2573 #endif /* 0 */
2576 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2577 Plane *p= &s->plane[plane_index];
2578 const int mb_w= s->b_width << s->block_max_depth;
2579 const int mb_h= s->b_height << s->block_max_depth;
2580 int x, y, mb_x;
2581 int block_size = MB_SIZE >> s->block_max_depth;
2582 int block_w = plane_index ? block_size/2 : block_size;
2583 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2584 int obmc_stride= plane_index ? block_size : 2*block_size;
2585 int ref_stride= s->current_picture.linesize[plane_index];
2586 uint8_t *dst8= s->current_picture.data[plane_index];
2587 int w= p->width;
2588 int h= p->height;
2590 if(s->keyframe || (s->avctx->debug&512)){
2591 if(mb_y==mb_h)
2592 return;
2594 if(add){
2595 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2596 // DWTELEM * line = slice_buffer_get_line(sb, y);
2597 IDWTELEM * line = sb->line[y];
2598 for(x=0; x<w; x++){
2599 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2600 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2601 v >>= FRAC_BITS;
2602 if(v&(~255)) v= ~(v>>31);
2603 dst8[x + y*ref_stride]= v;
2606 }else{
2607 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2608 // DWTELEM * line = slice_buffer_get_line(sb, y);
2609 IDWTELEM * line = sb->line[y];
2610 for(x=0; x<w; x++){
2611 line[x] -= 128 << FRAC_BITS;
2612 // buf[x + y*w]-= 128<<FRAC_BITS;
2617 return;
2620 for(mb_x=0; mb_x<=mb_w; mb_x++){
2621 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2622 block_w*mb_x - block_w/2,
2623 block_w*mb_y - block_w/2,
2624 block_w, block_w,
2625 w, h,
2626 w, ref_stride, obmc_stride,
2627 mb_x - 1, mb_y - 1,
2628 add, 0, plane_index);
2632 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2633 Plane *p= &s->plane[plane_index];
2634 const int mb_w= s->b_width << s->block_max_depth;
2635 const int mb_h= s->b_height << s->block_max_depth;
2636 int x, y, mb_x;
2637 int block_size = MB_SIZE >> s->block_max_depth;
2638 int block_w = plane_index ? block_size/2 : block_size;
2639 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2640 const int obmc_stride= plane_index ? block_size : 2*block_size;
2641 int ref_stride= s->current_picture.linesize[plane_index];
2642 uint8_t *dst8= s->current_picture.data[plane_index];
2643 int w= p->width;
2644 int h= p->height;
2646 if(s->keyframe || (s->avctx->debug&512)){
2647 if(mb_y==mb_h)
2648 return;
2650 if(add){
2651 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2652 for(x=0; x<w; x++){
2653 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2654 v >>= FRAC_BITS;
2655 if(v&(~255)) v= ~(v>>31);
2656 dst8[x + y*ref_stride]= v;
2659 }else{
2660 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2661 for(x=0; x<w; x++){
2662 buf[x + y*w]-= 128<<FRAC_BITS;
2667 return;
2670 for(mb_x=0; mb_x<=mb_w; mb_x++){
2671 add_yblock(s, 0, NULL, buf, dst8, obmc,
2672 block_w*mb_x - block_w/2,
2673 block_w*mb_y - block_w/2,
2674 block_w, block_w,
2675 w, h,
2676 w, ref_stride, obmc_stride,
2677 mb_x - 1, mb_y - 1,
2678 add, 1, plane_index);
2682 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2683 const int mb_h= s->b_height << s->block_max_depth;
2684 int mb_y;
2685 for(mb_y=0; mb_y<=mb_h; mb_y++)
2686 predict_slice(s, buf, plane_index, add, mb_y);
2689 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2690 int i, x2, y2;
2691 Plane *p= &s->plane[plane_index];
2692 const int block_size = MB_SIZE >> s->block_max_depth;
2693 const int block_w = plane_index ? block_size/2 : block_size;
2694 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2695 const int obmc_stride= plane_index ? block_size : 2*block_size;
2696 const int ref_stride= s->current_picture.linesize[plane_index];
2697 uint8_t *src= s-> input_picture.data[plane_index];
2698 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2699 const int b_stride = s->b_width << s->block_max_depth;
2700 const int w= p->width;
2701 const int h= p->height;
2702 int index= mb_x + mb_y*b_stride;
2703 BlockNode *b= &s->block[index];
2704 BlockNode backup= *b;
2705 int ab=0;
2706 int aa=0;
2708 b->type|= BLOCK_INTRA;
2709 b->color[plane_index]= 0;
2710 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2712 for(i=0; i<4; i++){
2713 int mb_x2= mb_x + (i &1) - 1;
2714 int mb_y2= mb_y + (i>>1) - 1;
2715 int x= block_w*mb_x2 + block_w/2;
2716 int y= block_w*mb_y2 + block_w/2;
2718 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2719 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2721 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2722 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2723 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2724 int obmc_v= obmc[index];
2725 int d;
2726 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2727 if(x<0) obmc_v += obmc[index + block_w];
2728 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2729 if(x+block_w>w) obmc_v += obmc[index - block_w];
2730 //FIXME precalculate this or simplify it somehow else
2732 d = -dst[index] + (1<<(FRAC_BITS-1));
2733 dst[index] = d;
2734 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2735 aa += obmc_v * obmc_v; //FIXME precalculate this
2739 *b= backup;
2741 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2744 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2745 const int b_stride = s->b_width << s->block_max_depth;
2746 const int b_height = s->b_height<< s->block_max_depth;
2747 int index= x + y*b_stride;
2748 const BlockNode *b = &s->block[index];
2749 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2750 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2751 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2752 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2753 int dmx, dmy;
2754 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2755 // int my_context= av_log2(2*FFABS(left->my - top->my));
2757 if(x<0 || x>=b_stride || y>=b_height)
2758 return 0;
2760 1 0 0
2761 01X 1-2 1
2762 001XX 3-6 2-3
2763 0001XXX 7-14 4-7
2764 00001XXXX 15-30 8-15
2766 //FIXME try accurate rate
2767 //FIXME intra and inter predictors if surrounding blocks are not the same type
2768 if(b->type & BLOCK_INTRA){
2769 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2770 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2771 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2772 }else{
2773 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2774 dmx-= b->mx;
2775 dmy-= b->my;
2776 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2777 + av_log2(2*FFABS(dmy))
2778 + av_log2(2*b->ref));
2782 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2783 Plane *p= &s->plane[plane_index];
2784 const int block_size = MB_SIZE >> s->block_max_depth;
2785 const int block_w = plane_index ? block_size/2 : block_size;
2786 const int obmc_stride= plane_index ? block_size : 2*block_size;
2787 const int ref_stride= s->current_picture.linesize[plane_index];
2788 uint8_t *dst= s->current_picture.data[plane_index];
2789 uint8_t *src= s-> input_picture.data[plane_index];
2790 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2791 uint8_t *cur = s->scratchbuf;
2792 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2793 const int b_stride = s->b_width << s->block_max_depth;
2794 const int b_height = s->b_height<< s->block_max_depth;
2795 const int w= p->width;
2796 const int h= p->height;
2797 int distortion;
2798 int rate= 0;
2799 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2800 int sx= block_w*mb_x - block_w/2;
2801 int sy= block_w*mb_y - block_w/2;
2802 int x0= FFMAX(0,-sx);
2803 int y0= FFMAX(0,-sy);
2804 int x1= FFMIN(block_w*2, w-sx);
2805 int y1= FFMIN(block_w*2, h-sy);
2806 int i,x,y;
2808 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2810 for(y=y0; y<y1; y++){
2811 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2812 const IDWTELEM *pred1 = pred + y*obmc_stride;
2813 uint8_t *cur1 = cur + y*ref_stride;
2814 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2815 for(x=x0; x<x1; x++){
2816 #if FRAC_BITS >= LOG2_OBMC_MAX
2817 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2818 #else
2819 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2820 #endif
2821 v = (v + pred1[x]) >> FRAC_BITS;
2822 if(v&(~255)) v= ~(v>>31);
2823 dst1[x] = v;
2827 /* copy the regions where obmc[] = (uint8_t)256 */
2828 if(LOG2_OBMC_MAX == 8
2829 && (mb_x == 0 || mb_x == b_stride-1)
2830 && (mb_y == 0 || mb_y == b_height-1)){
2831 if(mb_x == 0)
2832 x1 = block_w;
2833 else
2834 x0 = block_w;
2835 if(mb_y == 0)
2836 y1 = block_w;
2837 else
2838 y0 = block_w;
2839 for(y=y0; y<y1; y++)
2840 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2843 if(block_w==16){
2844 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2845 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2846 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2847 * So improving the score of one block is not strictly guaranteed
2848 * to improve the score of the whole frame, thus iterative motion
2849 * estimation does not always converge. */
2850 if(s->avctx->me_cmp == FF_CMP_W97)
2851 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2852 else if(s->avctx->me_cmp == FF_CMP_W53)
2853 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2854 else{
2855 distortion = 0;
2856 for(i=0; i<4; i++){
2857 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2858 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2861 }else{
2862 assert(block_w==8);
2863 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2866 if(plane_index==0){
2867 for(i=0; i<4; i++){
2868 /* ..RRr
2869 * .RXx.
2870 * rxx..
2872 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2874 if(mb_x == b_stride-2)
2875 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2877 return distortion + rate*penalty_factor;
2880 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2881 int i, y2;
2882 Plane *p= &s->plane[plane_index];
2883 const int block_size = MB_SIZE >> s->block_max_depth;
2884 const int block_w = plane_index ? block_size/2 : block_size;
2885 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2886 const int obmc_stride= plane_index ? block_size : 2*block_size;
2887 const int ref_stride= s->current_picture.linesize[plane_index];
2888 uint8_t *dst= s->current_picture.data[plane_index];
2889 uint8_t *src= s-> input_picture.data[plane_index];
2890 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2891 // const has only been removed from zero_dst to suppress a warning
2892 static IDWTELEM zero_dst[4096]; //FIXME
2893 const int b_stride = s->b_width << s->block_max_depth;
2894 const int w= p->width;
2895 const int h= p->height;
2896 int distortion= 0;
2897 int rate= 0;
2898 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2900 for(i=0; i<9; i++){
2901 int mb_x2= mb_x + (i%3) - 1;
2902 int mb_y2= mb_y + (i/3) - 1;
2903 int x= block_w*mb_x2 + block_w/2;
2904 int y= block_w*mb_y2 + block_w/2;
2906 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2907 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2909 //FIXME find a cleaner/simpler way to skip the outside stuff
2910 for(y2= y; y2<0; y2++)
2911 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2912 for(y2= h; y2<y+block_w; y2++)
2913 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2914 if(x<0){
2915 for(y2= y; y2<y+block_w; y2++)
2916 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2918 if(x+block_w > w){
2919 for(y2= y; y2<y+block_w; y2++)
2920 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2923 assert(block_w== 8 || block_w==16);
2924 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2927 if(plane_index==0){
2928 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2929 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2931 /* ..RRRr
2932 * .RXXx.
2933 * .RXXx.
2934 * rxxx.
2936 if(merged)
2937 rate = get_block_bits(s, mb_x, mb_y, 2);
2938 for(i=merged?4:0; i<9; i++){
2939 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2940 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2943 return distortion + rate*penalty_factor;
2946 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2947 const int b_stride= s->b_width << s->block_max_depth;
2948 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2949 BlockNode backup= *block;
2950 int rd, index, value;
2952 assert(mb_x>=0 && mb_y>=0);
2953 assert(mb_x<b_stride);
2955 if(intra){
2956 block->color[0] = p[0];
2957 block->color[1] = p[1];
2958 block->color[2] = p[2];
2959 block->type |= BLOCK_INTRA;
2960 }else{
2961 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2962 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2963 if(s->me_cache[index] == value)
2964 return 0;
2965 s->me_cache[index]= value;
2967 block->mx= p[0];
2968 block->my= p[1];
2969 block->type &= ~BLOCK_INTRA;
2972 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2974 //FIXME chroma
2975 if(rd < *best_rd){
2976 *best_rd= rd;
2977 return 1;
2978 }else{
2979 *block= backup;
2980 return 0;
2984 /* special case for int[2] args we discard afterwards,
2985 * fixes compilation problem with gcc 2.95 */
2986 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2987 int p[2] = {p0, p1};
2988 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2991 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2992 const int b_stride= s->b_width << s->block_max_depth;
2993 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2994 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2995 int rd, index, value;
2997 assert(mb_x>=0 && mb_y>=0);
2998 assert(mb_x<b_stride);
2999 assert(((mb_x|mb_y)&1) == 0);
3001 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3002 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3003 if(s->me_cache[index] == value)
3004 return 0;
3005 s->me_cache[index]= value;
3007 block->mx= p0;
3008 block->my= p1;
3009 block->ref= ref;
3010 block->type &= ~BLOCK_INTRA;
3011 block[1]= block[b_stride]= block[b_stride+1]= *block;
3013 rd= get_4block_rd(s, mb_x, mb_y, 0);
3015 //FIXME chroma
3016 if(rd < *best_rd){
3017 *best_rd= rd;
3018 return 1;
3019 }else{
3020 block[0]= backup[0];
3021 block[1]= backup[1];
3022 block[b_stride]= backup[2];
3023 block[b_stride+1]= backup[3];
3024 return 0;
3028 static void iterative_me(SnowContext *s){
3029 int pass, mb_x, mb_y;
3030 const int b_width = s->b_width << s->block_max_depth;
3031 const int b_height= s->b_height << s->block_max_depth;
3032 const int b_stride= b_width;
3033 int color[3];
3036 RangeCoder r = s->c;
3037 uint8_t state[sizeof(s->block_state)];
3038 memcpy(state, s->block_state, sizeof(s->block_state));
3039 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3040 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3041 encode_q_branch(s, 0, mb_x, mb_y);
3042 s->c = r;
3043 memcpy(s->block_state, state, sizeof(s->block_state));
3046 for(pass=0; pass<25; pass++){
3047 int change= 0;
3049 for(mb_y= 0; mb_y<b_height; mb_y++){
3050 for(mb_x= 0; mb_x<b_width; mb_x++){
3051 int dia_change, i, j, ref;
3052 int best_rd= INT_MAX, ref_rd;
3053 BlockNode backup, ref_b;
3054 const int index= mb_x + mb_y * b_stride;
3055 BlockNode *block= &s->block[index];
3056 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3057 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3058 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3059 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3060 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3061 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3062 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3063 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3064 const int b_w= (MB_SIZE >> s->block_max_depth);
3065 uint8_t obmc_edged[b_w*2][b_w*2];
3067 if(pass && (block->type & BLOCK_OPT))
3068 continue;
3069 block->type |= BLOCK_OPT;
3071 backup= *block;
3073 if(!s->me_cache_generation)
3074 memset(s->me_cache, 0, sizeof(s->me_cache));
3075 s->me_cache_generation += 1<<22;
3077 //FIXME precalculate
3079 int x, y;
3080 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3081 if(mb_x==0)
3082 for(y=0; y<b_w*2; y++)
3083 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3084 if(mb_x==b_stride-1)
3085 for(y=0; y<b_w*2; y++)
3086 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3087 if(mb_y==0){
3088 for(x=0; x<b_w*2; x++)
3089 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3090 for(y=1; y<b_w; y++)
3091 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3093 if(mb_y==b_height-1){
3094 for(x=0; x<b_w*2; x++)
3095 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3096 for(y=b_w; y<b_w*2-1; y++)
3097 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3101 //skip stuff outside the picture
3102 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3103 uint8_t *src= s-> input_picture.data[0];
3104 uint8_t *dst= s->current_picture.data[0];
3105 const int stride= s->current_picture.linesize[0];
3106 const int block_w= MB_SIZE >> s->block_max_depth;
3107 const int sx= block_w*mb_x - block_w/2;
3108 const int sy= block_w*mb_y - block_w/2;
3109 const int w= s->plane[0].width;
3110 const int h= s->plane[0].height;
3111 int y;
3113 for(y=sy; y<0; y++)
3114 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3115 for(y=h; y<sy+block_w*2; y++)
3116 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3117 if(sx<0){
3118 for(y=sy; y<sy+block_w*2; y++)
3119 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3121 if(sx+block_w*2 > w){
3122 for(y=sy; y<sy+block_w*2; y++)
3123 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3127 // intra(black) = neighbors' contribution to the current block
3128 for(i=0; i<3; i++)
3129 color[i]= get_dc(s, mb_x, mb_y, i);
3131 // get previous score (cannot be cached due to OBMC)
3132 if(pass > 0 && (block->type&BLOCK_INTRA)){
3133 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3134 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3135 }else
3136 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3138 ref_b= *block;
3139 ref_rd= best_rd;
3140 for(ref=0; ref < s->ref_frames; ref++){
3141 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3142 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3143 continue;
3144 block->ref= ref;
3145 best_rd= INT_MAX;
3147 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3148 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3149 if(tb)
3150 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3151 if(lb)
3152 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3153 if(rb)
3154 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3155 if(bb)
3156 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3158 /* fullpel ME */
3159 //FIXME avoid subpel interpolation / round to nearest integer
3161 dia_change=0;
3162 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3163 for(j=0; j<i; j++){
3164 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3165 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3166 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3167 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3170 }while(dia_change);
3171 /* subpel ME */
3173 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3174 dia_change=0;
3175 for(i=0; i<8; i++)
3176 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3177 }while(dia_change);
3178 //FIXME or try the standard 2 pass qpel or similar
3180 mvr[0][0]= block->mx;
3181 mvr[0][1]= block->my;
3182 if(ref_rd > best_rd){
3183 ref_rd= best_rd;
3184 ref_b= *block;
3187 best_rd= ref_rd;
3188 *block= ref_b;
3189 #if 1
3190 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3191 //FIXME RD style color selection
3192 #endif
3193 if(!same_block(block, &backup)){
3194 if(tb ) tb ->type &= ~BLOCK_OPT;
3195 if(lb ) lb ->type &= ~BLOCK_OPT;
3196 if(rb ) rb ->type &= ~BLOCK_OPT;
3197 if(bb ) bb ->type &= ~BLOCK_OPT;
3198 if(tlb) tlb->type &= ~BLOCK_OPT;
3199 if(trb) trb->type &= ~BLOCK_OPT;
3200 if(blb) blb->type &= ~BLOCK_OPT;
3201 if(brb) brb->type &= ~BLOCK_OPT;
3202 change ++;
3206 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3207 if(!change)
3208 break;
3211 if(s->block_max_depth == 1){
3212 int change= 0;
3213 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3214 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3215 int i;
3216 int best_rd, init_rd;
3217 const int index= mb_x + mb_y * b_stride;
3218 BlockNode *b[4];
3220 b[0]= &s->block[index];
3221 b[1]= b[0]+1;
3222 b[2]= b[0]+b_stride;
3223 b[3]= b[2]+1;
3224 if(same_block(b[0], b[1]) &&
3225 same_block(b[0], b[2]) &&
3226 same_block(b[0], b[3]))
3227 continue;
3229 if(!s->me_cache_generation)
3230 memset(s->me_cache, 0, sizeof(s->me_cache));
3231 s->me_cache_generation += 1<<22;
3233 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3235 //FIXME more multiref search?
3236 check_4block_inter(s, mb_x, mb_y,
3237 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3238 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3240 for(i=0; i<4; i++)
3241 if(!(b[i]->type&BLOCK_INTRA))
3242 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3244 if(init_rd != best_rd)
3245 change++;
3248 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3252 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3253 const int w= b->width;
3254 const int h= b->height;
3255 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3256 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3257 int x,y, thres1, thres2;
3259 if(s->qlog == LOSSLESS_QLOG){
3260 for(y=0; y<h; y++)
3261 for(x=0; x<w; x++)
3262 dst[x + y*stride]= src[x + y*stride];
3263 return;
3266 bias= bias ? 0 : (3*qmul)>>3;
3267 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3268 thres2= 2*thres1;
3270 if(!bias){
3271 for(y=0; y<h; y++){
3272 for(x=0; x<w; x++){
3273 int i= src[x + y*stride];
3275 if((unsigned)(i+thres1) > thres2){
3276 if(i>=0){
3277 i<<= QEXPSHIFT;
3278 i/= qmul; //FIXME optimize
3279 dst[x + y*stride]= i;
3280 }else{
3281 i= -i;
3282 i<<= QEXPSHIFT;
3283 i/= qmul; //FIXME optimize
3284 dst[x + y*stride]= -i;
3286 }else
3287 dst[x + y*stride]= 0;
3290 }else{
3291 for(y=0; y<h; y++){
3292 for(x=0; x<w; x++){
3293 int i= src[x + y*stride];
3295 if((unsigned)(i+thres1) > thres2){
3296 if(i>=0){
3297 i<<= QEXPSHIFT;
3298 i= (i + bias) / qmul; //FIXME optimize
3299 dst[x + y*stride]= i;
3300 }else{
3301 i= -i;
3302 i<<= QEXPSHIFT;
3303 i= (i + bias) / qmul; //FIXME optimize
3304 dst[x + y*stride]= -i;
3306 }else
3307 dst[x + y*stride]= 0;
3313 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3314 const int w= b->width;
3315 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3316 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3317 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3318 int x,y;
3320 if(s->qlog == LOSSLESS_QLOG) return;
3322 for(y=start_y; y<end_y; y++){
3323 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3324 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3325 for(x=0; x<w; x++){
3326 int i= line[x];
3327 if(i<0){
3328 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3329 }else if(i>0){
3330 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3336 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3337 const int w= b->width;
3338 const int h= b->height;
3339 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3340 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3341 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3342 int x,y;
3344 if(s->qlog == LOSSLESS_QLOG) return;
3346 for(y=0; y<h; y++){
3347 for(x=0; x<w; x++){
3348 int i= src[x + y*stride];
3349 if(i<0){
3350 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3351 }else if(i>0){
3352 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3358 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3359 const int w= b->width;
3360 const int h= b->height;
3361 int x,y;
3363 for(y=h-1; y>=0; y--){
3364 for(x=w-1; x>=0; x--){
3365 int i= x + y*stride;
3367 if(x){
3368 if(use_median){
3369 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3370 else src[i] -= src[i - 1];
3371 }else{
3372 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3373 else src[i] -= src[i - 1];
3375 }else{
3376 if(y) src[i] -= src[i - stride];
3382 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3383 const int w= b->width;
3384 int x,y;
3386 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3387 IDWTELEM * prev;
3389 if (start_y != 0)
3390 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3392 for(y=start_y; y<end_y; y++){
3393 prev = line;
3394 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3395 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3396 for(x=0; x<w; x++){
3397 if(x){
3398 if(use_median){
3399 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3400 else line[x] += line[x - 1];
3401 }else{
3402 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3403 else line[x] += line[x - 1];
3405 }else{
3406 if(y) line[x] += prev[x];
3412 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3413 const int w= b->width;
3414 const int h= b->height;
3415 int x,y;
3417 for(y=0; y<h; y++){
3418 for(x=0; x<w; x++){
3419 int i= x + y*stride;
3421 if(x){
3422 if(use_median){
3423 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3424 else src[i] += src[i - 1];
3425 }else{
3426 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3427 else src[i] += src[i - 1];
3429 }else{
3430 if(y) src[i] += src[i - stride];
3436 static void encode_qlogs(SnowContext *s){
3437 int plane_index, level, orientation;
3439 for(plane_index=0; plane_index<2; plane_index++){
3440 for(level=0; level<s->spatial_decomposition_count; level++){
3441 for(orientation=level ? 1:0; orientation<4; orientation++){
3442 if(orientation==2) continue;
3443 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3449 static void encode_header(SnowContext *s){
3450 int plane_index, i;
3451 uint8_t kstate[32];
3453 memset(kstate, MID_STATE, sizeof(kstate));
3455 put_rac(&s->c, kstate, s->keyframe);
3456 if(s->keyframe || s->always_reset){
3457 reset_contexts(s);
3458 s->last_spatial_decomposition_type=
3459 s->last_qlog=
3460 s->last_qbias=
3461 s->last_mv_scale=
3462 s->last_block_max_depth= 0;
3463 for(plane_index=0; plane_index<2; plane_index++){
3464 Plane *p= &s->plane[plane_index];
3465 p->last_htaps=0;
3466 p->last_diag_mc=0;
3467 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3470 if(s->keyframe){
3471 put_symbol(&s->c, s->header_state, s->version, 0);
3472 put_rac(&s->c, s->header_state, s->always_reset);
3473 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3474 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3475 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3476 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3477 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3478 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3479 put_rac(&s->c, s->header_state, s->spatial_scalability);
3480 // put_rac(&s->c, s->header_state, s->rate_scalability);
3481 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3483 encode_qlogs(s);
3486 if(!s->keyframe){
3487 int update_mc=0;
3488 for(plane_index=0; plane_index<2; plane_index++){
3489 Plane *p= &s->plane[plane_index];
3490 update_mc |= p->last_htaps != p->htaps;
3491 update_mc |= p->last_diag_mc != p->diag_mc;
3492 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3494 put_rac(&s->c, s->header_state, update_mc);
3495 if(update_mc){
3496 for(plane_index=0; plane_index<2; plane_index++){
3497 Plane *p= &s->plane[plane_index];
3498 put_rac(&s->c, s->header_state, p->diag_mc);
3499 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3500 for(i= p->htaps/2; i; i--)
3501 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3504 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3505 put_rac(&s->c, s->header_state, 1);
3506 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3507 encode_qlogs(s);
3508 }else
3509 put_rac(&s->c, s->header_state, 0);
3512 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3513 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3514 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3515 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3516 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3520 static void update_last_header_values(SnowContext *s){
3521 int plane_index;
3523 if(!s->keyframe){
3524 for(plane_index=0; plane_index<2; plane_index++){
3525 Plane *p= &s->plane[plane_index];
3526 p->last_diag_mc= p->diag_mc;
3527 p->last_htaps = p->htaps;
3528 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3532 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3533 s->last_qlog = s->qlog;
3534 s->last_qbias = s->qbias;
3535 s->last_mv_scale = s->mv_scale;
3536 s->last_block_max_depth = s->block_max_depth;
3537 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3540 static void decode_qlogs(SnowContext *s){
3541 int plane_index, level, orientation;
3543 for(plane_index=0; plane_index<3; plane_index++){
3544 for(level=0; level<s->spatial_decomposition_count; level++){
3545 for(orientation=level ? 1:0; orientation<4; orientation++){
3546 int q;
3547 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3548 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3549 else q= get_symbol(&s->c, s->header_state, 1);
3550 s->plane[plane_index].band[level][orientation].qlog= q;
3556 static int decode_header(SnowContext *s){
3557 int plane_index;
3558 uint8_t kstate[32];
3560 memset(kstate, MID_STATE, sizeof(kstate));
3562 s->keyframe= get_rac(&s->c, kstate);
3563 if(s->keyframe || s->always_reset){
3564 reset_contexts(s);
3565 s->spatial_decomposition_type=
3566 s->qlog=
3567 s->qbias=
3568 s->mv_scale=
3569 s->block_max_depth= 0;
3571 if(s->keyframe){
3572 s->version= get_symbol(&s->c, s->header_state, 0);
3573 if(s->version>0){
3574 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3575 return -1;
3577 s->always_reset= get_rac(&s->c, s->header_state);
3578 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3579 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3580 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3581 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3582 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3583 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3584 s->spatial_scalability= get_rac(&s->c, s->header_state);
3585 // s->rate_scalability= get_rac(&s->c, s->header_state);
3586 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3588 decode_qlogs(s);
3591 if(!s->keyframe){
3592 if(get_rac(&s->c, s->header_state)){
3593 for(plane_index=0; plane_index<2; plane_index++){
3594 int htaps, i, sum=0;
3595 Plane *p= &s->plane[plane_index];
3596 p->diag_mc= get_rac(&s->c, s->header_state);
3597 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3598 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3599 return -1;
3600 p->htaps= htaps;
3601 for(i= htaps/2; i; i--){
3602 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3603 sum += p->hcoeff[i];
3605 p->hcoeff[0]= 32-sum;
3607 s->plane[2].diag_mc= s->plane[1].diag_mc;
3608 s->plane[2].htaps = s->plane[1].htaps;
3609 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3611 if(get_rac(&s->c, s->header_state)){
3612 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3613 decode_qlogs(s);
3617 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3618 if(s->spatial_decomposition_type > 1){
3619 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3620 return -1;
3623 s->qlog += get_symbol(&s->c, s->header_state, 1);
3624 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3625 s->qbias += get_symbol(&s->c, s->header_state, 1);
3626 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3627 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3628 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3629 s->block_max_depth= 0;
3630 return -1;
3633 return 0;
3636 static void init_qexp(void){
3637 int i;
3638 double v=128;
3640 for(i=0; i<QROOT; i++){
3641 qexp[i]= lrintf(v);
3642 v *= pow(2, 1.0 / QROOT);
3646 static av_cold int common_init(AVCodecContext *avctx){
3647 SnowContext *s = avctx->priv_data;
3648 int width, height;
3649 int i, j;
3651 s->avctx= avctx;
3653 dsputil_init(&s->dsp, avctx);
3655 #define mcf(dx,dy)\
3656 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3657 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3658 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3659 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3660 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3661 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3663 mcf( 0, 0)
3664 mcf( 4, 0)
3665 mcf( 8, 0)
3666 mcf(12, 0)
3667 mcf( 0, 4)
3668 mcf( 4, 4)
3669 mcf( 8, 4)
3670 mcf(12, 4)
3671 mcf( 0, 8)
3672 mcf( 4, 8)
3673 mcf( 8, 8)
3674 mcf(12, 8)
3675 mcf( 0,12)
3676 mcf( 4,12)
3677 mcf( 8,12)
3678 mcf(12,12)
3680 #define mcfh(dx,dy)\
3681 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3682 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3683 mc_block_hpel ## dx ## dy ## 16;\
3684 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3685 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3686 mc_block_hpel ## dx ## dy ## 8;
3688 mcfh(0, 0)
3689 mcfh(8, 0)
3690 mcfh(0, 8)
3691 mcfh(8, 8)
3693 if(!qexp[0])
3694 init_qexp();
3696 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3698 width= s->avctx->width;
3699 height= s->avctx->height;
3701 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3702 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3704 for(i=0; i<MAX_REF_FRAMES; i++)
3705 for(j=0; j<MAX_REF_FRAMES; j++)
3706 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3708 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3709 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
3711 return 0;
3714 static int common_init_after_header(AVCodecContext *avctx){
3715 SnowContext *s = avctx->priv_data;
3716 int plane_index, level, orientation;
3718 for(plane_index=0; plane_index<3; plane_index++){
3719 int w= s->avctx->width;
3720 int h= s->avctx->height;
3722 if(plane_index){
3723 w>>= s->chroma_h_shift;
3724 h>>= s->chroma_v_shift;
3726 s->plane[plane_index].width = w;
3727 s->plane[plane_index].height= h;
3729 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3730 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3731 SubBand *b= &s->plane[plane_index].band[level][orientation];
3733 b->buf= s->spatial_dwt_buffer;
3734 b->level= level;
3735 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3736 b->width = (w + !(orientation&1))>>1;
3737 b->height= (h + !(orientation>1))>>1;
3739 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3740 b->buf_x_offset = 0;
3741 b->buf_y_offset = 0;
3743 if(orientation&1){
3744 b->buf += (w+1)>>1;
3745 b->buf_x_offset = (w+1)>>1;
3747 if(orientation>1){
3748 b->buf += b->stride>>1;
3749 b->buf_y_offset = b->stride_line >> 1;
3751 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3753 if(level)
3754 b->parent= &s->plane[plane_index].band[level-1][orientation];
3755 //FIXME avoid this realloc
3756 av_freep(&b->x_coeff);
3757 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3759 w= (w+1)>>1;
3760 h= (h+1)>>1;
3764 return 0;
3767 static int qscale2qlog(int qscale){
3768 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3769 + 61*QROOT/8; //<64 >60
3772 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3774 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3775 * FIXME we know exact mv bits at this point,
3776 * but ratecontrol isn't set up to include them. */
3777 uint32_t coef_sum= 0;
3778 int level, orientation, delta_qlog;
3780 for(level=0; level<s->spatial_decomposition_count; level++){
3781 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3782 SubBand *b= &s->plane[0].band[level][orientation];
3783 IDWTELEM *buf= b->ibuf;
3784 const int w= b->width;
3785 const int h= b->height;
3786 const int stride= b->stride;
3787 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3788 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3789 const int qdiv= (1<<16)/qmul;
3790 int x, y;
3791 //FIXME this is ugly
3792 for(y=0; y<h; y++)
3793 for(x=0; x<w; x++)
3794 buf[x+y*stride]= b->buf[x+y*stride];
3795 if(orientation==0)
3796 decorrelate(s, b, buf, stride, 1, 0);
3797 for(y=0; y<h; y++)
3798 for(x=0; x<w; x++)
3799 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3803 /* ugly, ratecontrol just takes a sqrt again */
3804 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3805 assert(coef_sum < INT_MAX);
3807 if(pict->pict_type == FF_I_TYPE){
3808 s->m.current_picture.mb_var_sum= coef_sum;
3809 s->m.current_picture.mc_mb_var_sum= 0;
3810 }else{
3811 s->m.current_picture.mc_mb_var_sum= coef_sum;
3812 s->m.current_picture.mb_var_sum= 0;
3815 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3816 if (pict->quality < 0)
3817 return INT_MIN;
3818 s->lambda= pict->quality * 3/2;
3819 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3820 s->qlog+= delta_qlog;
3821 return delta_qlog;
3824 static void calculate_visual_weight(SnowContext *s, Plane *p){
3825 int width = p->width;
3826 int height= p->height;
3827 int level, orientation, x, y;
3829 for(level=0; level<s->spatial_decomposition_count; level++){
3830 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3831 SubBand *b= &p->band[level][orientation];
3832 IDWTELEM *ibuf= b->ibuf;
3833 int64_t error=0;
3835 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3836 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3837 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3838 for(y=0; y<height; y++){
3839 for(x=0; x<width; x++){
3840 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3841 error += d*d;
3845 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3850 #define QUANTIZE2 0
3852 #if QUANTIZE2==1
3853 #define Q2_STEP 8
3855 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3856 SubBand *b= &p->band[level][orientation];
3857 int x, y;
3858 int xo=0;
3859 int yo=0;
3860 int step= 1 << (s->spatial_decomposition_count - level);
3862 if(orientation&1)
3863 xo= step>>1;
3864 if(orientation&2)
3865 yo= step>>1;
3867 //FIXME bias for nonzero ?
3868 //FIXME optimize
3869 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3870 for(y=0; y<p->height; y++){
3871 for(x=0; x<p->width; x++){
3872 int sx= (x-xo + step/2) / step / Q2_STEP;
3873 int sy= (y-yo + step/2) / step / Q2_STEP;
3874 int v= r0[x + y*p->width] - r1[x + y*p->width];
3875 assert(sx>=0 && sy>=0 && sx < score_stride);
3876 v= ((v+8)>>4)<<4;
3877 score[sx + sy*score_stride] += v*v;
3878 assert(score[sx + sy*score_stride] >= 0);
3883 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3884 int level, orientation;
3886 for(level=0; level<s->spatial_decomposition_count; level++){
3887 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3888 SubBand *b= &p->band[level][orientation];
3889 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3891 dequantize(s, b, dst, b->stride);
3896 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3897 int level, orientation, ys, xs, x, y, pass;
3898 IDWTELEM best_dequant[height * stride];
3899 IDWTELEM idwt2_buffer[height * stride];
3900 const int score_stride= (width + 10)/Q2_STEP;
3901 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3902 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3903 int threshold= (s->m.lambda * s->m.lambda) >> 6;
3905 //FIXME pass the copy cleanly ?
3907 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3908 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3910 for(level=0; level<s->spatial_decomposition_count; level++){
3911 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3912 SubBand *b= &p->band[level][orientation];
3913 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3914 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
3915 assert(src == b->buf); // code does not depend on this but it is true currently
3917 quantize(s, b, dst, src, b->stride, s->qbias);
3920 for(pass=0; pass<1; pass++){
3921 if(s->qbias == 0) //keyframe
3922 continue;
3923 for(level=0; level<s->spatial_decomposition_count; level++){
3924 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3925 SubBand *b= &p->band[level][orientation];
3926 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3927 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3929 for(ys= 0; ys<Q2_STEP; ys++){
3930 for(xs= 0; xs<Q2_STEP; xs++){
3931 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3932 dequantize_all(s, p, idwt2_buffer, width, height);
3933 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3934 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3935 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3936 for(y=ys; y<b->height; y+= Q2_STEP){
3937 for(x=xs; x<b->width; x+= Q2_STEP){
3938 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3939 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3940 //FIXME try more than just --
3943 dequantize_all(s, p, idwt2_buffer, width, height);
3944 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3945 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3946 for(y=ys; y<b->height; y+= Q2_STEP){
3947 for(x=xs; x<b->width; x+= Q2_STEP){
3948 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3949 if(score[score_idx] <= best_score[score_idx] + threshold){
3950 best_score[score_idx]= score[score_idx];
3951 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3952 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3953 //FIXME copy instead
3962 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3965 #endif /* QUANTIZE2==1 */
3967 static av_cold int encode_init(AVCodecContext *avctx)
3969 SnowContext *s = avctx->priv_data;
3970 int plane_index;
3972 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3973 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3974 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3975 return -1;
3978 if(avctx->prediction_method == DWT_97
3979 && (avctx->flags & CODEC_FLAG_QSCALE)
3980 && avctx->global_quality == 0){
3981 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3982 return -1;
3985 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3987 s->chroma_h_shift= 1; //FIXME XXX
3988 s->chroma_v_shift= 1;
3990 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3991 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3993 for(plane_index=0; plane_index<3; plane_index++){
3994 s->plane[plane_index].diag_mc= 1;
3995 s->plane[plane_index].htaps= 6;
3996 s->plane[plane_index].hcoeff[0]= 40;
3997 s->plane[plane_index].hcoeff[1]= -10;
3998 s->plane[plane_index].hcoeff[2]= 2;
3999 s->plane[plane_index].fast_mc= 1;
4002 common_init(avctx);
4003 alloc_blocks(s);
4005 s->version=0;
4007 s->m.avctx = avctx;
4008 s->m.flags = avctx->flags;
4009 s->m.bit_rate= avctx->bit_rate;
4011 s->m.me.temp =
4012 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4013 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4014 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4015 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4016 h263_encode_init(&s->m); //mv_penalty
4018 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4020 if(avctx->flags&CODEC_FLAG_PASS1){
4021 if(!avctx->stats_out)
4022 avctx->stats_out = av_mallocz(256);
4024 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4025 if(ff_rate_control_init(&s->m) < 0)
4026 return -1;
4028 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4030 avctx->coded_frame= &s->current_picture;
4031 switch(avctx->pix_fmt){
4032 // case PIX_FMT_YUV444P:
4033 // case PIX_FMT_YUV422P:
4034 case PIX_FMT_YUV420P:
4035 case PIX_FMT_GRAY8:
4036 // case PIX_FMT_YUV411P:
4037 // case PIX_FMT_YUV410P:
4038 s->colorspace_type= 0;
4039 break;
4040 /* case PIX_FMT_RGB32:
4041 s->colorspace= 1;
4042 break;*/
4043 default:
4044 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4045 return -1;
4047 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4048 s->chroma_h_shift= 1;
4049 s->chroma_v_shift= 1;
4051 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4052 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4054 s->avctx->get_buffer(s->avctx, &s->input_picture);
4056 if(s->avctx->me_method == ME_ITER){
4057 int i;
4058 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4059 for(i=0; i<s->max_ref_frames; i++){
4060 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4061 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4065 return 0;
4068 #define USE_HALFPEL_PLANE 0
4070 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4071 int p,x,y;
4073 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4075 for(p=0; p<3; p++){
4076 int is_chroma= !!p;
4077 int w= s->avctx->width >>is_chroma;
4078 int h= s->avctx->height >>is_chroma;
4079 int ls= frame->linesize[p];
4080 uint8_t *src= frame->data[p];
4082 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4083 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4084 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4086 halfpel[0][p]= src;
4087 for(y=0; y<h; y++){
4088 for(x=0; x<w; x++){
4089 int i= y*ls + x;
4091 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4094 for(y=0; y<h; y++){
4095 for(x=0; x<w; x++){
4096 int i= y*ls + x;
4098 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4101 src= halfpel[1][p];
4102 for(y=0; y<h; y++){
4103 for(x=0; x<w; x++){
4104 int i= y*ls + x;
4106 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4110 //FIXME border!
4114 static int frame_start(SnowContext *s){
4115 AVFrame tmp;
4116 int w= s->avctx->width; //FIXME round up to x16 ?
4117 int h= s->avctx->height;
4119 if(s->current_picture.data[0]){
4120 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4121 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4122 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4125 tmp= s->last_picture[s->max_ref_frames-1];
4126 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4127 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4128 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
4129 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4130 s->last_picture[0]= s->current_picture;
4131 s->current_picture= tmp;
4133 if(s->keyframe){
4134 s->ref_frames= 0;
4135 }else{
4136 int i;
4137 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4138 if(i && s->last_picture[i-1].key_frame)
4139 break;
4140 s->ref_frames= i;
4143 s->current_picture.reference= 1;
4144 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4145 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4146 return -1;
4149 s->current_picture.key_frame= s->keyframe;
4151 return 0;
4154 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4155 SnowContext *s = avctx->priv_data;
4156 RangeCoder * const c= &s->c;
4157 AVFrame *pict = data;
4158 const int width= s->avctx->width;
4159 const int height= s->avctx->height;
4160 int level, orientation, plane_index, i, y;
4161 uint8_t rc_header_bak[sizeof(s->header_state)];
4162 uint8_t rc_block_bak[sizeof(s->block_state)];
4164 ff_init_range_encoder(c, buf, buf_size);
4165 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4167 for(i=0; i<3; i++){
4168 int shift= !!i;
4169 for(y=0; y<(height>>shift); y++)
4170 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4171 &pict->data[i][y * pict->linesize[i]],
4172 width>>shift);
4174 s->new_picture = *pict;
4176 s->m.picture_number= avctx->frame_number;
4177 if(avctx->flags&CODEC_FLAG_PASS2){
4178 s->m.pict_type =
4179 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4180 s->keyframe= pict->pict_type==FF_I_TYPE;
4181 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4182 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4183 if (pict->quality < 0)
4184 return -1;
4186 }else{
4187 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4188 s->m.pict_type=
4189 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4192 if(s->pass1_rc && avctx->frame_number == 0)
4193 pict->quality= 2*FF_QP2LAMBDA;
4194 if(pict->quality){
4195 s->qlog= qscale2qlog(pict->quality);
4196 s->lambda = pict->quality * 3/2;
4198 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4199 s->qlog= LOSSLESS_QLOG;
4200 s->lambda = 0;
4201 }//else keep previous frame's qlog until after motion estimation
4203 frame_start(s);
4205 s->m.current_picture_ptr= &s->m.current_picture;
4206 if(pict->pict_type == FF_P_TYPE){
4207 int block_width = (width +15)>>4;
4208 int block_height= (height+15)>>4;
4209 int stride= s->current_picture.linesize[0];
4211 assert(s->current_picture.data[0]);
4212 assert(s->last_picture[0].data[0]);
4214 s->m.avctx= s->avctx;
4215 s->m.current_picture.data[0]= s->current_picture.data[0];
4216 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4217 s->m. new_picture.data[0]= s-> input_picture.data[0];
4218 s->m. last_picture_ptr= &s->m. last_picture;
4219 s->m.linesize=
4220 s->m. last_picture.linesize[0]=
4221 s->m. new_picture.linesize[0]=
4222 s->m.current_picture.linesize[0]= stride;
4223 s->m.uvlinesize= s->current_picture.linesize[1];
4224 s->m.width = width;
4225 s->m.height= height;
4226 s->m.mb_width = block_width;
4227 s->m.mb_height= block_height;
4228 s->m.mb_stride= s->m.mb_width+1;
4229 s->m.b8_stride= 2*s->m.mb_width+1;
4230 s->m.f_code=1;
4231 s->m.pict_type= pict->pict_type;
4232 s->m.me_method= s->avctx->me_method;
4233 s->m.me.scene_change_score=0;
4234 s->m.flags= s->avctx->flags;
4235 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4236 s->m.out_format= FMT_H263;
4237 s->m.unrestricted_mv= 1;
4239 s->m.lambda = s->lambda;
4240 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4241 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4243 s->m.dsp= s->dsp; //move
4244 ff_init_me(&s->m);
4245 s->dsp= s->m.dsp;
4248 if(s->pass1_rc){
4249 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4250 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4253 redo_frame:
4255 if(pict->pict_type == FF_I_TYPE)
4256 s->spatial_decomposition_count= 5;
4257 else
4258 s->spatial_decomposition_count= 5;
4260 s->m.pict_type = pict->pict_type;
4261 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
4263 common_init_after_header(avctx);
4265 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4266 for(plane_index=0; plane_index<3; plane_index++){
4267 calculate_visual_weight(s, &s->plane[plane_index]);
4271 encode_header(s);
4272 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4273 encode_blocks(s, 1);
4274 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4276 for(plane_index=0; plane_index<3; plane_index++){
4277 Plane *p= &s->plane[plane_index];
4278 int w= p->width;
4279 int h= p->height;
4280 int x, y;
4281 // int bits= put_bits_count(&s->c.pb);
4283 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4284 //FIXME optimize
4285 if(pict->data[plane_index]) //FIXME gray hack
4286 for(y=0; y<h; y++){
4287 for(x=0; x<w; x++){
4288 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4291 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4293 if( plane_index==0
4294 && pict->pict_type == FF_P_TYPE
4295 && !(avctx->flags&CODEC_FLAG_PASS2)
4296 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4297 ff_init_range_encoder(c, buf, buf_size);
4298 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4299 pict->pict_type= FF_I_TYPE;
4300 s->keyframe=1;
4301 s->current_picture.key_frame=1;
4302 goto redo_frame;
4305 if(s->qlog == LOSSLESS_QLOG){
4306 for(y=0; y<h; y++){
4307 for(x=0; x<w; x++){
4308 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4311 }else{
4312 for(y=0; y<h; y++){
4313 for(x=0; x<w; x++){
4314 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4319 /* if(QUANTIZE2)
4320 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4321 else*/
4322 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4324 if(s->pass1_rc && plane_index==0){
4325 int delta_qlog = ratecontrol_1pass(s, pict);
4326 if (delta_qlog <= INT_MIN)
4327 return -1;
4328 if(delta_qlog){
4329 //reordering qlog in the bitstream would eliminate this reset
4330 ff_init_range_encoder(c, buf, buf_size);
4331 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4332 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4333 encode_header(s);
4334 encode_blocks(s, 0);
4338 for(level=0; level<s->spatial_decomposition_count; level++){
4339 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4340 SubBand *b= &p->band[level][orientation];
4342 if(!QUANTIZE2)
4343 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4344 if(orientation==0)
4345 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
4346 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4347 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4348 if(orientation==0)
4349 correlate(s, b, b->ibuf, b->stride, 1, 0);
4353 for(level=0; level<s->spatial_decomposition_count; level++){
4354 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4355 SubBand *b= &p->band[level][orientation];
4357 dequantize(s, b, b->ibuf, b->stride);
4361 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4362 if(s->qlog == LOSSLESS_QLOG){
4363 for(y=0; y<h; y++){
4364 for(x=0; x<w; x++){
4365 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4369 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4370 }else{
4371 //ME/MC only
4372 if(pict->pict_type == FF_I_TYPE){
4373 for(y=0; y<h; y++){
4374 for(x=0; x<w; x++){
4375 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4376 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4379 }else{
4380 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4381 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4384 if(s->avctx->flags&CODEC_FLAG_PSNR){
4385 int64_t error= 0;
4387 if(pict->data[plane_index]) //FIXME gray hack
4388 for(y=0; y<h; y++){
4389 for(x=0; x<w; x++){
4390 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4391 error += d*d;
4394 s->avctx->error[plane_index] += error;
4395 s->current_picture.error[plane_index] = error;
4400 update_last_header_values(s);
4402 if(s->last_picture[s->max_ref_frames-1].data[0]){
4403 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4404 for(i=0; i<9; i++)
4405 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4406 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4409 s->current_picture.coded_picture_number = avctx->frame_number;
4410 s->current_picture.pict_type = pict->pict_type;
4411 s->current_picture.quality = pict->quality;
4412 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4413 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4414 s->m.current_picture.display_picture_number =
4415 s->m.current_picture.coded_picture_number = avctx->frame_number;
4416 s->m.current_picture.quality = pict->quality;
4417 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4418 if(s->pass1_rc)
4419 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4420 return -1;
4421 if(avctx->flags&CODEC_FLAG_PASS1)
4422 ff_write_pass1_stats(&s->m);
4423 s->m.last_pict_type = s->m.pict_type;
4424 avctx->frame_bits = s->m.frame_bits;
4425 avctx->mv_bits = s->m.mv_bits;
4426 avctx->misc_bits = s->m.misc_bits;
4427 avctx->p_tex_bits = s->m.p_tex_bits;
4429 emms_c();
4431 return ff_rac_terminate(c);
4434 static av_cold void common_end(SnowContext *s){
4435 int plane_index, level, orientation, i;
4437 av_freep(&s->spatial_dwt_buffer);
4438 av_freep(&s->spatial_idwt_buffer);
4440 s->m.me.temp= NULL;
4441 av_freep(&s->m.me.scratchpad);
4442 av_freep(&s->m.me.map);
4443 av_freep(&s->m.me.score_map);
4444 av_freep(&s->m.obmc_scratchpad);
4446 av_freep(&s->block);
4447 av_freep(&s->scratchbuf);
4449 for(i=0; i<MAX_REF_FRAMES; i++){
4450 av_freep(&s->ref_mvs[i]);
4451 av_freep(&s->ref_scores[i]);
4452 if(s->last_picture[i].data[0])
4453 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4456 for(plane_index=0; plane_index<3; plane_index++){
4457 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4458 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4459 SubBand *b= &s->plane[plane_index].band[level][orientation];
4461 av_freep(&b->x_coeff);
4467 static av_cold int encode_end(AVCodecContext *avctx)
4469 SnowContext *s = avctx->priv_data;
4471 common_end(s);
4472 av_free(avctx->stats_out);
4474 return 0;
4477 static av_cold int decode_init(AVCodecContext *avctx)
4479 avctx->pix_fmt= PIX_FMT_YUV420P;
4481 common_init(avctx);
4483 return 0;
4486 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){
4487 SnowContext *s = avctx->priv_data;
4488 RangeCoder * const c= &s->c;
4489 int bytes_read;
4490 AVFrame *picture = data;
4491 int level, orientation, plane_index, i;
4493 ff_init_range_decoder(c, buf, buf_size);
4494 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4496 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4497 if(decode_header(s)<0)
4498 return -1;
4499 common_init_after_header(avctx);
4501 // realloc slice buffer for the case that spatial_decomposition_count changed
4502 slice_buffer_destroy(&s->sb);
4503 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4505 for(plane_index=0; plane_index<3; plane_index++){
4506 Plane *p= &s->plane[plane_index];
4507 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4508 && p->hcoeff[1]==-10
4509 && p->hcoeff[2]==2;
4512 if(!s->block) alloc_blocks(s);
4514 frame_start(s);
4515 //keyframe flag duplication mess FIXME
4516 if(avctx->debug&FF_DEBUG_PICT_INFO)
4517 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4519 decode_blocks(s);
4521 for(plane_index=0; plane_index<3; plane_index++){
4522 Plane *p= &s->plane[plane_index];
4523 int w= p->width;
4524 int h= p->height;
4525 int x, y;
4526 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4528 if(s->avctx->debug&2048){
4529 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4530 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4532 for(y=0; y<h; y++){
4533 for(x=0; x<w; x++){
4534 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4535 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4541 for(level=0; level<s->spatial_decomposition_count; level++){
4542 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4543 SubBand *b= &p->band[level][orientation];
4544 unpack_coeffs(s, b, b->parent, orientation);
4550 const int mb_h= s->b_height << s->block_max_depth;
4551 const int block_size = MB_SIZE >> s->block_max_depth;
4552 const int block_w = plane_index ? block_size/2 : block_size;
4553 int mb_y;
4554 DWTCompose cs[MAX_DECOMPOSITIONS];
4555 int yd=0, yq=0;
4556 int y;
4557 int end_y;
4559 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4560 for(mb_y=0; mb_y<=mb_h; mb_y++){
4562 int slice_starty = block_w*mb_y;
4563 int slice_h = block_w*(mb_y+1);
4564 if (!(s->keyframe || s->avctx->debug&512)){
4565 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4566 slice_h -= (block_w >> 1);
4569 for(level=0; level<s->spatial_decomposition_count; level++){
4570 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4571 SubBand *b= &p->band[level][orientation];
4572 int start_y;
4573 int end_y;
4574 int our_mb_start = mb_y;
4575 int our_mb_end = (mb_y + 1);
4576 const int extra= 3;
4577 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4578 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4579 if (!(s->keyframe || s->avctx->debug&512)){
4580 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4581 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4583 start_y = FFMIN(b->height, start_y);
4584 end_y = FFMIN(b->height, end_y);
4586 if (start_y != end_y){
4587 if (orientation == 0){
4588 SubBand * correlate_band = &p->band[0][0];
4589 int correlate_end_y = FFMIN(b->height, end_y + 1);
4590 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4591 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4592 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4593 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4595 else
4596 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4601 for(; yd<slice_h; yd+=4){
4602 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4605 if(s->qlog == LOSSLESS_QLOG){
4606 for(; yq<slice_h && yq<h; yq++){
4607 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4608 for(x=0; x<w; x++){
4609 line[x] <<= FRAC_BITS;
4614 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4616 y = FFMIN(p->height, slice_starty);
4617 end_y = FFMIN(p->height, slice_h);
4618 while(y < end_y)
4619 slice_buffer_release(&s->sb, y++);
4622 slice_buffer_flush(&s->sb);
4627 emms_c();
4629 if(s->last_picture[s->max_ref_frames-1].data[0]){
4630 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4631 for(i=0; i<9; i++)
4632 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4633 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4636 if(!(s->avctx->debug&2048))
4637 *picture= s->current_picture;
4638 else
4639 *picture= s->mconly_picture;
4641 *data_size = sizeof(AVFrame);
4643 bytes_read= c->bytestream - c->bytestream_start;
4644 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4646 return bytes_read;
4649 static av_cold int decode_end(AVCodecContext *avctx)
4651 SnowContext *s = avctx->priv_data;
4653 slice_buffer_destroy(&s->sb);
4655 common_end(s);
4657 return 0;
4660 AVCodec snow_decoder = {
4661 "snow",
4662 CODEC_TYPE_VIDEO,
4663 CODEC_ID_SNOW,
4664 sizeof(SnowContext),
4665 decode_init,
4666 NULL,
4667 decode_end,
4668 decode_frame,
4669 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4670 NULL,
4671 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4674 #if CONFIG_SNOW_ENCODER
4675 AVCodec snow_encoder = {
4676 "snow",
4677 CODEC_TYPE_VIDEO,
4678 CODEC_ID_SNOW,
4679 sizeof(SnowContext),
4680 encode_init,
4681 encode_frame,
4682 encode_end,
4683 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4685 #endif
4688 #ifdef TEST
4689 #undef malloc
4690 #undef free
4691 #undef printf
4693 #include "libavutil/lfg.h"
4695 int main(void){
4696 int width=256;
4697 int height=256;
4698 int buffer[2][width*height];
4699 SnowContext s;
4700 int i;
4701 s.spatial_decomposition_count=6;
4702 s.spatial_decomposition_type=1;
4703 AVLFG prn;
4705 av_lfg_init(&prn, 1);
4707 printf("testing 5/3 DWT\n");
4708 for(i=0; i<width*height; i++)
4709 buffer[0][i] = buffer[1][i] = av_lfg_get(&prn) % 54321 - 12345;
4711 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4712 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4714 for(i=0; i<width*height; i++)
4715 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4717 printf("testing 9/7 DWT\n");
4718 s.spatial_decomposition_type=0;
4719 for(i=0; i<width*height; i++)
4720 buffer[0][i] = buffer[1][i] = av_lfg_get(&prn) % 54321 - 12345;
4722 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4723 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4725 for(i=0; i<width*height; i++)
4726 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4728 #if 0
4729 printf("testing AC coder\n");
4730 memset(s.header_state, 0, sizeof(s.header_state));
4731 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4732 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4734 for(i=-256; i<256; i++){
4735 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4737 ff_rac_terminate(&s.c);
4739 memset(s.header_state, 0, sizeof(s.header_state));
4740 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4741 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4743 for(i=-256; i<256; i++){
4744 int j;
4745 j= get_symbol(&s.c, s.header_state, 1);
4746 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4748 #endif
4750 int level, orientation, x, y;
4751 int64_t errors[8][4];
4752 int64_t g=0;
4754 memset(errors, 0, sizeof(errors));
4755 s.spatial_decomposition_count=3;
4756 s.spatial_decomposition_type=0;
4757 for(level=0; level<s.spatial_decomposition_count; level++){
4758 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4759 int w= width >> (s.spatial_decomposition_count-level);
4760 int h= height >> (s.spatial_decomposition_count-level);
4761 int stride= width << (s.spatial_decomposition_count-level);
4762 DWTELEM *buf= buffer[0];
4763 int64_t error=0;
4765 if(orientation&1) buf+=w;
4766 if(orientation>1) buf+=stride>>1;
4768 memset(buffer[0], 0, sizeof(int)*width*height);
4769 buf[w/2 + h/2*stride]= 256*256;
4770 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4771 for(y=0; y<height; y++){
4772 for(x=0; x<width; x++){
4773 int64_t d= buffer[0][x + y*width];
4774 error += d*d;
4775 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4777 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4779 error= (int)(sqrt(error)+0.5);
4780 errors[level][orientation]= error;
4781 if(g) g=av_gcd(g, error);
4782 else g= error;
4785 printf("static int const visual_weight[][4]={\n");
4786 for(level=0; level<s.spatial_decomposition_count; level++){
4787 printf(" {");
4788 for(orientation=0; orientation<4; orientation++){
4789 printf("%8"PRId64",", errors[level][orientation]/g);
4791 printf("},\n");
4793 printf("};\n");
4795 int level=2;
4796 int w= width >> (s.spatial_decomposition_count-level);
4797 //int h= height >> (s.spatial_decomposition_count-level);
4798 int stride= width << (s.spatial_decomposition_count-level);
4799 DWTELEM *buf= buffer[0];
4800 int64_t error=0;
4802 buf+=w;
4803 buf+=stride>>1;
4805 memset(buffer[0], 0, sizeof(int)*width*height);
4806 #if 1
4807 for(y=0; y<height; y++){
4808 for(x=0; x<width; x++){
4809 int tab[4]={0,2,3,1};
4810 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4813 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4814 #else
4815 for(y=0; y<h; y++){
4816 for(x=0; x<w; x++){
4817 buf[x + y*stride ]=169;
4818 buf[x + y*stride-w]=64;
4821 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4822 #endif
4823 for(y=0; y<height; y++){
4824 for(x=0; x<width; x++){
4825 int64_t d= buffer[0][x + y*width];
4826 error += d*d;
4827 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4829 if(FFABS(height/2-y)<9) printf("\n");
4834 return 0;
4836 #endif /* TEST */