Merge branch 'mirror' into vdpau
[FFMpeg-mirror/ffmpeg-vdpau.git] / libavcodec / snow.c
blobc5c73b117373066a9d9e39f39ede357ad494f329
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "avcodec.h"
22 #include "dsputil.h"
23 #include "snow.h"
25 #include "rangecoder.h"
27 #include "mpegvideo.h"
29 #undef NDEBUG
30 #include <assert.h>
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
177 #if 0 //64*cubic
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211 //error:0.000022
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
230 //error:0.000033
232 #elif 1 // 64*linear
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
266 //error:0.000020
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
285 //error:0.000015
287 #else //64*cos
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
321 //error:0.000022
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
340 //error:0.000022
342 #endif /* 0 */
344 //linear *64
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
354 //error:0.000000
357 //linear *64
358 static const uint8_t obmc4[16]={
359 16, 48, 48, 16,
360 48,144,144, 48,
361 48,144,144, 48,
362 16, 48, 48, 16,
363 //error:0.000000
366 static const uint8_t * const obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372 typedef struct BlockNode{
373 int16_t mx;
374 int16_t my;
375 uint8_t ref;
376 uint8_t color[3];
377 uint8_t type;
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
380 #define BLOCK_OPT 2
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
383 }BlockNode;
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
387 .mx= 0,
388 .my= 0,
389 .ref= 0,
390 .type= 0,
391 .level= 0,
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
396 #define ENCODER_EXTRA_BITS 4
397 #define HTAPS_MAX 8
399 typedef struct x_and_coeff{
400 int16_t x;
401 uint16_t coeff;
402 } x_and_coeff;
404 typedef struct SubBand{
405 int level;
406 int stride;
407 int width;
408 int height;
409 int qlog; ///< log(qscale)/log[2^(1/6)]
410 DWTELEM *buf;
411 IDWTELEM *ibuf;
412 int buf_x_offset;
413 int buf_y_offset;
414 int stride_line; ///< Stride measured in lines, not pixels.
415 x_and_coeff * x_coeff;
416 struct SubBand *parent;
417 uint8_t state[/*7*2*/ 7 + 512][32];
418 }SubBand;
420 typedef struct Plane{
421 int width;
422 int height;
423 SubBand band[MAX_DECOMPOSITIONS][4];
425 int htaps;
426 int8_t hcoeff[HTAPS_MAX/2];
427 int diag_mc;
428 int fast_mc;
430 int last_htaps;
431 int8_t last_hcoeff[HTAPS_MAX/2];
432 int last_diag_mc;
433 }Plane;
435 typedef struct SnowContext{
436 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
438 AVCodecContext *avctx;
439 RangeCoder c;
440 DSPContext dsp;
441 AVFrame new_picture;
442 AVFrame input_picture; ///< new_picture with the internal linesizes
443 AVFrame current_picture;
444 AVFrame last_picture[MAX_REF_FRAMES];
445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446 AVFrame mconly_picture;
447 // uint8_t q_context[16];
448 uint8_t header_state[32];
449 uint8_t block_state[128 + 32*128];
450 int keyframe;
451 int always_reset;
452 int version;
453 int spatial_decomposition_type;
454 int last_spatial_decomposition_type;
455 int temporal_decomposition_type;
456 int spatial_decomposition_count;
457 int last_spatial_decomposition_count;
458 int temporal_decomposition_count;
459 int max_ref_frames;
460 int ref_frames;
461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
462 uint32_t *ref_scores[MAX_REF_FRAMES];
463 DWTELEM *spatial_dwt_buffer;
464 IDWTELEM *spatial_idwt_buffer;
465 int colorspace_type;
466 int chroma_h_shift;
467 int chroma_v_shift;
468 int spatial_scalability;
469 int qlog;
470 int last_qlog;
471 int lambda;
472 int lambda2;
473 int pass1_rc;
474 int mv_scale;
475 int last_mv_scale;
476 int qbias;
477 int last_qbias;
478 #define QBIAS_SHIFT 3
479 int b_width;
480 int b_height;
481 int block_max_depth;
482 int last_block_max_depth;
483 Plane plane[MAX_PLANES];
484 BlockNode *block;
485 #define ME_CACHE_SIZE 1024
486 int me_cache[ME_CACHE_SIZE];
487 int me_cache_generation;
488 slice_buffer sb;
490 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
491 }SnowContext;
493 typedef struct {
494 IDWTELEM *b0;
495 IDWTELEM *b1;
496 IDWTELEM *b2;
497 IDWTELEM *b3;
498 int y;
499 } dwt_compose_t;
501 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
502 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
504 static void iterative_me(SnowContext *s);
506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
508 int i;
510 buf->base_buffer = base_buffer;
511 buf->line_count = line_count;
512 buf->line_width = line_width;
513 buf->data_count = max_allocated_lines;
514 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
515 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
517 for(i = 0; i < max_allocated_lines; i++){
518 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
521 buf->data_stack_top = max_allocated_lines - 1;
524 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
526 int offset;
527 IDWTELEM * buffer;
529 assert(buf->data_stack_top >= 0);
530 // assert(!buf->line[line]);
531 if (buf->line[line])
532 return buf->line[line];
534 offset = buf->line_width * line;
535 buffer = buf->data_stack[buf->data_stack_top];
536 buf->data_stack_top--;
537 buf->line[line] = buffer;
539 return buffer;
542 static void slice_buffer_release(slice_buffer * buf, int line)
544 int offset;
545 IDWTELEM * buffer;
547 assert(line >= 0 && line < buf->line_count);
548 assert(buf->line[line]);
550 offset = buf->line_width * line;
551 buffer = buf->line[line];
552 buf->data_stack_top++;
553 buf->data_stack[buf->data_stack_top] = buffer;
554 buf->line[line] = NULL;
557 static void slice_buffer_flush(slice_buffer * buf)
559 int i;
560 for(i = 0; i < buf->line_count; i++){
561 if (buf->line[i])
562 slice_buffer_release(buf, i);
566 static void slice_buffer_destroy(slice_buffer * buf)
568 int i;
569 slice_buffer_flush(buf);
571 for(i = buf->data_count - 1; i >= 0; i--){
572 av_freep(&buf->data_stack[i]);
574 av_freep(&buf->data_stack);
575 av_freep(&buf->line);
578 #ifdef __sgi
579 // Avoid a name clash on SGI IRIX
580 #undef qexp
581 #endif
582 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
583 static uint8_t qexp[QROOT];
585 static inline int mirror(int v, int m){
586 while((unsigned)v > (unsigned)m){
587 v=-v;
588 if(v<0) v+= 2*m;
590 return v;
593 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
594 int i;
596 if(v){
597 const int a= FFABS(v);
598 const int e= av_log2(a);
599 #if 1
600 const int el= FFMIN(e, 10);
601 put_rac(c, state+0, 0);
603 for(i=0; i<el; i++){
604 put_rac(c, state+1+i, 1); //1..10
606 for(; i<e; i++){
607 put_rac(c, state+1+9, 1); //1..10
609 put_rac(c, state+1+FFMIN(i,9), 0);
611 for(i=e-1; i>=el; i--){
612 put_rac(c, state+22+9, (a>>i)&1); //22..31
614 for(; i>=0; i--){
615 put_rac(c, state+22+i, (a>>i)&1); //22..31
618 if(is_signed)
619 put_rac(c, state+11 + el, v < 0); //11..21
620 #else
622 put_rac(c, state+0, 0);
623 if(e<=9){
624 for(i=0; i<e; i++){
625 put_rac(c, state+1+i, 1); //1..10
627 put_rac(c, state+1+i, 0);
629 for(i=e-1; i>=0; i--){
630 put_rac(c, state+22+i, (a>>i)&1); //22..31
633 if(is_signed)
634 put_rac(c, state+11 + e, v < 0); //11..21
635 }else{
636 for(i=0; i<e; i++){
637 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
639 put_rac(c, state+1+FFMIN(i,9), 0);
641 for(i=e-1; i>=0; i--){
642 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
645 if(is_signed)
646 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
648 #endif /* 1 */
649 }else{
650 put_rac(c, state+0, 1);
654 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
655 if(get_rac(c, state+0))
656 return 0;
657 else{
658 int i, e, a;
659 e= 0;
660 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
661 e++;
664 a= 1;
665 for(i=e-1; i>=0; i--){
666 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
669 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
670 return -a;
671 else
672 return a;
676 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
677 int i;
678 int r= log2>=0 ? 1<<log2 : 1;
680 assert(v>=0);
681 assert(log2>=-4);
683 while(v >= r){
684 put_rac(c, state+4+log2, 1);
685 v -= r;
686 log2++;
687 if(log2>0) r+=r;
689 put_rac(c, state+4+log2, 0);
691 for(i=log2-1; i>=0; i--){
692 put_rac(c, state+31-i, (v>>i)&1);
696 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
697 int i;
698 int r= log2>=0 ? 1<<log2 : 1;
699 int v=0;
701 assert(log2>=-4);
703 while(get_rac(c, state+4+log2)){
704 v+= r;
705 log2++;
706 if(log2>0) r+=r;
709 for(i=log2-1; i>=0; i--){
710 v+= get_rac(c, state+31-i)<<i;
713 return v;
716 static av_always_inline void
717 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
718 int dst_step, int src_step, int ref_step,
719 int width, int mul, int add, int shift,
720 int highpass, int inverse){
721 const int mirror_left= !highpass;
722 const int mirror_right= (width&1) ^ highpass;
723 const int w= (width>>1) - 1 + (highpass & width);
724 int i;
726 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
727 if(mirror_left){
728 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
729 dst += dst_step;
730 src += src_step;
733 for(i=0; i<w; i++){
734 dst[i*dst_step] =
735 LIFT(src[i*src_step],
736 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
737 inverse);
740 if(mirror_right){
741 dst[w*dst_step] =
742 LIFT(src[w*src_step],
743 ((mul*2*ref[w*ref_step]+add)>>shift),
744 inverse);
748 static av_always_inline void
749 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
750 int dst_step, int src_step, int ref_step,
751 int width, int mul, int add, int shift,
752 int highpass, int inverse){
753 const int mirror_left= !highpass;
754 const int mirror_right= (width&1) ^ highpass;
755 const int w= (width>>1) - 1 + (highpass & width);
756 int i;
758 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
759 if(mirror_left){
760 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
761 dst += dst_step;
762 src += src_step;
765 for(i=0; i<w; i++){
766 dst[i*dst_step] =
767 LIFT(src[i*src_step],
768 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
769 inverse);
772 if(mirror_right){
773 dst[w*dst_step] =
774 LIFT(src[w*src_step],
775 ((mul*2*ref[w*ref_step]+add)>>shift),
776 inverse);
780 #ifndef liftS
781 static av_always_inline void
782 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
783 int dst_step, int src_step, int ref_step,
784 int width, int mul, int add, int shift,
785 int highpass, int inverse){
786 const int mirror_left= !highpass;
787 const int mirror_right= (width&1) ^ highpass;
788 const int w= (width>>1) - 1 + (highpass & width);
789 int i;
791 assert(shift == 4);
792 #define LIFTS(src, ref, inv) \
793 ((inv) ? \
794 (src) + (((ref) + 4*(src))>>shift): \
795 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
796 if(mirror_left){
797 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
798 dst += dst_step;
799 src += src_step;
802 for(i=0; i<w; i++){
803 dst[i*dst_step] =
804 LIFTS(src[i*src_step],
805 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
806 inverse);
809 if(mirror_right){
810 dst[w*dst_step] =
811 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
814 static av_always_inline void
815 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
816 int dst_step, int src_step, int ref_step,
817 int width, int mul, int add, int shift,
818 int highpass, int inverse){
819 const int mirror_left= !highpass;
820 const int mirror_right= (width&1) ^ highpass;
821 const int w= (width>>1) - 1 + (highpass & width);
822 int i;
824 assert(shift == 4);
825 #define LIFTS(src, ref, inv) \
826 ((inv) ? \
827 (src) + (((ref) + 4*(src))>>shift): \
828 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
829 if(mirror_left){
830 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
831 dst += dst_step;
832 src += src_step;
835 for(i=0; i<w; i++){
836 dst[i*dst_step] =
837 LIFTS(src[i*src_step],
838 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
839 inverse);
842 if(mirror_right){
843 dst[w*dst_step] =
844 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
847 #endif /* ! liftS */
849 static void horizontal_decompose53i(DWTELEM *b, int width){
850 DWTELEM temp[width];
851 const int width2= width>>1;
852 int x;
853 const int w2= (width+1)>>1;
855 for(x=0; x<width2; x++){
856 temp[x ]= b[2*x ];
857 temp[x+w2]= b[2*x + 1];
859 if(width&1)
860 temp[x ]= b[2*x ];
861 #if 0
863 int A1,A2,A3,A4;
864 A2= temp[1 ];
865 A4= temp[0 ];
866 A1= temp[0+width2];
867 A1 -= (A2 + A4)>>1;
868 A4 += (A1 + 1)>>1;
869 b[0+width2] = A1;
870 b[0 ] = A4;
871 for(x=1; x+1<width2; x+=2){
872 A3= temp[x+width2];
873 A4= temp[x+1 ];
874 A3 -= (A2 + A4)>>1;
875 A2 += (A1 + A3 + 2)>>2;
876 b[x+width2] = A3;
877 b[x ] = A2;
879 A1= temp[x+1+width2];
880 A2= temp[x+2 ];
881 A1 -= (A2 + A4)>>1;
882 A4 += (A1 + A3 + 2)>>2;
883 b[x+1+width2] = A1;
884 b[x+1 ] = A4;
886 A3= temp[width-1];
887 A3 -= A2;
888 A2 += (A1 + A3 + 2)>>2;
889 b[width -1] = A3;
890 b[width2-1] = A2;
892 #else
893 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
894 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
895 #endif /* 0 */
898 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
899 int i;
901 for(i=0; i<width; i++){
902 b1[i] -= (b0[i] + b2[i])>>1;
906 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
907 int i;
909 for(i=0; i<width; i++){
910 b1[i] += (b0[i] + b2[i] + 2)>>2;
914 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
915 int y;
916 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
917 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
919 for(y=-2; y<height; y+=2){
920 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
921 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
923 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
924 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
926 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
927 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
929 b0=b2;
930 b1=b3;
934 static void horizontal_decompose97i(DWTELEM *b, int width){
935 DWTELEM temp[width];
936 const int w2= (width+1)>>1;
938 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
939 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
940 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
941 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
945 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
946 int i;
948 for(i=0; i<width; i++){
949 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
953 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
954 int i;
956 for(i=0; i<width; i++){
957 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
961 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
962 int i;
964 for(i=0; i<width; i++){
965 #ifdef liftS
966 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
967 #else
968 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
969 #endif
973 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
974 int i;
976 for(i=0; i<width; i++){
977 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
981 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
982 int y;
983 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
984 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
985 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
986 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
988 for(y=-4; y<height; y+=2){
989 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
990 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
992 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
993 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
995 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
996 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
997 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
998 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1000 b0=b2;
1001 b1=b3;
1002 b2=b4;
1003 b3=b5;
1007 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1008 int level;
1010 for(level=0; level<decomposition_count; level++){
1011 switch(type){
1012 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1013 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1018 static void horizontal_compose53i(IDWTELEM *b, int width){
1019 IDWTELEM temp[width];
1020 const int width2= width>>1;
1021 const int w2= (width+1)>>1;
1022 int x;
1024 #if 0
1025 int A1,A2,A3,A4;
1026 A2= temp[1 ];
1027 A4= temp[0 ];
1028 A1= temp[0+width2];
1029 A1 -= (A2 + A4)>>1;
1030 A4 += (A1 + 1)>>1;
1031 b[0+width2] = A1;
1032 b[0 ] = A4;
1033 for(x=1; x+1<width2; x+=2){
1034 A3= temp[x+width2];
1035 A4= temp[x+1 ];
1036 A3 -= (A2 + A4)>>1;
1037 A2 += (A1 + A3 + 2)>>2;
1038 b[x+width2] = A3;
1039 b[x ] = A2;
1041 A1= temp[x+1+width2];
1042 A2= temp[x+2 ];
1043 A1 -= (A2 + A4)>>1;
1044 A4 += (A1 + A3 + 2)>>2;
1045 b[x+1+width2] = A1;
1046 b[x+1 ] = A4;
1048 A3= temp[width-1];
1049 A3 -= A2;
1050 A2 += (A1 + A3 + 2)>>2;
1051 b[width -1] = A3;
1052 b[width2-1] = A2;
1053 #else
1054 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1055 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1056 #endif /* 0 */
1057 for(x=0; x<width2; x++){
1058 b[2*x ]= temp[x ];
1059 b[2*x + 1]= temp[x+w2];
1061 if(width&1)
1062 b[2*x ]= temp[x ];
1065 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1066 int i;
1068 for(i=0; i<width; i++){
1069 b1[i] += (b0[i] + b2[i])>>1;
1073 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1074 int i;
1076 for(i=0; i<width; i++){
1077 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1081 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1082 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1083 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1084 cs->y = -1;
1087 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1088 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1089 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1090 cs->y = -1;
1093 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1094 int y= cs->y;
1096 IDWTELEM *b0= cs->b0;
1097 IDWTELEM *b1= cs->b1;
1098 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1099 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1101 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1102 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1104 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1105 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1107 cs->b0 = b2;
1108 cs->b1 = b3;
1109 cs->y += 2;
1112 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1113 int y= cs->y;
1114 IDWTELEM *b0= cs->b0;
1115 IDWTELEM *b1= cs->b1;
1116 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1117 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1119 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1120 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1122 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1123 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1125 cs->b0 = b2;
1126 cs->b1 = b3;
1127 cs->y += 2;
1130 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1131 dwt_compose_t cs;
1132 spatial_compose53i_init(&cs, buffer, height, stride);
1133 while(cs.y <= height)
1134 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1138 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1139 IDWTELEM temp[width];
1140 const int w2= (width+1)>>1;
1142 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1143 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1144 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1145 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1148 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1149 int i;
1151 for(i=0; i<width; i++){
1152 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1156 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1157 int i;
1159 for(i=0; i<width; i++){
1160 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1164 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1165 int i;
1167 for(i=0; i<width; i++){
1168 #ifdef liftS
1169 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1170 #else
1171 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1172 #endif
1176 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1177 int i;
1179 for(i=0; i<width; i++){
1180 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1184 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1185 int i;
1187 for(i=0; i<width; i++){
1188 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1189 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1190 #ifdef liftS
1191 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1192 #else
1193 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1194 #endif
1195 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1199 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1200 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1201 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1202 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1203 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1204 cs->y = -3;
1207 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1208 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1209 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1210 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1211 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1212 cs->y = -3;
1215 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1216 int y = cs->y;
1218 IDWTELEM *b0= cs->b0;
1219 IDWTELEM *b1= cs->b1;
1220 IDWTELEM *b2= cs->b2;
1221 IDWTELEM *b3= cs->b3;
1222 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1223 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1225 if(y>0 && y+4<height){
1226 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1227 }else{
1228 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1229 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1230 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1231 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1234 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1235 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1237 cs->b0=b2;
1238 cs->b1=b3;
1239 cs->b2=b4;
1240 cs->b3=b5;
1241 cs->y += 2;
1244 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1245 int y = cs->y;
1246 IDWTELEM *b0= cs->b0;
1247 IDWTELEM *b1= cs->b1;
1248 IDWTELEM *b2= cs->b2;
1249 IDWTELEM *b3= cs->b3;
1250 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1251 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1253 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1254 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1255 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1256 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1258 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1259 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1261 cs->b0=b2;
1262 cs->b1=b3;
1263 cs->b2=b4;
1264 cs->b3=b5;
1265 cs->y += 2;
1268 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1269 dwt_compose_t cs;
1270 spatial_compose97i_init(&cs, buffer, height, stride);
1271 while(cs.y <= height)
1272 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1275 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1276 int level;
1277 for(level=decomposition_count-1; level>=0; level--){
1278 switch(type){
1279 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1280 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1285 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1286 int level;
1287 for(level=decomposition_count-1; level>=0; level--){
1288 switch(type){
1289 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1290 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1295 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1296 const int support = type==1 ? 3 : 5;
1297 int level;
1298 if(type==2) return;
1300 for(level=decomposition_count-1; level>=0; level--){
1301 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1302 switch(type){
1303 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1304 break;
1305 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1306 break;
1312 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1313 const int support = type==1 ? 3 : 5;
1314 int level;
1315 if(type==2) return;
1317 for(level=decomposition_count-1; level>=0; level--){
1318 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1319 switch(type){
1320 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1321 break;
1322 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1323 break;
1329 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1330 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1331 int y;
1332 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1333 for(y=0; y<height; y+=4)
1334 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1337 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1338 const int w= b->width;
1339 const int h= b->height;
1340 int x, y;
1342 if(1){
1343 int run=0;
1344 int runs[w*h];
1345 int run_index=0;
1346 int max_index;
1348 for(y=0; y<h; y++){
1349 for(x=0; x<w; x++){
1350 int v, p=0;
1351 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1352 v= src[x + y*stride];
1354 if(y){
1355 t= src[x + (y-1)*stride];
1356 if(x){
1357 lt= src[x - 1 + (y-1)*stride];
1359 if(x + 1 < w){
1360 rt= src[x + 1 + (y-1)*stride];
1363 if(x){
1364 l= src[x - 1 + y*stride];
1365 /*if(x > 1){
1366 if(orientation==1) ll= src[y + (x-2)*stride];
1367 else ll= src[x - 2 + y*stride];
1370 if(parent){
1371 int px= x>>1;
1372 int py= y>>1;
1373 if(px<b->parent->width && py<b->parent->height)
1374 p= parent[px + py*2*stride];
1376 if(!(/*ll|*/l|lt|t|rt|p)){
1377 if(v){
1378 runs[run_index++]= run;
1379 run=0;
1380 }else{
1381 run++;
1386 max_index= run_index;
1387 runs[run_index++]= run;
1388 run_index=0;
1389 run= runs[run_index++];
1391 put_symbol2(&s->c, b->state[30], max_index, 0);
1392 if(run_index <= max_index)
1393 put_symbol2(&s->c, b->state[1], run, 3);
1395 for(y=0; y<h; y++){
1396 if(s->c.bytestream_end - s->c.bytestream < w*40){
1397 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1398 return -1;
1400 for(x=0; x<w; x++){
1401 int v, p=0;
1402 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1403 v= src[x + y*stride];
1405 if(y){
1406 t= src[x + (y-1)*stride];
1407 if(x){
1408 lt= src[x - 1 + (y-1)*stride];
1410 if(x + 1 < w){
1411 rt= src[x + 1 + (y-1)*stride];
1414 if(x){
1415 l= src[x - 1 + y*stride];
1416 /*if(x > 1){
1417 if(orientation==1) ll= src[y + (x-2)*stride];
1418 else ll= src[x - 2 + y*stride];
1421 if(parent){
1422 int px= x>>1;
1423 int py= y>>1;
1424 if(px<b->parent->width && py<b->parent->height)
1425 p= parent[px + py*2*stride];
1427 if(/*ll|*/l|lt|t|rt|p){
1428 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1430 put_rac(&s->c, &b->state[0][context], !!v);
1431 }else{
1432 if(!run){
1433 run= runs[run_index++];
1435 if(run_index <= max_index)
1436 put_symbol2(&s->c, b->state[1], run, 3);
1437 assert(v);
1438 }else{
1439 run--;
1440 assert(!v);
1443 if(v){
1444 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1445 int l2= 2*FFABS(l) + (l<0);
1446 int t2= 2*FFABS(t) + (t<0);
1448 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1449 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1454 return 0;
1457 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1458 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1459 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1460 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1461 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1464 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1465 const int w= b->width;
1466 const int h= b->height;
1467 int x,y;
1469 if(1){
1470 int run, runs;
1471 x_and_coeff *xc= b->x_coeff;
1472 x_and_coeff *prev_xc= NULL;
1473 x_and_coeff *prev2_xc= xc;
1474 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1475 x_and_coeff *prev_parent_xc= parent_xc;
1477 runs= get_symbol2(&s->c, b->state[30], 0);
1478 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1479 else run= INT_MAX;
1481 for(y=0; y<h; y++){
1482 int v=0;
1483 int lt=0, t=0, rt=0;
1485 if(y && prev_xc->x == 0){
1486 rt= prev_xc->coeff;
1488 for(x=0; x<w; x++){
1489 int p=0;
1490 const int l= v;
1492 lt= t; t= rt;
1494 if(y){
1495 if(prev_xc->x <= x)
1496 prev_xc++;
1497 if(prev_xc->x == x + 1)
1498 rt= prev_xc->coeff;
1499 else
1500 rt=0;
1502 if(parent_xc){
1503 if(x>>1 > parent_xc->x){
1504 parent_xc++;
1506 if(x>>1 == parent_xc->x){
1507 p= parent_xc->coeff;
1510 if(/*ll|*/l|lt|t|rt|p){
1511 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1513 v=get_rac(&s->c, &b->state[0][context]);
1514 if(v){
1515 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1516 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1518 xc->x=x;
1519 (xc++)->coeff= v;
1521 }else{
1522 if(!run){
1523 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1524 else run= INT_MAX;
1525 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1526 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1528 xc->x=x;
1529 (xc++)->coeff= v;
1530 }else{
1531 int max_run;
1532 run--;
1533 v=0;
1535 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1536 else max_run= FFMIN(run, w-x-1);
1537 if(parent_xc)
1538 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1539 x+= max_run;
1540 run-= max_run;
1544 (xc++)->x= w+1; //end marker
1545 prev_xc= prev2_xc;
1546 prev2_xc= xc;
1548 if(parent_xc){
1549 if(y&1){
1550 while(parent_xc->x != parent->width+1)
1551 parent_xc++;
1552 parent_xc++;
1553 prev_parent_xc= parent_xc;
1554 }else{
1555 parent_xc= prev_parent_xc;
1560 (xc++)->x= w+1; //end marker
1564 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1565 const int w= b->width;
1566 int y;
1567 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1568 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1569 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1570 int new_index = 0;
1572 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1573 qadd= 0;
1574 qmul= 1<<QEXPSHIFT;
1577 /* If we are on the second or later slice, restore our index. */
1578 if (start_y != 0)
1579 new_index = save_state[0];
1582 for(y=start_y; y<h; y++){
1583 int x = 0;
1584 int v;
1585 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1586 memset(line, 0, b->width*sizeof(IDWTELEM));
1587 v = b->x_coeff[new_index].coeff;
1588 x = b->x_coeff[new_index++].x;
1589 while(x < w){
1590 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1591 register int u= -(v&1);
1592 line[x] = (t^u) - u;
1594 v = b->x_coeff[new_index].coeff;
1595 x = b->x_coeff[new_index++].x;
1599 /* Save our variables for the next slice. */
1600 save_state[0] = new_index;
1602 return;
1605 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1606 int plane_index, level, orientation;
1608 for(plane_index=0; plane_index<3; plane_index++){
1609 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1610 for(orientation=level ? 1:0; orientation<4; orientation++){
1611 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1615 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1616 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1619 static int alloc_blocks(SnowContext *s){
1620 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1621 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1623 s->b_width = w;
1624 s->b_height= h;
1626 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1627 return 0;
1630 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1631 uint8_t *bytestream= d->bytestream;
1632 uint8_t *bytestream_start= d->bytestream_start;
1633 *d= *s;
1634 d->bytestream= bytestream;
1635 d->bytestream_start= bytestream_start;
1638 //near copy & paste from dsputil, FIXME
1639 static int pix_sum(uint8_t * pix, int line_size, int w)
1641 int s, i, j;
1643 s = 0;
1644 for (i = 0; i < w; i++) {
1645 for (j = 0; j < w; j++) {
1646 s += pix[0];
1647 pix ++;
1649 pix += line_size - w;
1651 return s;
1654 //near copy & paste from dsputil, FIXME
1655 static int pix_norm1(uint8_t * pix, int line_size, int w)
1657 int s, i, j;
1658 uint32_t *sq = ff_squareTbl + 256;
1660 s = 0;
1661 for (i = 0; i < w; i++) {
1662 for (j = 0; j < w; j ++) {
1663 s += sq[pix[0]];
1664 pix ++;
1666 pix += line_size - w;
1668 return s;
1671 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1672 const int w= s->b_width << s->block_max_depth;
1673 const int rem_depth= s->block_max_depth - level;
1674 const int index= (x + y*w) << rem_depth;
1675 const int block_w= 1<<rem_depth;
1676 BlockNode block;
1677 int i,j;
1679 block.color[0]= l;
1680 block.color[1]= cb;
1681 block.color[2]= cr;
1682 block.mx= mx;
1683 block.my= my;
1684 block.ref= ref;
1685 block.type= type;
1686 block.level= level;
1688 for(j=0; j<block_w; j++){
1689 for(i=0; i<block_w; i++){
1690 s->block[index + i + j*w]= block;
1695 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1696 const int offset[3]= {
1697 y*c-> stride + x,
1698 ((y*c->uvstride + x)>>1),
1699 ((y*c->uvstride + x)>>1),
1701 int i;
1702 for(i=0; i<3; i++){
1703 c->src[0][i]= src [i];
1704 c->ref[0][i]= ref [i] + offset[i];
1706 assert(!ref_index);
1709 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1710 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1711 if(s->ref_frames == 1){
1712 *mx = mid_pred(left->mx, top->mx, tr->mx);
1713 *my = mid_pred(left->my, top->my, tr->my);
1714 }else{
1715 const int *scale = scale_mv_ref[ref];
1716 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1717 (top ->mx * scale[top ->ref] + 128) >>8,
1718 (tr ->mx * scale[tr ->ref] + 128) >>8);
1719 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1720 (top ->my * scale[top ->ref] + 128) >>8,
1721 (tr ->my * scale[tr ->ref] + 128) >>8);
1725 //FIXME copy&paste
1726 #define P_LEFT P[1]
1727 #define P_TOP P[2]
1728 #define P_TOPRIGHT P[3]
1729 #define P_MEDIAN P[4]
1730 #define P_MV1 P[9]
1731 #define FLAG_QPEL 1 //must be 1
1733 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1734 uint8_t p_buffer[1024];
1735 uint8_t i_buffer[1024];
1736 uint8_t p_state[sizeof(s->block_state)];
1737 uint8_t i_state[sizeof(s->block_state)];
1738 RangeCoder pc, ic;
1739 uint8_t *pbbak= s->c.bytestream;
1740 uint8_t *pbbak_start= s->c.bytestream_start;
1741 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1742 const int w= s->b_width << s->block_max_depth;
1743 const int h= s->b_height << s->block_max_depth;
1744 const int rem_depth= s->block_max_depth - level;
1745 const int index= (x + y*w) << rem_depth;
1746 const int block_w= 1<<(LOG2_MB_SIZE - level);
1747 int trx= (x+1)<<rem_depth;
1748 int try= (y+1)<<rem_depth;
1749 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1750 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1751 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1752 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1753 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1754 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1755 int pl = left->color[0];
1756 int pcb= left->color[1];
1757 int pcr= left->color[2];
1758 int pmx, pmy;
1759 int mx=0, my=0;
1760 int l,cr,cb;
1761 const int stride= s->current_picture.linesize[0];
1762 const int uvstride= s->current_picture.linesize[1];
1763 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1764 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1765 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1766 int P[10][2];
1767 int16_t last_mv[3][2];
1768 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1769 const int shift= 1+qpel;
1770 MotionEstContext *c= &s->m.me;
1771 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1772 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1773 int my_context= av_log2(2*FFABS(left->my - top->my));
1774 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1775 int ref, best_ref, ref_score, ref_mx, ref_my;
1777 assert(sizeof(s->block_state) >= 256);
1778 if(s->keyframe){
1779 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1780 return 0;
1783 // clip predictors / edge ?
1785 P_LEFT[0]= left->mx;
1786 P_LEFT[1]= left->my;
1787 P_TOP [0]= top->mx;
1788 P_TOP [1]= top->my;
1789 P_TOPRIGHT[0]= tr->mx;
1790 P_TOPRIGHT[1]= tr->my;
1792 last_mv[0][0]= s->block[index].mx;
1793 last_mv[0][1]= s->block[index].my;
1794 last_mv[1][0]= right->mx;
1795 last_mv[1][1]= right->my;
1796 last_mv[2][0]= bottom->mx;
1797 last_mv[2][1]= bottom->my;
1799 s->m.mb_stride=2;
1800 s->m.mb_x=
1801 s->m.mb_y= 0;
1802 c->skip= 0;
1804 assert(c-> stride == stride);
1805 assert(c->uvstride == uvstride);
1807 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1808 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1809 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1810 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1812 c->xmin = - x*block_w - 16+2;
1813 c->ymin = - y*block_w - 16+2;
1814 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1815 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1817 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1818 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1819 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1820 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1821 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1822 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1823 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1825 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1826 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1828 if (!y) {
1829 c->pred_x= P_LEFT[0];
1830 c->pred_y= P_LEFT[1];
1831 } else {
1832 c->pred_x = P_MEDIAN[0];
1833 c->pred_y = P_MEDIAN[1];
1836 score= INT_MAX;
1837 best_ref= 0;
1838 for(ref=0; ref<s->ref_frames; ref++){
1839 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1841 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1842 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1844 assert(ref_mx >= c->xmin);
1845 assert(ref_mx <= c->xmax);
1846 assert(ref_my >= c->ymin);
1847 assert(ref_my <= c->ymax);
1849 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1850 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1851 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1852 if(s->ref_mvs[ref]){
1853 s->ref_mvs[ref][index][0]= ref_mx;
1854 s->ref_mvs[ref][index][1]= ref_my;
1855 s->ref_scores[ref][index]= ref_score;
1857 if(score > ref_score){
1858 score= ref_score;
1859 best_ref= ref;
1860 mx= ref_mx;
1861 my= ref_my;
1864 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1866 // subpel search
1867 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1868 pc= s->c;
1869 pc.bytestream_start=
1870 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1871 memcpy(p_state, s->block_state, sizeof(s->block_state));
1873 if(level!=s->block_max_depth)
1874 put_rac(&pc, &p_state[4 + s_context], 1);
1875 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1876 if(s->ref_frames > 1)
1877 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1878 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1879 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1880 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1881 p_len= pc.bytestream - pc.bytestream_start;
1882 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1884 block_s= block_w*block_w;
1885 sum = pix_sum(current_data[0], stride, block_w);
1886 l= (sum + block_s/2)/block_s;
1887 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1889 block_s= block_w*block_w>>2;
1890 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1891 cb= (sum + block_s/2)/block_s;
1892 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1893 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1894 cr= (sum + block_s/2)/block_s;
1895 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1897 ic= s->c;
1898 ic.bytestream_start=
1899 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1900 memcpy(i_state, s->block_state, sizeof(s->block_state));
1901 if(level!=s->block_max_depth)
1902 put_rac(&ic, &i_state[4 + s_context], 1);
1903 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1904 put_symbol(&ic, &i_state[32], l-pl , 1);
1905 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1906 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1907 i_len= ic.bytestream - ic.bytestream_start;
1908 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1910 // assert(score==256*256*256*64-1);
1911 assert(iscore < 255*255*256 + s->lambda2*10);
1912 assert(iscore >= 0);
1913 assert(l>=0 && l<=255);
1914 assert(pl>=0 && pl<=255);
1916 if(level==0){
1917 int varc= iscore >> 8;
1918 int vard= score >> 8;
1919 if (vard <= 64 || vard < varc)
1920 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1921 else
1922 c->scene_change_score+= s->m.qscale;
1925 if(level!=s->block_max_depth){
1926 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1927 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1928 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1929 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1930 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1931 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1933 if(score2 < score && score2 < iscore)
1934 return score2;
1937 if(iscore < score){
1938 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1939 memcpy(pbbak, i_buffer, i_len);
1940 s->c= ic;
1941 s->c.bytestream_start= pbbak_start;
1942 s->c.bytestream= pbbak + i_len;
1943 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1944 memcpy(s->block_state, i_state, sizeof(s->block_state));
1945 return iscore;
1946 }else{
1947 memcpy(pbbak, p_buffer, p_len);
1948 s->c= pc;
1949 s->c.bytestream_start= pbbak_start;
1950 s->c.bytestream= pbbak + p_len;
1951 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1952 memcpy(s->block_state, p_state, sizeof(s->block_state));
1953 return score;
1957 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1958 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1959 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1960 }else{
1961 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1965 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1966 const int w= s->b_width << s->block_max_depth;
1967 const int rem_depth= s->block_max_depth - level;
1968 const int index= (x + y*w) << rem_depth;
1969 int trx= (x+1)<<rem_depth;
1970 BlockNode *b= &s->block[index];
1971 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1972 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1973 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1974 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1975 int pl = left->color[0];
1976 int pcb= left->color[1];
1977 int pcr= left->color[2];
1978 int pmx, pmy;
1979 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1980 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1981 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1982 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1984 if(s->keyframe){
1985 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1986 return;
1989 if(level!=s->block_max_depth){
1990 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1991 put_rac(&s->c, &s->block_state[4 + s_context], 1);
1992 }else{
1993 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1994 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1995 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1996 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
1997 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
1998 return;
2001 if(b->type & BLOCK_INTRA){
2002 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2003 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2004 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2005 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2006 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2007 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2008 }else{
2009 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2010 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2011 if(s->ref_frames > 1)
2012 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2013 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2014 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2015 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2019 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2020 const int w= s->b_width << s->block_max_depth;
2021 const int rem_depth= s->block_max_depth - level;
2022 const int index= (x + y*w) << rem_depth;
2023 int trx= (x+1)<<rem_depth;
2024 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2025 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2026 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2027 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2028 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2030 if(s->keyframe){
2031 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2032 return;
2035 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2036 int type, mx, my;
2037 int l = left->color[0];
2038 int cb= left->color[1];
2039 int cr= left->color[2];
2040 int ref = 0;
2041 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2042 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2043 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2045 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2047 if(type){
2048 pred_mv(s, &mx, &my, 0, left, top, tr);
2049 l += get_symbol(&s->c, &s->block_state[32], 1);
2050 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2051 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2052 }else{
2053 if(s->ref_frames > 1)
2054 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2055 pred_mv(s, &mx, &my, ref, left, top, tr);
2056 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2057 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2059 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2060 }else{
2061 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2062 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2063 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2064 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2068 static void encode_blocks(SnowContext *s, int search){
2069 int x, y;
2070 int w= s->b_width;
2071 int h= s->b_height;
2073 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2074 iterative_me(s);
2076 for(y=0; y<h; y++){
2077 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2078 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2079 return;
2081 for(x=0; x<w; x++){
2082 if(s->avctx->me_method == ME_ITER || !search)
2083 encode_q_branch2(s, 0, x, y);
2084 else
2085 encode_q_branch (s, 0, x, y);
2090 static void decode_blocks(SnowContext *s){
2091 int x, y;
2092 int w= s->b_width;
2093 int h= s->b_height;
2095 for(y=0; y<h; y++){
2096 for(x=0; x<w; x++){
2097 decode_q_branch(s, 0, x, y);
2102 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2103 static const uint8_t weight[64]={
2104 8,7,6,5,4,3,2,1,
2105 7,7,0,0,0,0,0,1,
2106 6,0,6,0,0,0,2,0,
2107 5,0,0,5,0,3,0,0,
2108 4,0,0,0,4,0,0,0,
2109 3,0,0,5,0,3,0,0,
2110 2,0,6,0,0,0,2,0,
2111 1,7,0,0,0,0,0,1,
2114 static const uint8_t brane[256]={
2115 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2116 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2117 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2118 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2119 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2120 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2121 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2122 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2123 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2124 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2125 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2126 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2127 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2128 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2129 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2130 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2133 static const uint8_t needs[16]={
2134 0,1,0,0,
2135 2,4,2,0,
2136 0,1,0,0,
2140 int x, y, b, r, l;
2141 int16_t tmpIt [64*(32+HTAPS_MAX)];
2142 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2143 int16_t *tmpI= tmpIt;
2144 uint8_t *tmp2= tmp2t[0];
2145 const uint8_t *hpel[11];
2146 assert(dx<16 && dy<16);
2147 r= brane[dx + 16*dy]&15;
2148 l= brane[dx + 16*dy]>>4;
2150 b= needs[l] | needs[r];
2151 if(p && !p->diag_mc)
2152 b= 15;
2154 if(b&5){
2155 for(y=0; y < b_h+HTAPS_MAX-1; y++){
2156 for(x=0; x < b_w; x++){
2157 int a_1=src[x + HTAPS_MAX/2-4];
2158 int a0= src[x + HTAPS_MAX/2-3];
2159 int a1= src[x + HTAPS_MAX/2-2];
2160 int a2= src[x + HTAPS_MAX/2-1];
2161 int a3= src[x + HTAPS_MAX/2+0];
2162 int a4= src[x + HTAPS_MAX/2+1];
2163 int a5= src[x + HTAPS_MAX/2+2];
2164 int a6= src[x + HTAPS_MAX/2+3];
2165 int am=0;
2166 if(!p || p->fast_mc){
2167 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2168 tmpI[x]= am;
2169 am= (am+16)>>5;
2170 }else{
2171 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2172 tmpI[x]= am;
2173 am= (am+32)>>6;
2176 if(am&(~255)) am= ~(am>>31);
2177 tmp2[x]= am;
2179 tmpI+= 64;
2180 tmp2+= stride;
2181 src += stride;
2183 src -= stride*y;
2185 src += HTAPS_MAX/2 - 1;
2186 tmp2= tmp2t[1];
2188 if(b&2){
2189 for(y=0; y < b_h; y++){
2190 for(x=0; x < b_w+1; x++){
2191 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2192 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2193 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2194 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2195 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2196 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2197 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2198 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2199 int am=0;
2200 if(!p || p->fast_mc)
2201 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2202 else
2203 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2205 if(am&(~255)) am= ~(am>>31);
2206 tmp2[x]= am;
2208 src += stride;
2209 tmp2+= stride;
2211 src -= stride*y;
2213 src += stride*(HTAPS_MAX/2 - 1);
2214 tmp2= tmp2t[2];
2215 tmpI= tmpIt;
2216 if(b&4){
2217 for(y=0; y < b_h; y++){
2218 for(x=0; x < b_w; x++){
2219 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2220 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2221 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2222 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2223 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2224 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2225 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2226 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2227 int am=0;
2228 if(!p || p->fast_mc)
2229 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2230 else
2231 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2232 if(am&(~255)) am= ~(am>>31);
2233 tmp2[x]= am;
2235 tmpI+= 64;
2236 tmp2+= stride;
2240 hpel[ 0]= src;
2241 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2242 hpel[ 2]= src + 1;
2244 hpel[ 4]= tmp2t[1];
2245 hpel[ 5]= tmp2t[2];
2246 hpel[ 6]= tmp2t[1] + 1;
2248 hpel[ 8]= src + stride;
2249 hpel[ 9]= hpel[1] + stride;
2250 hpel[10]= hpel[8] + 1;
2252 if(b==15){
2253 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2254 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2255 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2256 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2257 dx&=7;
2258 dy&=7;
2259 for(y=0; y < b_h; y++){
2260 for(x=0; x < b_w; x++){
2261 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2262 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2264 src1+=stride;
2265 src2+=stride;
2266 src3+=stride;
2267 src4+=stride;
2268 dst +=stride;
2270 }else{
2271 const uint8_t *src1= hpel[l];
2272 const uint8_t *src2= hpel[r];
2273 int a= weight[((dx&7) + (8*(dy&7)))];
2274 int b= 8-a;
2275 for(y=0; y < b_h; y++){
2276 for(x=0; x < b_w; x++){
2277 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2279 src1+=stride;
2280 src2+=stride;
2281 dst +=stride;
2286 #define mca(dx,dy,b_w)\
2287 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2288 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2289 assert(h==b_w);\
2290 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2293 mca( 0, 0,16)
2294 mca( 8, 0,16)
2295 mca( 0, 8,16)
2296 mca( 8, 8,16)
2297 mca( 0, 0,8)
2298 mca( 8, 0,8)
2299 mca( 0, 8,8)
2300 mca( 8, 8,8)
2302 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2303 if(block->type & BLOCK_INTRA){
2304 int x, y;
2305 const int color = block->color[plane_index];
2306 const int color4= color*0x01010101;
2307 if(b_w==32){
2308 for(y=0; y < b_h; y++){
2309 *(uint32_t*)&dst[0 + y*stride]= color4;
2310 *(uint32_t*)&dst[4 + y*stride]= color4;
2311 *(uint32_t*)&dst[8 + y*stride]= color4;
2312 *(uint32_t*)&dst[12+ y*stride]= color4;
2313 *(uint32_t*)&dst[16+ y*stride]= color4;
2314 *(uint32_t*)&dst[20+ y*stride]= color4;
2315 *(uint32_t*)&dst[24+ y*stride]= color4;
2316 *(uint32_t*)&dst[28+ y*stride]= color4;
2318 }else if(b_w==16){
2319 for(y=0; y < b_h; y++){
2320 *(uint32_t*)&dst[0 + y*stride]= color4;
2321 *(uint32_t*)&dst[4 + y*stride]= color4;
2322 *(uint32_t*)&dst[8 + y*stride]= color4;
2323 *(uint32_t*)&dst[12+ y*stride]= color4;
2325 }else if(b_w==8){
2326 for(y=0; y < b_h; y++){
2327 *(uint32_t*)&dst[0 + y*stride]= color4;
2328 *(uint32_t*)&dst[4 + y*stride]= color4;
2330 }else if(b_w==4){
2331 for(y=0; y < b_h; y++){
2332 *(uint32_t*)&dst[0 + y*stride]= color4;
2334 }else{
2335 for(y=0; y < b_h; y++){
2336 for(x=0; x < b_w; x++){
2337 dst[x + y*stride]= color;
2341 }else{
2342 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2343 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2344 int mx= block->mx*scale;
2345 int my= block->my*scale;
2346 const int dx= mx&15;
2347 const int dy= my&15;
2348 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2349 sx += (mx>>4) - (HTAPS_MAX/2-1);
2350 sy += (my>>4) - (HTAPS_MAX/2-1);
2351 src += sx + sy*stride;
2352 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2353 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2354 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2355 src= tmp + MB_SIZE;
2357 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2358 // assert(!(b_w&(b_w-1)));
2359 assert(b_w>1 && b_h>1);
2360 assert((tab_index>=0 && tab_index<4) || b_w==32);
2361 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2362 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2363 else if(b_w==32){
2364 int y;
2365 for(y=0; y<b_h; y+=16){
2366 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2367 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2369 }else if(b_w==b_h)
2370 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2371 else if(b_w==2*b_h){
2372 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2373 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2374 }else{
2375 assert(2*b_w==b_h);
2376 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2377 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2382 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2383 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2384 int y, x;
2385 IDWTELEM * dst;
2386 for(y=0; y<b_h; y++){
2387 //FIXME ugly misuse of obmc_stride
2388 const uint8_t *obmc1= obmc + y*obmc_stride;
2389 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2390 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2391 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2392 dst = slice_buffer_get_line(sb, src_y + y);
2393 for(x=0; x<b_w; x++){
2394 int v= obmc1[x] * block[3][x + y*src_stride]
2395 +obmc2[x] * block[2][x + y*src_stride]
2396 +obmc3[x] * block[1][x + y*src_stride]
2397 +obmc4[x] * block[0][x + y*src_stride];
2399 v <<= 8 - LOG2_OBMC_MAX;
2400 if(FRAC_BITS != 8){
2401 v >>= 8 - FRAC_BITS;
2403 if(add){
2404 v += dst[x + src_x];
2405 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2406 if(v&(~255)) v= ~(v>>31);
2407 dst8[x + y*src_stride] = v;
2408 }else{
2409 dst[x + src_x] -= v;
2415 //FIXME name cleanup (b_w, block_w, b_width stuff)
2416 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2417 const int b_width = s->b_width << s->block_max_depth;
2418 const int b_height= s->b_height << s->block_max_depth;
2419 const int b_stride= b_width;
2420 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2421 BlockNode *rt= lt+1;
2422 BlockNode *lb= lt+b_stride;
2423 BlockNode *rb= lb+1;
2424 uint8_t *block[4];
2425 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2426 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2427 uint8_t *ptmp;
2428 int x,y;
2430 if(b_x<0){
2431 lt= rt;
2432 lb= rb;
2433 }else if(b_x + 1 >= b_width){
2434 rt= lt;
2435 rb= lb;
2437 if(b_y<0){
2438 lt= lb;
2439 rt= rb;
2440 }else if(b_y + 1 >= b_height){
2441 lb= lt;
2442 rb= rt;
2445 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2446 obmc -= src_x;
2447 b_w += src_x;
2448 if(!sliced && !offset_dst)
2449 dst -= src_x;
2450 src_x=0;
2451 }else if(src_x + b_w > w){
2452 b_w = w - src_x;
2454 if(src_y<0){
2455 obmc -= src_y*obmc_stride;
2456 b_h += src_y;
2457 if(!sliced && !offset_dst)
2458 dst -= src_y*dst_stride;
2459 src_y=0;
2460 }else if(src_y + b_h> h){
2461 b_h = h - src_y;
2464 if(b_w<=0 || b_h<=0) return;
2466 assert(src_stride > 2*MB_SIZE + 5);
2468 if(!sliced && offset_dst)
2469 dst += src_x + src_y*dst_stride;
2470 dst8+= src_x + src_y*src_stride;
2471 // src += src_x + src_y*src_stride;
2473 ptmp= tmp + 3*tmp_step;
2474 block[0]= ptmp;
2475 ptmp+=tmp_step;
2476 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2478 if(same_block(lt, rt)){
2479 block[1]= block[0];
2480 }else{
2481 block[1]= ptmp;
2482 ptmp+=tmp_step;
2483 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2486 if(same_block(lt, lb)){
2487 block[2]= block[0];
2488 }else if(same_block(rt, lb)){
2489 block[2]= block[1];
2490 }else{
2491 block[2]= ptmp;
2492 ptmp+=tmp_step;
2493 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2496 if(same_block(lt, rb) ){
2497 block[3]= block[0];
2498 }else if(same_block(rt, rb)){
2499 block[3]= block[1];
2500 }else if(same_block(lb, rb)){
2501 block[3]= block[2];
2502 }else{
2503 block[3]= ptmp;
2504 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2506 #if 0
2507 for(y=0; y<b_h; y++){
2508 for(x=0; x<b_w; x++){
2509 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2510 if(add) dst[x + y*dst_stride] += v;
2511 else dst[x + y*dst_stride] -= v;
2514 for(y=0; y<b_h; y++){
2515 uint8_t *obmc2= obmc + (obmc_stride>>1);
2516 for(x=0; x<b_w; x++){
2517 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2518 if(add) dst[x + y*dst_stride] += v;
2519 else dst[x + y*dst_stride] -= v;
2522 for(y=0; y<b_h; y++){
2523 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2524 for(x=0; x<b_w; x++){
2525 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2526 if(add) dst[x + y*dst_stride] += v;
2527 else dst[x + y*dst_stride] -= v;
2530 for(y=0; y<b_h; y++){
2531 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2532 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2533 for(x=0; x<b_w; x++){
2534 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2535 if(add) dst[x + y*dst_stride] += v;
2536 else dst[x + y*dst_stride] -= v;
2539 #else
2540 if(sliced){
2541 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2542 }else{
2543 for(y=0; y<b_h; y++){
2544 //FIXME ugly misuse of obmc_stride
2545 const uint8_t *obmc1= obmc + y*obmc_stride;
2546 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2547 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2548 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2549 for(x=0; x<b_w; x++){
2550 int v= obmc1[x] * block[3][x + y*src_stride]
2551 +obmc2[x] * block[2][x + y*src_stride]
2552 +obmc3[x] * block[1][x + y*src_stride]
2553 +obmc4[x] * block[0][x + y*src_stride];
2555 v <<= 8 - LOG2_OBMC_MAX;
2556 if(FRAC_BITS != 8){
2557 v >>= 8 - FRAC_BITS;
2559 if(add){
2560 v += dst[x + y*dst_stride];
2561 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2562 if(v&(~255)) v= ~(v>>31);
2563 dst8[x + y*src_stride] = v;
2564 }else{
2565 dst[x + y*dst_stride] -= v;
2570 #endif /* 0 */
2573 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2574 Plane *p= &s->plane[plane_index];
2575 const int mb_w= s->b_width << s->block_max_depth;
2576 const int mb_h= s->b_height << s->block_max_depth;
2577 int x, y, mb_x;
2578 int block_size = MB_SIZE >> s->block_max_depth;
2579 int block_w = plane_index ? block_size/2 : block_size;
2580 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2581 int obmc_stride= plane_index ? block_size : 2*block_size;
2582 int ref_stride= s->current_picture.linesize[plane_index];
2583 uint8_t *dst8= s->current_picture.data[plane_index];
2584 int w= p->width;
2585 int h= p->height;
2587 if(s->keyframe || (s->avctx->debug&512)){
2588 if(mb_y==mb_h)
2589 return;
2591 if(add){
2592 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2593 // DWTELEM * line = slice_buffer_get_line(sb, y);
2594 IDWTELEM * line = sb->line[y];
2595 for(x=0; x<w; x++){
2596 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2597 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2598 v >>= FRAC_BITS;
2599 if(v&(~255)) v= ~(v>>31);
2600 dst8[x + y*ref_stride]= v;
2603 }else{
2604 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2605 // DWTELEM * line = slice_buffer_get_line(sb, y);
2606 IDWTELEM * line = sb->line[y];
2607 for(x=0; x<w; x++){
2608 line[x] -= 128 << FRAC_BITS;
2609 // buf[x + y*w]-= 128<<FRAC_BITS;
2614 return;
2617 for(mb_x=0; mb_x<=mb_w; mb_x++){
2618 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2619 block_w*mb_x - block_w/2,
2620 block_w*mb_y - block_w/2,
2621 block_w, block_w,
2622 w, h,
2623 w, ref_stride, obmc_stride,
2624 mb_x - 1, mb_y - 1,
2625 add, 0, plane_index);
2629 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2630 Plane *p= &s->plane[plane_index];
2631 const int mb_w= s->b_width << s->block_max_depth;
2632 const int mb_h= s->b_height << s->block_max_depth;
2633 int x, y, mb_x;
2634 int block_size = MB_SIZE >> s->block_max_depth;
2635 int block_w = plane_index ? block_size/2 : block_size;
2636 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2637 const int obmc_stride= plane_index ? block_size : 2*block_size;
2638 int ref_stride= s->current_picture.linesize[plane_index];
2639 uint8_t *dst8= s->current_picture.data[plane_index];
2640 int w= p->width;
2641 int h= p->height;
2643 if(s->keyframe || (s->avctx->debug&512)){
2644 if(mb_y==mb_h)
2645 return;
2647 if(add){
2648 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2649 for(x=0; x<w; x++){
2650 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2651 v >>= FRAC_BITS;
2652 if(v&(~255)) v= ~(v>>31);
2653 dst8[x + y*ref_stride]= v;
2656 }else{
2657 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2658 for(x=0; x<w; x++){
2659 buf[x + y*w]-= 128<<FRAC_BITS;
2664 return;
2667 for(mb_x=0; mb_x<=mb_w; mb_x++){
2668 add_yblock(s, 0, NULL, buf, dst8, obmc,
2669 block_w*mb_x - block_w/2,
2670 block_w*mb_y - block_w/2,
2671 block_w, block_w,
2672 w, h,
2673 w, ref_stride, obmc_stride,
2674 mb_x - 1, mb_y - 1,
2675 add, 1, plane_index);
2679 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2680 const int mb_h= s->b_height << s->block_max_depth;
2681 int mb_y;
2682 for(mb_y=0; mb_y<=mb_h; mb_y++)
2683 predict_slice(s, buf, plane_index, add, mb_y);
2686 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2687 int i, x2, y2;
2688 Plane *p= &s->plane[plane_index];
2689 const int block_size = MB_SIZE >> s->block_max_depth;
2690 const int block_w = plane_index ? block_size/2 : block_size;
2691 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2692 const int obmc_stride= plane_index ? block_size : 2*block_size;
2693 const int ref_stride= s->current_picture.linesize[plane_index];
2694 uint8_t *src= s-> input_picture.data[plane_index];
2695 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2696 const int b_stride = s->b_width << s->block_max_depth;
2697 const int w= p->width;
2698 const int h= p->height;
2699 int index= mb_x + mb_y*b_stride;
2700 BlockNode *b= &s->block[index];
2701 BlockNode backup= *b;
2702 int ab=0;
2703 int aa=0;
2705 b->type|= BLOCK_INTRA;
2706 b->color[plane_index]= 0;
2707 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2709 for(i=0; i<4; i++){
2710 int mb_x2= mb_x + (i &1) - 1;
2711 int mb_y2= mb_y + (i>>1) - 1;
2712 int x= block_w*mb_x2 + block_w/2;
2713 int y= block_w*mb_y2 + block_w/2;
2715 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2716 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2718 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2719 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2720 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2721 int obmc_v= obmc[index];
2722 int d;
2723 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2724 if(x<0) obmc_v += obmc[index + block_w];
2725 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2726 if(x+block_w>w) obmc_v += obmc[index - block_w];
2727 //FIXME precalculate this or simplify it somehow else
2729 d = -dst[index] + (1<<(FRAC_BITS-1));
2730 dst[index] = d;
2731 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2732 aa += obmc_v * obmc_v; //FIXME precalculate this
2736 *b= backup;
2738 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2741 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2742 const int b_stride = s->b_width << s->block_max_depth;
2743 const int b_height = s->b_height<< s->block_max_depth;
2744 int index= x + y*b_stride;
2745 const BlockNode *b = &s->block[index];
2746 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2747 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2748 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2749 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2750 int dmx, dmy;
2751 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2752 // int my_context= av_log2(2*FFABS(left->my - top->my));
2754 if(x<0 || x>=b_stride || y>=b_height)
2755 return 0;
2757 1 0 0
2758 01X 1-2 1
2759 001XX 3-6 2-3
2760 0001XXX 7-14 4-7
2761 00001XXXX 15-30 8-15
2763 //FIXME try accurate rate
2764 //FIXME intra and inter predictors if surrounding blocks are not the same type
2765 if(b->type & BLOCK_INTRA){
2766 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2767 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2768 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2769 }else{
2770 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2771 dmx-= b->mx;
2772 dmy-= b->my;
2773 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2774 + av_log2(2*FFABS(dmy))
2775 + av_log2(2*b->ref));
2779 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2780 Plane *p= &s->plane[plane_index];
2781 const int block_size = MB_SIZE >> s->block_max_depth;
2782 const int block_w = plane_index ? block_size/2 : block_size;
2783 const int obmc_stride= plane_index ? block_size : 2*block_size;
2784 const int ref_stride= s->current_picture.linesize[plane_index];
2785 uint8_t *dst= s->current_picture.data[plane_index];
2786 uint8_t *src= s-> input_picture.data[plane_index];
2787 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2788 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2789 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2790 const int b_stride = s->b_width << s->block_max_depth;
2791 const int b_height = s->b_height<< s->block_max_depth;
2792 const int w= p->width;
2793 const int h= p->height;
2794 int distortion;
2795 int rate= 0;
2796 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2797 int sx= block_w*mb_x - block_w/2;
2798 int sy= block_w*mb_y - block_w/2;
2799 int x0= FFMAX(0,-sx);
2800 int y0= FFMAX(0,-sy);
2801 int x1= FFMIN(block_w*2, w-sx);
2802 int y1= FFMIN(block_w*2, h-sy);
2803 int i,x,y;
2805 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2807 for(y=y0; y<y1; y++){
2808 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2809 const IDWTELEM *pred1 = pred + y*obmc_stride;
2810 uint8_t *cur1 = cur + y*ref_stride;
2811 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2812 for(x=x0; x<x1; x++){
2813 #if FRAC_BITS >= LOG2_OBMC_MAX
2814 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2815 #else
2816 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2817 #endif
2818 v = (v + pred1[x]) >> FRAC_BITS;
2819 if(v&(~255)) v= ~(v>>31);
2820 dst1[x] = v;
2824 /* copy the regions where obmc[] = (uint8_t)256 */
2825 if(LOG2_OBMC_MAX == 8
2826 && (mb_x == 0 || mb_x == b_stride-1)
2827 && (mb_y == 0 || mb_y == b_height-1)){
2828 if(mb_x == 0)
2829 x1 = block_w;
2830 else
2831 x0 = block_w;
2832 if(mb_y == 0)
2833 y1 = block_w;
2834 else
2835 y0 = block_w;
2836 for(y=y0; y<y1; y++)
2837 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2840 if(block_w==16){
2841 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2842 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2843 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2844 * So improving the score of one block is not strictly guaranteed
2845 * to improve the score of the whole frame, thus iterative motion
2846 * estimation does not always converge. */
2847 if(s->avctx->me_cmp == FF_CMP_W97)
2848 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2849 else if(s->avctx->me_cmp == FF_CMP_W53)
2850 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2851 else{
2852 distortion = 0;
2853 for(i=0; i<4; i++){
2854 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2855 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2858 }else{
2859 assert(block_w==8);
2860 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2863 if(plane_index==0){
2864 for(i=0; i<4; i++){
2865 /* ..RRr
2866 * .RXx.
2867 * rxx..
2869 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2871 if(mb_x == b_stride-2)
2872 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2874 return distortion + rate*penalty_factor;
2877 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2878 int i, y2;
2879 Plane *p= &s->plane[plane_index];
2880 const int block_size = MB_SIZE >> s->block_max_depth;
2881 const int block_w = plane_index ? block_size/2 : block_size;
2882 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2883 const int obmc_stride= plane_index ? block_size : 2*block_size;
2884 const int ref_stride= s->current_picture.linesize[plane_index];
2885 uint8_t *dst= s->current_picture.data[plane_index];
2886 uint8_t *src= s-> input_picture.data[plane_index];
2887 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2888 // const has only been removed from zero_dst to suppress a warning
2889 static IDWTELEM zero_dst[4096]; //FIXME
2890 const int b_stride = s->b_width << s->block_max_depth;
2891 const int w= p->width;
2892 const int h= p->height;
2893 int distortion= 0;
2894 int rate= 0;
2895 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2897 for(i=0; i<9; i++){
2898 int mb_x2= mb_x + (i%3) - 1;
2899 int mb_y2= mb_y + (i/3) - 1;
2900 int x= block_w*mb_x2 + block_w/2;
2901 int y= block_w*mb_y2 + block_w/2;
2903 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2904 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2906 //FIXME find a cleaner/simpler way to skip the outside stuff
2907 for(y2= y; y2<0; y2++)
2908 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2909 for(y2= h; y2<y+block_w; y2++)
2910 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2911 if(x<0){
2912 for(y2= y; y2<y+block_w; y2++)
2913 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2915 if(x+block_w > w){
2916 for(y2= y; y2<y+block_w; y2++)
2917 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2920 assert(block_w== 8 || block_w==16);
2921 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2924 if(plane_index==0){
2925 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2926 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2928 /* ..RRRr
2929 * .RXXx.
2930 * .RXXx.
2931 * rxxx.
2933 if(merged)
2934 rate = get_block_bits(s, mb_x, mb_y, 2);
2935 for(i=merged?4:0; i<9; i++){
2936 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2937 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2940 return distortion + rate*penalty_factor;
2943 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2944 const int b_stride= s->b_width << s->block_max_depth;
2945 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2946 BlockNode backup= *block;
2947 int rd, index, value;
2949 assert(mb_x>=0 && mb_y>=0);
2950 assert(mb_x<b_stride);
2952 if(intra){
2953 block->color[0] = p[0];
2954 block->color[1] = p[1];
2955 block->color[2] = p[2];
2956 block->type |= BLOCK_INTRA;
2957 }else{
2958 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2959 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2960 if(s->me_cache[index] == value)
2961 return 0;
2962 s->me_cache[index]= value;
2964 block->mx= p[0];
2965 block->my= p[1];
2966 block->type &= ~BLOCK_INTRA;
2969 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2971 //FIXME chroma
2972 if(rd < *best_rd){
2973 *best_rd= rd;
2974 return 1;
2975 }else{
2976 *block= backup;
2977 return 0;
2981 /* special case for int[2] args we discard afterwards,
2982 * fixes compilation problem with gcc 2.95 */
2983 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2984 int p[2] = {p0, p1};
2985 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2988 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2989 const int b_stride= s->b_width << s->block_max_depth;
2990 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2991 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2992 int rd, index, value;
2994 assert(mb_x>=0 && mb_y>=0);
2995 assert(mb_x<b_stride);
2996 assert(((mb_x|mb_y)&1) == 0);
2998 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2999 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3000 if(s->me_cache[index] == value)
3001 return 0;
3002 s->me_cache[index]= value;
3004 block->mx= p0;
3005 block->my= p1;
3006 block->ref= ref;
3007 block->type &= ~BLOCK_INTRA;
3008 block[1]= block[b_stride]= block[b_stride+1]= *block;
3010 rd= get_4block_rd(s, mb_x, mb_y, 0);
3012 //FIXME chroma
3013 if(rd < *best_rd){
3014 *best_rd= rd;
3015 return 1;
3016 }else{
3017 block[0]= backup[0];
3018 block[1]= backup[1];
3019 block[b_stride]= backup[2];
3020 block[b_stride+1]= backup[3];
3021 return 0;
3025 static void iterative_me(SnowContext *s){
3026 int pass, mb_x, mb_y;
3027 const int b_width = s->b_width << s->block_max_depth;
3028 const int b_height= s->b_height << s->block_max_depth;
3029 const int b_stride= b_width;
3030 int color[3];
3033 RangeCoder r = s->c;
3034 uint8_t state[sizeof(s->block_state)];
3035 memcpy(state, s->block_state, sizeof(s->block_state));
3036 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3037 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3038 encode_q_branch(s, 0, mb_x, mb_y);
3039 s->c = r;
3040 memcpy(s->block_state, state, sizeof(s->block_state));
3043 for(pass=0; pass<25; pass++){
3044 int change= 0;
3046 for(mb_y= 0; mb_y<b_height; mb_y++){
3047 for(mb_x= 0; mb_x<b_width; mb_x++){
3048 int dia_change, i, j, ref;
3049 int best_rd= INT_MAX, ref_rd;
3050 BlockNode backup, ref_b;
3051 const int index= mb_x + mb_y * b_stride;
3052 BlockNode *block= &s->block[index];
3053 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3054 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3055 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3056 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3057 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3058 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3059 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3060 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3061 const int b_w= (MB_SIZE >> s->block_max_depth);
3062 uint8_t obmc_edged[b_w*2][b_w*2];
3064 if(pass && (block->type & BLOCK_OPT))
3065 continue;
3066 block->type |= BLOCK_OPT;
3068 backup= *block;
3070 if(!s->me_cache_generation)
3071 memset(s->me_cache, 0, sizeof(s->me_cache));
3072 s->me_cache_generation += 1<<22;
3074 //FIXME precalculate
3076 int x, y;
3077 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3078 if(mb_x==0)
3079 for(y=0; y<b_w*2; y++)
3080 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3081 if(mb_x==b_stride-1)
3082 for(y=0; y<b_w*2; y++)
3083 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3084 if(mb_y==0){
3085 for(x=0; x<b_w*2; x++)
3086 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3087 for(y=1; y<b_w; y++)
3088 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3090 if(mb_y==b_height-1){
3091 for(x=0; x<b_w*2; x++)
3092 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3093 for(y=b_w; y<b_w*2-1; y++)
3094 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3098 //skip stuff outside the picture
3099 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3100 uint8_t *src= s-> input_picture.data[0];
3101 uint8_t *dst= s->current_picture.data[0];
3102 const int stride= s->current_picture.linesize[0];
3103 const int block_w= MB_SIZE >> s->block_max_depth;
3104 const int sx= block_w*mb_x - block_w/2;
3105 const int sy= block_w*mb_y - block_w/2;
3106 const int w= s->plane[0].width;
3107 const int h= s->plane[0].height;
3108 int y;
3110 for(y=sy; y<0; y++)
3111 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3112 for(y=h; y<sy+block_w*2; y++)
3113 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3114 if(sx<0){
3115 for(y=sy; y<sy+block_w*2; y++)
3116 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3118 if(sx+block_w*2 > w){
3119 for(y=sy; y<sy+block_w*2; y++)
3120 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3124 // intra(black) = neighbors' contribution to the current block
3125 for(i=0; i<3; i++)
3126 color[i]= get_dc(s, mb_x, mb_y, i);
3128 // get previous score (cannot be cached due to OBMC)
3129 if(pass > 0 && (block->type&BLOCK_INTRA)){
3130 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3131 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3132 }else
3133 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3135 ref_b= *block;
3136 ref_rd= best_rd;
3137 for(ref=0; ref < s->ref_frames; ref++){
3138 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3139 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3140 continue;
3141 block->ref= ref;
3142 best_rd= INT_MAX;
3144 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3145 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3146 if(tb)
3147 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3148 if(lb)
3149 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3150 if(rb)
3151 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3152 if(bb)
3153 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3155 /* fullpel ME */
3156 //FIXME avoid subpel interpolation / round to nearest integer
3158 dia_change=0;
3159 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3160 for(j=0; j<i; j++){
3161 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3162 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3163 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3164 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3167 }while(dia_change);
3168 /* subpel ME */
3170 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3171 dia_change=0;
3172 for(i=0; i<8; i++)
3173 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3174 }while(dia_change);
3175 //FIXME or try the standard 2 pass qpel or similar
3177 mvr[0][0]= block->mx;
3178 mvr[0][1]= block->my;
3179 if(ref_rd > best_rd){
3180 ref_rd= best_rd;
3181 ref_b= *block;
3184 best_rd= ref_rd;
3185 *block= ref_b;
3186 #if 1
3187 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3188 //FIXME RD style color selection
3189 #endif
3190 if(!same_block(block, &backup)){
3191 if(tb ) tb ->type &= ~BLOCK_OPT;
3192 if(lb ) lb ->type &= ~BLOCK_OPT;
3193 if(rb ) rb ->type &= ~BLOCK_OPT;
3194 if(bb ) bb ->type &= ~BLOCK_OPT;
3195 if(tlb) tlb->type &= ~BLOCK_OPT;
3196 if(trb) trb->type &= ~BLOCK_OPT;
3197 if(blb) blb->type &= ~BLOCK_OPT;
3198 if(brb) brb->type &= ~BLOCK_OPT;
3199 change ++;
3203 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3204 if(!change)
3205 break;
3208 if(s->block_max_depth == 1){
3209 int change= 0;
3210 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3211 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3212 int i;
3213 int best_rd, init_rd;
3214 const int index= mb_x + mb_y * b_stride;
3215 BlockNode *b[4];
3217 b[0]= &s->block[index];
3218 b[1]= b[0]+1;
3219 b[2]= b[0]+b_stride;
3220 b[3]= b[2]+1;
3221 if(same_block(b[0], b[1]) &&
3222 same_block(b[0], b[2]) &&
3223 same_block(b[0], b[3]))
3224 continue;
3226 if(!s->me_cache_generation)
3227 memset(s->me_cache, 0, sizeof(s->me_cache));
3228 s->me_cache_generation += 1<<22;
3230 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3232 //FIXME more multiref search?
3233 check_4block_inter(s, mb_x, mb_y,
3234 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3235 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3237 for(i=0; i<4; i++)
3238 if(!(b[i]->type&BLOCK_INTRA))
3239 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3241 if(init_rd != best_rd)
3242 change++;
3245 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3249 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3250 const int w= b->width;
3251 const int h= b->height;
3252 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3253 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3254 int x,y, thres1, thres2;
3256 if(s->qlog == LOSSLESS_QLOG){
3257 for(y=0; y<h; y++)
3258 for(x=0; x<w; x++)
3259 dst[x + y*stride]= src[x + y*stride];
3260 return;
3263 bias= bias ? 0 : (3*qmul)>>3;
3264 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3265 thres2= 2*thres1;
3267 if(!bias){
3268 for(y=0; y<h; y++){
3269 for(x=0; x<w; x++){
3270 int i= src[x + y*stride];
3272 if((unsigned)(i+thres1) > thres2){
3273 if(i>=0){
3274 i<<= QEXPSHIFT;
3275 i/= qmul; //FIXME optimize
3276 dst[x + y*stride]= i;
3277 }else{
3278 i= -i;
3279 i<<= QEXPSHIFT;
3280 i/= qmul; //FIXME optimize
3281 dst[x + y*stride]= -i;
3283 }else
3284 dst[x + y*stride]= 0;
3287 }else{
3288 for(y=0; y<h; y++){
3289 for(x=0; x<w; x++){
3290 int i= src[x + y*stride];
3292 if((unsigned)(i+thres1) > thres2){
3293 if(i>=0){
3294 i<<= QEXPSHIFT;
3295 i= (i + bias) / qmul; //FIXME optimize
3296 dst[x + y*stride]= i;
3297 }else{
3298 i= -i;
3299 i<<= QEXPSHIFT;
3300 i= (i + bias) / qmul; //FIXME optimize
3301 dst[x + y*stride]= -i;
3303 }else
3304 dst[x + y*stride]= 0;
3310 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3311 const int w= b->width;
3312 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3313 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3314 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3315 int x,y;
3317 if(s->qlog == LOSSLESS_QLOG) return;
3319 for(y=start_y; y<end_y; y++){
3320 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3321 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3322 for(x=0; x<w; x++){
3323 int i= line[x];
3324 if(i<0){
3325 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3326 }else if(i>0){
3327 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3333 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3334 const int w= b->width;
3335 const int h= b->height;
3336 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3337 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3338 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3339 int x,y;
3341 if(s->qlog == LOSSLESS_QLOG) return;
3343 for(y=0; y<h; y++){
3344 for(x=0; x<w; x++){
3345 int i= src[x + y*stride];
3346 if(i<0){
3347 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3348 }else if(i>0){
3349 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3355 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3356 const int w= b->width;
3357 const int h= b->height;
3358 int x,y;
3360 for(y=h-1; y>=0; y--){
3361 for(x=w-1; x>=0; x--){
3362 int i= x + y*stride;
3364 if(x){
3365 if(use_median){
3366 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3367 else src[i] -= src[i - 1];
3368 }else{
3369 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3370 else src[i] -= src[i - 1];
3372 }else{
3373 if(y) src[i] -= src[i - stride];
3379 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3380 const int w= b->width;
3381 int x,y;
3383 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3384 IDWTELEM * prev;
3386 if (start_y != 0)
3387 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3389 for(y=start_y; y<end_y; y++){
3390 prev = line;
3391 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3392 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3393 for(x=0; x<w; x++){
3394 if(x){
3395 if(use_median){
3396 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3397 else line[x] += line[x - 1];
3398 }else{
3399 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3400 else line[x] += line[x - 1];
3402 }else{
3403 if(y) line[x] += prev[x];
3409 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3410 const int w= b->width;
3411 const int h= b->height;
3412 int x,y;
3414 for(y=0; y<h; y++){
3415 for(x=0; x<w; x++){
3416 int i= x + y*stride;
3418 if(x){
3419 if(use_median){
3420 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3421 else src[i] += src[i - 1];
3422 }else{
3423 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3424 else src[i] += src[i - 1];
3426 }else{
3427 if(y) src[i] += src[i - stride];
3433 static void encode_qlogs(SnowContext *s){
3434 int plane_index, level, orientation;
3436 for(plane_index=0; plane_index<2; plane_index++){
3437 for(level=0; level<s->spatial_decomposition_count; level++){
3438 for(orientation=level ? 1:0; orientation<4; orientation++){
3439 if(orientation==2) continue;
3440 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3446 static void encode_header(SnowContext *s){
3447 int plane_index, i;
3448 uint8_t kstate[32];
3450 memset(kstate, MID_STATE, sizeof(kstate));
3452 put_rac(&s->c, kstate, s->keyframe);
3453 if(s->keyframe || s->always_reset){
3454 reset_contexts(s);
3455 s->last_spatial_decomposition_type=
3456 s->last_qlog=
3457 s->last_qbias=
3458 s->last_mv_scale=
3459 s->last_block_max_depth= 0;
3460 for(plane_index=0; plane_index<2; plane_index++){
3461 Plane *p= &s->plane[plane_index];
3462 p->last_htaps=0;
3463 p->last_diag_mc=0;
3464 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3467 if(s->keyframe){
3468 put_symbol(&s->c, s->header_state, s->version, 0);
3469 put_rac(&s->c, s->header_state, s->always_reset);
3470 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3471 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3472 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3473 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3474 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3475 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3476 put_rac(&s->c, s->header_state, s->spatial_scalability);
3477 // put_rac(&s->c, s->header_state, s->rate_scalability);
3478 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3480 encode_qlogs(s);
3483 if(!s->keyframe){
3484 int update_mc=0;
3485 for(plane_index=0; plane_index<2; plane_index++){
3486 Plane *p= &s->plane[plane_index];
3487 update_mc |= p->last_htaps != p->htaps;
3488 update_mc |= p->last_diag_mc != p->diag_mc;
3489 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3491 put_rac(&s->c, s->header_state, update_mc);
3492 if(update_mc){
3493 for(plane_index=0; plane_index<2; plane_index++){
3494 Plane *p= &s->plane[plane_index];
3495 put_rac(&s->c, s->header_state, p->diag_mc);
3496 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3497 for(i= p->htaps/2; i; i--)
3498 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3501 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3502 put_rac(&s->c, s->header_state, 1);
3503 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3504 encode_qlogs(s);
3505 }else
3506 put_rac(&s->c, s->header_state, 0);
3509 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3510 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3511 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3512 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3513 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3517 static void update_last_header_values(SnowContext *s){
3518 int plane_index;
3520 if(!s->keyframe){
3521 for(plane_index=0; plane_index<2; plane_index++){
3522 Plane *p= &s->plane[plane_index];
3523 p->last_diag_mc= p->diag_mc;
3524 p->last_htaps = p->htaps;
3525 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3529 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3530 s->last_qlog = s->qlog;
3531 s->last_qbias = s->qbias;
3532 s->last_mv_scale = s->mv_scale;
3533 s->last_block_max_depth = s->block_max_depth;
3534 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3537 static void decode_qlogs(SnowContext *s){
3538 int plane_index, level, orientation;
3540 for(plane_index=0; plane_index<3; plane_index++){
3541 for(level=0; level<s->spatial_decomposition_count; level++){
3542 for(orientation=level ? 1:0; orientation<4; orientation++){
3543 int q;
3544 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3545 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3546 else q= get_symbol(&s->c, s->header_state, 1);
3547 s->plane[plane_index].band[level][orientation].qlog= q;
3553 static int decode_header(SnowContext *s){
3554 int plane_index;
3555 uint8_t kstate[32];
3557 memset(kstate, MID_STATE, sizeof(kstate));
3559 s->keyframe= get_rac(&s->c, kstate);
3560 if(s->keyframe || s->always_reset){
3561 reset_contexts(s);
3562 s->spatial_decomposition_type=
3563 s->qlog=
3564 s->qbias=
3565 s->mv_scale=
3566 s->block_max_depth= 0;
3568 if(s->keyframe){
3569 s->version= get_symbol(&s->c, s->header_state, 0);
3570 if(s->version>0){
3571 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3572 return -1;
3574 s->always_reset= get_rac(&s->c, s->header_state);
3575 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3576 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3577 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3578 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3579 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3580 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3581 s->spatial_scalability= get_rac(&s->c, s->header_state);
3582 // s->rate_scalability= get_rac(&s->c, s->header_state);
3583 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3585 decode_qlogs(s);
3588 if(!s->keyframe){
3589 if(get_rac(&s->c, s->header_state)){
3590 for(plane_index=0; plane_index<2; plane_index++){
3591 int htaps, i, sum=0;
3592 Plane *p= &s->plane[plane_index];
3593 p->diag_mc= get_rac(&s->c, s->header_state);
3594 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3595 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3596 return -1;
3597 p->htaps= htaps;
3598 for(i= htaps/2; i; i--){
3599 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3600 sum += p->hcoeff[i];
3602 p->hcoeff[0]= 32-sum;
3604 s->plane[2].diag_mc= s->plane[1].diag_mc;
3605 s->plane[2].htaps = s->plane[1].htaps;
3606 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3608 if(get_rac(&s->c, s->header_state)){
3609 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3610 decode_qlogs(s);
3614 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3615 if(s->spatial_decomposition_type > 1){
3616 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3617 return -1;
3620 s->qlog += get_symbol(&s->c, s->header_state, 1);
3621 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3622 s->qbias += get_symbol(&s->c, s->header_state, 1);
3623 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3624 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3625 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3626 s->block_max_depth= 0;
3627 return -1;
3630 return 0;
3633 static void init_qexp(void){
3634 int i;
3635 double v=128;
3637 for(i=0; i<QROOT; i++){
3638 qexp[i]= lrintf(v);
3639 v *= pow(2, 1.0 / QROOT);
3643 static av_cold int common_init(AVCodecContext *avctx){
3644 SnowContext *s = avctx->priv_data;
3645 int width, height;
3646 int i, j;
3648 s->avctx= avctx;
3650 dsputil_init(&s->dsp, avctx);
3652 #define mcf(dx,dy)\
3653 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3654 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3655 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3656 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3657 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3658 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3660 mcf( 0, 0)
3661 mcf( 4, 0)
3662 mcf( 8, 0)
3663 mcf(12, 0)
3664 mcf( 0, 4)
3665 mcf( 4, 4)
3666 mcf( 8, 4)
3667 mcf(12, 4)
3668 mcf( 0, 8)
3669 mcf( 4, 8)
3670 mcf( 8, 8)
3671 mcf(12, 8)
3672 mcf( 0,12)
3673 mcf( 4,12)
3674 mcf( 8,12)
3675 mcf(12,12)
3677 #define mcfh(dx,dy)\
3678 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3679 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3680 mc_block_hpel ## dx ## dy ## 16;\
3681 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3682 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3683 mc_block_hpel ## dx ## dy ## 8;
3685 mcfh(0, 0)
3686 mcfh(8, 0)
3687 mcfh(0, 8)
3688 mcfh(8, 8)
3690 if(!qexp[0])
3691 init_qexp();
3693 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3695 width= s->avctx->width;
3696 height= s->avctx->height;
3698 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3699 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3701 for(i=0; i<MAX_REF_FRAMES; i++)
3702 for(j=0; j<MAX_REF_FRAMES; j++)
3703 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3705 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3707 return 0;
3710 static int common_init_after_header(AVCodecContext *avctx){
3711 SnowContext *s = avctx->priv_data;
3712 int plane_index, level, orientation;
3714 for(plane_index=0; plane_index<3; plane_index++){
3715 int w= s->avctx->width;
3716 int h= s->avctx->height;
3718 if(plane_index){
3719 w>>= s->chroma_h_shift;
3720 h>>= s->chroma_v_shift;
3722 s->plane[plane_index].width = w;
3723 s->plane[plane_index].height= h;
3725 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3726 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3727 SubBand *b= &s->plane[plane_index].band[level][orientation];
3729 b->buf= s->spatial_dwt_buffer;
3730 b->level= level;
3731 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3732 b->width = (w + !(orientation&1))>>1;
3733 b->height= (h + !(orientation>1))>>1;
3735 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3736 b->buf_x_offset = 0;
3737 b->buf_y_offset = 0;
3739 if(orientation&1){
3740 b->buf += (w+1)>>1;
3741 b->buf_x_offset = (w+1)>>1;
3743 if(orientation>1){
3744 b->buf += b->stride>>1;
3745 b->buf_y_offset = b->stride_line >> 1;
3747 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3749 if(level)
3750 b->parent= &s->plane[plane_index].band[level-1][orientation];
3751 //FIXME avoid this realloc
3752 av_freep(&b->x_coeff);
3753 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3755 w= (w+1)>>1;
3756 h= (h+1)>>1;
3760 return 0;
3763 static int qscale2qlog(int qscale){
3764 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3765 + 61*QROOT/8; //<64 >60
3768 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3770 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3771 * FIXME we know exact mv bits at this point,
3772 * but ratecontrol isn't set up to include them. */
3773 uint32_t coef_sum= 0;
3774 int level, orientation, delta_qlog;
3776 for(level=0; level<s->spatial_decomposition_count; level++){
3777 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3778 SubBand *b= &s->plane[0].band[level][orientation];
3779 IDWTELEM *buf= b->ibuf;
3780 const int w= b->width;
3781 const int h= b->height;
3782 const int stride= b->stride;
3783 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3784 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3785 const int qdiv= (1<<16)/qmul;
3786 int x, y;
3787 //FIXME this is ugly
3788 for(y=0; y<h; y++)
3789 for(x=0; x<w; x++)
3790 buf[x+y*stride]= b->buf[x+y*stride];
3791 if(orientation==0)
3792 decorrelate(s, b, buf, stride, 1, 0);
3793 for(y=0; y<h; y++)
3794 for(x=0; x<w; x++)
3795 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3799 /* ugly, ratecontrol just takes a sqrt again */
3800 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3801 assert(coef_sum < INT_MAX);
3803 if(pict->pict_type == FF_I_TYPE){
3804 s->m.current_picture.mb_var_sum= coef_sum;
3805 s->m.current_picture.mc_mb_var_sum= 0;
3806 }else{
3807 s->m.current_picture.mc_mb_var_sum= coef_sum;
3808 s->m.current_picture.mb_var_sum= 0;
3811 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3812 if (pict->quality < 0)
3813 return INT_MIN;
3814 s->lambda= pict->quality * 3/2;
3815 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3816 s->qlog+= delta_qlog;
3817 return delta_qlog;
3820 static void calculate_visual_weight(SnowContext *s, Plane *p){
3821 int width = p->width;
3822 int height= p->height;
3823 int level, orientation, x, y;
3825 for(level=0; level<s->spatial_decomposition_count; level++){
3826 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3827 SubBand *b= &p->band[level][orientation];
3828 IDWTELEM *ibuf= b->ibuf;
3829 int64_t error=0;
3831 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3832 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3833 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3834 for(y=0; y<height; y++){
3835 for(x=0; x<width; x++){
3836 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3837 error += d*d;
3841 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3846 #define QUANTIZE2 0
3848 #if QUANTIZE2==1
3849 #define Q2_STEP 8
3851 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3852 SubBand *b= &p->band[level][orientation];
3853 int x, y;
3854 int xo=0;
3855 int yo=0;
3856 int step= 1 << (s->spatial_decomposition_count - level);
3858 if(orientation&1)
3859 xo= step>>1;
3860 if(orientation&2)
3861 yo= step>>1;
3863 //FIXME bias for nonzero ?
3864 //FIXME optimize
3865 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3866 for(y=0; y<p->height; y++){
3867 for(x=0; x<p->width; x++){
3868 int sx= (x-xo + step/2) / step / Q2_STEP;
3869 int sy= (y-yo + step/2) / step / Q2_STEP;
3870 int v= r0[x + y*p->width] - r1[x + y*p->width];
3871 assert(sx>=0 && sy>=0 && sx < score_stride);
3872 v= ((v+8)>>4)<<4;
3873 score[sx + sy*score_stride] += v*v;
3874 assert(score[sx + sy*score_stride] >= 0);
3879 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3880 int level, orientation;
3882 for(level=0; level<s->spatial_decomposition_count; level++){
3883 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3884 SubBand *b= &p->band[level][orientation];
3885 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3887 dequantize(s, b, dst, b->stride);
3892 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3893 int level, orientation, ys, xs, x, y, pass;
3894 IDWTELEM best_dequant[height * stride];
3895 IDWTELEM idwt2_buffer[height * stride];
3896 const int score_stride= (width + 10)/Q2_STEP;
3897 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3898 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3899 int threshold= (s->m.lambda * s->m.lambda) >> 6;
3901 //FIXME pass the copy cleanly ?
3903 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3904 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3906 for(level=0; level<s->spatial_decomposition_count; level++){
3907 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3908 SubBand *b= &p->band[level][orientation];
3909 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3910 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
3911 assert(src == b->buf); // code does not depend on this but it is true currently
3913 quantize(s, b, dst, src, b->stride, s->qbias);
3916 for(pass=0; pass<1; pass++){
3917 if(s->qbias == 0) //keyframe
3918 continue;
3919 for(level=0; level<s->spatial_decomposition_count; level++){
3920 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3921 SubBand *b= &p->band[level][orientation];
3922 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3923 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3925 for(ys= 0; ys<Q2_STEP; ys++){
3926 for(xs= 0; xs<Q2_STEP; xs++){
3927 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3928 dequantize_all(s, p, idwt2_buffer, width, height);
3929 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3930 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3931 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3932 for(y=ys; y<b->height; y+= Q2_STEP){
3933 for(x=xs; x<b->width; x+= Q2_STEP){
3934 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3935 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3936 //FIXME try more than just --
3939 dequantize_all(s, p, idwt2_buffer, width, height);
3940 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3941 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3942 for(y=ys; y<b->height; y+= Q2_STEP){
3943 for(x=xs; x<b->width; x+= Q2_STEP){
3944 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3945 if(score[score_idx] <= best_score[score_idx] + threshold){
3946 best_score[score_idx]= score[score_idx];
3947 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3948 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3949 //FIXME copy instead
3958 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3961 #endif /* QUANTIZE2==1 */
3963 static av_cold int encode_init(AVCodecContext *avctx)
3965 SnowContext *s = avctx->priv_data;
3966 int plane_index;
3968 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3969 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3970 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3971 return -1;
3974 if(avctx->prediction_method == DWT_97
3975 && (avctx->flags & CODEC_FLAG_QSCALE)
3976 && avctx->global_quality == 0){
3977 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3978 return -1;
3981 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3983 s->chroma_h_shift= 1; //FIXME XXX
3984 s->chroma_v_shift= 1;
3986 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3987 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3989 for(plane_index=0; plane_index<3; plane_index++){
3990 s->plane[plane_index].diag_mc= 1;
3991 s->plane[plane_index].htaps= 6;
3992 s->plane[plane_index].hcoeff[0]= 40;
3993 s->plane[plane_index].hcoeff[1]= -10;
3994 s->plane[plane_index].hcoeff[2]= 2;
3995 s->plane[plane_index].fast_mc= 1;
3998 common_init(avctx);
3999 alloc_blocks(s);
4001 s->version=0;
4003 s->m.avctx = avctx;
4004 s->m.flags = avctx->flags;
4005 s->m.bit_rate= avctx->bit_rate;
4007 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4008 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4009 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4010 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4011 h263_encode_init(&s->m); //mv_penalty
4013 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4015 if(avctx->flags&CODEC_FLAG_PASS1){
4016 if(!avctx->stats_out)
4017 avctx->stats_out = av_mallocz(256);
4019 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4020 if(ff_rate_control_init(&s->m) < 0)
4021 return -1;
4023 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4025 avctx->coded_frame= &s->current_picture;
4026 switch(avctx->pix_fmt){
4027 // case PIX_FMT_YUV444P:
4028 // case PIX_FMT_YUV422P:
4029 case PIX_FMT_YUV420P:
4030 case PIX_FMT_GRAY8:
4031 // case PIX_FMT_YUV411P:
4032 // case PIX_FMT_YUV410P:
4033 s->colorspace_type= 0;
4034 break;
4035 /* case PIX_FMT_RGB32:
4036 s->colorspace= 1;
4037 break;*/
4038 default:
4039 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4040 return -1;
4042 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4043 s->chroma_h_shift= 1;
4044 s->chroma_v_shift= 1;
4046 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4047 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4049 s->avctx->get_buffer(s->avctx, &s->input_picture);
4051 if(s->avctx->me_method == ME_ITER){
4052 int i;
4053 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4054 for(i=0; i<s->max_ref_frames; i++){
4055 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4056 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4060 return 0;
4063 #define USE_HALFPEL_PLANE 0
4065 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4066 int p,x,y;
4068 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4070 for(p=0; p<3; p++){
4071 int is_chroma= !!p;
4072 int w= s->avctx->width >>is_chroma;
4073 int h= s->avctx->height >>is_chroma;
4074 int ls= frame->linesize[p];
4075 uint8_t *src= frame->data[p];
4077 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4078 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4079 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4081 halfpel[0][p]= src;
4082 for(y=0; y<h; y++){
4083 for(x=0; x<w; x++){
4084 int i= y*ls + x;
4086 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4089 for(y=0; y<h; y++){
4090 for(x=0; x<w; x++){
4091 int i= y*ls + x;
4093 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4096 src= halfpel[1][p];
4097 for(y=0; y<h; y++){
4098 for(x=0; x<w; x++){
4099 int i= y*ls + x;
4101 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4105 //FIXME border!
4109 static int frame_start(SnowContext *s){
4110 AVFrame tmp;
4111 int w= s->avctx->width; //FIXME round up to x16 ?
4112 int h= s->avctx->height;
4114 if(s->current_picture.data[0]){
4115 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4116 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4117 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4120 tmp= s->last_picture[s->max_ref_frames-1];
4121 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4122 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4123 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
4124 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4125 s->last_picture[0]= s->current_picture;
4126 s->current_picture= tmp;
4128 if(s->keyframe){
4129 s->ref_frames= 0;
4130 }else{
4131 int i;
4132 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4133 if(i && s->last_picture[i-1].key_frame)
4134 break;
4135 s->ref_frames= i;
4138 s->current_picture.reference= 1;
4139 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4140 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4141 return -1;
4144 s->current_picture.key_frame= s->keyframe;
4146 return 0;
4149 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4150 SnowContext *s = avctx->priv_data;
4151 RangeCoder * const c= &s->c;
4152 AVFrame *pict = data;
4153 const int width= s->avctx->width;
4154 const int height= s->avctx->height;
4155 int level, orientation, plane_index, i, y;
4156 uint8_t rc_header_bak[sizeof(s->header_state)];
4157 uint8_t rc_block_bak[sizeof(s->block_state)];
4159 ff_init_range_encoder(c, buf, buf_size);
4160 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4162 for(i=0; i<3; i++){
4163 int shift= !!i;
4164 for(y=0; y<(height>>shift); y++)
4165 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4166 &pict->data[i][y * pict->linesize[i]],
4167 width>>shift);
4169 s->new_picture = *pict;
4171 s->m.picture_number= avctx->frame_number;
4172 if(avctx->flags&CODEC_FLAG_PASS2){
4173 s->m.pict_type =
4174 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4175 s->keyframe= pict->pict_type==FF_I_TYPE;
4176 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4177 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4178 if (pict->quality < 0)
4179 return -1;
4181 }else{
4182 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4183 s->m.pict_type=
4184 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4187 if(s->pass1_rc && avctx->frame_number == 0)
4188 pict->quality= 2*FF_QP2LAMBDA;
4189 if(pict->quality){
4190 s->qlog= qscale2qlog(pict->quality);
4191 s->lambda = pict->quality * 3/2;
4193 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4194 s->qlog= LOSSLESS_QLOG;
4195 s->lambda = 0;
4196 }//else keep previous frame's qlog until after motion estimation
4198 frame_start(s);
4200 s->m.current_picture_ptr= &s->m.current_picture;
4201 if(pict->pict_type == FF_P_TYPE){
4202 int block_width = (width +15)>>4;
4203 int block_height= (height+15)>>4;
4204 int stride= s->current_picture.linesize[0];
4206 assert(s->current_picture.data[0]);
4207 assert(s->last_picture[0].data[0]);
4209 s->m.avctx= s->avctx;
4210 s->m.current_picture.data[0]= s->current_picture.data[0];
4211 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4212 s->m. new_picture.data[0]= s-> input_picture.data[0];
4213 s->m. last_picture_ptr= &s->m. last_picture;
4214 s->m.linesize=
4215 s->m. last_picture.linesize[0]=
4216 s->m. new_picture.linesize[0]=
4217 s->m.current_picture.linesize[0]= stride;
4218 s->m.uvlinesize= s->current_picture.linesize[1];
4219 s->m.width = width;
4220 s->m.height= height;
4221 s->m.mb_width = block_width;
4222 s->m.mb_height= block_height;
4223 s->m.mb_stride= s->m.mb_width+1;
4224 s->m.b8_stride= 2*s->m.mb_width+1;
4225 s->m.f_code=1;
4226 s->m.pict_type= pict->pict_type;
4227 s->m.me_method= s->avctx->me_method;
4228 s->m.me.scene_change_score=0;
4229 s->m.flags= s->avctx->flags;
4230 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4231 s->m.out_format= FMT_H263;
4232 s->m.unrestricted_mv= 1;
4234 s->m.lambda = s->lambda;
4235 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4236 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4238 s->m.dsp= s->dsp; //move
4239 ff_init_me(&s->m);
4240 s->dsp= s->m.dsp;
4243 if(s->pass1_rc){
4244 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4245 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4248 redo_frame:
4250 if(pict->pict_type == FF_I_TYPE)
4251 s->spatial_decomposition_count= 5;
4252 else
4253 s->spatial_decomposition_count= 5;
4255 s->m.pict_type = pict->pict_type;
4256 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
4258 common_init_after_header(avctx);
4260 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4261 for(plane_index=0; plane_index<3; plane_index++){
4262 calculate_visual_weight(s, &s->plane[plane_index]);
4266 encode_header(s);
4267 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4268 encode_blocks(s, 1);
4269 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4271 for(plane_index=0; plane_index<3; plane_index++){
4272 Plane *p= &s->plane[plane_index];
4273 int w= p->width;
4274 int h= p->height;
4275 int x, y;
4276 // int bits= put_bits_count(&s->c.pb);
4278 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4279 //FIXME optimize
4280 if(pict->data[plane_index]) //FIXME gray hack
4281 for(y=0; y<h; y++){
4282 for(x=0; x<w; x++){
4283 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4286 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4288 if( plane_index==0
4289 && pict->pict_type == FF_P_TYPE
4290 && !(avctx->flags&CODEC_FLAG_PASS2)
4291 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4292 ff_init_range_encoder(c, buf, buf_size);
4293 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4294 pict->pict_type= FF_I_TYPE;
4295 s->keyframe=1;
4296 s->current_picture.key_frame=1;
4297 goto redo_frame;
4300 if(s->qlog == LOSSLESS_QLOG){
4301 for(y=0; y<h; y++){
4302 for(x=0; x<w; x++){
4303 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4306 }else{
4307 for(y=0; y<h; y++){
4308 for(x=0; x<w; x++){
4309 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4314 /* if(QUANTIZE2)
4315 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4316 else*/
4317 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4319 if(s->pass1_rc && plane_index==0){
4320 int delta_qlog = ratecontrol_1pass(s, pict);
4321 if (delta_qlog <= INT_MIN)
4322 return -1;
4323 if(delta_qlog){
4324 //reordering qlog in the bitstream would eliminate this reset
4325 ff_init_range_encoder(c, buf, buf_size);
4326 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4327 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4328 encode_header(s);
4329 encode_blocks(s, 0);
4333 for(level=0; level<s->spatial_decomposition_count; level++){
4334 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4335 SubBand *b= &p->band[level][orientation];
4337 if(!QUANTIZE2)
4338 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4339 if(orientation==0)
4340 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
4341 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4342 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4343 if(orientation==0)
4344 correlate(s, b, b->ibuf, b->stride, 1, 0);
4348 for(level=0; level<s->spatial_decomposition_count; level++){
4349 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4350 SubBand *b= &p->band[level][orientation];
4352 dequantize(s, b, b->ibuf, b->stride);
4356 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4357 if(s->qlog == LOSSLESS_QLOG){
4358 for(y=0; y<h; y++){
4359 for(x=0; x<w; x++){
4360 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4364 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4365 }else{
4366 //ME/MC only
4367 if(pict->pict_type == FF_I_TYPE){
4368 for(y=0; y<h; y++){
4369 for(x=0; x<w; x++){
4370 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4371 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4374 }else{
4375 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4376 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4379 if(s->avctx->flags&CODEC_FLAG_PSNR){
4380 int64_t error= 0;
4382 if(pict->data[plane_index]) //FIXME gray hack
4383 for(y=0; y<h; y++){
4384 for(x=0; x<w; x++){
4385 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4386 error += d*d;
4389 s->avctx->error[plane_index] += error;
4390 s->current_picture.error[plane_index] = error;
4395 update_last_header_values(s);
4397 if(s->last_picture[s->max_ref_frames-1].data[0]){
4398 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4399 for(i=0; i<9; i++)
4400 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4401 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4404 s->current_picture.coded_picture_number = avctx->frame_number;
4405 s->current_picture.pict_type = pict->pict_type;
4406 s->current_picture.quality = pict->quality;
4407 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4408 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4409 s->m.current_picture.display_picture_number =
4410 s->m.current_picture.coded_picture_number = avctx->frame_number;
4411 s->m.current_picture.quality = pict->quality;
4412 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4413 if(s->pass1_rc)
4414 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4415 return -1;
4416 if(avctx->flags&CODEC_FLAG_PASS1)
4417 ff_write_pass1_stats(&s->m);
4418 s->m.last_pict_type = s->m.pict_type;
4419 avctx->frame_bits = s->m.frame_bits;
4420 avctx->mv_bits = s->m.mv_bits;
4421 avctx->misc_bits = s->m.misc_bits;
4422 avctx->p_tex_bits = s->m.p_tex_bits;
4424 emms_c();
4426 return ff_rac_terminate(c);
4429 static av_cold void common_end(SnowContext *s){
4430 int plane_index, level, orientation, i;
4432 av_freep(&s->spatial_dwt_buffer);
4433 av_freep(&s->spatial_idwt_buffer);
4435 av_freep(&s->m.me.scratchpad);
4436 av_freep(&s->m.me.map);
4437 av_freep(&s->m.me.score_map);
4438 av_freep(&s->m.obmc_scratchpad);
4440 av_freep(&s->block);
4442 for(i=0; i<MAX_REF_FRAMES; i++){
4443 av_freep(&s->ref_mvs[i]);
4444 av_freep(&s->ref_scores[i]);
4445 if(s->last_picture[i].data[0])
4446 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4449 for(plane_index=0; plane_index<3; plane_index++){
4450 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4451 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4452 SubBand *b= &s->plane[plane_index].band[level][orientation];
4454 av_freep(&b->x_coeff);
4460 static av_cold int encode_end(AVCodecContext *avctx)
4462 SnowContext *s = avctx->priv_data;
4464 common_end(s);
4465 av_free(avctx->stats_out);
4467 return 0;
4470 static av_cold int decode_init(AVCodecContext *avctx)
4472 avctx->pix_fmt= PIX_FMT_YUV420P;
4474 common_init(avctx);
4476 return 0;
4479 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){
4480 SnowContext *s = avctx->priv_data;
4481 RangeCoder * const c= &s->c;
4482 int bytes_read;
4483 AVFrame *picture = data;
4484 int level, orientation, plane_index, i;
4486 ff_init_range_decoder(c, buf, buf_size);
4487 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4489 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4490 if(decode_header(s)<0)
4491 return -1;
4492 common_init_after_header(avctx);
4494 // realloc slice buffer for the case that spatial_decomposition_count changed
4495 slice_buffer_destroy(&s->sb);
4496 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4498 for(plane_index=0; plane_index<3; plane_index++){
4499 Plane *p= &s->plane[plane_index];
4500 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4501 && p->hcoeff[1]==-10
4502 && p->hcoeff[2]==2;
4505 if(!s->block) alloc_blocks(s);
4507 frame_start(s);
4508 //keyframe flag duplication mess FIXME
4509 if(avctx->debug&FF_DEBUG_PICT_INFO)
4510 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4512 decode_blocks(s);
4514 for(plane_index=0; plane_index<3; plane_index++){
4515 Plane *p= &s->plane[plane_index];
4516 int w= p->width;
4517 int h= p->height;
4518 int x, y;
4519 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4521 if(s->avctx->debug&2048){
4522 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4523 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4525 for(y=0; y<h; y++){
4526 for(x=0; x<w; x++){
4527 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4528 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4534 for(level=0; level<s->spatial_decomposition_count; level++){
4535 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4536 SubBand *b= &p->band[level][orientation];
4537 unpack_coeffs(s, b, b->parent, orientation);
4543 const int mb_h= s->b_height << s->block_max_depth;
4544 const int block_size = MB_SIZE >> s->block_max_depth;
4545 const int block_w = plane_index ? block_size/2 : block_size;
4546 int mb_y;
4547 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4548 int yd=0, yq=0;
4549 int y;
4550 int end_y;
4552 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4553 for(mb_y=0; mb_y<=mb_h; mb_y++){
4555 int slice_starty = block_w*mb_y;
4556 int slice_h = block_w*(mb_y+1);
4557 if (!(s->keyframe || s->avctx->debug&512)){
4558 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4559 slice_h -= (block_w >> 1);
4562 for(level=0; level<s->spatial_decomposition_count; level++){
4563 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4564 SubBand *b= &p->band[level][orientation];
4565 int start_y;
4566 int end_y;
4567 int our_mb_start = mb_y;
4568 int our_mb_end = (mb_y + 1);
4569 const int extra= 3;
4570 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4571 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4572 if (!(s->keyframe || s->avctx->debug&512)){
4573 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4574 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4576 start_y = FFMIN(b->height, start_y);
4577 end_y = FFMIN(b->height, end_y);
4579 if (start_y != end_y){
4580 if (orientation == 0){
4581 SubBand * correlate_band = &p->band[0][0];
4582 int correlate_end_y = FFMIN(b->height, end_y + 1);
4583 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4584 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4585 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4586 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4588 else
4589 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4594 for(; yd<slice_h; yd+=4){
4595 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4598 if(s->qlog == LOSSLESS_QLOG){
4599 for(; yq<slice_h && yq<h; yq++){
4600 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4601 for(x=0; x<w; x++){
4602 line[x] <<= FRAC_BITS;
4607 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4609 y = FFMIN(p->height, slice_starty);
4610 end_y = FFMIN(p->height, slice_h);
4611 while(y < end_y)
4612 slice_buffer_release(&s->sb, y++);
4615 slice_buffer_flush(&s->sb);
4620 emms_c();
4622 if(s->last_picture[s->max_ref_frames-1].data[0]){
4623 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4624 for(i=0; i<9; i++)
4625 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4626 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4629 if(!(s->avctx->debug&2048))
4630 *picture= s->current_picture;
4631 else
4632 *picture= s->mconly_picture;
4634 *data_size = sizeof(AVFrame);
4636 bytes_read= c->bytestream - c->bytestream_start;
4637 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4639 return bytes_read;
4642 static av_cold int decode_end(AVCodecContext *avctx)
4644 SnowContext *s = avctx->priv_data;
4646 slice_buffer_destroy(&s->sb);
4648 common_end(s);
4650 return 0;
4653 AVCodec snow_decoder = {
4654 "snow",
4655 CODEC_TYPE_VIDEO,
4656 CODEC_ID_SNOW,
4657 sizeof(SnowContext),
4658 decode_init,
4659 NULL,
4660 decode_end,
4661 decode_frame,
4662 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4663 NULL,
4664 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4667 #ifdef CONFIG_SNOW_ENCODER
4668 AVCodec snow_encoder = {
4669 "snow",
4670 CODEC_TYPE_VIDEO,
4671 CODEC_ID_SNOW,
4672 sizeof(SnowContext),
4673 encode_init,
4674 encode_frame,
4675 encode_end,
4676 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4678 #endif
4681 #ifdef TEST
4682 #undef malloc
4683 #undef free
4684 #undef printf
4685 #undef random
4687 int main(void){
4688 int width=256;
4689 int height=256;
4690 int buffer[2][width*height];
4691 SnowContext s;
4692 int i;
4693 s.spatial_decomposition_count=6;
4694 s.spatial_decomposition_type=1;
4696 printf("testing 5/3 DWT\n");
4697 for(i=0; i<width*height; i++)
4698 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4700 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4701 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4703 for(i=0; i<width*height; i++)
4704 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4706 printf("testing 9/7 DWT\n");
4707 s.spatial_decomposition_type=0;
4708 for(i=0; i<width*height; i++)
4709 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4711 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4712 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4714 for(i=0; i<width*height; i++)
4715 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4717 #if 0
4718 printf("testing AC coder\n");
4719 memset(s.header_state, 0, sizeof(s.header_state));
4720 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4721 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4723 for(i=-256; i<256; i++){
4724 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4726 ff_rac_terminate(&s.c);
4728 memset(s.header_state, 0, sizeof(s.header_state));
4729 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4730 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4732 for(i=-256; i<256; i++){
4733 int j;
4734 j= get_symbol(&s.c, s.header_state, 1);
4735 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4737 #endif
4739 int level, orientation, x, y;
4740 int64_t errors[8][4];
4741 int64_t g=0;
4743 memset(errors, 0, sizeof(errors));
4744 s.spatial_decomposition_count=3;
4745 s.spatial_decomposition_type=0;
4746 for(level=0; level<s.spatial_decomposition_count; level++){
4747 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4748 int w= width >> (s.spatial_decomposition_count-level);
4749 int h= height >> (s.spatial_decomposition_count-level);
4750 int stride= width << (s.spatial_decomposition_count-level);
4751 DWTELEM *buf= buffer[0];
4752 int64_t error=0;
4754 if(orientation&1) buf+=w;
4755 if(orientation>1) buf+=stride>>1;
4757 memset(buffer[0], 0, sizeof(int)*width*height);
4758 buf[w/2 + h/2*stride]= 256*256;
4759 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4760 for(y=0; y<height; y++){
4761 for(x=0; x<width; x++){
4762 int64_t d= buffer[0][x + y*width];
4763 error += d*d;
4764 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4766 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4768 error= (int)(sqrt(error)+0.5);
4769 errors[level][orientation]= error;
4770 if(g) g=ff_gcd(g, error);
4771 else g= error;
4774 printf("static int const visual_weight[][4]={\n");
4775 for(level=0; level<s.spatial_decomposition_count; level++){
4776 printf(" {");
4777 for(orientation=0; orientation<4; orientation++){
4778 printf("%8"PRId64",", errors[level][orientation]/g);
4780 printf("},\n");
4782 printf("};\n");
4784 int level=2;
4785 int w= width >> (s.spatial_decomposition_count-level);
4786 //int h= height >> (s.spatial_decomposition_count-level);
4787 int stride= width << (s.spatial_decomposition_count-level);
4788 DWTELEM *buf= buffer[0];
4789 int64_t error=0;
4791 buf+=w;
4792 buf+=stride>>1;
4794 memset(buffer[0], 0, sizeof(int)*width*height);
4795 #if 1
4796 for(y=0; y<height; y++){
4797 for(x=0; x<width; x++){
4798 int tab[4]={0,2,3,1};
4799 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4802 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4803 #else
4804 for(y=0; y<h; y++){
4805 for(x=0; x<w; x++){
4806 buf[x + y*stride ]=169;
4807 buf[x + y*stride-w]=64;
4810 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4811 #endif
4812 for(y=0; y<height; y++){
4813 for(x=0; x<width; x++){
4814 int64_t d= buffer[0][x + y*width];
4815 error += d*d;
4816 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4818 if(FFABS(height/2-y)<9) printf("\n");
4823 return 0;
4825 #endif /* TEST */