use enum value instead of numerical value for acmod
[ffmpeg-lucabe.git] / libavcodec / snow.c
blob68d256606789f6fdc9e5e4917e0f507288743c32
1 /*
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "avcodec.h"
22 #include "dsputil.h"
23 #include "snow.h"
25 #include "rangecoder.h"
27 #include "mpegvideo.h"
29 #undef NDEBUG
30 #include <assert.h>
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
177 #if 0 //64*cubic
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
181 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
182 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
183 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
184 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
185 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
186 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
187 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
188 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
189 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
190 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
191 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
192 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
193 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
196 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
197 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
198 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
201 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
202 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
203 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
204 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
205 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
206 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
207 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
208 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
209 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211 //error:0.000022
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
215 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
216 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
217 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
218 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
219 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
220 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
223 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
224 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
225 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
228 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
229 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
230 //error:0.000033
232 #elif 1 // 64*linear
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
266 //error:0.000020
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
285 //error:0.000015
287 #else //64*cos
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
292 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
293 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
294 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
295 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
296 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
297 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
298 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
299 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
300 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
301 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
302 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
303 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
306 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
307 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
308 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
311 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
312 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
313 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
314 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
315 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
316 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
317 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
318 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
321 //error:0.000022
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
325 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
326 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
327 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
328 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
329 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
330 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
333 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
334 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
335 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
338 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
339 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
340 //error:0.000022
342 #endif
344 //linear *64
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
354 //error:0.000000
357 //linear *64
358 static const uint8_t obmc4[16]={
359 16, 48, 48, 16,
360 48,144,144, 48,
361 48,144,144, 48,
362 16, 48, 48, 16,
363 //error:0.000000
366 static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372 typedef struct BlockNode{
373 int16_t mx;
374 int16_t my;
375 uint8_t ref;
376 uint8_t color[3];
377 uint8_t type;
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
380 #define BLOCK_OPT 2
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
383 }BlockNode;
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
387 .mx= 0,
388 .my= 0,
389 .ref= 0,
390 .type= 0,
391 .level= 0,
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
397 typedef struct x_and_coeff{
398 int16_t x;
399 uint16_t coeff;
400 } x_and_coeff;
402 typedef struct SubBand{
403 int level;
404 int stride;
405 int width;
406 int height;
407 int qlog; ///< log(qscale)/log[2^(1/6)]
408 DWTELEM *buf;
409 int buf_x_offset;
410 int buf_y_offset;
411 int stride_line; ///< Stride measured in lines, not pixels.
412 x_and_coeff * x_coeff;
413 struct SubBand *parent;
414 uint8_t state[/*7*2*/ 7 + 512][32];
415 }SubBand;
417 typedef struct Plane{
418 int width;
419 int height;
420 SubBand band[MAX_DECOMPOSITIONS][4];
421 }Plane;
423 typedef struct SnowContext{
424 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
426 AVCodecContext *avctx;
427 RangeCoder c;
428 DSPContext dsp;
429 AVFrame new_picture;
430 AVFrame input_picture; ///< new_picture with the internal linesizes
431 AVFrame current_picture;
432 AVFrame last_picture[MAX_REF_FRAMES];
433 AVFrame mconly_picture;
434 // uint8_t q_context[16];
435 uint8_t header_state[32];
436 uint8_t block_state[128 + 32*128];
437 int keyframe;
438 int always_reset;
439 int version;
440 int spatial_decomposition_type;
441 int last_spatial_decomposition_type;
442 int temporal_decomposition_type;
443 int spatial_decomposition_count;
444 int temporal_decomposition_count;
445 int max_ref_frames;
446 int ref_frames;
447 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
448 uint32_t *ref_scores[MAX_REF_FRAMES];
449 DWTELEM *spatial_dwt_buffer;
450 int colorspace_type;
451 int chroma_h_shift;
452 int chroma_v_shift;
453 int spatial_scalability;
454 int qlog;
455 int last_qlog;
456 int lambda;
457 int lambda2;
458 int pass1_rc;
459 int mv_scale;
460 int last_mv_scale;
461 int qbias;
462 int last_qbias;
463 #define QBIAS_SHIFT 3
464 int b_width;
465 int b_height;
466 int block_max_depth;
467 int last_block_max_depth;
468 Plane plane[MAX_PLANES];
469 BlockNode *block;
470 #define ME_CACHE_SIZE 1024
471 int me_cache[ME_CACHE_SIZE];
472 int me_cache_generation;
473 slice_buffer sb;
475 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
476 }SnowContext;
478 typedef struct {
479 DWTELEM *b0;
480 DWTELEM *b1;
481 DWTELEM *b2;
482 DWTELEM *b3;
483 int y;
484 } dwt_compose_t;
486 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
487 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
489 static void iterative_me(SnowContext *s);
491 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
493 int i;
495 buf->base_buffer = base_buffer;
496 buf->line_count = line_count;
497 buf->line_width = line_width;
498 buf->data_count = max_allocated_lines;
499 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
500 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
502 for (i = 0; i < max_allocated_lines; i++)
504 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
507 buf->data_stack_top = max_allocated_lines - 1;
510 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
512 int offset;
513 DWTELEM * buffer;
515 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
517 assert(buf->data_stack_top >= 0);
518 // assert(!buf->line[line]);
519 if (buf->line[line])
520 return buf->line[line];
522 offset = buf->line_width * line;
523 buffer = buf->data_stack[buf->data_stack_top];
524 buf->data_stack_top--;
525 buf->line[line] = buffer;
527 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
529 return buffer;
532 static void slice_buffer_release(slice_buffer * buf, int line)
534 int offset;
535 DWTELEM * buffer;
537 assert(line >= 0 && line < buf->line_count);
538 assert(buf->line[line]);
540 offset = buf->line_width * line;
541 buffer = buf->line[line];
542 buf->data_stack_top++;
543 buf->data_stack[buf->data_stack_top] = buffer;
544 buf->line[line] = NULL;
546 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
549 static void slice_buffer_flush(slice_buffer * buf)
551 int i;
552 for (i = 0; i < buf->line_count; i++)
554 if (buf->line[i])
556 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
557 slice_buffer_release(buf, i);
562 static void slice_buffer_destroy(slice_buffer * buf)
564 int i;
565 slice_buffer_flush(buf);
567 for (i = buf->data_count - 1; i >= 0; i--)
569 assert(buf->data_stack[i]);
570 av_freep(&buf->data_stack[i]);
572 assert(buf->data_stack);
573 av_freep(&buf->data_stack);
574 assert(buf->line);
575 av_freep(&buf->line);
578 #ifdef __sgi
579 // Avoid a name clash on SGI IRIX
580 #undef qexp
581 #endif
582 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
583 static uint8_t qexp[QROOT];
585 static inline int mirror(int v, int m){
586 while((unsigned)v > (unsigned)m){
587 v=-v;
588 if(v<0) v+= 2*m;
590 return v;
593 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
594 int i;
596 if(v){
597 const int a= FFABS(v);
598 const int e= av_log2(a);
599 #if 1
600 const int el= FFMIN(e, 10);
601 put_rac(c, state+0, 0);
603 for(i=0; i<el; i++){
604 put_rac(c, state+1+i, 1); //1..10
606 for(; i<e; i++){
607 put_rac(c, state+1+9, 1); //1..10
609 put_rac(c, state+1+FFMIN(i,9), 0);
611 for(i=e-1; i>=el; i--){
612 put_rac(c, state+22+9, (a>>i)&1); //22..31
614 for(; i>=0; i--){
615 put_rac(c, state+22+i, (a>>i)&1); //22..31
618 if(is_signed)
619 put_rac(c, state+11 + el, v < 0); //11..21
620 #else
622 put_rac(c, state+0, 0);
623 if(e<=9){
624 for(i=0; i<e; i++){
625 put_rac(c, state+1+i, 1); //1..10
627 put_rac(c, state+1+i, 0);
629 for(i=e-1; i>=0; i--){
630 put_rac(c, state+22+i, (a>>i)&1); //22..31
633 if(is_signed)
634 put_rac(c, state+11 + e, v < 0); //11..21
635 }else{
636 for(i=0; i<e; i++){
637 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
639 put_rac(c, state+1+FFMIN(i,9), 0);
641 for(i=e-1; i>=0; i--){
642 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
645 if(is_signed)
646 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
648 #endif
649 }else{
650 put_rac(c, state+0, 1);
654 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
655 if(get_rac(c, state+0))
656 return 0;
657 else{
658 int i, e, a;
659 e= 0;
660 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
661 e++;
664 a= 1;
665 for(i=e-1; i>=0; i--){
666 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
669 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
670 return -a;
671 else
672 return a;
676 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
677 int i;
678 int r= log2>=0 ? 1<<log2 : 1;
680 assert(v>=0);
681 assert(log2>=-4);
683 while(v >= r){
684 put_rac(c, state+4+log2, 1);
685 v -= r;
686 log2++;
687 if(log2>0) r+=r;
689 put_rac(c, state+4+log2, 0);
691 for(i=log2-1; i>=0; i--){
692 put_rac(c, state+31-i, (v>>i)&1);
696 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
697 int i;
698 int r= log2>=0 ? 1<<log2 : 1;
699 int v=0;
701 assert(log2>=-4);
703 while(get_rac(c, state+4+log2)){
704 v+= r;
705 log2++;
706 if(log2>0) r+=r;
709 for(i=log2-1; i>=0; i--){
710 v+= get_rac(c, state+31-i)<<i;
713 return v;
716 static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
717 const int mirror_left= !highpass;
718 const int mirror_right= (width&1) ^ highpass;
719 const int w= (width>>1) - 1 + (highpass & width);
720 int i;
722 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
723 if(mirror_left){
724 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
725 dst += dst_step;
726 src += src_step;
729 for(i=0; i<w; i++){
730 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
733 if(mirror_right){
734 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
738 #ifndef lift5
739 static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
740 const int mirror_left= !highpass;
741 const int mirror_right= (width&1) ^ highpass;
742 const int w= (width>>1) - 1 + (highpass & width);
743 int i;
745 if(mirror_left){
746 int r= 3*2*ref[0];
747 r += r>>4;
748 r += r>>8;
749 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
750 dst += dst_step;
751 src += src_step;
754 for(i=0; i<w; i++){
755 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
756 r += r>>4;
757 r += r>>8;
758 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
761 if(mirror_right){
762 int r= 3*2*ref[w*ref_step];
763 r += r>>4;
764 r += r>>8;
765 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
768 #endif
770 #ifndef liftS
771 static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
772 const int mirror_left= !highpass;
773 const int mirror_right= (width&1) ^ highpass;
774 const int w= (width>>1) - 1 + (highpass & width);
775 int i;
777 assert(shift == 4);
778 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
779 if(mirror_left){
780 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
781 dst += dst_step;
782 src += src_step;
785 for(i=0; i<w; i++){
786 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
789 if(mirror_right){
790 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
793 #endif
796 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
797 int x, i;
799 for(x=start; x<width; x+=2){
800 int64_t sum=0;
802 for(i=0; i<n; i++){
803 int x2= x + 2*i - n + 1;
804 if (x2< 0) x2= -x2;
805 else if(x2>=width) x2= 2*width-x2-2;
806 sum += coeffs[i]*(int64_t)dst[x2];
808 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
809 else dst[x] += (sum + (1<<shift)/2)>>shift;
813 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
814 int x, y, i;
815 for(y=start; y<height; y+=2){
816 for(x=0; x<width; x++){
817 int64_t sum=0;
819 for(i=0; i<n; i++){
820 int y2= y + 2*i - n + 1;
821 if (y2< 0) y2= -y2;
822 else if(y2>=height) y2= 2*height-y2-2;
823 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
825 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
826 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
831 #define SCALEX 1
832 #define LX0 0
833 #define LX1 1
835 #if 0 // more accurate 9/7
836 #define N1 2
837 #define SHIFT1 14
838 #define COEFFS1 (int[]){-25987,-25987}
839 #define N2 2
840 #define SHIFT2 19
841 #define COEFFS2 (int[]){-27777,-27777}
842 #define N3 2
843 #define SHIFT3 15
844 #define COEFFS3 (int[]){28931,28931}
845 #define N4 2
846 #define SHIFT4 15
847 #define COEFFS4 (int[]){14533,14533}
848 #elif 1 // 13/7 CRF
849 #define N1 4
850 #define SHIFT1 4
851 #define COEFFS1 (int[]){1,-9,-9,1}
852 #define N2 4
853 #define SHIFT2 4
854 #define COEFFS2 (int[]){-1,5,5,-1}
855 #define N3 0
856 #define SHIFT3 1
857 #define COEFFS3 NULL
858 #define N4 0
859 #define SHIFT4 1
860 #define COEFFS4 NULL
861 #elif 1 // 3/5
862 #define LX0 1
863 #define LX1 0
864 #define SCALEX 0.5
865 #define N1 2
866 #define SHIFT1 1
867 #define COEFFS1 (int[]){1,1}
868 #define N2 2
869 #define SHIFT2 2
870 #define COEFFS2 (int[]){-1,-1}
871 #define N3 0
872 #define SHIFT3 0
873 #define COEFFS3 NULL
874 #define N4 0
875 #define SHIFT4 0
876 #define COEFFS4 NULL
877 #elif 1 // 11/5
878 #define N1 0
879 #define SHIFT1 1
880 #define COEFFS1 NULL
881 #define N2 2
882 #define SHIFT2 2
883 #define COEFFS2 (int[]){-1,-1}
884 #define N3 2
885 #define SHIFT3 0
886 #define COEFFS3 (int[]){-1,-1}
887 #define N4 4
888 #define SHIFT4 7
889 #define COEFFS4 (int[]){-5,29,29,-5}
890 #define SCALEX 4
891 #elif 1 // 9/7 CDF
892 #define N1 2
893 #define SHIFT1 7
894 #define COEFFS1 (int[]){-203,-203}
895 #define N2 2
896 #define SHIFT2 12
897 #define COEFFS2 (int[]){-217,-217}
898 #define N3 2
899 #define SHIFT3 7
900 #define COEFFS3 (int[]){113,113}
901 #define N4 2
902 #define SHIFT4 9
903 #define COEFFS4 (int[]){227,227}
904 #define SCALEX 1
905 #elif 1 // 7/5 CDF
906 #define N1 0
907 #define SHIFT1 1
908 #define COEFFS1 NULL
909 #define N2 2
910 #define SHIFT2 2
911 #define COEFFS2 (int[]){-1,-1}
912 #define N3 2
913 #define SHIFT3 0
914 #define COEFFS3 (int[]){-1,-1}
915 #define N4 2
916 #define SHIFT4 4
917 #define COEFFS4 (int[]){3,3}
918 #elif 1 // 9/7 MN
919 #define N1 4
920 #define SHIFT1 4
921 #define COEFFS1 (int[]){1,-9,-9,1}
922 #define N2 2
923 #define SHIFT2 2
924 #define COEFFS2 (int[]){1,1}
925 #define N3 0
926 #define SHIFT3 1
927 #define COEFFS3 NULL
928 #define N4 0
929 #define SHIFT4 1
930 #define COEFFS4 NULL
931 #else // 13/7 CRF
932 #define N1 4
933 #define SHIFT1 4
934 #define COEFFS1 (int[]){1,-9,-9,1}
935 #define N2 4
936 #define SHIFT2 4
937 #define COEFFS2 (int[]){-1,5,5,-1}
938 #define N3 0
939 #define SHIFT3 1
940 #define COEFFS3 NULL
941 #define N4 0
942 #define SHIFT4 1
943 #define COEFFS4 NULL
944 #endif
945 static void horizontal_decomposeX(DWTELEM *b, int width){
946 DWTELEM temp[width];
947 const int width2= width>>1;
948 const int w2= (width+1)>>1;
949 int x;
951 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
952 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
953 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
954 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
956 for(x=0; x<width2; x++){
957 temp[x ]= b[2*x ];
958 temp[x+w2]= b[2*x + 1];
960 if(width&1)
961 temp[x ]= b[2*x ];
962 memcpy(b, temp, width*sizeof(int));
965 static void horizontal_composeX(DWTELEM *b, int width){
966 DWTELEM temp[width];
967 const int width2= width>>1;
968 int x;
969 const int w2= (width+1)>>1;
971 memcpy(temp, b, width*sizeof(int));
972 for(x=0; x<width2; x++){
973 b[2*x ]= temp[x ];
974 b[2*x + 1]= temp[x+w2];
976 if(width&1)
977 b[2*x ]= temp[x ];
979 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
980 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
981 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
982 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
985 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
986 int x, y;
988 for(y=0; y<height; y++){
989 for(x=0; x<width; x++){
990 buffer[y*stride + x] *= SCALEX;
994 for(y=0; y<height; y++){
995 horizontal_decomposeX(buffer + y*stride, width);
998 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
999 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1000 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
1001 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
1004 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1005 int x, y;
1007 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1008 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1009 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1010 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1012 for(y=0; y<height; y++){
1013 horizontal_composeX(buffer + y*stride, width);
1016 for(y=0; y<height; y++){
1017 for(x=0; x<width; x++){
1018 buffer[y*stride + x] /= SCALEX;
1023 static void horizontal_decompose53i(DWTELEM *b, int width){
1024 DWTELEM temp[width];
1025 const int width2= width>>1;
1026 int x;
1027 const int w2= (width+1)>>1;
1029 for(x=0; x<width2; x++){
1030 temp[x ]= b[2*x ];
1031 temp[x+w2]= b[2*x + 1];
1033 if(width&1)
1034 temp[x ]= b[2*x ];
1035 #if 0
1037 int A1,A2,A3,A4;
1038 A2= temp[1 ];
1039 A4= temp[0 ];
1040 A1= temp[0+width2];
1041 A1 -= (A2 + A4)>>1;
1042 A4 += (A1 + 1)>>1;
1043 b[0+width2] = A1;
1044 b[0 ] = A4;
1045 for(x=1; x+1<width2; x+=2){
1046 A3= temp[x+width2];
1047 A4= temp[x+1 ];
1048 A3 -= (A2 + A4)>>1;
1049 A2 += (A1 + A3 + 2)>>2;
1050 b[x+width2] = A3;
1051 b[x ] = A2;
1053 A1= temp[x+1+width2];
1054 A2= temp[x+2 ];
1055 A1 -= (A2 + A4)>>1;
1056 A4 += (A1 + A3 + 2)>>2;
1057 b[x+1+width2] = A1;
1058 b[x+1 ] = A4;
1060 A3= temp[width-1];
1061 A3 -= A2;
1062 A2 += (A1 + A3 + 2)>>2;
1063 b[width -1] = A3;
1064 b[width2-1] = A2;
1066 #else
1067 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1068 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1069 #endif
1072 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1073 int i;
1075 for(i=0; i<width; i++){
1076 b1[i] -= (b0[i] + b2[i])>>1;
1080 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1081 int i;
1083 for(i=0; i<width; i++){
1084 b1[i] += (b0[i] + b2[i] + 2)>>2;
1088 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1089 int y;
1090 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1091 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1093 for(y=-2; y<height; y+=2){
1094 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1095 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1097 {START_TIMER
1098 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1099 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1100 STOP_TIMER("horizontal_decompose53i")}
1102 {START_TIMER
1103 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1104 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1105 STOP_TIMER("vertical_decompose53i*")}
1107 b0=b2;
1108 b1=b3;
1112 static void horizontal_decompose97i(DWTELEM *b, int width){
1113 DWTELEM temp[width];
1114 const int w2= (width+1)>>1;
1116 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1117 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1118 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1119 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1123 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1124 int i;
1126 for(i=0; i<width; i++){
1127 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1131 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1132 int i;
1134 for(i=0; i<width; i++){
1135 #ifdef lift5
1136 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1137 #else
1138 int r= 3*(b0[i] + b2[i]);
1139 r+= r>>4;
1140 r+= r>>8;
1141 b1[i] += (r+W_CO)>>W_CS;
1142 #endif
1146 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1147 int i;
1149 for(i=0; i<width; i++){
1150 #ifdef liftS
1151 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1152 #else
1153 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1154 #endif
1158 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1159 int i;
1161 for(i=0; i<width; i++){
1162 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1166 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1167 int y;
1168 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1169 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1170 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1171 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1173 for(y=-4; y<height; y+=2){
1174 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1175 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1177 {START_TIMER
1178 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1179 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1180 if(width>400){
1181 STOP_TIMER("horizontal_decompose97i")
1184 {START_TIMER
1185 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1186 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1187 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1188 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1190 if(width>400){
1191 STOP_TIMER("vertical_decompose97i")
1194 b0=b2;
1195 b1=b3;
1196 b2=b4;
1197 b3=b5;
1201 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1202 int level;
1204 for(level=0; level<decomposition_count; level++){
1205 switch(type){
1206 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1207 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1208 case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1213 static void horizontal_compose53i(DWTELEM *b, int width){
1214 DWTELEM temp[width];
1215 const int width2= width>>1;
1216 const int w2= (width+1)>>1;
1217 int x;
1219 #if 0
1220 int A1,A2,A3,A4;
1221 A2= temp[1 ];
1222 A4= temp[0 ];
1223 A1= temp[0+width2];
1224 A1 -= (A2 + A4)>>1;
1225 A4 += (A1 + 1)>>1;
1226 b[0+width2] = A1;
1227 b[0 ] = A4;
1228 for(x=1; x+1<width2; x+=2){
1229 A3= temp[x+width2];
1230 A4= temp[x+1 ];
1231 A3 -= (A2 + A4)>>1;
1232 A2 += (A1 + A3 + 2)>>2;
1233 b[x+width2] = A3;
1234 b[x ] = A2;
1236 A1= temp[x+1+width2];
1237 A2= temp[x+2 ];
1238 A1 -= (A2 + A4)>>1;
1239 A4 += (A1 + A3 + 2)>>2;
1240 b[x+1+width2] = A1;
1241 b[x+1 ] = A4;
1243 A3= temp[width-1];
1244 A3 -= A2;
1245 A2 += (A1 + A3 + 2)>>2;
1246 b[width -1] = A3;
1247 b[width2-1] = A2;
1248 #else
1249 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1250 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1251 #endif
1252 for(x=0; x<width2; x++){
1253 b[2*x ]= temp[x ];
1254 b[2*x + 1]= temp[x+w2];
1256 if(width&1)
1257 b[2*x ]= temp[x ];
1260 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1261 int i;
1263 for(i=0; i<width; i++){
1264 b1[i] += (b0[i] + b2[i])>>1;
1268 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1269 int i;
1271 for(i=0; i<width; i++){
1272 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1276 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1277 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1278 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1279 cs->y = -1;
1282 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1283 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1284 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1285 cs->y = -1;
1288 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1289 int y= cs->y;
1291 DWTELEM *b0= cs->b0;
1292 DWTELEM *b1= cs->b1;
1293 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1294 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1296 {START_TIMER
1297 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1298 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1299 STOP_TIMER("vertical_compose53i*")}
1301 {START_TIMER
1302 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1303 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1304 STOP_TIMER("horizontal_compose53i")}
1306 cs->b0 = b2;
1307 cs->b1 = b3;
1308 cs->y += 2;
1311 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1312 int y= cs->y;
1313 DWTELEM *b0= cs->b0;
1314 DWTELEM *b1= cs->b1;
1315 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1316 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1318 {START_TIMER
1319 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1320 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1321 STOP_TIMER("vertical_compose53i*")}
1323 {START_TIMER
1324 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1325 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1326 STOP_TIMER("horizontal_compose53i")}
1328 cs->b0 = b2;
1329 cs->b1 = b3;
1330 cs->y += 2;
1333 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1334 dwt_compose_t cs;
1335 spatial_compose53i_init(&cs, buffer, height, stride);
1336 while(cs.y <= height)
1337 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1341 void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1342 DWTELEM temp[width];
1343 const int w2= (width+1)>>1;
1345 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1346 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1347 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1348 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1351 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1352 int i;
1354 for(i=0; i<width; i++){
1355 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1359 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1360 int i;
1362 for(i=0; i<width; i++){
1363 #ifdef lift5
1364 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1365 #else
1366 int r= 3*(b0[i] + b2[i]);
1367 r+= r>>4;
1368 r+= r>>8;
1369 b1[i] -= (r+W_CO)>>W_CS;
1370 #endif
1374 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1375 int i;
1377 for(i=0; i<width; i++){
1378 #ifdef liftS
1379 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1380 #else
1381 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1382 #endif
1386 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1387 int i;
1389 for(i=0; i<width; i++){
1390 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1394 void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1395 int i;
1397 for(i=0; i<width; i++){
1398 #ifndef lift5
1399 int r;
1400 #endif
1401 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1402 #ifdef lift5
1403 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1404 #else
1405 r= 3*(b2[i] + b4[i]);
1406 r+= r>>4;
1407 r+= r>>8;
1408 b3[i] -= (r+W_CO)>>W_CS;
1409 #endif
1410 #ifdef liftS
1411 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1412 #else
1413 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1414 #endif
1415 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1419 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1420 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1421 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1422 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1423 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1424 cs->y = -3;
1427 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1428 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1429 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1430 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1431 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1432 cs->y = -3;
1435 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1436 int y = cs->y;
1438 DWTELEM *b0= cs->b0;
1439 DWTELEM *b1= cs->b1;
1440 DWTELEM *b2= cs->b2;
1441 DWTELEM *b3= cs->b3;
1442 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1443 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1445 {START_TIMER
1446 if(y>0 && y+4<height){
1447 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1448 }else{
1449 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1450 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1451 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1452 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1454 if(width>400){
1455 STOP_TIMER("vertical_compose97i")}}
1457 {START_TIMER
1458 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1459 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1460 if(width>400 && y+0<(unsigned)height){
1461 STOP_TIMER("horizontal_compose97i")}}
1463 cs->b0=b2;
1464 cs->b1=b3;
1465 cs->b2=b4;
1466 cs->b3=b5;
1467 cs->y += 2;
1470 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1471 int y = cs->y;
1472 DWTELEM *b0= cs->b0;
1473 DWTELEM *b1= cs->b1;
1474 DWTELEM *b2= cs->b2;
1475 DWTELEM *b3= cs->b3;
1476 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1477 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1479 {START_TIMER
1480 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1481 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1482 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1483 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1484 if(width>400){
1485 STOP_TIMER("vertical_compose97i")}}
1487 {START_TIMER
1488 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1489 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1490 if(width>400 && b0 <= b2){
1491 STOP_TIMER("horizontal_compose97i")}}
1493 cs->b0=b2;
1494 cs->b1=b3;
1495 cs->b2=b4;
1496 cs->b3=b5;
1497 cs->y += 2;
1500 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1501 dwt_compose_t cs;
1502 spatial_compose97i_init(&cs, buffer, height, stride);
1503 while(cs.y <= height)
1504 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1507 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1508 int level;
1509 for(level=decomposition_count-1; level>=0; level--){
1510 switch(type){
1511 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1512 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1513 /* not slicified yet */
1514 case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1515 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1520 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1521 int level;
1522 for(level=decomposition_count-1; level>=0; level--){
1523 switch(type){
1524 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1525 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1526 /* not slicified yet */
1527 case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1532 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1533 const int support = type==1 ? 3 : 5;
1534 int level;
1535 if(type==2) return;
1537 for(level=decomposition_count-1; level>=0; level--){
1538 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1539 switch(type){
1540 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1541 break;
1542 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1543 break;
1544 case DWT_X: break;
1550 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1551 const int support = type==1 ? 3 : 5;
1552 int level;
1553 if(type==2) return;
1555 for(level=decomposition_count-1; level>=0; level--){
1556 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1557 switch(type){
1558 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1559 break;
1560 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1561 break;
1562 case DWT_X: break;
1568 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1569 if(type==2){
1570 int level;
1571 for(level=decomposition_count-1; level>=0; level--)
1572 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1573 }else{
1574 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1575 int y;
1576 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1577 for(y=0; y<height; y+=4)
1578 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1582 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1583 const int w= b->width;
1584 const int h= b->height;
1585 int x, y;
1587 if(1){
1588 int run=0;
1589 int runs[w*h];
1590 int run_index=0;
1591 int max_index;
1593 for(y=0; y<h; y++){
1594 for(x=0; x<w; x++){
1595 int v, p=0;
1596 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1597 v= src[x + y*stride];
1599 if(y){
1600 t= src[x + (y-1)*stride];
1601 if(x){
1602 lt= src[x - 1 + (y-1)*stride];
1604 if(x + 1 < w){
1605 rt= src[x + 1 + (y-1)*stride];
1608 if(x){
1609 l= src[x - 1 + y*stride];
1610 /*if(x > 1){
1611 if(orientation==1) ll= src[y + (x-2)*stride];
1612 else ll= src[x - 2 + y*stride];
1615 if(parent){
1616 int px= x>>1;
1617 int py= y>>1;
1618 if(px<b->parent->width && py<b->parent->height)
1619 p= parent[px + py*2*stride];
1621 if(!(/*ll|*/l|lt|t|rt|p)){
1622 if(v){
1623 runs[run_index++]= run;
1624 run=0;
1625 }else{
1626 run++;
1631 max_index= run_index;
1632 runs[run_index++]= run;
1633 run_index=0;
1634 run= runs[run_index++];
1636 put_symbol2(&s->c, b->state[30], max_index, 0);
1637 if(run_index <= max_index)
1638 put_symbol2(&s->c, b->state[1], run, 3);
1640 for(y=0; y<h; y++){
1641 if(s->c.bytestream_end - s->c.bytestream < w*40){
1642 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1643 return -1;
1645 for(x=0; x<w; x++){
1646 int v, p=0;
1647 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1648 v= src[x + y*stride];
1650 if(y){
1651 t= src[x + (y-1)*stride];
1652 if(x){
1653 lt= src[x - 1 + (y-1)*stride];
1655 if(x + 1 < w){
1656 rt= src[x + 1 + (y-1)*stride];
1659 if(x){
1660 l= src[x - 1 + y*stride];
1661 /*if(x > 1){
1662 if(orientation==1) ll= src[y + (x-2)*stride];
1663 else ll= src[x - 2 + y*stride];
1666 if(parent){
1667 int px= x>>1;
1668 int py= y>>1;
1669 if(px<b->parent->width && py<b->parent->height)
1670 p= parent[px + py*2*stride];
1672 if(/*ll|*/l|lt|t|rt|p){
1673 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1675 put_rac(&s->c, &b->state[0][context], !!v);
1676 }else{
1677 if(!run){
1678 run= runs[run_index++];
1680 if(run_index <= max_index)
1681 put_symbol2(&s->c, b->state[1], run, 3);
1682 assert(v);
1683 }else{
1684 run--;
1685 assert(!v);
1688 if(v){
1689 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1690 int l2= 2*FFABS(l) + (l<0);
1691 int t2= 2*FFABS(t) + (t<0);
1693 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1694 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1699 return 0;
1702 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1703 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1704 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1705 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1706 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1709 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1710 const int w= b->width;
1711 const int h= b->height;
1712 int x,y;
1714 if(1){
1715 int run, runs;
1716 x_and_coeff *xc= b->x_coeff;
1717 x_and_coeff *prev_xc= NULL;
1718 x_and_coeff *prev2_xc= xc;
1719 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1720 x_and_coeff *prev_parent_xc= parent_xc;
1722 runs= get_symbol2(&s->c, b->state[30], 0);
1723 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1724 else run= INT_MAX;
1726 for(y=0; y<h; y++){
1727 int v=0;
1728 int lt=0, t=0, rt=0;
1730 if(y && prev_xc->x == 0){
1731 rt= prev_xc->coeff;
1733 for(x=0; x<w; x++){
1734 int p=0;
1735 const int l= v;
1737 lt= t; t= rt;
1739 if(y){
1740 if(prev_xc->x <= x)
1741 prev_xc++;
1742 if(prev_xc->x == x + 1)
1743 rt= prev_xc->coeff;
1744 else
1745 rt=0;
1747 if(parent_xc){
1748 if(x>>1 > parent_xc->x){
1749 parent_xc++;
1751 if(x>>1 == parent_xc->x){
1752 p= parent_xc->coeff;
1755 if(/*ll|*/l|lt|t|rt|p){
1756 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1758 v=get_rac(&s->c, &b->state[0][context]);
1759 if(v){
1760 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1761 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1763 xc->x=x;
1764 (xc++)->coeff= v;
1766 }else{
1767 if(!run){
1768 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1769 else run= INT_MAX;
1770 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1771 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1773 xc->x=x;
1774 (xc++)->coeff= v;
1775 }else{
1776 int max_run;
1777 run--;
1778 v=0;
1780 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1781 else max_run= FFMIN(run, w-x-1);
1782 if(parent_xc)
1783 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1784 x+= max_run;
1785 run-= max_run;
1789 (xc++)->x= w+1; //end marker
1790 prev_xc= prev2_xc;
1791 prev2_xc= xc;
1793 if(parent_xc){
1794 if(y&1){
1795 while(parent_xc->x != parent->width+1)
1796 parent_xc++;
1797 parent_xc++;
1798 prev_parent_xc= parent_xc;
1799 }else{
1800 parent_xc= prev_parent_xc;
1805 (xc++)->x= w+1; //end marker
1809 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1810 const int w= b->width;
1811 int y;
1812 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1813 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1814 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1815 int new_index = 0;
1817 START_TIMER
1819 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1820 qadd= 0;
1821 qmul= 1<<QEXPSHIFT;
1824 /* If we are on the second or later slice, restore our index. */
1825 if (start_y != 0)
1826 new_index = save_state[0];
1829 for(y=start_y; y<h; y++){
1830 int x = 0;
1831 int v;
1832 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1833 memset(line, 0, b->width*sizeof(DWTELEM));
1834 v = b->x_coeff[new_index].coeff;
1835 x = b->x_coeff[new_index++].x;
1836 while(x < w)
1838 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1839 register int u= -(v&1);
1840 line[x] = (t^u) - u;
1842 v = b->x_coeff[new_index].coeff;
1843 x = b->x_coeff[new_index++].x;
1846 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1847 STOP_TIMER("decode_subband")
1850 /* Save our variables for the next slice. */
1851 save_state[0] = new_index;
1853 return;
1856 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1857 int plane_index, level, orientation;
1859 for(plane_index=0; plane_index<3; plane_index++){
1860 for(level=0; level<s->spatial_decomposition_count; level++){
1861 for(orientation=level ? 1:0; orientation<4; orientation++){
1862 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1866 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1867 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1870 static int alloc_blocks(SnowContext *s){
1871 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1872 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1874 s->b_width = w;
1875 s->b_height= h;
1877 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1878 return 0;
1881 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1882 uint8_t *bytestream= d->bytestream;
1883 uint8_t *bytestream_start= d->bytestream_start;
1884 *d= *s;
1885 d->bytestream= bytestream;
1886 d->bytestream_start= bytestream_start;
1889 //near copy & paste from dsputil, FIXME
1890 static int pix_sum(uint8_t * pix, int line_size, int w)
1892 int s, i, j;
1894 s = 0;
1895 for (i = 0; i < w; i++) {
1896 for (j = 0; j < w; j++) {
1897 s += pix[0];
1898 pix ++;
1900 pix += line_size - w;
1902 return s;
1905 //near copy & paste from dsputil, FIXME
1906 static int pix_norm1(uint8_t * pix, int line_size, int w)
1908 int s, i, j;
1909 uint32_t *sq = ff_squareTbl + 256;
1911 s = 0;
1912 for (i = 0; i < w; i++) {
1913 for (j = 0; j < w; j ++) {
1914 s += sq[pix[0]];
1915 pix ++;
1917 pix += line_size - w;
1919 return s;
1922 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1923 const int w= s->b_width << s->block_max_depth;
1924 const int rem_depth= s->block_max_depth - level;
1925 const int index= (x + y*w) << rem_depth;
1926 const int block_w= 1<<rem_depth;
1927 BlockNode block;
1928 int i,j;
1930 block.color[0]= l;
1931 block.color[1]= cb;
1932 block.color[2]= cr;
1933 block.mx= mx;
1934 block.my= my;
1935 block.ref= ref;
1936 block.type= type;
1937 block.level= level;
1939 for(j=0; j<block_w; j++){
1940 for(i=0; i<block_w; i++){
1941 s->block[index + i + j*w]= block;
1946 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1947 const int offset[3]= {
1948 y*c-> stride + x,
1949 ((y*c->uvstride + x)>>1),
1950 ((y*c->uvstride + x)>>1),
1952 int i;
1953 for(i=0; i<3; i++){
1954 c->src[0][i]= src [i];
1955 c->ref[0][i]= ref [i] + offset[i];
1957 assert(!ref_index);
1960 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1961 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1962 if(s->ref_frames == 1){
1963 *mx = mid_pred(left->mx, top->mx, tr->mx);
1964 *my = mid_pred(left->my, top->my, tr->my);
1965 }else{
1966 const int *scale = scale_mv_ref[ref];
1967 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1968 (top ->mx * scale[top ->ref] + 128) >>8,
1969 (tr ->mx * scale[tr ->ref] + 128) >>8);
1970 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1971 (top ->my * scale[top ->ref] + 128) >>8,
1972 (tr ->my * scale[tr ->ref] + 128) >>8);
1976 //FIXME copy&paste
1977 #define P_LEFT P[1]
1978 #define P_TOP P[2]
1979 #define P_TOPRIGHT P[3]
1980 #define P_MEDIAN P[4]
1981 #define P_MV1 P[9]
1982 #define FLAG_QPEL 1 //must be 1
1984 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1985 uint8_t p_buffer[1024];
1986 uint8_t i_buffer[1024];
1987 uint8_t p_state[sizeof(s->block_state)];
1988 uint8_t i_state[sizeof(s->block_state)];
1989 RangeCoder pc, ic;
1990 uint8_t *pbbak= s->c.bytestream;
1991 uint8_t *pbbak_start= s->c.bytestream_start;
1992 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1993 const int w= s->b_width << s->block_max_depth;
1994 const int h= s->b_height << s->block_max_depth;
1995 const int rem_depth= s->block_max_depth - level;
1996 const int index= (x + y*w) << rem_depth;
1997 const int block_w= 1<<(LOG2_MB_SIZE - level);
1998 int trx= (x+1)<<rem_depth;
1999 int try= (y+1)<<rem_depth;
2000 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2001 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2002 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2003 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2004 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2005 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2006 int pl = left->color[0];
2007 int pcb= left->color[1];
2008 int pcr= left->color[2];
2009 int pmx, pmy;
2010 int mx=0, my=0;
2011 int l,cr,cb;
2012 const int stride= s->current_picture.linesize[0];
2013 const int uvstride= s->current_picture.linesize[1];
2014 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2015 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2016 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2017 int P[10][2];
2018 int16_t last_mv[3][2];
2019 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2020 const int shift= 1+qpel;
2021 MotionEstContext *c= &s->m.me;
2022 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2023 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2024 int my_context= av_log2(2*FFABS(left->my - top->my));
2025 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2026 int ref, best_ref, ref_score, ref_mx, ref_my;
2028 assert(sizeof(s->block_state) >= 256);
2029 if(s->keyframe){
2030 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2031 return 0;
2034 // clip predictors / edge ?
2036 P_LEFT[0]= left->mx;
2037 P_LEFT[1]= left->my;
2038 P_TOP [0]= top->mx;
2039 P_TOP [1]= top->my;
2040 P_TOPRIGHT[0]= tr->mx;
2041 P_TOPRIGHT[1]= tr->my;
2043 last_mv[0][0]= s->block[index].mx;
2044 last_mv[0][1]= s->block[index].my;
2045 last_mv[1][0]= right->mx;
2046 last_mv[1][1]= right->my;
2047 last_mv[2][0]= bottom->mx;
2048 last_mv[2][1]= bottom->my;
2050 s->m.mb_stride=2;
2051 s->m.mb_x=
2052 s->m.mb_y= 0;
2053 c->skip= 0;
2055 assert(c-> stride == stride);
2056 assert(c->uvstride == uvstride);
2058 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2059 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2060 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2061 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2063 c->xmin = - x*block_w - 16+2;
2064 c->ymin = - y*block_w - 16+2;
2065 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2066 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2068 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2069 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2070 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2071 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2072 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2073 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2074 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2076 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2077 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2079 if (!y) {
2080 c->pred_x= P_LEFT[0];
2081 c->pred_y= P_LEFT[1];
2082 } else {
2083 c->pred_x = P_MEDIAN[0];
2084 c->pred_y = P_MEDIAN[1];
2087 score= INT_MAX;
2088 best_ref= 0;
2089 for(ref=0; ref<s->ref_frames; ref++){
2090 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2092 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2093 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2095 assert(ref_mx >= c->xmin);
2096 assert(ref_mx <= c->xmax);
2097 assert(ref_my >= c->ymin);
2098 assert(ref_my <= c->ymax);
2100 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2101 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2102 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2103 if(s->ref_mvs[ref]){
2104 s->ref_mvs[ref][index][0]= ref_mx;
2105 s->ref_mvs[ref][index][1]= ref_my;
2106 s->ref_scores[ref][index]= ref_score;
2108 if(score > ref_score){
2109 score= ref_score;
2110 best_ref= ref;
2111 mx= ref_mx;
2112 my= ref_my;
2115 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
2117 // subpel search
2118 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2119 pc= s->c;
2120 pc.bytestream_start=
2121 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2122 memcpy(p_state, s->block_state, sizeof(s->block_state));
2124 if(level!=s->block_max_depth)
2125 put_rac(&pc, &p_state[4 + s_context], 1);
2126 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2127 if(s->ref_frames > 1)
2128 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2129 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2130 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2131 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2132 p_len= pc.bytestream - pc.bytestream_start;
2133 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
2135 block_s= block_w*block_w;
2136 sum = pix_sum(current_data[0], stride, block_w);
2137 l= (sum + block_s/2)/block_s;
2138 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2140 block_s= block_w*block_w>>2;
2141 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2142 cb= (sum + block_s/2)/block_s;
2143 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2144 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2145 cr= (sum + block_s/2)/block_s;
2146 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2148 ic= s->c;
2149 ic.bytestream_start=
2150 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2151 memcpy(i_state, s->block_state, sizeof(s->block_state));
2152 if(level!=s->block_max_depth)
2153 put_rac(&ic, &i_state[4 + s_context], 1);
2154 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2155 put_symbol(&ic, &i_state[32], l-pl , 1);
2156 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2157 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2158 i_len= ic.bytestream - ic.bytestream_start;
2159 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
2161 // assert(score==256*256*256*64-1);
2162 assert(iscore < 255*255*256 + s->lambda2*10);
2163 assert(iscore >= 0);
2164 assert(l>=0 && l<=255);
2165 assert(pl>=0 && pl<=255);
2167 if(level==0){
2168 int varc= iscore >> 8;
2169 int vard= score >> 8;
2170 if (vard <= 64 || vard < varc)
2171 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2172 else
2173 c->scene_change_score+= s->m.qscale;
2176 if(level!=s->block_max_depth){
2177 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2178 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2179 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2180 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2181 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2182 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2184 if(score2 < score && score2 < iscore)
2185 return score2;
2188 if(iscore < score){
2189 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2190 memcpy(pbbak, i_buffer, i_len);
2191 s->c= ic;
2192 s->c.bytestream_start= pbbak_start;
2193 s->c.bytestream= pbbak + i_len;
2194 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2195 memcpy(s->block_state, i_state, sizeof(s->block_state));
2196 return iscore;
2197 }else{
2198 memcpy(pbbak, p_buffer, p_len);
2199 s->c= pc;
2200 s->c.bytestream_start= pbbak_start;
2201 s->c.bytestream= pbbak + p_len;
2202 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2203 memcpy(s->block_state, p_state, sizeof(s->block_state));
2204 return score;
2208 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2209 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2210 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2211 }else{
2212 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2216 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2217 const int w= s->b_width << s->block_max_depth;
2218 const int rem_depth= s->block_max_depth - level;
2219 const int index= (x + y*w) << rem_depth;
2220 int trx= (x+1)<<rem_depth;
2221 BlockNode *b= &s->block[index];
2222 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2223 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2224 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2225 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2226 int pl = left->color[0];
2227 int pcb= left->color[1];
2228 int pcr= left->color[2];
2229 int pmx, pmy;
2230 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2231 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2232 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2233 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2235 if(s->keyframe){
2236 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2237 return;
2240 if(level!=s->block_max_depth){
2241 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2242 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2243 }else{
2244 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2245 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2246 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2247 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2248 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2249 return;
2252 if(b->type & BLOCK_INTRA){
2253 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2254 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2255 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2256 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2257 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2258 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2259 }else{
2260 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2261 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2262 if(s->ref_frames > 1)
2263 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2264 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2265 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2266 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2270 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2271 const int w= s->b_width << s->block_max_depth;
2272 const int rem_depth= s->block_max_depth - level;
2273 const int index= (x + y*w) << rem_depth;
2274 int trx= (x+1)<<rem_depth;
2275 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2276 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2277 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2278 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2279 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2281 if(s->keyframe){
2282 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2283 return;
2286 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2287 int type, mx, my;
2288 int l = left->color[0];
2289 int cb= left->color[1];
2290 int cr= left->color[2];
2291 int ref = 0;
2292 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2293 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2294 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2296 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2298 if(type){
2299 pred_mv(s, &mx, &my, 0, left, top, tr);
2300 l += get_symbol(&s->c, &s->block_state[32], 1);
2301 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2302 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2303 }else{
2304 if(s->ref_frames > 1)
2305 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2306 pred_mv(s, &mx, &my, ref, left, top, tr);
2307 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2308 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2310 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2311 }else{
2312 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2313 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2314 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2315 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2319 static void encode_blocks(SnowContext *s, int search){
2320 int x, y;
2321 int w= s->b_width;
2322 int h= s->b_height;
2324 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2325 iterative_me(s);
2327 for(y=0; y<h; y++){
2328 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2329 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2330 return;
2332 for(x=0; x<w; x++){
2333 if(s->avctx->me_method == ME_ITER || !search)
2334 encode_q_branch2(s, 0, x, y);
2335 else
2336 encode_q_branch (s, 0, x, y);
2341 static void decode_blocks(SnowContext *s){
2342 int x, y;
2343 int w= s->b_width;
2344 int h= s->b_height;
2346 for(y=0; y<h; y++){
2347 for(x=0; x<w; x++){
2348 decode_q_branch(s, 0, x, y);
2353 static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2354 int x, y;
2355 START_TIMER
2356 for(y=0; y < b_h+5; y++){
2357 for(x=0; x < b_w; x++){
2358 int a0= src[x ];
2359 int a1= src[x + 1];
2360 int a2= src[x + 2];
2361 int a3= src[x + 3];
2362 int a4= src[x + 4];
2363 int a5= src[x + 5];
2364 // int am= 9*(a1+a2) - (a0+a3);
2365 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2366 // int am= 18*(a2+a3) - 2*(a1+a4);
2367 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2368 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2370 // if(b_w==16) am= 8*(a1+a2);
2372 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2373 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2375 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2376 if(am&(~255)) am= ~(am>>31);
2378 tmp[x] = am;
2380 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2381 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2382 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2383 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2385 tmp += stride;
2386 src += stride;
2388 tmp -= (b_h+5)*stride;
2390 for(y=0; y < b_h; y++){
2391 for(x=0; x < b_w; x++){
2392 int a0= tmp[x + 0*stride];
2393 int a1= tmp[x + 1*stride];
2394 int a2= tmp[x + 2*stride];
2395 int a3= tmp[x + 3*stride];
2396 int a4= tmp[x + 4*stride];
2397 int a5= tmp[x + 5*stride];
2398 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2399 // int am= 18*(a2+a3) - 2*(a1+a4);
2400 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2401 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2403 // if(b_w==16) am= 8*(a1+a2);
2405 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2406 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2408 if(am&(~255)) am= ~(am>>31);
2410 dst[x] = am;
2411 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2412 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2413 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2414 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2416 dst += stride;
2417 tmp += stride;
2419 STOP_TIMER("mc_block")
2422 #define mca(dx,dy,b_w)\
2423 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2424 uint8_t tmp[stride*(b_w+5)];\
2425 assert(h==b_w);\
2426 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2429 mca( 0, 0,16)
2430 mca( 8, 0,16)
2431 mca( 0, 8,16)
2432 mca( 8, 8,16)
2433 mca( 0, 0,8)
2434 mca( 8, 0,8)
2435 mca( 0, 8,8)
2436 mca( 8, 8,8)
2438 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2439 if(block->type & BLOCK_INTRA){
2440 int x, y;
2441 const int color = block->color[plane_index];
2442 const int color4= color*0x01010101;
2443 if(b_w==32){
2444 for(y=0; y < b_h; y++){
2445 *(uint32_t*)&dst[0 + y*stride]= color4;
2446 *(uint32_t*)&dst[4 + y*stride]= color4;
2447 *(uint32_t*)&dst[8 + y*stride]= color4;
2448 *(uint32_t*)&dst[12+ y*stride]= color4;
2449 *(uint32_t*)&dst[16+ y*stride]= color4;
2450 *(uint32_t*)&dst[20+ y*stride]= color4;
2451 *(uint32_t*)&dst[24+ y*stride]= color4;
2452 *(uint32_t*)&dst[28+ y*stride]= color4;
2454 }else if(b_w==16){
2455 for(y=0; y < b_h; y++){
2456 *(uint32_t*)&dst[0 + y*stride]= color4;
2457 *(uint32_t*)&dst[4 + y*stride]= color4;
2458 *(uint32_t*)&dst[8 + y*stride]= color4;
2459 *(uint32_t*)&dst[12+ y*stride]= color4;
2461 }else if(b_w==8){
2462 for(y=0; y < b_h; y++){
2463 *(uint32_t*)&dst[0 + y*stride]= color4;
2464 *(uint32_t*)&dst[4 + y*stride]= color4;
2466 }else if(b_w==4){
2467 for(y=0; y < b_h; y++){
2468 *(uint32_t*)&dst[0 + y*stride]= color4;
2470 }else{
2471 for(y=0; y < b_h; y++){
2472 for(x=0; x < b_w; x++){
2473 dst[x + y*stride]= color;
2477 }else{
2478 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2479 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2480 int mx= block->mx*scale;
2481 int my= block->my*scale;
2482 const int dx= mx&15;
2483 const int dy= my&15;
2484 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2485 sx += (mx>>4) - 2;
2486 sy += (my>>4) - 2;
2487 src += sx + sy*stride;
2488 if( (unsigned)sx >= w - b_w - 4
2489 || (unsigned)sy >= h - b_h - 4){
2490 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2491 src= tmp + MB_SIZE;
2493 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2494 // assert(!(b_w&(b_w-1)));
2495 assert(b_w>1 && b_h>1);
2496 assert(tab_index>=0 && tab_index<4 || b_w==32);
2497 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2498 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2499 else if(b_w==32){
2500 int y;
2501 for(y=0; y<b_h; y+=16){
2502 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2503 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2505 }else if(b_w==b_h)
2506 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2507 else if(b_w==2*b_h){
2508 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2509 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2510 }else{
2511 assert(2*b_w==b_h);
2512 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2513 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2518 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2519 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2520 int y, x;
2521 DWTELEM * dst;
2522 for(y=0; y<b_h; y++){
2523 //FIXME ugly missue of obmc_stride
2524 const uint8_t *obmc1= obmc + y*obmc_stride;
2525 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2526 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2527 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2528 dst = slice_buffer_get_line(sb, src_y + y);
2529 for(x=0; x<b_w; x++){
2530 int v= obmc1[x] * block[3][x + y*src_stride]
2531 +obmc2[x] * block[2][x + y*src_stride]
2532 +obmc3[x] * block[1][x + y*src_stride]
2533 +obmc4[x] * block[0][x + y*src_stride];
2535 v <<= 8 - LOG2_OBMC_MAX;
2536 if(FRAC_BITS != 8){
2537 v += 1<<(7 - FRAC_BITS);
2538 v >>= 8 - FRAC_BITS;
2540 if(add){
2541 v += dst[x + src_x];
2542 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2543 if(v&(~255)) v= ~(v>>31);
2544 dst8[x + y*src_stride] = v;
2545 }else{
2546 dst[x + src_x] -= v;
2552 //FIXME name clenup (b_w, block_w, b_width stuff)
2553 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2554 const int b_width = s->b_width << s->block_max_depth;
2555 const int b_height= s->b_height << s->block_max_depth;
2556 const int b_stride= b_width;
2557 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2558 BlockNode *rt= lt+1;
2559 BlockNode *lb= lt+b_stride;
2560 BlockNode *rb= lb+1;
2561 uint8_t *block[4];
2562 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2563 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2564 uint8_t *ptmp;
2565 int x,y;
2567 if(b_x<0){
2568 lt= rt;
2569 lb= rb;
2570 }else if(b_x + 1 >= b_width){
2571 rt= lt;
2572 rb= lb;
2574 if(b_y<0){
2575 lt= lb;
2576 rt= rb;
2577 }else if(b_y + 1 >= b_height){
2578 lb= lt;
2579 rb= rt;
2582 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2583 obmc -= src_x;
2584 b_w += src_x;
2585 if(!sliced && !offset_dst)
2586 dst -= src_x;
2587 src_x=0;
2588 }else if(src_x + b_w > w){
2589 b_w = w - src_x;
2591 if(src_y<0){
2592 obmc -= src_y*obmc_stride;
2593 b_h += src_y;
2594 if(!sliced && !offset_dst)
2595 dst -= src_y*dst_stride;
2596 src_y=0;
2597 }else if(src_y + b_h> h){
2598 b_h = h - src_y;
2601 if(b_w<=0 || b_h<=0) return;
2603 assert(src_stride > 2*MB_SIZE + 5);
2604 if(!sliced && offset_dst)
2605 dst += src_x + src_y*dst_stride;
2606 dst8+= src_x + src_y*src_stride;
2607 // src += src_x + src_y*src_stride;
2609 ptmp= tmp + 3*tmp_step;
2610 block[0]= ptmp;
2611 ptmp+=tmp_step;
2612 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2614 if(same_block(lt, rt)){
2615 block[1]= block[0];
2616 }else{
2617 block[1]= ptmp;
2618 ptmp+=tmp_step;
2619 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2622 if(same_block(lt, lb)){
2623 block[2]= block[0];
2624 }else if(same_block(rt, lb)){
2625 block[2]= block[1];
2626 }else{
2627 block[2]= ptmp;
2628 ptmp+=tmp_step;
2629 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2632 if(same_block(lt, rb) ){
2633 block[3]= block[0];
2634 }else if(same_block(rt, rb)){
2635 block[3]= block[1];
2636 }else if(same_block(lb, rb)){
2637 block[3]= block[2];
2638 }else{
2639 block[3]= ptmp;
2640 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2642 #if 0
2643 for(y=0; y<b_h; y++){
2644 for(x=0; x<b_w; x++){
2645 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2646 if(add) dst[x + y*dst_stride] += v;
2647 else dst[x + y*dst_stride] -= v;
2650 for(y=0; y<b_h; y++){
2651 uint8_t *obmc2= obmc + (obmc_stride>>1);
2652 for(x=0; x<b_w; x++){
2653 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2654 if(add) dst[x + y*dst_stride] += v;
2655 else dst[x + y*dst_stride] -= v;
2658 for(y=0; y<b_h; y++){
2659 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2660 for(x=0; x<b_w; x++){
2661 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2662 if(add) dst[x + y*dst_stride] += v;
2663 else dst[x + y*dst_stride] -= v;
2666 for(y=0; y<b_h; y++){
2667 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2668 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2669 for(x=0; x<b_w; x++){
2670 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2671 if(add) dst[x + y*dst_stride] += v;
2672 else dst[x + y*dst_stride] -= v;
2675 #else
2676 if(sliced){
2677 START_TIMER
2679 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2680 STOP_TIMER("inner_add_yblock")
2681 }else
2682 for(y=0; y<b_h; y++){
2683 //FIXME ugly missue of obmc_stride
2684 const uint8_t *obmc1= obmc + y*obmc_stride;
2685 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2686 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2687 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2688 for(x=0; x<b_w; x++){
2689 int v= obmc1[x] * block[3][x + y*src_stride]
2690 +obmc2[x] * block[2][x + y*src_stride]
2691 +obmc3[x] * block[1][x + y*src_stride]
2692 +obmc4[x] * block[0][x + y*src_stride];
2694 v <<= 8 - LOG2_OBMC_MAX;
2695 if(FRAC_BITS != 8){
2696 v += 1<<(7 - FRAC_BITS);
2697 v >>= 8 - FRAC_BITS;
2699 if(add){
2700 v += dst[x + y*dst_stride];
2701 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2702 if(v&(~255)) v= ~(v>>31);
2703 dst8[x + y*src_stride] = v;
2704 }else{
2705 dst[x + y*dst_stride] -= v;
2709 #endif
2712 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2713 Plane *p= &s->plane[plane_index];
2714 const int mb_w= s->b_width << s->block_max_depth;
2715 const int mb_h= s->b_height << s->block_max_depth;
2716 int x, y, mb_x;
2717 int block_size = MB_SIZE >> s->block_max_depth;
2718 int block_w = plane_index ? block_size/2 : block_size;
2719 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2720 int obmc_stride= plane_index ? block_size : 2*block_size;
2721 int ref_stride= s->current_picture.linesize[plane_index];
2722 uint8_t *dst8= s->current_picture.data[plane_index];
2723 int w= p->width;
2724 int h= p->height;
2725 START_TIMER
2727 if(s->keyframe || (s->avctx->debug&512)){
2728 if(mb_y==mb_h)
2729 return;
2731 if(add){
2732 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2734 // DWTELEM * line = slice_buffer_get_line(sb, y);
2735 DWTELEM * line = sb->line[y];
2736 for(x=0; x<w; x++)
2738 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2739 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2740 v >>= FRAC_BITS;
2741 if(v&(~255)) v= ~(v>>31);
2742 dst8[x + y*ref_stride]= v;
2745 }else{
2746 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2748 // DWTELEM * line = slice_buffer_get_line(sb, y);
2749 DWTELEM * line = sb->line[y];
2750 for(x=0; x<w; x++)
2752 line[x] -= 128 << FRAC_BITS;
2753 // buf[x + y*w]-= 128<<FRAC_BITS;
2758 return;
2761 for(mb_x=0; mb_x<=mb_w; mb_x++){
2762 START_TIMER
2764 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2765 block_w*mb_x - block_w/2,
2766 block_w*mb_y - block_w/2,
2767 block_w, block_w,
2768 w, h,
2769 w, ref_stride, obmc_stride,
2770 mb_x - 1, mb_y - 1,
2771 add, 0, plane_index);
2773 STOP_TIMER("add_yblock")
2776 STOP_TIMER("predict_slice")
2779 static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2780 Plane *p= &s->plane[plane_index];
2781 const int mb_w= s->b_width << s->block_max_depth;
2782 const int mb_h= s->b_height << s->block_max_depth;
2783 int x, y, mb_x;
2784 int block_size = MB_SIZE >> s->block_max_depth;
2785 int block_w = plane_index ? block_size/2 : block_size;
2786 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2787 const int obmc_stride= plane_index ? block_size : 2*block_size;
2788 int ref_stride= s->current_picture.linesize[plane_index];
2789 uint8_t *dst8= s->current_picture.data[plane_index];
2790 int w= p->width;
2791 int h= p->height;
2792 START_TIMER
2794 if(s->keyframe || (s->avctx->debug&512)){
2795 if(mb_y==mb_h)
2796 return;
2798 if(add){
2799 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2800 for(x=0; x<w; x++){
2801 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2802 v >>= FRAC_BITS;
2803 if(v&(~255)) v= ~(v>>31);
2804 dst8[x + y*ref_stride]= v;
2807 }else{
2808 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2809 for(x=0; x<w; x++){
2810 buf[x + y*w]-= 128<<FRAC_BITS;
2815 return;
2818 for(mb_x=0; mb_x<=mb_w; mb_x++){
2819 START_TIMER
2821 add_yblock(s, 0, NULL, buf, dst8, obmc,
2822 block_w*mb_x - block_w/2,
2823 block_w*mb_y - block_w/2,
2824 block_w, block_w,
2825 w, h,
2826 w, ref_stride, obmc_stride,
2827 mb_x - 1, mb_y - 1,
2828 add, 1, plane_index);
2830 STOP_TIMER("add_yblock")
2833 STOP_TIMER("predict_slice")
2836 static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2837 const int mb_h= s->b_height << s->block_max_depth;
2838 int mb_y;
2839 for(mb_y=0; mb_y<=mb_h; mb_y++)
2840 predict_slice(s, buf, plane_index, add, mb_y);
2843 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2844 int i, x2, y2;
2845 Plane *p= &s->plane[plane_index];
2846 const int block_size = MB_SIZE >> s->block_max_depth;
2847 const int block_w = plane_index ? block_size/2 : block_size;
2848 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2849 const int obmc_stride= plane_index ? block_size : 2*block_size;
2850 const int ref_stride= s->current_picture.linesize[plane_index];
2851 uint8_t *src= s-> input_picture.data[plane_index];
2852 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2853 const int b_stride = s->b_width << s->block_max_depth;
2854 const int w= p->width;
2855 const int h= p->height;
2856 int index= mb_x + mb_y*b_stride;
2857 BlockNode *b= &s->block[index];
2858 BlockNode backup= *b;
2859 int ab=0;
2860 int aa=0;
2862 b->type|= BLOCK_INTRA;
2863 b->color[plane_index]= 0;
2864 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2866 for(i=0; i<4; i++){
2867 int mb_x2= mb_x + (i &1) - 1;
2868 int mb_y2= mb_y + (i>>1) - 1;
2869 int x= block_w*mb_x2 + block_w/2;
2870 int y= block_w*mb_y2 + block_w/2;
2872 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2873 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2875 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2876 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2877 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2878 int obmc_v= obmc[index];
2879 int d;
2880 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2881 if(x<0) obmc_v += obmc[index + block_w];
2882 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2883 if(x+block_w>w) obmc_v += obmc[index - block_w];
2884 //FIXME precalc this or simplify it somehow else
2886 d = -dst[index] + (1<<(FRAC_BITS-1));
2887 dst[index] = d;
2888 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2889 aa += obmc_v * obmc_v; //FIXME precalclate this
2893 *b= backup;
2895 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2898 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2899 const int b_stride = s->b_width << s->block_max_depth;
2900 const int b_height = s->b_height<< s->block_max_depth;
2901 int index= x + y*b_stride;
2902 const BlockNode *b = &s->block[index];
2903 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2904 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2905 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2906 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2907 int dmx, dmy;
2908 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2909 // int my_context= av_log2(2*FFABS(left->my - top->my));
2911 if(x<0 || x>=b_stride || y>=b_height)
2912 return 0;
2914 1 0 0
2915 01X 1-2 1
2916 001XX 3-6 2-3
2917 0001XXX 7-14 4-7
2918 00001XXXX 15-30 8-15
2920 //FIXME try accurate rate
2921 //FIXME intra and inter predictors if surrounding blocks arent the same type
2922 if(b->type & BLOCK_INTRA){
2923 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2924 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2925 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2926 }else{
2927 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2928 dmx-= b->mx;
2929 dmy-= b->my;
2930 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2931 + av_log2(2*FFABS(dmy))
2932 + av_log2(2*b->ref));
2936 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2937 Plane *p= &s->plane[plane_index];
2938 const int block_size = MB_SIZE >> s->block_max_depth;
2939 const int block_w = plane_index ? block_size/2 : block_size;
2940 const int obmc_stride= plane_index ? block_size : 2*block_size;
2941 const int ref_stride= s->current_picture.linesize[plane_index];
2942 uint8_t *dst= s->current_picture.data[plane_index];
2943 uint8_t *src= s-> input_picture.data[plane_index];
2944 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2945 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2946 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
2947 const int b_stride = s->b_width << s->block_max_depth;
2948 const int b_height = s->b_height<< s->block_max_depth;
2949 const int w= p->width;
2950 const int h= p->height;
2951 int distortion;
2952 int rate= 0;
2953 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2954 int sx= block_w*mb_x - block_w/2;
2955 int sy= block_w*mb_y - block_w/2;
2956 int x0= FFMAX(0,-sx);
2957 int y0= FFMAX(0,-sy);
2958 int x1= FFMIN(block_w*2, w-sx);
2959 int y1= FFMIN(block_w*2, h-sy);
2960 int i,x,y;
2962 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2964 for(y=y0; y<y1; y++){
2965 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2966 const DWTELEM *pred1 = pred + y*obmc_stride;
2967 uint8_t *cur1 = cur + y*ref_stride;
2968 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2969 for(x=x0; x<x1; x++){
2970 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2971 v = (v + pred1[x]) >> FRAC_BITS;
2972 if(v&(~255)) v= ~(v>>31);
2973 dst1[x] = v;
2977 /* copy the regions where obmc[] = (uint8_t)256 */
2978 if(LOG2_OBMC_MAX == 8
2979 && (mb_x == 0 || mb_x == b_stride-1)
2980 && (mb_y == 0 || mb_y == b_height-1)){
2981 if(mb_x == 0)
2982 x1 = block_w;
2983 else
2984 x0 = block_w;
2985 if(mb_y == 0)
2986 y1 = block_w;
2987 else
2988 y0 = block_w;
2989 for(y=y0; y<y1; y++)
2990 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2993 if(block_w==16){
2994 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2995 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2996 /* FIXME cmps overlap but don't cover the wavelet's whole support,
2997 * so improving the score of one block is not strictly guaranteed to
2998 * improve the score of the whole frame, so iterative motion est
2999 * doesn't always converge. */
3000 if(s->avctx->me_cmp == FF_CMP_W97)
3001 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3002 else if(s->avctx->me_cmp == FF_CMP_W53)
3003 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3004 else{
3005 distortion = 0;
3006 for(i=0; i<4; i++){
3007 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3008 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3011 }else{
3012 assert(block_w==8);
3013 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3016 if(plane_index==0){
3017 for(i=0; i<4; i++){
3018 /* ..RRr
3019 * .RXx.
3020 * rxx..
3022 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3024 if(mb_x == b_stride-2)
3025 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3027 return distortion + rate*penalty_factor;
3030 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3031 int i, y2;
3032 Plane *p= &s->plane[plane_index];
3033 const int block_size = MB_SIZE >> s->block_max_depth;
3034 const int block_w = plane_index ? block_size/2 : block_size;
3035 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3036 const int obmc_stride= plane_index ? block_size : 2*block_size;
3037 const int ref_stride= s->current_picture.linesize[plane_index];
3038 uint8_t *dst= s->current_picture.data[plane_index];
3039 uint8_t *src= s-> input_picture.data[plane_index];
3040 static const DWTELEM zero_dst[4096]; //FIXME
3041 const int b_stride = s->b_width << s->block_max_depth;
3042 const int w= p->width;
3043 const int h= p->height;
3044 int distortion= 0;
3045 int rate= 0;
3046 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3048 for(i=0; i<9; i++){
3049 int mb_x2= mb_x + (i%3) - 1;
3050 int mb_y2= mb_y + (i/3) - 1;
3051 int x= block_w*mb_x2 + block_w/2;
3052 int y= block_w*mb_y2 + block_w/2;
3054 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
3055 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3057 //FIXME find a cleaner/simpler way to skip the outside stuff
3058 for(y2= y; y2<0; y2++)
3059 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3060 for(y2= h; y2<y+block_w; y2++)
3061 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3062 if(x<0){
3063 for(y2= y; y2<y+block_w; y2++)
3064 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3066 if(x+block_w > w){
3067 for(y2= y; y2<y+block_w; y2++)
3068 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3071 assert(block_w== 8 || block_w==16);
3072 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3075 if(plane_index==0){
3076 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3077 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3079 /* ..RRRr
3080 * .RXXx.
3081 * .RXXx.
3082 * rxxx.
3084 if(merged)
3085 rate = get_block_bits(s, mb_x, mb_y, 2);
3086 for(i=merged?4:0; i<9; i++){
3087 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3088 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3091 return distortion + rate*penalty_factor;
3094 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3095 const int b_stride= s->b_width << s->block_max_depth;
3096 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3097 BlockNode backup= *block;
3098 int rd, index, value;
3100 assert(mb_x>=0 && mb_y>=0);
3101 assert(mb_x<b_stride);
3103 if(intra){
3104 block->color[0] = p[0];
3105 block->color[1] = p[1];
3106 block->color[2] = p[2];
3107 block->type |= BLOCK_INTRA;
3108 }else{
3109 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3110 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3111 if(s->me_cache[index] == value)
3112 return 0;
3113 s->me_cache[index]= value;
3115 block->mx= p[0];
3116 block->my= p[1];
3117 block->type &= ~BLOCK_INTRA;
3120 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3122 //FIXME chroma
3123 if(rd < *best_rd){
3124 *best_rd= rd;
3125 return 1;
3126 }else{
3127 *block= backup;
3128 return 0;
3132 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3133 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3134 int p[2] = {p0, p1};
3135 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3138 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3139 const int b_stride= s->b_width << s->block_max_depth;
3140 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3141 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3142 int rd, index, value;
3144 assert(mb_x>=0 && mb_y>=0);
3145 assert(mb_x<b_stride);
3146 assert(((mb_x|mb_y)&1) == 0);
3148 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3149 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3150 if(s->me_cache[index] == value)
3151 return 0;
3152 s->me_cache[index]= value;
3154 block->mx= p0;
3155 block->my= p1;
3156 block->ref= ref;
3157 block->type &= ~BLOCK_INTRA;
3158 block[1]= block[b_stride]= block[b_stride+1]= *block;
3160 rd= get_4block_rd(s, mb_x, mb_y, 0);
3162 //FIXME chroma
3163 if(rd < *best_rd){
3164 *best_rd= rd;
3165 return 1;
3166 }else{
3167 block[0]= backup[0];
3168 block[1]= backup[1];
3169 block[b_stride]= backup[2];
3170 block[b_stride+1]= backup[3];
3171 return 0;
3175 static void iterative_me(SnowContext *s){
3176 int pass, mb_x, mb_y;
3177 const int b_width = s->b_width << s->block_max_depth;
3178 const int b_height= s->b_height << s->block_max_depth;
3179 const int b_stride= b_width;
3180 int color[3];
3183 RangeCoder r = s->c;
3184 uint8_t state[sizeof(s->block_state)];
3185 memcpy(state, s->block_state, sizeof(s->block_state));
3186 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3187 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3188 encode_q_branch(s, 0, mb_x, mb_y);
3189 s->c = r;
3190 memcpy(s->block_state, state, sizeof(s->block_state));
3193 for(pass=0; pass<25; pass++){
3194 int change= 0;
3196 for(mb_y= 0; mb_y<b_height; mb_y++){
3197 for(mb_x= 0; mb_x<b_width; mb_x++){
3198 int dia_change, i, j, ref;
3199 int best_rd= INT_MAX, ref_rd;
3200 BlockNode backup, ref_b;
3201 const int index= mb_x + mb_y * b_stride;
3202 BlockNode *block= &s->block[index];
3203 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3204 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3205 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3206 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3207 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3208 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3209 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3210 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3211 const int b_w= (MB_SIZE >> s->block_max_depth);
3212 uint8_t obmc_edged[b_w*2][b_w*2];
3214 if(pass && (block->type & BLOCK_OPT))
3215 continue;
3216 block->type |= BLOCK_OPT;
3218 backup= *block;
3220 if(!s->me_cache_generation)
3221 memset(s->me_cache, 0, sizeof(s->me_cache));
3222 s->me_cache_generation += 1<<22;
3224 //FIXME precalc
3226 int x, y;
3227 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3228 if(mb_x==0)
3229 for(y=0; y<b_w*2; y++)
3230 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3231 if(mb_x==b_stride-1)
3232 for(y=0; y<b_w*2; y++)
3233 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3234 if(mb_y==0){
3235 for(x=0; x<b_w*2; x++)
3236 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3237 for(y=1; y<b_w; y++)
3238 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3240 if(mb_y==b_height-1){
3241 for(x=0; x<b_w*2; x++)
3242 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3243 for(y=b_w; y<b_w*2-1; y++)
3244 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3248 //skip stuff outside the picture
3249 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3251 uint8_t *src= s-> input_picture.data[0];
3252 uint8_t *dst= s->current_picture.data[0];
3253 const int stride= s->current_picture.linesize[0];
3254 const int block_w= MB_SIZE >> s->block_max_depth;
3255 const int sx= block_w*mb_x - block_w/2;
3256 const int sy= block_w*mb_y - block_w/2;
3257 const int w= s->plane[0].width;
3258 const int h= s->plane[0].height;
3259 int y;
3261 for(y=sy; y<0; y++)
3262 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3263 for(y=h; y<sy+block_w*2; y++)
3264 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3265 if(sx<0){
3266 for(y=sy; y<sy+block_w*2; y++)
3267 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3269 if(sx+block_w*2 > w){
3270 for(y=sy; y<sy+block_w*2; y++)
3271 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3275 // intra(black) = neighbors' contribution to the current block
3276 for(i=0; i<3; i++)
3277 color[i]= get_dc(s, mb_x, mb_y, i);
3279 // get previous score (cannot be cached due to OBMC)
3280 if(pass > 0 && (block->type&BLOCK_INTRA)){
3281 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3282 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3283 }else
3284 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3286 ref_b= *block;
3287 ref_rd= best_rd;
3288 for(ref=0; ref < s->ref_frames; ref++){
3289 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3290 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3291 continue;
3292 block->ref= ref;
3293 best_rd= INT_MAX;
3295 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3296 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3297 if(tb)
3298 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3299 if(lb)
3300 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3301 if(rb)
3302 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3303 if(bb)
3304 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3306 /* fullpel ME */
3307 //FIXME avoid subpel interpol / round to nearest integer
3309 dia_change=0;
3310 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3311 for(j=0; j<i; j++){
3312 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3313 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3314 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3315 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3318 }while(dia_change);
3319 /* subpel ME */
3321 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3322 dia_change=0;
3323 for(i=0; i<8; i++)
3324 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3325 }while(dia_change);
3326 //FIXME or try the standard 2 pass qpel or similar
3328 mvr[0][0]= block->mx;
3329 mvr[0][1]= block->my;
3330 if(ref_rd > best_rd){
3331 ref_rd= best_rd;
3332 ref_b= *block;
3335 best_rd= ref_rd;
3336 *block= ref_b;
3337 #if 1
3338 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3339 //FIXME RD style color selection
3340 #endif
3341 if(!same_block(block, &backup)){
3342 if(tb ) tb ->type &= ~BLOCK_OPT;
3343 if(lb ) lb ->type &= ~BLOCK_OPT;
3344 if(rb ) rb ->type &= ~BLOCK_OPT;
3345 if(bb ) bb ->type &= ~BLOCK_OPT;
3346 if(tlb) tlb->type &= ~BLOCK_OPT;
3347 if(trb) trb->type &= ~BLOCK_OPT;
3348 if(blb) blb->type &= ~BLOCK_OPT;
3349 if(brb) brb->type &= ~BLOCK_OPT;
3350 change ++;
3354 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3355 if(!change)
3356 break;
3359 if(s->block_max_depth == 1){
3360 int change= 0;
3361 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3362 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3363 int i;
3364 int best_rd, init_rd;
3365 const int index= mb_x + mb_y * b_stride;
3366 BlockNode *b[4];
3368 b[0]= &s->block[index];
3369 b[1]= b[0]+1;
3370 b[2]= b[0]+b_stride;
3371 b[3]= b[2]+1;
3372 if(same_block(b[0], b[1]) &&
3373 same_block(b[0], b[2]) &&
3374 same_block(b[0], b[3]))
3375 continue;
3377 if(!s->me_cache_generation)
3378 memset(s->me_cache, 0, sizeof(s->me_cache));
3379 s->me_cache_generation += 1<<22;
3381 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3383 //FIXME more multiref search?
3384 check_4block_inter(s, mb_x, mb_y,
3385 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3386 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3388 for(i=0; i<4; i++)
3389 if(!(b[i]->type&BLOCK_INTRA))
3390 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3392 if(init_rd != best_rd)
3393 change++;
3396 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3400 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3401 const int level= b->level;
3402 const int w= b->width;
3403 const int h= b->height;
3404 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3405 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3406 int x,y, thres1, thres2;
3407 // START_TIMER
3409 if(s->qlog == LOSSLESS_QLOG) return;
3411 bias= bias ? 0 : (3*qmul)>>3;
3412 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3413 thres2= 2*thres1;
3415 if(!bias){
3416 for(y=0; y<h; y++){
3417 for(x=0; x<w; x++){
3418 int i= src[x + y*stride];
3420 if((unsigned)(i+thres1) > thres2){
3421 if(i>=0){
3422 i<<= QEXPSHIFT;
3423 i/= qmul; //FIXME optimize
3424 src[x + y*stride]= i;
3425 }else{
3426 i= -i;
3427 i<<= QEXPSHIFT;
3428 i/= qmul; //FIXME optimize
3429 src[x + y*stride]= -i;
3431 }else
3432 src[x + y*stride]= 0;
3435 }else{
3436 for(y=0; y<h; y++){
3437 for(x=0; x<w; x++){
3438 int i= src[x + y*stride];
3440 if((unsigned)(i+thres1) > thres2){
3441 if(i>=0){
3442 i<<= QEXPSHIFT;
3443 i= (i + bias) / qmul; //FIXME optimize
3444 src[x + y*stride]= i;
3445 }else{
3446 i= -i;
3447 i<<= QEXPSHIFT;
3448 i= (i + bias) / qmul; //FIXME optimize
3449 src[x + y*stride]= -i;
3451 }else
3452 src[x + y*stride]= 0;
3456 if(level+1 == s->spatial_decomposition_count){
3457 // STOP_TIMER("quantize")
3461 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3462 const int w= b->width;
3463 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3464 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3465 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3466 int x,y;
3467 START_TIMER
3469 if(s->qlog == LOSSLESS_QLOG) return;
3471 for(y=start_y; y<end_y; y++){
3472 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3473 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3474 for(x=0; x<w; x++){
3475 int i= line[x];
3476 if(i<0){
3477 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3478 }else if(i>0){
3479 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3483 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3484 STOP_TIMER("dquant")
3488 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3489 const int w= b->width;
3490 const int h= b->height;
3491 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3492 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3493 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3494 int x,y;
3495 START_TIMER
3497 if(s->qlog == LOSSLESS_QLOG) return;
3499 for(y=0; y<h; y++){
3500 for(x=0; x<w; x++){
3501 int i= src[x + y*stride];
3502 if(i<0){
3503 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3504 }else if(i>0){
3505 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3509 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3510 STOP_TIMER("dquant")
3514 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3515 const int w= b->width;
3516 const int h= b->height;
3517 int x,y;
3519 for(y=h-1; y>=0; y--){
3520 for(x=w-1; x>=0; x--){
3521 int i= x + y*stride;
3523 if(x){
3524 if(use_median){
3525 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3526 else src[i] -= src[i - 1];
3527 }else{
3528 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3529 else src[i] -= src[i - 1];
3531 }else{
3532 if(y) src[i] -= src[i - stride];
3538 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3539 const int w= b->width;
3540 int x,y;
3542 // START_TIMER
3544 DWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3545 DWTELEM * prev;
3547 if (start_y != 0)
3548 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3550 for(y=start_y; y<end_y; y++){
3551 prev = line;
3552 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3553 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3554 for(x=0; x<w; x++){
3555 if(x){
3556 if(use_median){
3557 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3558 else line[x] += line[x - 1];
3559 }else{
3560 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3561 else line[x] += line[x - 1];
3563 }else{
3564 if(y) line[x] += prev[x];
3569 // STOP_TIMER("correlate")
3572 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3573 const int w= b->width;
3574 const int h= b->height;
3575 int x,y;
3577 for(y=0; y<h; y++){
3578 for(x=0; x<w; x++){
3579 int i= x + y*stride;
3581 if(x){
3582 if(use_median){
3583 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3584 else src[i] += src[i - 1];
3585 }else{
3586 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3587 else src[i] += src[i - 1];
3589 }else{
3590 if(y) src[i] += src[i - stride];
3596 static void encode_header(SnowContext *s){
3597 int plane_index, level, orientation;
3598 uint8_t kstate[32];
3600 memset(kstate, MID_STATE, sizeof(kstate));
3602 put_rac(&s->c, kstate, s->keyframe);
3603 if(s->keyframe || s->always_reset){
3604 reset_contexts(s);
3605 s->last_spatial_decomposition_type=
3606 s->last_qlog=
3607 s->last_qbias=
3608 s->last_mv_scale=
3609 s->last_block_max_depth= 0;
3611 if(s->keyframe){
3612 put_symbol(&s->c, s->header_state, s->version, 0);
3613 put_rac(&s->c, s->header_state, s->always_reset);
3614 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3615 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3616 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3617 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3618 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3619 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3620 put_rac(&s->c, s->header_state, s->spatial_scalability);
3621 // put_rac(&s->c, s->header_state, s->rate_scalability);
3622 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3624 for(plane_index=0; plane_index<2; plane_index++){
3625 for(level=0; level<s->spatial_decomposition_count; level++){
3626 for(orientation=level ? 1:0; orientation<4; orientation++){
3627 if(orientation==2) continue;
3628 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3633 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3634 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3635 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3636 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3637 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3639 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3640 s->last_qlog = s->qlog;
3641 s->last_qbias = s->qbias;
3642 s->last_mv_scale = s->mv_scale;
3643 s->last_block_max_depth = s->block_max_depth;
3646 static int decode_header(SnowContext *s){
3647 int plane_index, level, orientation;
3648 uint8_t kstate[32];
3650 memset(kstate, MID_STATE, sizeof(kstate));
3652 s->keyframe= get_rac(&s->c, kstate);
3653 if(s->keyframe || s->always_reset){
3654 reset_contexts(s);
3655 s->spatial_decomposition_type=
3656 s->qlog=
3657 s->qbias=
3658 s->mv_scale=
3659 s->block_max_depth= 0;
3661 if(s->keyframe){
3662 s->version= get_symbol(&s->c, s->header_state, 0);
3663 if(s->version>0){
3664 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3665 return -1;
3667 s->always_reset= get_rac(&s->c, s->header_state);
3668 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3669 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3670 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3671 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3672 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3673 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3674 s->spatial_scalability= get_rac(&s->c, s->header_state);
3675 // s->rate_scalability= get_rac(&s->c, s->header_state);
3676 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3678 for(plane_index=0; plane_index<3; plane_index++){
3679 for(level=0; level<s->spatial_decomposition_count; level++){
3680 for(orientation=level ? 1:0; orientation<4; orientation++){
3681 int q;
3682 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3683 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3684 else q= get_symbol(&s->c, s->header_state, 1);
3685 s->plane[plane_index].band[level][orientation].qlog= q;
3691 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3692 if(s->spatial_decomposition_type > 2){
3693 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3694 return -1;
3697 s->qlog += get_symbol(&s->c, s->header_state, 1);
3698 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3699 s->qbias += get_symbol(&s->c, s->header_state, 1);
3700 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3701 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3702 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3703 s->block_max_depth= 0;
3704 return -1;
3707 return 0;
3710 static void init_qexp(void){
3711 int i;
3712 double v=128;
3714 for(i=0; i<QROOT; i++){
3715 qexp[i]= lrintf(v);
3716 v *= pow(2, 1.0 / QROOT);
3720 static int common_init(AVCodecContext *avctx){
3721 SnowContext *s = avctx->priv_data;
3722 int width, height;
3723 int level, orientation, plane_index, dec;
3724 int i, j;
3726 s->avctx= avctx;
3728 dsputil_init(&s->dsp, avctx);
3730 #define mcf(dx,dy)\
3731 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3732 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3733 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3734 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3735 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3736 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3738 mcf( 0, 0)
3739 mcf( 4, 0)
3740 mcf( 8, 0)
3741 mcf(12, 0)
3742 mcf( 0, 4)
3743 mcf( 4, 4)
3744 mcf( 8, 4)
3745 mcf(12, 4)
3746 mcf( 0, 8)
3747 mcf( 4, 8)
3748 mcf( 8, 8)
3749 mcf(12, 8)
3750 mcf( 0,12)
3751 mcf( 4,12)
3752 mcf( 8,12)
3753 mcf(12,12)
3755 #define mcfh(dx,dy)\
3756 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3757 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3758 mc_block_hpel ## dx ## dy ## 16;\
3759 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3760 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3761 mc_block_hpel ## dx ## dy ## 8;
3763 mcfh(0, 0)
3764 mcfh(8, 0)
3765 mcfh(0, 8)
3766 mcfh(8, 8)
3768 if(!qexp[0])
3769 init_qexp();
3771 dec= s->spatial_decomposition_count= 5;
3772 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3774 s->chroma_h_shift= 1; //FIXME XXX
3775 s->chroma_v_shift= 1;
3777 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3779 width= s->avctx->width;
3780 height= s->avctx->height;
3782 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3784 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3785 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3787 for(plane_index=0; plane_index<3; plane_index++){
3788 int w= s->avctx->width;
3789 int h= s->avctx->height;
3791 if(plane_index){
3792 w>>= s->chroma_h_shift;
3793 h>>= s->chroma_v_shift;
3795 s->plane[plane_index].width = w;
3796 s->plane[plane_index].height= h;
3797 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3798 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3799 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3800 SubBand *b= &s->plane[plane_index].band[level][orientation];
3802 b->buf= s->spatial_dwt_buffer;
3803 b->level= level;
3804 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3805 b->width = (w + !(orientation&1))>>1;
3806 b->height= (h + !(orientation>1))>>1;
3808 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3809 b->buf_x_offset = 0;
3810 b->buf_y_offset = 0;
3812 if(orientation&1){
3813 b->buf += (w+1)>>1;
3814 b->buf_x_offset = (w+1)>>1;
3816 if(orientation>1){
3817 b->buf += b->stride>>1;
3818 b->buf_y_offset = b->stride_line >> 1;
3821 if(level)
3822 b->parent= &s->plane[plane_index].band[level-1][orientation];
3823 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3825 w= (w+1)>>1;
3826 h= (h+1)>>1;
3830 for(i=0; i<MAX_REF_FRAMES; i++)
3831 for(j=0; j<MAX_REF_FRAMES; j++)
3832 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3834 reset_contexts(s);
3836 width= s->width= avctx->width;
3837 height= s->height= avctx->height;
3839 assert(width && height);
3841 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3843 return 0;
3846 static int qscale2qlog(int qscale){
3847 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3848 + 61*QROOT/8; //<64 >60
3851 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3853 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3854 * FIXME we know exact mv bits at this point,
3855 * but ratecontrol isn't set up to include them. */
3856 uint32_t coef_sum= 0;
3857 int level, orientation, delta_qlog;
3859 for(level=0; level<s->spatial_decomposition_count; level++){
3860 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3861 SubBand *b= &s->plane[0].band[level][orientation];
3862 DWTELEM *buf= b->buf;
3863 const int w= b->width;
3864 const int h= b->height;
3865 const int stride= b->stride;
3866 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3867 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3868 const int qdiv= (1<<16)/qmul;
3869 int x, y;
3870 if(orientation==0)
3871 decorrelate(s, b, buf, stride, 1, 0);
3872 for(y=0; y<h; y++)
3873 for(x=0; x<w; x++)
3874 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3875 if(orientation==0)
3876 correlate(s, b, buf, stride, 1, 0);
3880 /* ugly, ratecontrol just takes a sqrt again */
3881 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3882 assert(coef_sum < INT_MAX);
3884 if(pict->pict_type == I_TYPE){
3885 s->m.current_picture.mb_var_sum= coef_sum;
3886 s->m.current_picture.mc_mb_var_sum= 0;
3887 }else{
3888 s->m.current_picture.mc_mb_var_sum= coef_sum;
3889 s->m.current_picture.mb_var_sum= 0;
3892 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3893 if (pict->quality < 0)
3894 return INT_MIN;
3895 s->lambda= pict->quality * 3/2;
3896 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3897 s->qlog+= delta_qlog;
3898 return delta_qlog;
3901 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3902 int width = p->width;
3903 int height= p->height;
3904 int level, orientation, x, y;
3906 for(level=0; level<s->spatial_decomposition_count; level++){
3907 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3908 SubBand *b= &p->band[level][orientation];
3909 DWTELEM *buf= b->buf;
3910 int64_t error=0;
3912 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3913 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3914 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3915 for(y=0; y<height; y++){
3916 for(x=0; x<width; x++){
3917 int64_t d= s->spatial_dwt_buffer[x + y*width];
3918 error += d*d;
3922 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3923 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3928 static int encode_init(AVCodecContext *avctx)
3930 SnowContext *s = avctx->priv_data;
3931 int plane_index;
3933 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3934 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3935 "use vstrict=-2 / -strict -2 to use it anyway\n");
3936 return -1;
3939 if(avctx->prediction_method == DWT_97
3940 && (avctx->flags & CODEC_FLAG_QSCALE)
3941 && avctx->global_quality == 0){
3942 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3943 return -1;
3946 common_init(avctx);
3947 alloc_blocks(s);
3949 s->version=0;
3951 s->m.avctx = avctx;
3952 s->m.flags = avctx->flags;
3953 s->m.bit_rate= avctx->bit_rate;
3955 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3956 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3957 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3958 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3959 h263_encode_init(&s->m); //mv_penalty
3961 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3963 if(avctx->flags&CODEC_FLAG_PASS1){
3964 if(!avctx->stats_out)
3965 avctx->stats_out = av_mallocz(256);
3967 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3968 if(ff_rate_control_init(&s->m) < 0)
3969 return -1;
3971 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3973 for(plane_index=0; plane_index<3; plane_index++){
3974 calculate_vissual_weight(s, &s->plane[plane_index]);
3978 avctx->coded_frame= &s->current_picture;
3979 switch(avctx->pix_fmt){
3980 // case PIX_FMT_YUV444P:
3981 // case PIX_FMT_YUV422P:
3982 case PIX_FMT_YUV420P:
3983 case PIX_FMT_GRAY8:
3984 // case PIX_FMT_YUV411P:
3985 // case PIX_FMT_YUV410P:
3986 s->colorspace_type= 0;
3987 break;
3988 /* case PIX_FMT_RGB32:
3989 s->colorspace= 1;
3990 break;*/
3991 default:
3992 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3993 return -1;
3995 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3996 s->chroma_h_shift= 1;
3997 s->chroma_v_shift= 1;
3999 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4000 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4002 s->avctx->get_buffer(s->avctx, &s->input_picture);
4004 if(s->avctx->me_method == ME_ITER){
4005 int i;
4006 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4007 for(i=0; i<s->max_ref_frames; i++){
4008 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4009 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4013 return 0;
4016 static int frame_start(SnowContext *s){
4017 AVFrame tmp;
4018 int w= s->avctx->width; //FIXME round up to x16 ?
4019 int h= s->avctx->height;
4021 if(s->current_picture.data[0]){
4022 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4023 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4024 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4027 tmp= s->last_picture[s->max_ref_frames-1];
4028 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4029 s->last_picture[0]= s->current_picture;
4030 s->current_picture= tmp;
4032 if(s->keyframe){
4033 s->ref_frames= 0;
4034 }else{
4035 int i;
4036 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4037 if(i && s->last_picture[i-1].key_frame)
4038 break;
4039 s->ref_frames= i;
4042 s->current_picture.reference= 1;
4043 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4044 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4045 return -1;
4048 s->current_picture.key_frame= s->keyframe;
4050 return 0;
4053 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4054 SnowContext *s = avctx->priv_data;
4055 RangeCoder * const c= &s->c;
4056 AVFrame *pict = data;
4057 const int width= s->avctx->width;
4058 const int height= s->avctx->height;
4059 int level, orientation, plane_index, i, y;
4060 uint8_t rc_header_bak[sizeof(s->header_state)];
4061 uint8_t rc_block_bak[sizeof(s->block_state)];
4063 ff_init_range_encoder(c, buf, buf_size);
4064 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4066 for(i=0; i<3; i++){
4067 int shift= !!i;
4068 for(y=0; y<(height>>shift); y++)
4069 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4070 &pict->data[i][y * pict->linesize[i]],
4071 width>>shift);
4073 s->new_picture = *pict;
4075 s->m.picture_number= avctx->frame_number;
4076 if(avctx->flags&CODEC_FLAG_PASS2){
4077 s->m.pict_type =
4078 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4079 s->keyframe= pict->pict_type==FF_I_TYPE;
4080 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4081 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4082 if (pict->quality < 0)
4083 return -1;
4085 }else{
4086 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4087 s->m.pict_type=
4088 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4091 if(s->pass1_rc && avctx->frame_number == 0)
4092 pict->quality= 2*FF_QP2LAMBDA;
4093 if(pict->quality){
4094 s->qlog= qscale2qlog(pict->quality);
4095 s->lambda = pict->quality * 3/2;
4097 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4098 s->qlog= LOSSLESS_QLOG;
4099 s->lambda = 0;
4100 }//else keep previous frame's qlog until after motion est
4102 frame_start(s);
4104 s->m.current_picture_ptr= &s->m.current_picture;
4105 if(pict->pict_type == P_TYPE){
4106 int block_width = (width +15)>>4;
4107 int block_height= (height+15)>>4;
4108 int stride= s->current_picture.linesize[0];
4110 assert(s->current_picture.data[0]);
4111 assert(s->last_picture[0].data[0]);
4113 s->m.avctx= s->avctx;
4114 s->m.current_picture.data[0]= s->current_picture.data[0];
4115 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4116 s->m. new_picture.data[0]= s-> input_picture.data[0];
4117 s->m. last_picture_ptr= &s->m. last_picture;
4118 s->m.linesize=
4119 s->m. last_picture.linesize[0]=
4120 s->m. new_picture.linesize[0]=
4121 s->m.current_picture.linesize[0]= stride;
4122 s->m.uvlinesize= s->current_picture.linesize[1];
4123 s->m.width = width;
4124 s->m.height= height;
4125 s->m.mb_width = block_width;
4126 s->m.mb_height= block_height;
4127 s->m.mb_stride= s->m.mb_width+1;
4128 s->m.b8_stride= 2*s->m.mb_width+1;
4129 s->m.f_code=1;
4130 s->m.pict_type= pict->pict_type;
4131 s->m.me_method= s->avctx->me_method;
4132 s->m.me.scene_change_score=0;
4133 s->m.flags= s->avctx->flags;
4134 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4135 s->m.out_format= FMT_H263;
4136 s->m.unrestricted_mv= 1;
4138 s->m.lambda = s->lambda;
4139 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4140 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4142 s->m.dsp= s->dsp; //move
4143 ff_init_me(&s->m);
4144 s->dsp= s->m.dsp;
4147 if(s->pass1_rc){
4148 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4149 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4152 redo_frame:
4154 s->m.pict_type = pict->pict_type;
4155 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4157 encode_header(s);
4158 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4159 encode_blocks(s, 1);
4160 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4162 for(plane_index=0; plane_index<3; plane_index++){
4163 Plane *p= &s->plane[plane_index];
4164 int w= p->width;
4165 int h= p->height;
4166 int x, y;
4167 // int bits= put_bits_count(&s->c.pb);
4169 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4170 //FIXME optimize
4171 if(pict->data[plane_index]) //FIXME gray hack
4172 for(y=0; y<h; y++){
4173 for(x=0; x<w; x++){
4174 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4177 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4179 if( plane_index==0
4180 && pict->pict_type == P_TYPE
4181 && !(avctx->flags&CODEC_FLAG_PASS2)
4182 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4183 ff_init_range_encoder(c, buf, buf_size);
4184 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4185 pict->pict_type= FF_I_TYPE;
4186 s->keyframe=1;
4187 s->current_picture.key_frame=1;
4188 goto redo_frame;
4191 if(s->qlog == LOSSLESS_QLOG){
4192 for(y=0; y<h; y++){
4193 for(x=0; x<w; x++){
4194 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4199 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4201 if(s->pass1_rc && plane_index==0){
4202 int delta_qlog = ratecontrol_1pass(s, pict);
4203 if (delta_qlog <= INT_MIN)
4204 return -1;
4205 if(delta_qlog){
4206 //reordering qlog in the bitstream would eliminate this reset
4207 ff_init_range_encoder(c, buf, buf_size);
4208 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4209 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4210 encode_header(s);
4211 encode_blocks(s, 0);
4215 for(level=0; level<s->spatial_decomposition_count; level++){
4216 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4217 SubBand *b= &p->band[level][orientation];
4219 quantize(s, b, b->buf, b->stride, s->qbias);
4220 if(orientation==0)
4221 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4222 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4223 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4224 if(orientation==0)
4225 correlate(s, b, b->buf, b->stride, 1, 0);
4228 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4230 for(level=0; level<s->spatial_decomposition_count; level++){
4231 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4232 SubBand *b= &p->band[level][orientation];
4234 dequantize(s, b, b->buf, b->stride);
4238 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4239 if(s->qlog == LOSSLESS_QLOG){
4240 for(y=0; y<h; y++){
4241 for(x=0; x<w; x++){
4242 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4246 {START_TIMER
4247 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4248 STOP_TIMER("pred-conv")}
4249 }else{
4250 //ME/MC only
4251 if(pict->pict_type == I_TYPE){
4252 for(y=0; y<h; y++){
4253 for(x=0; x<w; x++){
4254 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4255 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4258 }else{
4259 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4260 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4263 if(s->avctx->flags&CODEC_FLAG_PSNR){
4264 int64_t error= 0;
4266 if(pict->data[plane_index]) //FIXME gray hack
4267 for(y=0; y<h; y++){
4268 for(x=0; x<w; x++){
4269 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4270 error += d*d;
4273 s->avctx->error[plane_index] += error;
4274 s->current_picture.error[plane_index] = error;
4278 if(s->last_picture[s->max_ref_frames-1].data[0])
4279 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4281 s->current_picture.coded_picture_number = avctx->frame_number;
4282 s->current_picture.pict_type = pict->pict_type;
4283 s->current_picture.quality = pict->quality;
4284 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4285 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4286 s->m.current_picture.display_picture_number =
4287 s->m.current_picture.coded_picture_number = avctx->frame_number;
4288 s->m.current_picture.quality = pict->quality;
4289 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4290 if(s->pass1_rc)
4291 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4292 return -1;
4293 if(avctx->flags&CODEC_FLAG_PASS1)
4294 ff_write_pass1_stats(&s->m);
4295 s->m.last_pict_type = s->m.pict_type;
4296 avctx->frame_bits = s->m.frame_bits;
4297 avctx->mv_bits = s->m.mv_bits;
4298 avctx->misc_bits = s->m.misc_bits;
4299 avctx->p_tex_bits = s->m.p_tex_bits;
4301 emms_c();
4303 return ff_rac_terminate(c);
4306 static void common_end(SnowContext *s){
4307 int plane_index, level, orientation, i;
4309 av_freep(&s->spatial_dwt_buffer);
4311 av_freep(&s->m.me.scratchpad);
4312 av_freep(&s->m.me.map);
4313 av_freep(&s->m.me.score_map);
4314 av_freep(&s->m.obmc_scratchpad);
4316 av_freep(&s->block);
4318 for(i=0; i<MAX_REF_FRAMES; i++){
4319 av_freep(&s->ref_mvs[i]);
4320 av_freep(&s->ref_scores[i]);
4321 if(s->last_picture[i].data[0])
4322 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4325 for(plane_index=0; plane_index<3; plane_index++){
4326 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4327 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4328 SubBand *b= &s->plane[plane_index].band[level][orientation];
4330 av_freep(&b->x_coeff);
4336 static int encode_end(AVCodecContext *avctx)
4338 SnowContext *s = avctx->priv_data;
4340 common_end(s);
4341 av_free(avctx->stats_out);
4343 return 0;
4346 static int decode_init(AVCodecContext *avctx)
4348 SnowContext *s = avctx->priv_data;
4349 int block_size;
4351 avctx->pix_fmt= PIX_FMT_YUV420P;
4353 common_init(avctx);
4355 block_size = MB_SIZE >> s->block_max_depth;
4356 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4358 return 0;
4361 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4362 SnowContext *s = avctx->priv_data;
4363 RangeCoder * const c= &s->c;
4364 int bytes_read;
4365 AVFrame *picture = data;
4366 int level, orientation, plane_index;
4368 ff_init_range_decoder(c, buf, buf_size);
4369 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4371 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4372 decode_header(s);
4373 if(!s->block) alloc_blocks(s);
4375 frame_start(s);
4376 //keyframe flag dupliaction mess FIXME
4377 if(avctx->debug&FF_DEBUG_PICT_INFO)
4378 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4380 decode_blocks(s);
4382 for(plane_index=0; plane_index<3; plane_index++){
4383 Plane *p= &s->plane[plane_index];
4384 int w= p->width;
4385 int h= p->height;
4386 int x, y;
4387 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4389 if(s->avctx->debug&2048){
4390 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4391 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4393 for(y=0; y<h; y++){
4394 for(x=0; x<w; x++){
4395 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4396 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4401 { START_TIMER
4402 for(level=0; level<s->spatial_decomposition_count; level++){
4403 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4404 SubBand *b= &p->band[level][orientation];
4405 unpack_coeffs(s, b, b->parent, orientation);
4408 STOP_TIMER("unpack coeffs");
4411 {START_TIMER
4412 const int mb_h= s->b_height << s->block_max_depth;
4413 const int block_size = MB_SIZE >> s->block_max_depth;
4414 const int block_w = plane_index ? block_size/2 : block_size;
4415 int mb_y;
4416 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4417 int yd=0, yq=0;
4418 int y;
4419 int end_y;
4421 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4422 for(mb_y=0; mb_y<=mb_h; mb_y++){
4424 int slice_starty = block_w*mb_y;
4425 int slice_h = block_w*(mb_y+1);
4426 if (!(s->keyframe || s->avctx->debug&512)){
4427 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4428 slice_h -= (block_w >> 1);
4432 START_TIMER
4433 for(level=0; level<s->spatial_decomposition_count; level++){
4434 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4435 SubBand *b= &p->band[level][orientation];
4436 int start_y;
4437 int end_y;
4438 int our_mb_start = mb_y;
4439 int our_mb_end = (mb_y + 1);
4440 const int extra= 3;
4441 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4442 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4443 if (!(s->keyframe || s->avctx->debug&512)){
4444 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4445 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4447 start_y = FFMIN(b->height, start_y);
4448 end_y = FFMIN(b->height, end_y);
4450 if (start_y != end_y){
4451 if (orientation == 0){
4452 SubBand * correlate_band = &p->band[0][0];
4453 int correlate_end_y = FFMIN(b->height, end_y + 1);
4454 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4455 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4456 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4457 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4459 else
4460 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4464 STOP_TIMER("decode_subband_slice");
4467 { START_TIMER
4468 for(; yd<slice_h; yd+=4){
4469 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4471 STOP_TIMER("idwt slice");}
4474 if(s->qlog == LOSSLESS_QLOG){
4475 for(; yq<slice_h && yq<h; yq++){
4476 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4477 for(x=0; x<w; x++){
4478 line[x] <<= FRAC_BITS;
4483 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4485 y = FFMIN(p->height, slice_starty);
4486 end_y = FFMIN(p->height, slice_h);
4487 while(y < end_y)
4488 slice_buffer_release(&s->sb, y++);
4491 slice_buffer_flush(&s->sb);
4493 STOP_TIMER("idwt + predict_slices")}
4496 emms_c();
4498 if(s->last_picture[s->max_ref_frames-1].data[0])
4499 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4501 if(!(s->avctx->debug&2048))
4502 *picture= s->current_picture;
4503 else
4504 *picture= s->mconly_picture;
4506 *data_size = sizeof(AVFrame);
4508 bytes_read= c->bytestream - c->bytestream_start;
4509 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4511 return bytes_read;
4514 static int decode_end(AVCodecContext *avctx)
4516 SnowContext *s = avctx->priv_data;
4518 slice_buffer_destroy(&s->sb);
4520 common_end(s);
4522 return 0;
4525 AVCodec snow_decoder = {
4526 "snow",
4527 CODEC_TYPE_VIDEO,
4528 CODEC_ID_SNOW,
4529 sizeof(SnowContext),
4530 decode_init,
4531 NULL,
4532 decode_end,
4533 decode_frame,
4534 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4535 NULL
4538 #ifdef CONFIG_SNOW_ENCODER
4539 AVCodec snow_encoder = {
4540 "snow",
4541 CODEC_TYPE_VIDEO,
4542 CODEC_ID_SNOW,
4543 sizeof(SnowContext),
4544 encode_init,
4545 encode_frame,
4546 encode_end,
4548 #endif
4551 #if 0
4552 #undef malloc
4553 #undef free
4554 #undef printf
4555 #undef random
4557 int main(){
4558 int width=256;
4559 int height=256;
4560 int buffer[2][width*height];
4561 SnowContext s;
4562 int i;
4563 s.spatial_decomposition_count=6;
4564 s.spatial_decomposition_type=1;
4566 printf("testing 5/3 DWT\n");
4567 for(i=0; i<width*height; i++)
4568 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4570 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4571 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4573 for(i=0; i<width*height; i++)
4574 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4576 printf("testing 9/7 DWT\n");
4577 s.spatial_decomposition_type=0;
4578 for(i=0; i<width*height; i++)
4579 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4581 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4582 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4584 for(i=0; i<width*height; i++)
4585 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4587 #if 0
4588 printf("testing AC coder\n");
4589 memset(s.header_state, 0, sizeof(s.header_state));
4590 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4591 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4593 for(i=-256; i<256; i++){
4594 START_TIMER
4595 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4596 STOP_TIMER("put_symbol")
4598 ff_rac_terminate(&s.c);
4600 memset(s.header_state, 0, sizeof(s.header_state));
4601 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4602 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4604 for(i=-256; i<256; i++){
4605 int j;
4606 START_TIMER
4607 j= get_symbol(&s.c, s.header_state, 1);
4608 STOP_TIMER("get_symbol")
4609 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4611 #endif
4613 int level, orientation, x, y;
4614 int64_t errors[8][4];
4615 int64_t g=0;
4617 memset(errors, 0, sizeof(errors));
4618 s.spatial_decomposition_count=3;
4619 s.spatial_decomposition_type=0;
4620 for(level=0; level<s.spatial_decomposition_count; level++){
4621 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4622 int w= width >> (s.spatial_decomposition_count-level);
4623 int h= height >> (s.spatial_decomposition_count-level);
4624 int stride= width << (s.spatial_decomposition_count-level);
4625 DWTELEM *buf= buffer[0];
4626 int64_t error=0;
4628 if(orientation&1) buf+=w;
4629 if(orientation>1) buf+=stride>>1;
4631 memset(buffer[0], 0, sizeof(int)*width*height);
4632 buf[w/2 + h/2*stride]= 256*256;
4633 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4634 for(y=0; y<height; y++){
4635 for(x=0; x<width; x++){
4636 int64_t d= buffer[0][x + y*width];
4637 error += d*d;
4638 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4640 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4642 error= (int)(sqrt(error)+0.5);
4643 errors[level][orientation]= error;
4644 if(g) g=ff_gcd(g, error);
4645 else g= error;
4648 printf("static int const visual_weight[][4]={\n");
4649 for(level=0; level<s.spatial_decomposition_count; level++){
4650 printf(" {");
4651 for(orientation=0; orientation<4; orientation++){
4652 printf("%8"PRId64",", errors[level][orientation]/g);
4654 printf("},\n");
4656 printf("};\n");
4658 int level=2;
4659 int orientation=3;
4660 int w= width >> (s.spatial_decomposition_count-level);
4661 int h= height >> (s.spatial_decomposition_count-level);
4662 int stride= width << (s.spatial_decomposition_count-level);
4663 DWTELEM *buf= buffer[0];
4664 int64_t error=0;
4666 buf+=w;
4667 buf+=stride>>1;
4669 memset(buffer[0], 0, sizeof(int)*width*height);
4670 #if 1
4671 for(y=0; y<height; y++){
4672 for(x=0; x<width; x++){
4673 int tab[4]={0,2,3,1};
4674 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4677 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4678 #else
4679 for(y=0; y<h; y++){
4680 for(x=0; x<w; x++){
4681 buf[x + y*stride ]=169;
4682 buf[x + y*stride-w]=64;
4685 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4686 #endif
4687 for(y=0; y<height; y++){
4688 for(x=0; x<width; x++){
4689 int64_t d= buffer[0][x + y*width];
4690 error += d*d;
4691 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4693 if(FFABS(height/2-y)<9) printf("\n");
4698 return 0;
4700 #endif