Refactor intra block prediction and reconstruction process
[aom.git] / vp9 / encoder / vp9_resize.c
blobf46cad80491a83ba0ac77bedff05ac789c85edde
1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
11 #include <assert.h>
12 #include <limits.h>
13 #include <math.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
18 #include "vpx_ports/mem.h"
19 #include "vp9/common/vp9_common.h"
20 #include "vp9/encoder/vp9_resize.h"
22 #define FILTER_BITS 7
24 #define INTERP_TAPS 8
25 #define SUBPEL_BITS 5
26 #define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
27 #define INTERP_PRECISION_BITS 32
29 typedef int16_t interp_kernel[INTERP_TAPS];
31 // Filters for interpolation (0.5-band) - note this also filters integer pels.
32 static const interp_kernel filteredinterp_filters500[(1 << SUBPEL_BITS)] = {
33 {-3, 0, 35, 64, 35, 0, -3, 0},
34 {-3, -1, 34, 64, 36, 1, -3, 0},
35 {-3, -1, 32, 64, 38, 1, -3, 0},
36 {-2, -2, 31, 63, 39, 2, -3, 0},
37 {-2, -2, 29, 63, 41, 2, -3, 0},
38 {-2, -2, 28, 63, 42, 3, -4, 0},
39 {-2, -3, 27, 63, 43, 4, -4, 0},
40 {-2, -3, 25, 62, 45, 5, -4, 0},
41 {-2, -3, 24, 62, 46, 5, -4, 0},
42 {-2, -3, 23, 61, 47, 6, -4, 0},
43 {-2, -3, 21, 60, 49, 7, -4, 0},
44 {-1, -4, 20, 60, 50, 8, -4, -1},
45 {-1, -4, 19, 59, 51, 9, -4, -1},
46 {-1, -4, 17, 58, 52, 10, -4, 0},
47 {-1, -4, 16, 57, 53, 12, -4, -1},
48 {-1, -4, 15, 56, 54, 13, -4, -1},
49 {-1, -4, 14, 55, 55, 14, -4, -1},
50 {-1, -4, 13, 54, 56, 15, -4, -1},
51 {-1, -4, 12, 53, 57, 16, -4, -1},
52 {0, -4, 10, 52, 58, 17, -4, -1},
53 {-1, -4, 9, 51, 59, 19, -4, -1},
54 {-1, -4, 8, 50, 60, 20, -4, -1},
55 {0, -4, 7, 49, 60, 21, -3, -2},
56 {0, -4, 6, 47, 61, 23, -3, -2},
57 {0, -4, 5, 46, 62, 24, -3, -2},
58 {0, -4, 5, 45, 62, 25, -3, -2},
59 {0, -4, 4, 43, 63, 27, -3, -2},
60 {0, -4, 3, 42, 63, 28, -2, -2},
61 {0, -3, 2, 41, 63, 29, -2, -2},
62 {0, -3, 2, 39, 63, 31, -2, -2},
63 {0, -3, 1, 38, 64, 32, -1, -3},
64 {0, -3, 1, 36, 64, 34, -1, -3}
67 // Filters for interpolation (0.625-band) - note this also filters integer pels.
68 static const interp_kernel filteredinterp_filters625[(1 << SUBPEL_BITS)] = {
69 {-1, -8, 33, 80, 33, -8, -1, 0},
70 {-1, -8, 30, 80, 35, -8, -1, 1},
71 {-1, -8, 28, 80, 37, -7, -2, 1},
72 {0, -8, 26, 79, 39, -7, -2, 1},
73 {0, -8, 24, 79, 41, -7, -2, 1},
74 {0, -8, 22, 78, 43, -6, -2, 1},
75 {0, -8, 20, 78, 45, -5, -3, 1},
76 {0, -8, 18, 77, 48, -5, -3, 1},
77 {0, -8, 16, 76, 50, -4, -3, 1},
78 {0, -8, 15, 75, 52, -3, -4, 1},
79 {0, -7, 13, 74, 54, -3, -4, 1},
80 {0, -7, 11, 73, 56, -2, -4, 1},
81 {0, -7, 10, 71, 58, -1, -4, 1},
82 {1, -7, 8, 70, 60, 0, -5, 1},
83 {1, -6, 6, 68, 62, 1, -5, 1},
84 {1, -6, 5, 67, 63, 2, -5, 1},
85 {1, -6, 4, 65, 65, 4, -6, 1},
86 {1, -5, 2, 63, 67, 5, -6, 1},
87 {1, -5, 1, 62, 68, 6, -6, 1},
88 {1, -5, 0, 60, 70, 8, -7, 1},
89 {1, -4, -1, 58, 71, 10, -7, 0},
90 {1, -4, -2, 56, 73, 11, -7, 0},
91 {1, -4, -3, 54, 74, 13, -7, 0},
92 {1, -4, -3, 52, 75, 15, -8, 0},
93 {1, -3, -4, 50, 76, 16, -8, 0},
94 {1, -3, -5, 48, 77, 18, -8, 0},
95 {1, -3, -5, 45, 78, 20, -8, 0},
96 {1, -2, -6, 43, 78, 22, -8, 0},
97 {1, -2, -7, 41, 79, 24, -8, 0},
98 {1, -2, -7, 39, 79, 26, -8, 0},
99 {1, -2, -7, 37, 80, 28, -8, -1},
100 {1, -1, -8, 35, 80, 30, -8, -1},
103 // Filters for interpolation (0.75-band) - note this also filters integer pels.
104 static const interp_kernel filteredinterp_filters750[(1 << SUBPEL_BITS)] = {
105 {2, -11, 25, 96, 25, -11, 2, 0},
106 {2, -11, 22, 96, 28, -11, 2, 0},
107 {2, -10, 19, 95, 31, -11, 2, 0},
108 {2, -10, 17, 95, 34, -12, 2, 0},
109 {2, -9, 14, 94, 37, -12, 2, 0},
110 {2, -8, 12, 93, 40, -12, 1, 0},
111 {2, -8, 9, 92, 43, -12, 1, 1},
112 {2, -7, 7, 91, 46, -12, 1, 0},
113 {2, -7, 5, 90, 49, -12, 1, 0},
114 {2, -6, 3, 88, 52, -12, 0, 1},
115 {2, -5, 1, 86, 55, -12, 0, 1},
116 {2, -5, -1, 84, 58, -11, 0, 1},
117 {2, -4, -2, 82, 61, -11, -1, 1},
118 {2, -4, -4, 80, 64, -10, -1, 1},
119 {1, -3, -5, 77, 67, -9, -1, 1},
120 {1, -3, -6, 75, 70, -8, -2, 1},
121 {1, -2, -7, 72, 72, -7, -2, 1},
122 {1, -2, -8, 70, 75, -6, -3, 1},
123 {1, -1, -9, 67, 77, -5, -3, 1},
124 {1, -1, -10, 64, 80, -4, -4, 2},
125 {1, -1, -11, 61, 82, -2, -4, 2},
126 {1, 0, -11, 58, 84, -1, -5, 2},
127 {1, 0, -12, 55, 86, 1, -5, 2},
128 {1, 0, -12, 52, 88, 3, -6, 2},
129 {0, 1, -12, 49, 90, 5, -7, 2},
130 {0, 1, -12, 46, 91, 7, -7, 2},
131 {1, 1, -12, 43, 92, 9, -8, 2},
132 {0, 1, -12, 40, 93, 12, -8, 2},
133 {0, 2, -12, 37, 94, 14, -9, 2},
134 {0, 2, -12, 34, 95, 17, -10, 2},
135 {0, 2, -11, 31, 95, 19, -10, 2},
136 {0, 2, -11, 28, 96, 22, -11, 2}
139 // Filters for interpolation (0.875-band) - note this also filters integer pels.
140 static const interp_kernel filteredinterp_filters875[(1 << SUBPEL_BITS)] = {
141 {3, -8, 13, 112, 13, -8, 3, 0},
142 {3, -7, 10, 112, 17, -9, 3, -1},
143 {2, -6, 7, 111, 21, -9, 3, -1},
144 {2, -5, 4, 111, 24, -10, 3, -1},
145 {2, -4, 1, 110, 28, -11, 3, -1},
146 {1, -3, -1, 108, 32, -12, 4, -1},
147 {1, -2, -3, 106, 36, -13, 4, -1},
148 {1, -1, -6, 105, 40, -14, 4, -1},
149 {1, -1, -7, 102, 44, -14, 4, -1},
150 {1, 0, -9, 100, 48, -15, 4, -1},
151 {1, 1, -11, 97, 53, -16, 4, -1},
152 {0, 1, -12, 95, 57, -16, 4, -1},
153 {0, 2, -13, 91, 61, -16, 4, -1},
154 {0, 2, -14, 88, 65, -16, 4, -1},
155 {0, 3, -15, 84, 69, -17, 4, 0},
156 {0, 3, -16, 81, 73, -16, 3, 0},
157 {0, 3, -16, 77, 77, -16, 3, 0},
158 {0, 3, -16, 73, 81, -16, 3, 0},
159 {0, 4, -17, 69, 84, -15, 3, 0},
160 {-1, 4, -16, 65, 88, -14, 2, 0},
161 {-1, 4, -16, 61, 91, -13, 2, 0},
162 {-1, 4, -16, 57, 95, -12, 1, 0},
163 {-1, 4, -16, 53, 97, -11, 1, 1},
164 {-1, 4, -15, 48, 100, -9, 0, 1},
165 {-1, 4, -14, 44, 102, -7, -1, 1},
166 {-1, 4, -14, 40, 105, -6, -1, 1},
167 {-1, 4, -13, 36, 106, -3, -2, 1},
168 {-1, 4, -12, 32, 108, -1, -3, 1},
169 {-1, 3, -11, 28, 110, 1, -4, 2},
170 {-1, 3, -10, 24, 111, 4, -5, 2},
171 {-1, 3, -9, 21, 111, 7, -6, 2},
172 {-1, 3, -9, 17, 112, 10, -7, 3}
175 // Filters for interpolation (full-band) - no filtering for integer pixels
176 static const interp_kernel filteredinterp_filters1000[(1 << SUBPEL_BITS)] = {
177 {0, 0, 0, 128, 0, 0, 0, 0},
178 {0, 1, -3, 128, 3, -1, 0, 0},
179 {-1, 2, -6, 127, 7, -2, 1, 0},
180 {-1, 3, -9, 126, 12, -4, 1, 0},
181 {-1, 4, -12, 125, 16, -5, 1, 0},
182 {-1, 4, -14, 123, 20, -6, 2, 0},
183 {-1, 5, -15, 120, 25, -8, 2, 0},
184 {-1, 5, -17, 118, 30, -9, 3, -1},
185 {-1, 6, -18, 114, 35, -10, 3, -1},
186 {-1, 6, -19, 111, 41, -12, 3, -1},
187 {-1, 6, -20, 107, 46, -13, 4, -1},
188 {-1, 6, -21, 103, 52, -14, 4, -1},
189 {-1, 6, -21, 99, 57, -16, 5, -1},
190 {-1, 6, -21, 94, 63, -17, 5, -1},
191 {-1, 6, -20, 89, 68, -18, 5, -1},
192 {-1, 6, -20, 84, 73, -19, 6, -1},
193 {-1, 6, -20, 79, 79, -20, 6, -1},
194 {-1, 6, -19, 73, 84, -20, 6, -1},
195 {-1, 5, -18, 68, 89, -20, 6, -1},
196 {-1, 5, -17, 63, 94, -21, 6, -1},
197 {-1, 5, -16, 57, 99, -21, 6, -1},
198 {-1, 4, -14, 52, 103, -21, 6, -1},
199 {-1, 4, -13, 46, 107, -20, 6, -1},
200 {-1, 3, -12, 41, 111, -19, 6, -1},
201 {-1, 3, -10, 35, 114, -18, 6, -1},
202 {-1, 3, -9, 30, 118, -17, 5, -1},
203 {0, 2, -8, 25, 120, -15, 5, -1},
204 {0, 2, -6, 20, 123, -14, 4, -1},
205 {0, 1, -5, 16, 125, -12, 4, -1},
206 {0, 1, -4, 12, 126, -9, 3, -1},
207 {0, 1, -2, 7, 127, -6, 2, -1},
208 {0, 0, -1, 3, 128, -3, 1, 0}
211 // Filters for factor of 2 downsampling.
212 static const int16_t vp9_down2_symeven_half_filter[] = {56, 12, -3, -1};
213 static const int16_t vp9_down2_symodd_half_filter[] = {64, 35, 0, -3};
215 static const interp_kernel *choose_interp_filter(int inlength, int outlength) {
216 int outlength16 = outlength * 16;
217 if (outlength16 >= inlength * 16)
218 return filteredinterp_filters1000;
219 else if (outlength16 >= inlength * 13)
220 return filteredinterp_filters875;
221 else if (outlength16 >= inlength * 11)
222 return filteredinterp_filters750;
223 else if (outlength16 >= inlength * 9)
224 return filteredinterp_filters625;
225 else
226 return filteredinterp_filters500;
229 static void interpolate(const uint8_t *const input, int inlength,
230 uint8_t *output, int outlength) {
231 const int64_t delta = (((uint64_t)inlength << 32) + outlength / 2) /
232 outlength;
233 const int64_t offset = inlength > outlength ?
234 (((int64_t)(inlength - outlength) << 31) + outlength / 2) / outlength :
235 -(((int64_t)(outlength - inlength) << 31) + outlength / 2) / outlength;
236 uint8_t *optr = output;
237 int x, x1, x2, sum, k, int_pel, sub_pel;
238 int64_t y;
240 const interp_kernel *interp_filters =
241 choose_interp_filter(inlength, outlength);
243 x = 0;
244 y = offset;
245 while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) {
246 x++;
247 y += delta;
249 x1 = x;
250 x = outlength - 1;
251 y = delta * x + offset;
252 while ((y >> INTERP_PRECISION_BITS) +
253 (int64_t)(INTERP_TAPS / 2) >= inlength) {
254 x--;
255 y -= delta;
257 x2 = x;
258 if (x1 > x2) {
259 for (x = 0, y = offset; x < outlength; ++x, y += delta) {
260 const int16_t *filter;
261 int_pel = y >> INTERP_PRECISION_BITS;
262 sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
263 filter = interp_filters[sub_pel];
264 sum = 0;
265 for (k = 0; k < INTERP_TAPS; ++k) {
266 const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
267 sum += filter[k] * input[(pk < 0 ? 0 :
268 (pk >= inlength ? inlength - 1 : pk))];
270 *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
272 } else {
273 // Initial part.
274 for (x = 0, y = offset; x < x1; ++x, y += delta) {
275 const int16_t *filter;
276 int_pel = y >> INTERP_PRECISION_BITS;
277 sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
278 filter = interp_filters[sub_pel];
279 sum = 0;
280 for (k = 0; k < INTERP_TAPS; ++k)
281 sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ?
283 int_pel - INTERP_TAPS / 2 + 1 + k)];
284 *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
286 // Middle part.
287 for (; x <= x2; ++x, y += delta) {
288 const int16_t *filter;
289 int_pel = y >> INTERP_PRECISION_BITS;
290 sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
291 filter = interp_filters[sub_pel];
292 sum = 0;
293 for (k = 0; k < INTERP_TAPS; ++k)
294 sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k];
295 *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
297 // End part.
298 for (; x < outlength; ++x, y += delta) {
299 const int16_t *filter;
300 int_pel = y >> INTERP_PRECISION_BITS;
301 sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
302 filter = interp_filters[sub_pel];
303 sum = 0;
304 for (k = 0; k < INTERP_TAPS; ++k)
305 sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >=
306 inlength ? inlength - 1 :
307 int_pel - INTERP_TAPS / 2 + 1 + k)];
308 *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
313 static void down2_symeven(const uint8_t *const input, int length,
314 uint8_t *output) {
315 // Actual filter len = 2 * filter_len_half.
316 const int16_t *filter = vp9_down2_symeven_half_filter;
317 const int filter_len_half = sizeof(vp9_down2_symeven_half_filter) / 2;
318 int i, j;
319 uint8_t *optr = output;
320 int l1 = filter_len_half;
321 int l2 = (length - filter_len_half);
322 l1 += (l1 & 1);
323 l2 += (l2 & 1);
324 if (l1 > l2) {
325 // Short input length.
326 for (i = 0; i < length; i += 2) {
327 int sum = (1 << (FILTER_BITS - 1));
328 for (j = 0; j < filter_len_half; ++j) {
329 sum += (input[(i - j < 0 ? 0 : i - j)] +
330 input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
331 filter[j];
333 sum >>= FILTER_BITS;
334 *optr++ = clip_pixel(sum);
336 } else {
337 // Initial part.
338 for (i = 0; i < l1; i += 2) {
339 int sum = (1 << (FILTER_BITS - 1));
340 for (j = 0; j < filter_len_half; ++j) {
341 sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j];
343 sum >>= FILTER_BITS;
344 *optr++ = clip_pixel(sum);
346 // Middle part.
347 for (; i < l2; i += 2) {
348 int sum = (1 << (FILTER_BITS - 1));
349 for (j = 0; j < filter_len_half; ++j) {
350 sum += (input[i - j] + input[i + 1 + j]) * filter[j];
352 sum >>= FILTER_BITS;
353 *optr++ = clip_pixel(sum);
355 // End part.
356 for (; i < length; i += 2) {
357 int sum = (1 << (FILTER_BITS - 1));
358 for (j = 0; j < filter_len_half; ++j) {
359 sum += (input[i - j] +
360 input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
361 filter[j];
363 sum >>= FILTER_BITS;
364 *optr++ = clip_pixel(sum);
369 static void down2_symodd(const uint8_t *const input, int length,
370 uint8_t *output) {
371 // Actual filter len = 2 * filter_len_half - 1.
372 const int16_t *filter = vp9_down2_symodd_half_filter;
373 const int filter_len_half = sizeof(vp9_down2_symodd_half_filter) / 2;
374 int i, j;
375 uint8_t *optr = output;
376 int l1 = filter_len_half - 1;
377 int l2 = (length - filter_len_half + 1);
378 l1 += (l1 & 1);
379 l2 += (l2 & 1);
380 if (l1 > l2) {
381 // Short input length.
382 for (i = 0; i < length; i += 2) {
383 int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
384 for (j = 1; j < filter_len_half; ++j) {
385 sum += (input[(i - j < 0 ? 0 : i - j)] +
386 input[(i + j >= length ? length - 1 : i + j)]) *
387 filter[j];
389 sum >>= FILTER_BITS;
390 *optr++ = clip_pixel(sum);
392 } else {
393 // Initial part.
394 for (i = 0; i < l1; i += 2) {
395 int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
396 for (j = 1; j < filter_len_half; ++j) {
397 sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j];
399 sum >>= FILTER_BITS;
400 *optr++ = clip_pixel(sum);
402 // Middle part.
403 for (; i < l2; i += 2) {
404 int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
405 for (j = 1; j < filter_len_half; ++j) {
406 sum += (input[i - j] + input[i + j]) * filter[j];
408 sum >>= FILTER_BITS;
409 *optr++ = clip_pixel(sum);
411 // End part.
412 for (; i < length; i += 2) {
413 int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
414 for (j = 1; j < filter_len_half; ++j) {
415 sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) *
416 filter[j];
418 sum >>= FILTER_BITS;
419 *optr++ = clip_pixel(sum);
424 static int get_down2_length(int length, int steps) {
425 int s;
426 for (s = 0; s < steps; ++s)
427 length = (length + 1) >> 1;
428 return length;
431 static int get_down2_steps(int in_length, int out_length) {
432 int steps = 0;
433 int proj_in_length;
434 while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) {
435 ++steps;
436 in_length = proj_in_length;
438 return steps;
441 static void resize_multistep(const uint8_t *const input,
442 int length,
443 uint8_t *output,
444 int olength,
445 uint8_t *buf) {
446 int steps;
447 if (length == olength) {
448 memcpy(output, input, sizeof(uint8_t) * length);
449 return;
451 steps = get_down2_steps(length, olength);
453 if (steps > 0) {
454 int s;
455 uint8_t *out = NULL;
456 uint8_t *tmpbuf = NULL;
457 uint8_t *otmp, *otmp2;
458 int filteredlength = length;
459 if (!tmpbuf) {
460 tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) * length);
461 otmp = tmpbuf;
462 } else {
463 otmp = buf;
465 otmp2 = otmp + get_down2_length(length, 1);
466 for (s = 0; s < steps; ++s) {
467 const int proj_filteredlength = get_down2_length(filteredlength, 1);
468 const uint8_t *const in = (s == 0 ? input : out);
469 if (s == steps - 1 && proj_filteredlength == olength)
470 out = output;
471 else
472 out = (s & 1 ? otmp2 : otmp);
473 if (filteredlength & 1)
474 down2_symodd(in, filteredlength, out);
475 else
476 down2_symeven(in, filteredlength, out);
477 filteredlength = proj_filteredlength;
479 if (filteredlength != olength) {
480 interpolate(out, filteredlength, output, olength);
482 if (tmpbuf)
483 free(tmpbuf);
484 } else {
485 interpolate(input, length, output, olength);
489 static void fill_col_to_arr(uint8_t *img, int stride, int len, uint8_t *arr) {
490 int i;
491 uint8_t *iptr = img;
492 uint8_t *aptr = arr;
493 for (i = 0; i < len; ++i, iptr += stride) {
494 *aptr++ = *iptr;
498 static void fill_arr_to_col(uint8_t *img, int stride, int len, uint8_t *arr) {
499 int i;
500 uint8_t *iptr = img;
501 uint8_t *aptr = arr;
502 for (i = 0; i < len; ++i, iptr += stride) {
503 *iptr = *aptr++;
507 void vp9_resize_plane(const uint8_t *const input,
508 int height,
509 int width,
510 int in_stride,
511 uint8_t *output,
512 int height2,
513 int width2,
514 int out_stride) {
515 int i;
516 uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height);
517 uint8_t *tmpbuf = (uint8_t *)malloc(sizeof(uint8_t) *
518 (width < height ? height : width));
519 uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * (height + height2));
520 assert(width > 0);
521 assert(height > 0);
522 assert(width2 > 0);
523 assert(height2 > 0);
524 for (i = 0; i < height; ++i)
525 resize_multistep(input + in_stride * i, width,
526 intbuf + width2 * i, width2, tmpbuf);
527 for (i = 0; i < width2; ++i) {
528 fill_col_to_arr(intbuf + i, width2, height, arrbuf);
529 resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf);
530 fill_arr_to_col(output + i, out_stride, height2, arrbuf + height);
532 free(intbuf);
533 free(tmpbuf);
534 free(arrbuf);
537 #if CONFIG_VP9_HIGHBITDEPTH
538 static void highbd_interpolate(const uint16_t *const input, int inlength,
539 uint16_t *output, int outlength, int bd) {
540 const int64_t delta =
541 (((uint64_t)inlength << 32) + outlength / 2) / outlength;
542 const int64_t offset = inlength > outlength ?
543 (((int64_t)(inlength - outlength) << 31) + outlength / 2) / outlength :
544 -(((int64_t)(outlength - inlength) << 31) + outlength / 2) / outlength;
545 uint16_t *optr = output;
546 int x, x1, x2, sum, k, int_pel, sub_pel;
547 int64_t y;
549 const interp_kernel *interp_filters =
550 choose_interp_filter(inlength, outlength);
552 x = 0;
553 y = offset;
554 while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) {
555 x++;
556 y += delta;
558 x1 = x;
559 x = outlength - 1;
560 y = delta * x + offset;
561 while ((y >> INTERP_PRECISION_BITS) +
562 (int64_t)(INTERP_TAPS / 2) >= inlength) {
563 x--;
564 y -= delta;
566 x2 = x;
567 if (x1 > x2) {
568 for (x = 0, y = offset; x < outlength; ++x, y += delta) {
569 const int16_t *filter;
570 int_pel = y >> INTERP_PRECISION_BITS;
571 sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
572 filter = interp_filters[sub_pel];
573 sum = 0;
574 for (k = 0; k < INTERP_TAPS; ++k) {
575 const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
576 sum += filter[k] *
577 input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))];
579 *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
581 } else {
582 // Initial part.
583 for (x = 0, y = offset; x < x1; ++x, y += delta) {
584 const int16_t *filter;
585 int_pel = y >> INTERP_PRECISION_BITS;
586 sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
587 filter = interp_filters[sub_pel];
588 sum = 0;
589 for (k = 0; k < INTERP_TAPS; ++k)
590 sum += filter[k] *
591 input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ?
592 0 : int_pel - INTERP_TAPS / 2 + 1 + k)];
593 *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
595 // Middle part.
596 for (; x <= x2; ++x, y += delta) {
597 const int16_t *filter;
598 int_pel = y >> INTERP_PRECISION_BITS;
599 sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
600 filter = interp_filters[sub_pel];
601 sum = 0;
602 for (k = 0; k < INTERP_TAPS; ++k)
603 sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k];
604 *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
606 // End part.
607 for (; x < outlength; ++x, y += delta) {
608 const int16_t *filter;
609 int_pel = y >> INTERP_PRECISION_BITS;
610 sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
611 filter = interp_filters[sub_pel];
612 sum = 0;
613 for (k = 0; k < INTERP_TAPS; ++k)
614 sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >=
615 inlength ? inlength - 1 :
616 int_pel - INTERP_TAPS / 2 + 1 + k)];
617 *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
622 static void highbd_down2_symeven(const uint16_t *const input, int length,
623 uint16_t *output, int bd) {
624 // Actual filter len = 2 * filter_len_half.
625 static const int16_t *filter = vp9_down2_symeven_half_filter;
626 const int filter_len_half = sizeof(vp9_down2_symeven_half_filter) / 2;
627 int i, j;
628 uint16_t *optr = output;
629 int l1 = filter_len_half;
630 int l2 = (length - filter_len_half);
631 l1 += (l1 & 1);
632 l2 += (l2 & 1);
633 if (l1 > l2) {
634 // Short input length.
635 for (i = 0; i < length; i += 2) {
636 int sum = (1 << (FILTER_BITS - 1));
637 for (j = 0; j < filter_len_half; ++j) {
638 sum += (input[(i - j < 0 ? 0 : i - j)] +
639 input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
640 filter[j];
642 sum >>= FILTER_BITS;
643 *optr++ = clip_pixel_highbd(sum, bd);
645 } else {
646 // Initial part.
647 for (i = 0; i < l1; i += 2) {
648 int sum = (1 << (FILTER_BITS - 1));
649 for (j = 0; j < filter_len_half; ++j) {
650 sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j];
652 sum >>= FILTER_BITS;
653 *optr++ = clip_pixel_highbd(sum, bd);
655 // Middle part.
656 for (; i < l2; i += 2) {
657 int sum = (1 << (FILTER_BITS - 1));
658 for (j = 0; j < filter_len_half; ++j) {
659 sum += (input[i - j] + input[i + 1 + j]) * filter[j];
661 sum >>= FILTER_BITS;
662 *optr++ = clip_pixel_highbd(sum, bd);
664 // End part.
665 for (; i < length; i += 2) {
666 int sum = (1 << (FILTER_BITS - 1));
667 for (j = 0; j < filter_len_half; ++j) {
668 sum += (input[i - j] +
669 input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
670 filter[j];
672 sum >>= FILTER_BITS;
673 *optr++ = clip_pixel_highbd(sum, bd);
678 static void highbd_down2_symodd(const uint16_t *const input, int length,
679 uint16_t *output, int bd) {
680 // Actual filter len = 2 * filter_len_half - 1.
681 static const int16_t *filter = vp9_down2_symodd_half_filter;
682 const int filter_len_half = sizeof(vp9_down2_symodd_half_filter) / 2;
683 int i, j;
684 uint16_t *optr = output;
685 int l1 = filter_len_half - 1;
686 int l2 = (length - filter_len_half + 1);
687 l1 += (l1 & 1);
688 l2 += (l2 & 1);
689 if (l1 > l2) {
690 // Short input length.
691 for (i = 0; i < length; i += 2) {
692 int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
693 for (j = 1; j < filter_len_half; ++j) {
694 sum += (input[(i - j < 0 ? 0 : i - j)] +
695 input[(i + j >= length ? length - 1 : i + j)]) *
696 filter[j];
698 sum >>= FILTER_BITS;
699 *optr++ = clip_pixel_highbd(sum, bd);
701 } else {
702 // Initial part.
703 for (i = 0; i < l1; i += 2) {
704 int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
705 for (j = 1; j < filter_len_half; ++j) {
706 sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j];
708 sum >>= FILTER_BITS;
709 *optr++ = clip_pixel_highbd(sum, bd);
711 // Middle part.
712 for (; i < l2; i += 2) {
713 int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
714 for (j = 1; j < filter_len_half; ++j) {
715 sum += (input[i - j] + input[i + j]) * filter[j];
717 sum >>= FILTER_BITS;
718 *optr++ = clip_pixel_highbd(sum, bd);
720 // End part.
721 for (; i < length; i += 2) {
722 int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
723 for (j = 1; j < filter_len_half; ++j) {
724 sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) *
725 filter[j];
727 sum >>= FILTER_BITS;
728 *optr++ = clip_pixel_highbd(sum, bd);
733 static void highbd_resize_multistep(const uint16_t *const input,
734 int length,
735 uint16_t *output,
736 int olength,
737 uint16_t *buf,
738 int bd) {
739 int steps;
740 if (length == olength) {
741 memcpy(output, input, sizeof(uint16_t) * length);
742 return;
744 steps = get_down2_steps(length, olength);
746 if (steps > 0) {
747 int s;
748 uint16_t *out = NULL;
749 uint16_t *tmpbuf = NULL;
750 uint16_t *otmp, *otmp2;
751 int filteredlength = length;
752 if (!tmpbuf) {
753 tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * length);
754 otmp = tmpbuf;
755 } else {
756 otmp = buf;
758 otmp2 = otmp + get_down2_length(length, 1);
759 for (s = 0; s < steps; ++s) {
760 const int proj_filteredlength = get_down2_length(filteredlength, 1);
761 const uint16_t *const in = (s == 0 ? input : out);
762 if (s == steps - 1 && proj_filteredlength == olength)
763 out = output;
764 else
765 out = (s & 1 ? otmp2 : otmp);
766 if (filteredlength & 1)
767 highbd_down2_symodd(in, filteredlength, out, bd);
768 else
769 highbd_down2_symeven(in, filteredlength, out, bd);
770 filteredlength = proj_filteredlength;
772 if (filteredlength != olength) {
773 highbd_interpolate(out, filteredlength, output, olength, bd);
775 if (tmpbuf)
776 free(tmpbuf);
777 } else {
778 highbd_interpolate(input, length, output, olength, bd);
782 static void highbd_fill_col_to_arr(uint16_t *img, int stride, int len,
783 uint16_t *arr) {
784 int i;
785 uint16_t *iptr = img;
786 uint16_t *aptr = arr;
787 for (i = 0; i < len; ++i, iptr += stride) {
788 *aptr++ = *iptr;
792 static void highbd_fill_arr_to_col(uint16_t *img, int stride, int len,
793 uint16_t *arr) {
794 int i;
795 uint16_t *iptr = img;
796 uint16_t *aptr = arr;
797 for (i = 0; i < len; ++i, iptr += stride) {
798 *iptr = *aptr++;
802 void vp9_highbd_resize_plane(const uint8_t *const input,
803 int height,
804 int width,
805 int in_stride,
806 uint8_t *output,
807 int height2,
808 int width2,
809 int out_stride,
810 int bd) {
811 int i;
812 uint16_t *intbuf = (uint16_t *)malloc(sizeof(uint16_t) * width2 * height);
813 uint16_t *tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) *
814 (width < height ? height : width));
815 uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * (height + height2));
816 for (i = 0; i < height; ++i) {
817 highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
818 intbuf + width2 * i, width2, tmpbuf, bd);
820 for (i = 0; i < width2; ++i) {
821 highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf);
822 highbd_resize_multistep(arrbuf, height, arrbuf + height, height2, tmpbuf,
823 bd);
824 highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2,
825 arrbuf + height);
827 free(intbuf);
828 free(tmpbuf);
829 free(arrbuf);
831 #endif // CONFIG_VP9_HIGHBITDEPTH
833 void vp9_resize_frame420(const uint8_t *const y,
834 int y_stride,
835 const uint8_t *const u, const uint8_t *const v,
836 int uv_stride,
837 int height, int width,
838 uint8_t *oy, int oy_stride,
839 uint8_t *ou, uint8_t *ov, int ouv_stride,
840 int oheight, int owidth) {
841 vp9_resize_plane(y, height, width, y_stride,
842 oy, oheight, owidth, oy_stride);
843 vp9_resize_plane(u, height / 2, width / 2, uv_stride,
844 ou, oheight / 2, owidth / 2, ouv_stride);
845 vp9_resize_plane(v, height / 2, width / 2, uv_stride,
846 ov, oheight / 2, owidth / 2, ouv_stride);
849 void vp9_resize_frame422(const uint8_t *const y, int y_stride,
850 const uint8_t *const u, const uint8_t *const v,
851 int uv_stride,
852 int height, int width,
853 uint8_t *oy, int oy_stride,
854 uint8_t *ou, uint8_t *ov, int ouv_stride,
855 int oheight, int owidth) {
856 vp9_resize_plane(y, height, width, y_stride,
857 oy, oheight, owidth, oy_stride);
858 vp9_resize_plane(u, height, width / 2, uv_stride,
859 ou, oheight, owidth / 2, ouv_stride);
860 vp9_resize_plane(v, height, width / 2, uv_stride,
861 ov, oheight, owidth / 2, ouv_stride);
864 void vp9_resize_frame444(const uint8_t *const y, int y_stride,
865 const uint8_t *const u, const uint8_t *const v,
866 int uv_stride,
867 int height, int width,
868 uint8_t *oy, int oy_stride,
869 uint8_t *ou, uint8_t *ov, int ouv_stride,
870 int oheight, int owidth) {
871 vp9_resize_plane(y, height, width, y_stride,
872 oy, oheight, owidth, oy_stride);
873 vp9_resize_plane(u, height, width, uv_stride,
874 ou, oheight, owidth, ouv_stride);
875 vp9_resize_plane(v, height, width, uv_stride,
876 ov, oheight, owidth, ouv_stride);
879 #if CONFIG_VP9_HIGHBITDEPTH
880 void vp9_highbd_resize_frame420(const uint8_t *const y,
881 int y_stride,
882 const uint8_t *const u, const uint8_t *const v,
883 int uv_stride,
884 int height, int width,
885 uint8_t *oy, int oy_stride,
886 uint8_t *ou, uint8_t *ov, int ouv_stride,
887 int oheight, int owidth, int bd) {
888 vp9_highbd_resize_plane(y, height, width, y_stride,
889 oy, oheight, owidth, oy_stride, bd);
890 vp9_highbd_resize_plane(u, height / 2, width / 2, uv_stride,
891 ou, oheight / 2, owidth / 2, ouv_stride, bd);
892 vp9_highbd_resize_plane(v, height / 2, width / 2, uv_stride,
893 ov, oheight / 2, owidth / 2, ouv_stride, bd);
896 void vp9_highbd_resize_frame422(const uint8_t *const y, int y_stride,
897 const uint8_t *const u, const uint8_t *const v,
898 int uv_stride,
899 int height, int width,
900 uint8_t *oy, int oy_stride,
901 uint8_t *ou, uint8_t *ov, int ouv_stride,
902 int oheight, int owidth, int bd) {
903 vp9_highbd_resize_plane(y, height, width, y_stride,
904 oy, oheight, owidth, oy_stride, bd);
905 vp9_highbd_resize_plane(u, height, width / 2, uv_stride,
906 ou, oheight, owidth / 2, ouv_stride, bd);
907 vp9_highbd_resize_plane(v, height, width / 2, uv_stride,
908 ov, oheight, owidth / 2, ouv_stride, bd);
911 void vp9_highbd_resize_frame444(const uint8_t *const y, int y_stride,
912 const uint8_t *const u, const uint8_t *const v,
913 int uv_stride,
914 int height, int width,
915 uint8_t *oy, int oy_stride,
916 uint8_t *ou, uint8_t *ov, int ouv_stride,
917 int oheight, int owidth, int bd) {
918 vp9_highbd_resize_plane(y, height, width, y_stride,
919 oy, oheight, owidth, oy_stride, bd);
920 vp9_highbd_resize_plane(u, height, width, uv_stride,
921 ou, oheight, owidth, ouv_stride, bd);
922 vp9_highbd_resize_plane(v, height, width, uv_stride,
923 ov, oheight, owidth, ouv_stride, bd);
925 #endif // CONFIG_VP9_HIGHBITDEPTH