2 * H.26L/H.264/AVC/JVT/14496-10/... motion vector predicion
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 motion vector predicion.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #ifndef AVCODEC_H264_MVPRED_H
29 #define AVCODEC_H264_MVPRED_H
38 static av_always_inline
int fetch_diagonal_mv(H264Context
*h
, const int16_t **C
,
39 int i
, int list
, int part_width
)
41 const int topright_ref
= h
->ref_cache
[list
][i
- 8 + part_width
];
42 MpegEncContext
*s
= &h
->s
;
44 /* there is no consistent mapping of mvs to neighboring locations that will
45 * make mbaff happy, so we can't move all this logic to fill_caches */
47 #define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4) \
48 const int xy = XY, y4 = Y4; \
49 const int mb_type = mb_types[xy + (y4 >> 2) * s->mb_stride]; \
50 if (!USES_LIST(mb_type, list)) \
51 return LIST_NOT_USED; \
52 mv = s->current_picture_ptr->f.motion_val[list][h->mb2b_xy[xy] + 3 + y4 * h->b_stride]; \
53 h->mv_cache[list][scan8[0] - 2][0] = mv[0]; \
54 h->mv_cache[list][scan8[0] - 2][1] = mv[1] MV_OP; \
55 return s->current_picture_ptr->f.ref_index[list][4 * xy + 1 + (y4 & ~1)] REF_OP;
57 if (topright_ref
== PART_NOT_AVAILABLE
58 && i
>= scan8
[0] + 8 && (i
& 7) == 4
59 && h
->ref_cache
[list
][scan8
[0] - 1] != PART_NOT_AVAILABLE
) {
60 const uint32_t *mb_types
= s
->current_picture_ptr
->f
.mb_type
;
62 AV_ZERO32(h
->mv_cache
[list
][scan8
[0] - 2]);
63 *C
= h
->mv_cache
[list
][scan8
[0] - 2];
65 if (!MB_FIELD
&& IS_INTERLACED(h
->left_type
[0])) {
66 SET_DIAG_MV(* 2, >> 1, h
->left_mb_xy
[0] + s
->mb_stride
,
67 (s
->mb_y
& 1) * 2 + (i
>> 5));
69 if (MB_FIELD
&& !IS_INTERLACED(h
->left_type
[0])) {
70 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
71 SET_DIAG_MV(/ 2, << 1, h
->left_mb_xy
[i
>= 36], ((i
>> 2)) & 3);
77 if (topright_ref
!= PART_NOT_AVAILABLE
) {
78 *C
= h
->mv_cache
[list
][i
- 8 + part_width
];
81 tprintf(s
->avctx
, "topright MV not available\n");
83 *C
= h
->mv_cache
[list
][i
- 8 - 1];
84 return h
->ref_cache
[list
][i
- 8 - 1];
89 * Get the predicted MV.
90 * @param n the block index
91 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
92 * @param mx the x component of the predicted motion vector
93 * @param my the y component of the predicted motion vector
95 static av_always_inline
void pred_motion(H264Context
*const h
, int n
,
96 int part_width
, int list
, int ref
,
97 int *const mx
, int *const my
)
99 const int index8
= scan8
[n
];
100 const int top_ref
= h
->ref_cache
[list
][index8
- 8];
101 const int left_ref
= h
->ref_cache
[list
][index8
- 1];
102 const int16_t *const A
= h
->mv_cache
[list
][index8
- 1];
103 const int16_t *const B
= h
->mv_cache
[list
][index8
- 8];
105 int diagonal_ref
, match_count
;
107 assert(part_width
== 1 || part_width
== 2 || part_width
== 4);
117 diagonal_ref
= fetch_diagonal_mv(h
, &C
, index8
, list
, part_width
);
118 match_count
= (diagonal_ref
== ref
) + (top_ref
== ref
) + (left_ref
== ref
);
119 tprintf(h
->s
.avctx
, "pred_motion match_count=%d\n", match_count
);
120 if (match_count
> 1) { //most common
121 *mx
= mid_pred(A
[0], B
[0], C
[0]);
122 *my
= mid_pred(A
[1], B
[1], C
[1]);
123 } else if (match_count
== 1) {
124 if (left_ref
== ref
) {
127 } else if (top_ref
== ref
) {
135 if (top_ref
== PART_NOT_AVAILABLE
&&
136 diagonal_ref
== PART_NOT_AVAILABLE
&&
137 left_ref
!= PART_NOT_AVAILABLE
) {
141 *mx
= mid_pred(A
[0], B
[0], C
[0]);
142 *my
= mid_pred(A
[1], B
[1], C
[1]);
147 "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n",
148 top_ref
, B
[0], B
[1], diagonal_ref
, C
[0], C
[1], left_ref
,
149 A
[0], A
[1], ref
, *mx
, *my
, h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
153 * Get the directionally predicted 16x8 MV.
154 * @param n the block index
155 * @param mx the x component of the predicted motion vector
156 * @param my the y component of the predicted motion vector
158 static av_always_inline
void pred_16x8_motion(H264Context
*const h
,
159 int n
, int list
, int ref
,
160 int *const mx
, int *const my
)
163 const int top_ref
= h
->ref_cache
[list
][scan8
[0] - 8];
164 const int16_t *const B
= h
->mv_cache
[list
][scan8
[0] - 8];
166 tprintf(h
->s
.avctx
, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n",
167 top_ref
, B
[0], B
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
169 if (top_ref
== ref
) {
175 const int left_ref
= h
->ref_cache
[list
][scan8
[8] - 1];
176 const int16_t *const A
= h
->mv_cache
[list
][scan8
[8] - 1];
178 tprintf(h
->s
.avctx
, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n",
179 left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
181 if (left_ref
== ref
) {
189 pred_motion(h
, n
, 4, list
, ref
, mx
, my
);
193 * Get the directionally predicted 8x16 MV.
194 * @param n the block index
195 * @param mx the x component of the predicted motion vector
196 * @param my the y component of the predicted motion vector
198 static av_always_inline
void pred_8x16_motion(H264Context
*const h
,
199 int n
, int list
, int ref
,
200 int *const mx
, int *const my
)
203 const int left_ref
= h
->ref_cache
[list
][scan8
[0] - 1];
204 const int16_t *const A
= h
->mv_cache
[list
][scan8
[0] - 1];
206 tprintf(h
->s
.avctx
, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n",
207 left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
209 if (left_ref
== ref
) {
218 diagonal_ref
= fetch_diagonal_mv(h
, &C
, scan8
[4], list
, 2);
220 tprintf(h
->s
.avctx
, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n",
221 diagonal_ref
, C
[0], C
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
223 if (diagonal_ref
== ref
) {
231 pred_motion(h
, n
, 2, list
, ref
, mx
, my
);
234 #define FIX_MV_MBAFF(type, refn, mvn, idx) \
237 if (!IS_INTERLACED(type)) { \
239 AV_COPY32(mvbuf[idx], mvn); \
240 mvbuf[idx][1] /= 2; \
244 if (IS_INTERLACED(type)) { \
246 AV_COPY32(mvbuf[idx], mvn); \
247 mvbuf[idx][1] <<= 1; \
253 static av_always_inline
void pred_pskip_motion(H264Context
*const h
)
255 DECLARE_ALIGNED(4, static const int16_t, zeromv
)[2] = { 0 };
256 DECLARE_ALIGNED(4, int16_t, mvbuf
)[3][2];
257 MpegEncContext
*const s
= &h
->s
;
258 int8_t *ref
= s
->current_picture
.f
.ref_index
[0];
259 int16_t(*mv
)[2] = s
->current_picture
.f
.motion_val
[0];
260 int top_ref
, left_ref
, diagonal_ref
, match_count
, mx
, my
;
261 const int16_t *A
, *B
, *C
;
262 int b_stride
= h
->b_stride
;
264 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, 0, 1);
266 /* To avoid doing an entire fill_decode_caches, we inline the relevant
268 * FIXME: this is a partial duplicate of the logic in fill_decode_caches,
269 * but it's faster this way. Is there a way to avoid this duplication?
271 if (USES_LIST(h
->left_type
[LTOP
], 0)) {
272 left_ref
= ref
[4 * h
->left_mb_xy
[LTOP
] + 1 + (h
->left_block
[0] & ~1)];
273 A
= mv
[h
->mb2b_xy
[h
->left_mb_xy
[LTOP
]] + 3 + b_stride
* h
->left_block
[0]];
274 FIX_MV_MBAFF(h
->left_type
[LTOP
], left_ref
, A
, 0);
275 if (!(left_ref
| AV_RN32A(A
)))
277 } else if (h
->left_type
[LTOP
]) {
278 left_ref
= LIST_NOT_USED
;
284 if (USES_LIST(h
->top_type
, 0)) {
285 top_ref
= ref
[4 * h
->top_mb_xy
+ 2];
286 B
= mv
[h
->mb2b_xy
[h
->top_mb_xy
] + 3 * b_stride
];
287 FIX_MV_MBAFF(h
->top_type
, top_ref
, B
, 1);
288 if (!(top_ref
| AV_RN32A(B
)))
290 } else if (h
->top_type
) {
291 top_ref
= LIST_NOT_USED
;
297 tprintf(h
->s
.avctx
, "pred_pskip: (%d) (%d) at %2d %2d\n",
298 top_ref
, left_ref
, h
->s
.mb_x
, h
->s
.mb_y
);
300 if (USES_LIST(h
->topright_type
, 0)) {
301 diagonal_ref
= ref
[4 * h
->topright_mb_xy
+ 2];
302 C
= mv
[h
->mb2b_xy
[h
->topright_mb_xy
] + 3 * b_stride
];
303 FIX_MV_MBAFF(h
->topright_type
, diagonal_ref
, C
, 2);
304 } else if (h
->topright_type
) {
305 diagonal_ref
= LIST_NOT_USED
;
308 if (USES_LIST(h
->topleft_type
, 0)) {
309 diagonal_ref
= ref
[4 * h
->topleft_mb_xy
+ 1 +
310 (h
->topleft_partition
& 2)];
311 C
= mv
[h
->mb2b_xy
[h
->topleft_mb_xy
] + 3 + b_stride
+
312 (h
->topleft_partition
& 2 * b_stride
)];
313 FIX_MV_MBAFF(h
->topleft_type
, diagonal_ref
, C
, 2);
314 } else if (h
->topleft_type
) {
315 diagonal_ref
= LIST_NOT_USED
;
318 diagonal_ref
= PART_NOT_AVAILABLE
;
323 match_count
= !diagonal_ref
+ !top_ref
+ !left_ref
;
324 tprintf(h
->s
.avctx
, "pred_pskip_motion match_count=%d\n", match_count
);
325 if (match_count
> 1) {
326 mx
= mid_pred(A
[0], B
[0], C
[0]);
327 my
= mid_pred(A
[1], B
[1], C
[1]);
328 } else if (match_count
== 1) {
332 } else if (!top_ref
) {
340 mx
= mid_pred(A
[0], B
[0], C
[0]);
341 my
= mid_pred(A
[1], B
[1], C
[1]);
344 fill_rectangle(h
->mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mx
, my
), 4);
348 fill_rectangle(h
->mv_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
352 static void fill_decode_neighbors(H264Context
*h
, int mb_type
)
354 MpegEncContext
*const s
= &h
->s
;
355 const int mb_xy
= h
->mb_xy
;
356 int topleft_xy
, top_xy
, topright_xy
, left_xy
[LEFT_MBS
];
357 static const uint8_t left_block_options
[4][32] = {
358 { 0, 1, 2, 3, 7, 10, 8, 11, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 5 * 4, 1 + 9 * 4 },
359 { 2, 2, 3, 3, 8, 11, 8, 11, 3 + 2 * 4, 3 + 2 * 4, 3 + 3 * 4, 3 + 3 * 4, 1 + 5 * 4, 1 + 9 * 4, 1 + 5 * 4, 1 + 9 * 4 },
360 { 0, 0, 1, 1, 7, 10, 7, 10, 3 + 0 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 1 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 },
361 { 0, 2, 0, 2, 7, 10, 7, 10, 3 + 0 * 4, 3 + 2 * 4, 3 + 0 * 4, 3 + 2 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 }
364 h
->topleft_partition
= -1;
366 top_xy
= mb_xy
- (s
->mb_stride
<< MB_FIELD
);
368 /* Wow, what a mess, why didn't they simplify the interlacing & intra
369 * stuff, I can't imagine that these complex rules are worth it. */
371 topleft_xy
= top_xy
- 1;
372 topright_xy
= top_xy
+ 1;
373 left_xy
[LBOT
] = left_xy
[LTOP
] = mb_xy
- 1;
374 h
->left_block
= left_block_options
[0];
376 const int left_mb_field_flag
= IS_INTERLACED(s
->current_picture
.f
.mb_type
[mb_xy
- 1]);
377 const int curr_mb_field_flag
= IS_INTERLACED(mb_type
);
379 if (left_mb_field_flag
!= curr_mb_field_flag
) {
380 left_xy
[LBOT
] = left_xy
[LTOP
] = mb_xy
- s
->mb_stride
- 1;
381 if (curr_mb_field_flag
) {
382 left_xy
[LBOT
] += s
->mb_stride
;
383 h
->left_block
= left_block_options
[3];
385 topleft_xy
+= s
->mb_stride
;
386 /* take top left mv from the middle of the mb, as opposed
387 * to all other modes which use the bottom right partition */
388 h
->topleft_partition
= 0;
389 h
->left_block
= left_block_options
[1];
393 if (curr_mb_field_flag
) {
394 topleft_xy
+= s
->mb_stride
& (((s
->current_picture
.f
.mb_type
[top_xy
- 1] >> 7) & 1) - 1);
395 topright_xy
+= s
->mb_stride
& (((s
->current_picture
.f
.mb_type
[top_xy
+ 1] >> 7) & 1) - 1);
396 top_xy
+= s
->mb_stride
& (((s
->current_picture
.f
.mb_type
[top_xy
] >> 7) & 1) - 1);
398 if (left_mb_field_flag
!= curr_mb_field_flag
) {
399 if (curr_mb_field_flag
) {
400 left_xy
[LBOT
] += s
->mb_stride
;
401 h
->left_block
= left_block_options
[3];
403 h
->left_block
= left_block_options
[2];
409 h
->topleft_mb_xy
= topleft_xy
;
410 h
->top_mb_xy
= top_xy
;
411 h
->topright_mb_xy
= topright_xy
;
412 h
->left_mb_xy
[LTOP
] = left_xy
[LTOP
];
413 h
->left_mb_xy
[LBOT
] = left_xy
[LBOT
];
414 //FIXME do we need all in the context?
416 h
->topleft_type
= s
->current_picture
.f
.mb_type
[topleft_xy
];
417 h
->top_type
= s
->current_picture
.f
.mb_type
[top_xy
];
418 h
->topright_type
= s
->current_picture
.f
.mb_type
[topright_xy
];
419 h
->left_type
[LTOP
] = s
->current_picture
.f
.mb_type
[left_xy
[LTOP
]];
420 h
->left_type
[LBOT
] = s
->current_picture
.f
.mb_type
[left_xy
[LBOT
]];
423 if (h
->slice_table
[topleft_xy
] != h
->slice_num
)
425 if (h
->slice_table
[top_xy
] != h
->slice_num
)
427 if (h
->slice_table
[left_xy
[LTOP
]] != h
->slice_num
)
428 h
->left_type
[LTOP
] = h
->left_type
[LBOT
] = 0;
430 if (h
->slice_table
[topleft_xy
] != h
->slice_num
) {
432 if (h
->slice_table
[top_xy
] != h
->slice_num
)
434 if (h
->slice_table
[left_xy
[LTOP
]] != h
->slice_num
)
435 h
->left_type
[LTOP
] = h
->left_type
[LBOT
] = 0;
438 if (h
->slice_table
[topright_xy
] != h
->slice_num
)
439 h
->topright_type
= 0;
442 static void fill_decode_caches(H264Context
*h
, int mb_type
)
444 MpegEncContext
*const s
= &h
->s
;
445 int topleft_xy
, top_xy
, topright_xy
, left_xy
[LEFT_MBS
];
446 int topleft_type
, top_type
, topright_type
, left_type
[LEFT_MBS
];
447 const uint8_t *left_block
= h
->left_block
;
452 topleft_xy
= h
->topleft_mb_xy
;
453 top_xy
= h
->top_mb_xy
;
454 topright_xy
= h
->topright_mb_xy
;
455 left_xy
[LTOP
] = h
->left_mb_xy
[LTOP
];
456 left_xy
[LBOT
] = h
->left_mb_xy
[LBOT
];
457 topleft_type
= h
->topleft_type
;
458 top_type
= h
->top_type
;
459 topright_type
= h
->topright_type
;
460 left_type
[LTOP
] = h
->left_type
[LTOP
];
461 left_type
[LBOT
] = h
->left_type
[LBOT
];
463 if (!IS_SKIP(mb_type
)) {
464 if (IS_INTRA(mb_type
)) {
465 int type_mask
= h
->pps
.constrained_intra_pred
? IS_INTRA(-1) : -1;
466 h
->topleft_samples_available
=
467 h
->top_samples_available
=
468 h
->left_samples_available
= 0xFFFF;
469 h
->topright_samples_available
= 0xEEEA;
471 if (!(top_type
& type_mask
)) {
472 h
->topleft_samples_available
= 0xB3FF;
473 h
->top_samples_available
= 0x33FF;
474 h
->topright_samples_available
= 0x26EA;
476 if (IS_INTERLACED(mb_type
) != IS_INTERLACED(left_type
[LTOP
])) {
477 if (IS_INTERLACED(mb_type
)) {
478 if (!(left_type
[LTOP
] & type_mask
)) {
479 h
->topleft_samples_available
&= 0xDFFF;
480 h
->left_samples_available
&= 0x5FFF;
482 if (!(left_type
[LBOT
] & type_mask
)) {
483 h
->topleft_samples_available
&= 0xFF5F;
484 h
->left_samples_available
&= 0xFF5F;
487 int left_typei
= s
->current_picture
.f
.mb_type
[left_xy
[LTOP
] + s
->mb_stride
];
489 assert(left_xy
[LTOP
] == left_xy
[LBOT
]);
490 if (!((left_typei
& type_mask
) && (left_type
[LTOP
] & type_mask
))) {
491 h
->topleft_samples_available
&= 0xDF5F;
492 h
->left_samples_available
&= 0x5F5F;
496 if (!(left_type
[LTOP
] & type_mask
)) {
497 h
->topleft_samples_available
&= 0xDF5F;
498 h
->left_samples_available
&= 0x5F5F;
502 if (!(topleft_type
& type_mask
))
503 h
->topleft_samples_available
&= 0x7FFF;
505 if (!(topright_type
& type_mask
))
506 h
->topright_samples_available
&= 0xFBFF;
508 if (IS_INTRA4x4(mb_type
)) {
509 if (IS_INTRA4x4(top_type
)) {
510 AV_COPY32(h
->intra4x4_pred_mode_cache
+ 4 + 8 * 0, h
->intra4x4_pred_mode
+ h
->mb2br_xy
[top_xy
]);
512 h
->intra4x4_pred_mode_cache
[4 + 8 * 0] =
513 h
->intra4x4_pred_mode_cache
[5 + 8 * 0] =
514 h
->intra4x4_pred_mode_cache
[6 + 8 * 0] =
515 h
->intra4x4_pred_mode_cache
[7 + 8 * 0] = 2 - 3 * !(top_type
& type_mask
);
517 for (i
= 0; i
< 2; i
++) {
518 if (IS_INTRA4x4(left_type
[LEFT(i
)])) {
519 int8_t *mode
= h
->intra4x4_pred_mode
+ h
->mb2br_xy
[left_xy
[LEFT(i
)]];
520 h
->intra4x4_pred_mode_cache
[3 + 8 * 1 + 2 * 8 * i
] = mode
[6 - left_block
[0 + 2 * i
]];
521 h
->intra4x4_pred_mode_cache
[3 + 8 * 2 + 2 * 8 * i
] = mode
[6 - left_block
[1 + 2 * i
]];
523 h
->intra4x4_pred_mode_cache
[3 + 8 * 1 + 2 * 8 * i
] =
524 h
->intra4x4_pred_mode_cache
[3 + 8 * 2 + 2 * 8 * i
] = 2 - 3 * !(left_type
[LEFT(i
)] & type_mask
);
538 /* FIXME: constraint_intra_pred & partitioning & nnz
539 * (let us hope this is just a typo in the spec) */
540 nnz_cache
= h
->non_zero_count_cache
;
542 nnz
= h
->non_zero_count
[top_xy
];
543 AV_COPY32(&nnz_cache
[4 + 8 * 0], &nnz
[4 * 3]);
544 if (!s
->chroma_y_shift
) {
545 AV_COPY32(&nnz_cache
[4 + 8 * 5], &nnz
[4 * 7]);
546 AV_COPY32(&nnz_cache
[4 + 8 * 10], &nnz
[4 * 11]);
548 AV_COPY32(&nnz_cache
[4 + 8 * 5], &nnz
[4 * 5]);
549 AV_COPY32(&nnz_cache
[4 + 8 * 10], &nnz
[4 * 9]);
552 uint32_t top_empty
= CABAC
&& !IS_INTRA(mb_type
) ? 0 : 0x40404040;
553 AV_WN32A(&nnz_cache
[4 + 8 * 0], top_empty
);
554 AV_WN32A(&nnz_cache
[4 + 8 * 5], top_empty
);
555 AV_WN32A(&nnz_cache
[4 + 8 * 10], top_empty
);
558 for (i
= 0; i
< 2; i
++) {
559 if (left_type
[LEFT(i
)]) {
560 nnz
= h
->non_zero_count
[left_xy
[LEFT(i
)]];
561 nnz_cache
[3 + 8 * 1 + 2 * 8 * i
] = nnz
[left_block
[8 + 0 + 2 * i
]];
562 nnz_cache
[3 + 8 * 2 + 2 * 8 * i
] = nnz
[left_block
[8 + 1 + 2 * i
]];
564 nnz_cache
[3 + 8 * 6 + 2 * 8 * i
] = nnz
[left_block
[8 + 0 + 2 * i
] + 4 * 4];
565 nnz_cache
[3 + 8 * 7 + 2 * 8 * i
] = nnz
[left_block
[8 + 1 + 2 * i
] + 4 * 4];
566 nnz_cache
[3 + 8 * 11 + 2 * 8 * i
] = nnz
[left_block
[8 + 0 + 2 * i
] + 8 * 4];
567 nnz_cache
[3 + 8 * 12 + 2 * 8 * i
] = nnz
[left_block
[8 + 1 + 2 * i
] + 8 * 4];
568 } else if (CHROMA422
) {
569 nnz_cache
[3 + 8 * 6 + 2 * 8 * i
] = nnz
[left_block
[8 + 0 + 2 * i
] - 2 + 4 * 4];
570 nnz_cache
[3 + 8 * 7 + 2 * 8 * i
] = nnz
[left_block
[8 + 1 + 2 * i
] - 2 + 4 * 4];
571 nnz_cache
[3 + 8 * 11 + 2 * 8 * i
] = nnz
[left_block
[8 + 0 + 2 * i
] - 2 + 8 * 4];
572 nnz_cache
[3 + 8 * 12 + 2 * 8 * i
] = nnz
[left_block
[8 + 1 + 2 * i
] - 2 + 8 * 4];
574 nnz_cache
[3 + 8 * 6 + 8 * i
] = nnz
[left_block
[8 + 4 + 2 * i
]];
575 nnz_cache
[3 + 8 * 11 + 8 * i
] = nnz
[left_block
[8 + 5 + 2 * i
]];
578 nnz_cache
[3 + 8 * 1 + 2 * 8 * i
] =
579 nnz_cache
[3 + 8 * 2 + 2 * 8 * i
] =
580 nnz_cache
[3 + 8 * 6 + 2 * 8 * i
] =
581 nnz_cache
[3 + 8 * 7 + 2 * 8 * i
] =
582 nnz_cache
[3 + 8 * 11 + 2 * 8 * i
] =
583 nnz_cache
[3 + 8 * 12 + 2 * 8 * i
] = CABAC
&& !IS_INTRA(mb_type
) ? 0 : 64;
590 h
->top_cbp
= h
->cbp_table
[top_xy
];
592 h
->top_cbp
= IS_INTRA(mb_type
) ? 0x7CF : 0x00F;
594 if (left_type
[LTOP
]) {
595 h
->left_cbp
= (h
->cbp_table
[left_xy
[LTOP
]] & 0x7F0) |
596 ((h
->cbp_table
[left_xy
[LTOP
]] >> (left_block
[0] & (~1))) & 2) |
597 (((h
->cbp_table
[left_xy
[LBOT
]] >> (left_block
[2] & (~1))) & 2) << 2);
599 h
->left_cbp
= IS_INTRA(mb_type
) ? 0x7CF : 0x00F;
604 if (IS_INTER(mb_type
) || (IS_DIRECT(mb_type
) && h
->direct_spatial_mv_pred
)) {
606 int b_stride
= h
->b_stride
;
607 for (list
= 0; list
< h
->list_count
; list
++) {
608 int8_t *ref_cache
= &h
->ref_cache
[list
][scan8
[0]];
609 int8_t *ref
= s
->current_picture
.f
.ref_index
[list
];
610 int16_t(*mv_cache
)[2] = &h
->mv_cache
[list
][scan8
[0]];
611 int16_t(*mv
)[2] = s
->current_picture
.f
.motion_val
[list
];
612 if (!USES_LIST(mb_type
, list
))
614 assert(!(IS_DIRECT(mb_type
) && !h
->direct_spatial_mv_pred
));
616 if (USES_LIST(top_type
, list
)) {
617 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3 * b_stride
;
618 AV_COPY128(mv_cache
[0 - 1 * 8], mv
[b_xy
+ 0]);
619 ref_cache
[0 - 1 * 8] =
620 ref_cache
[1 - 1 * 8] = ref
[4 * top_xy
+ 2];
621 ref_cache
[2 - 1 * 8] =
622 ref_cache
[3 - 1 * 8] = ref
[4 * top_xy
+ 3];
624 AV_ZERO128(mv_cache
[0 - 1 * 8]);
625 AV_WN32A(&ref_cache
[0 - 1 * 8],
626 ((top_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
) & 0xFF) * 0x01010101u
);
629 if (mb_type
& (MB_TYPE_16x8
| MB_TYPE_8x8
)) {
630 for (i
= 0; i
< 2; i
++) {
631 int cache_idx
= -1 + i
* 2 * 8;
632 if (USES_LIST(left_type
[LEFT(i
)], list
)) {
633 const int b_xy
= h
->mb2b_xy
[left_xy
[LEFT(i
)]] + 3;
634 const int b8_xy
= 4 * left_xy
[LEFT(i
)] + 1;
635 AV_COPY32(mv_cache
[cache_idx
],
636 mv
[b_xy
+ b_stride
* left_block
[0 + i
* 2]]);
637 AV_COPY32(mv_cache
[cache_idx
+ 8],
638 mv
[b_xy
+ b_stride
* left_block
[1 + i
* 2]]);
639 ref_cache
[cache_idx
] = ref
[b8_xy
+ (left_block
[0 + i
* 2] & ~1)];
640 ref_cache
[cache_idx
+ 8] = ref
[b8_xy
+ (left_block
[1 + i
* 2] & ~1)];
642 AV_ZERO32(mv_cache
[cache_idx
]);
643 AV_ZERO32(mv_cache
[cache_idx
+ 8]);
644 ref_cache
[cache_idx
] =
645 ref_cache
[cache_idx
+ 8] = (left_type
[LEFT(i
)]) ? LIST_NOT_USED
646 : PART_NOT_AVAILABLE
;
650 if (USES_LIST(left_type
[LTOP
], list
)) {
651 const int b_xy
= h
->mb2b_xy
[left_xy
[LTOP
]] + 3;
652 const int b8_xy
= 4 * left_xy
[LTOP
] + 1;
653 AV_COPY32(mv_cache
[-1], mv
[b_xy
+ b_stride
* left_block
[0]]);
654 ref_cache
[-1] = ref
[b8_xy
+ (left_block
[0] & ~1)];
656 AV_ZERO32(mv_cache
[-1]);
657 ref_cache
[-1] = left_type
[LTOP
] ? LIST_NOT_USED
658 : PART_NOT_AVAILABLE
;
662 if (USES_LIST(topright_type
, list
)) {
663 const int b_xy
= h
->mb2b_xy
[topright_xy
] + 3 * b_stride
;
664 AV_COPY32(mv_cache
[4 - 1 * 8], mv
[b_xy
]);
665 ref_cache
[4 - 1 * 8] = ref
[4 * topright_xy
+ 2];
667 AV_ZERO32(mv_cache
[4 - 1 * 8]);
668 ref_cache
[4 - 1 * 8] = topright_type
? LIST_NOT_USED
669 : PART_NOT_AVAILABLE
;
671 if (ref_cache
[4 - 1 * 8] < 0) {
672 if (USES_LIST(topleft_type
, list
)) {
673 const int b_xy
= h
->mb2b_xy
[topleft_xy
] + 3 + b_stride
+
674 (h
->topleft_partition
& 2 * b_stride
);
675 const int b8_xy
= 4 * topleft_xy
+ 1 + (h
->topleft_partition
& 2);
676 AV_COPY32(mv_cache
[-1 - 1 * 8], mv
[b_xy
]);
677 ref_cache
[-1 - 1 * 8] = ref
[b8_xy
];
679 AV_ZERO32(mv_cache
[-1 - 1 * 8]);
680 ref_cache
[-1 - 1 * 8] = topleft_type
? LIST_NOT_USED
681 : PART_NOT_AVAILABLE
;
685 if ((mb_type
& (MB_TYPE_SKIP
| MB_TYPE_DIRECT2
)) && !FRAME_MBAFF
)
688 if (!(mb_type
& (MB_TYPE_SKIP
| MB_TYPE_DIRECT2
))) {
689 uint8_t(*mvd_cache
)[2] = &h
->mvd_cache
[list
][scan8
[0]];
690 uint8_t(*mvd
)[2] = h
->mvd_table
[list
];
691 ref_cache
[2 + 8 * 0] =
692 ref_cache
[2 + 8 * 2] = PART_NOT_AVAILABLE
;
693 AV_ZERO32(mv_cache
[2 + 8 * 0]);
694 AV_ZERO32(mv_cache
[2 + 8 * 2]);
697 if (USES_LIST(top_type
, list
)) {
698 const int b_xy
= h
->mb2br_xy
[top_xy
];
699 AV_COPY64(mvd_cache
[0 - 1 * 8], mvd
[b_xy
+ 0]);
701 AV_ZERO64(mvd_cache
[0 - 1 * 8]);
703 if (USES_LIST(left_type
[LTOP
], list
)) {
704 const int b_xy
= h
->mb2br_xy
[left_xy
[LTOP
]] + 6;
705 AV_COPY16(mvd_cache
[-1 + 0 * 8], mvd
[b_xy
- left_block
[0]]);
706 AV_COPY16(mvd_cache
[-1 + 1 * 8], mvd
[b_xy
- left_block
[1]]);
708 AV_ZERO16(mvd_cache
[-1 + 0 * 8]);
709 AV_ZERO16(mvd_cache
[-1 + 1 * 8]);
711 if (USES_LIST(left_type
[LBOT
], list
)) {
712 const int b_xy
= h
->mb2br_xy
[left_xy
[LBOT
]] + 6;
713 AV_COPY16(mvd_cache
[-1 + 2 * 8], mvd
[b_xy
- left_block
[2]]);
714 AV_COPY16(mvd_cache
[-1 + 3 * 8], mvd
[b_xy
- left_block
[3]]);
716 AV_ZERO16(mvd_cache
[-1 + 2 * 8]);
717 AV_ZERO16(mvd_cache
[-1 + 3 * 8]);
719 AV_ZERO16(mvd_cache
[2 + 8 * 0]);
720 AV_ZERO16(mvd_cache
[2 + 8 * 2]);
721 if (h
->slice_type_nos
== AV_PICTURE_TYPE_B
) {
722 uint8_t *direct_cache
= &h
->direct_cache
[scan8
[0]];
723 uint8_t *direct_table
= h
->direct_table
;
724 fill_rectangle(direct_cache
, 4, 4, 8, MB_TYPE_16x16
>> 1, 1);
726 if (IS_DIRECT(top_type
)) {
727 AV_WN32A(&direct_cache
[-1 * 8],
728 0x01010101u
* (MB_TYPE_DIRECT2
>> 1));
729 } else if (IS_8X8(top_type
)) {
730 int b8_xy
= 4 * top_xy
;
731 direct_cache
[0 - 1 * 8] = direct_table
[b8_xy
+ 2];
732 direct_cache
[2 - 1 * 8] = direct_table
[b8_xy
+ 3];
734 AV_WN32A(&direct_cache
[-1 * 8],
735 0x01010101 * (MB_TYPE_16x16
>> 1));
738 if (IS_DIRECT(left_type
[LTOP
]))
739 direct_cache
[-1 + 0 * 8] = MB_TYPE_DIRECT2
>> 1;
740 else if (IS_8X8(left_type
[LTOP
]))
741 direct_cache
[-1 + 0 * 8] = direct_table
[4 * left_xy
[LTOP
] + 1 + (left_block
[0] & ~1)];
743 direct_cache
[-1 + 0 * 8] = MB_TYPE_16x16
>> 1;
745 if (IS_DIRECT(left_type
[LBOT
]))
746 direct_cache
[-1 + 2 * 8] = MB_TYPE_DIRECT2
>> 1;
747 else if (IS_8X8(left_type
[LBOT
]))
748 direct_cache
[-1 + 2 * 8] = direct_table
[4 * left_xy
[LBOT
] + 1 + (left_block
[2] & ~1)];
750 direct_cache
[-1 + 2 * 8] = MB_TYPE_16x16
>> 1;
756 MAP_F2F(scan8[0] - 1 - 1 * 8, topleft_type) \
757 MAP_F2F(scan8[0] + 0 - 1 * 8, top_type) \
758 MAP_F2F(scan8[0] + 1 - 1 * 8, top_type) \
759 MAP_F2F(scan8[0] + 2 - 1 * 8, top_type) \
760 MAP_F2F(scan8[0] + 3 - 1 * 8, top_type) \
761 MAP_F2F(scan8[0] + 4 - 1 * 8, topright_type) \
762 MAP_F2F(scan8[0] - 1 + 0 * 8, left_type[LTOP]) \
763 MAP_F2F(scan8[0] - 1 + 1 * 8, left_type[LTOP]) \
764 MAP_F2F(scan8[0] - 1 + 2 * 8, left_type[LBOT]) \
765 MAP_F2F(scan8[0] - 1 + 3 * 8, left_type[LBOT])
770 #define MAP_F2F(idx, mb_type) \
771 if (!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) { \
772 h->ref_cache[list][idx] <<= 1; \
773 h->mv_cache[list][idx][1] /= 2; \
774 h->mvd_cache[list][idx][1] >>= 1; \
781 #define MAP_F2F(idx, mb_type) \
782 if (IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) { \
783 h->ref_cache[list][idx] >>= 1; \
784 h->mv_cache[list][idx][1] <<= 1; \
785 h->mvd_cache[list][idx][1] <<= 1; \
795 h
->neighbor_transform_size
= !!IS_8x8DCT(top_type
) + !!IS_8x8DCT(left_type
[LTOP
]);
799 * decodes a P_SKIP or B_SKIP macroblock
801 static void av_unused
decode_mb_skip(H264Context
*h
)
803 MpegEncContext
*const s
= &h
->s
;
804 const int mb_xy
= h
->mb_xy
;
807 memset(h
->non_zero_count
[mb_xy
], 0, 48);
810 mb_type
|= MB_TYPE_INTERLACED
;
812 if (h
->slice_type_nos
== AV_PICTURE_TYPE_B
) {
813 // just for fill_caches. pred_direct_motion will set the real mb_type
814 mb_type
|= MB_TYPE_L0L1
| MB_TYPE_DIRECT2
| MB_TYPE_SKIP
;
815 if (h
->direct_spatial_mv_pred
) {
816 fill_decode_neighbors(h
, mb_type
);
817 fill_decode_caches(h
, mb_type
); //FIXME check what is needed and what not ...
819 ff_h264_pred_direct_motion(h
, &mb_type
);
820 mb_type
|= MB_TYPE_SKIP
;
822 mb_type
|= MB_TYPE_16x16
| MB_TYPE_P0L0
| MB_TYPE_P1L0
| MB_TYPE_SKIP
;
824 fill_decode_neighbors(h
, mb_type
);
825 pred_pskip_motion(h
);
828 write_back_motion(h
, mb_type
);
829 s
->current_picture
.f
.mb_type
[mb_xy
] = mb_type
;
830 s
->current_picture
.f
.qscale_table
[mb_xy
] = s
->qscale
;
831 h
->slice_table
[mb_xy
] = h
->slice_num
;
832 h
->prev_mb_skipped
= 1;
835 #endif /* AVCODEC_H264_MVPRED_H */