1 #include "mpeg3private.h"
2 #include "mpeg3protos.h"
7 #define CLIP(x) ((x) >= 0 ? ((x) < 255 ? (x) : 255) : 0)
8 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3)
9 #define USED __attribute__((used))
14 static unsigned long long MMX_128 USED
= 0x80008000800080LL
;
16 int mpeg3_new_slice_buffer(mpeg3_slice_buffer_t
*slice_buffer
)
18 pthread_mutexattr_t mutex_attr
;
20 slice_buffer
->data
= malloc(1024);
21 slice_buffer
->buffer_size
= 0;
22 slice_buffer
->buffer_allocation
= 1024;
23 slice_buffer
->current_position
= 0;
24 slice_buffer
->bits_size
= 0;
25 slice_buffer
->bits
= 0;
26 slice_buffer
->done
= 0;
27 pthread_mutexattr_init(&mutex_attr
);
28 // pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_ADAPTIVE_NP);
29 pthread_mutex_init(&(slice_buffer
->completion_lock
), &mutex_attr
);
33 int mpeg3_delete_slice_buffer(mpeg3_slice_buffer_t
*slice_buffer
)
35 free(slice_buffer
->data
);
36 pthread_mutex_destroy(&(slice_buffer
->completion_lock
));
40 int mpeg3_expand_slice_buffer(mpeg3_slice_buffer_t
*slice_buffer
)
43 unsigned char *new_buffer
= malloc(slice_buffer
->buffer_allocation
* 2);
44 for(i
= 0; i
< slice_buffer
->buffer_size
; i
++)
45 new_buffer
[i
] = slice_buffer
->data
[i
];
46 free(slice_buffer
->data
);
47 slice_buffer
->data
= new_buffer
;
48 slice_buffer
->buffer_allocation
*= 2;
52 /* limit coefficients to -2048..2047 */
54 /* move/add 8x8-Block from block[comp] to refframe */
56 static inline int mpeg3video_addblock(mpeg3_slice_t
*slice
,
67 int spar
= slice
->sparse
[comp
];
68 /* color component index */
69 cc
= (comp
< 4) ? 0 : (comp
& 1) + 1;
74 if(video
->pict_struct
== FRAME_PICTURE
)
78 /* field DCT coding */
79 rfp
= video
->newframe
[0] +
80 video
->coded_picture_width
* (by
+ ((comp
& 2) >> 1)) + bx
+ ((comp
& 1) << 3);
81 iincr
= (video
->coded_picture_width
<< 1);
85 /* frame DCT coding */
86 rfp
= video
->newframe
[0] +
87 video
->coded_picture_width
* (by
+ ((comp
& 2) << 2)) + bx
+ ((comp
& 1) << 3);
88 iincr
= video
->coded_picture_width
;
94 rfp
= video
->newframe
[0] +
95 (video
->coded_picture_width
<< 1) * (by
+ ((comp
& 2) << 2)) + bx
+ ((comp
& 1) << 3);
96 iincr
= (video
->coded_picture_width
<< 1);
103 /* scale coordinates */
104 if(video
->chroma_format
!= CHROMA444
) bx
>>= 1;
105 if(video
->chroma_format
== CHROMA420
) by
>>= 1;
106 if(video
->pict_struct
== FRAME_PICTURE
)
108 if(dct_type
&& (video
->chroma_format
!= CHROMA420
))
110 /* field DCT coding */
111 rfp
= video
->newframe
[cc
]
112 + video
->chrom_width
* (by
+ ((comp
& 2) >> 1)) + bx
+ (comp
& 8);
113 iincr
= (video
->chrom_width
<< 1);
117 /* frame DCT coding */
118 rfp
= video
->newframe
[cc
]
119 + video
->chrom_width
* (by
+ ((comp
& 2) << 2)) + bx
+ (comp
& 8);
120 iincr
= video
->chrom_width
;
126 rfp
= video
->newframe
[cc
]
127 + (video
->chrom_width
<< 1) * (by
+ ((comp
& 2) << 2)) + bx
+ (comp
& 8);
128 iincr
= (video
->chrom_width
<< 1);
132 bp
= slice
->block
[comp
];
141 __asm__
__volatile__(
142 "movq (%2), %%mm6\n" /* 4 blockvals */
143 "pxor %%mm4, %%mm4\n"
144 "punpcklwd %%mm6, %%mm6\n"
145 "punpcklwd %%mm6, %%mm6\n"
148 "movq (%1), %%mm0\n" /* 8 rindex1 */
149 "movq %%mm0, %%mm2\n"
150 "punpcklbw %%mm4, %%mm0\n"
151 "punpckhbw %%mm4, %%mm2\n"
152 "paddw %%mm6, %%mm0\n"
153 "paddw %%mm6, %%mm2\n"
155 "packuswb %%mm2, %%mm0\n"
158 "leal (%1, %3), %1\n"
161 : "c" (8),"r" (rfp
), "r" (bp
), "r" (iincr
)
166 __asm__
__volatile__(
167 "pxor %%mm4, %%mm4\n"
171 "movq (%2), %%mm0\n" /* 8 rfp 0 1 2 3 4 5 6 7*/
172 "movq (%1), %%mm6\n" /* 4 blockvals 0 1 2 3 */
174 "movq %%mm0, %%mm2\n"
175 "movq 8(%1), %%mm5\n" /* 4 blockvals 0 1 2 3 */
176 "punpcklbw %%mm4, %%mm0\n" /* 0 2 4 6 */
177 "punpckhbw %%mm4, %%mm2\n" /* 1 3 5 7 */
179 "paddw %%mm6, %%mm0\n"
180 "paddw %%mm5, %%mm2\n"
181 "packuswb %%mm2, %%mm0\n"
189 : "c" (8),"r" (bp
), "r" (rfp
), "r" (iincr
)
195 for(i
= 0; i
< 8; i
++)
197 rfp
[0] = CLIP(bp
[0] + rfp
[0]);
198 rfp
[1] = CLIP(bp
[1] + rfp
[1]);
199 rfp
[2] = CLIP(bp
[2] + rfp
[2]);
200 rfp
[3] = CLIP(bp
[3] + rfp
[3]);
201 rfp
[4] = CLIP(bp
[4] + rfp
[4]);
202 rfp
[5] = CLIP(bp
[5] + rfp
[5]);
203 rfp
[6] = CLIP(bp
[6] + rfp
[6]);
204 rfp
[7] = CLIP(bp
[7] + rfp
[7]);
216 __asm__
__volatile__(
217 "movd (%2), %%mm0\n" /* " 0 0 0 v1" */
218 "punpcklwd %%mm0, %%mm0\n" /* " 0 0 v1 v1" */
219 "punpcklwd %%mm0, %%mm0\n"
220 "paddw MMX_128, %%mm0\n"
221 "packuswb %%mm0, %%mm0\n"
222 "leal (%0,%1,2), %%eax\n"
224 "movq %%mm0, (%0, %1)\n"
225 "movq %%mm0, (%%eax)\n"
226 "leal (%%eax,%1,2), %0\n"
227 "movq %%mm0, (%%eax, %1)\n"
230 "leal (%0,%1,2), %%eax\n"
231 "movq %%mm0, (%0, %1)\n"
233 "movq %%mm0, (%%eax)\n"
234 "movq %%mm0, (%%eax, %1)\n"
236 : "D" (rfp
), "c" (iincr
), "r" (bp
)
241 __asm__
__volatile__(
242 "movq MMX_128,%%mm4\n"
246 "movq 8(%1), %%mm1\n"
247 "paddw %%mm4, %%mm0\n"
249 "movq 16(%1), %%mm2\n"
250 "paddw %%mm4, %%mm1\n"
252 "movq 24(%1), %%mm3\n"
253 "paddw %%mm4, %%mm2\n"
255 "packuswb %%mm1, %%mm0\n"
256 "paddw %%mm4, %%mm3\n"
259 "packuswb %%mm3, %%mm2\n"
263 "movq %%mm2, (%2,%3)\n"
265 "leal (%2,%3,2), %2\n"
268 : "c" (4), "r" (bp
), "r" (rfp
), "r" (iincr
)
274 for(i
= 0; i
< 8; i
++)
276 rfp
[0] = CLIP(bp
[0] + 128);
277 rfp
[1] = CLIP(bp
[1] + 128);
278 rfp
[2] = CLIP(bp
[2] + 128);
279 rfp
[3] = CLIP(bp
[3] + 128);
280 rfp
[4] = CLIP(bp
[4] + 128);
281 rfp
[5] = CLIP(bp
[5] + 128);
282 rfp
[6] = CLIP(bp
[6] + 128);
283 rfp
[7] = CLIP(bp
[7] + 128);
291 int mpeg3_decode_slice(mpeg3_slice_t
*slice
)
293 mpeg3video_t
*video
= slice
->video
;
295 int mb_type
, cbp
, motion_type
= 0, dct_type
;
296 int macroblock_address
, mba_inc
, mba_max
;
297 int slice_vert_pos_ext
;
301 int mv_count
, mv_format
, mvscale
;
302 int pmv
[2][2][2], mv_field_sel
[2][2];
303 int dmv
, dmvector
[2];
305 int stwtype
, stwclass
;
308 mpeg3_slice_buffer_t
*slice_buffer
= slice
->slice_buffer
;
310 /* number of macroblocks per picture */
311 mba_max
= video
->mb_width
* video
->mb_height
;
313 /* field picture has half as many macroblocks as frame */
314 if(video
->pict_struct
!= FRAME_PICTURE
)
317 /* macroblock address */
318 macroblock_address
= 0;
319 /* first macroblock in slice is not skipped */
323 code
= mpeg3slice_getbits(slice_buffer
, 32);
324 /* decode slice header (may change quant_scale) */
325 slice_vert_pos_ext
= mpeg3video_getslicehdr(slice
, video
);
327 /* reset all DC coefficient and motion vector predictors */
328 dc_dct_pred
[0] = dc_dct_pred
[1] = dc_dct_pred
[2] = 0;
329 pmv
[0][0][0] = pmv
[0][0][1] = pmv
[1][0][0] = pmv
[1][0][1] = 0;
330 pmv
[0][1][0] = pmv
[0][1][1] = pmv
[1][1][0] = pmv
[1][1][1] = 0;
333 slice_buffer
->current_position
< slice_buffer
->buffer_size
;
339 if(!mpeg3slice_showbits(slice_buffer
, 23)) return 0;
340 /* decode macroblock address increment */
341 mba_inc
= mpeg3video_get_macroblock_address(slice
);
343 if(slice
->fault
) return 1;
347 /* Get the macroblock_address */
348 macroblock_address
= ((slice_vert_pos_ext
<< 7) + (code
& 255) - 1) * video
->mb_width
+ mba_inc
- 1;
349 /* first macroblock in slice: not skipped */
354 if(slice
->fault
) return 1;
356 if(macroblock_address
>= mba_max
)
358 /* mba_inc points beyond picture dimensions */
359 /*fprintf(stderr, "mpeg3_decode_slice: too many macroblocks in picture\n"); */
366 mpeg3video_macroblock_modes(slice
,
378 if(slice
->fault
) return 1;
380 if(mb_type
& MB_QUANT
)
382 qs
= mpeg3slice_getbits(slice_buffer
, 5);
385 slice
->quant_scale
= video
->qscale_type
? mpeg3_non_linear_mquant_table
[qs
] : (qs
<< 1);
387 slice
->quant_scale
= qs
;
389 if(video
->scalable_mode
== SC_DP
)
390 /* make sure quant_scale is valid */
391 slice
->quant_scale
= slice
->quant_scale
;
397 /* decode forward motion vectors */
398 if((mb_type
& MB_FORWARD
) || ((mb_type
& MB_INTRA
) && video
->conceal_mv
))
401 mpeg3video_motion_vectors(slice
,
409 video
->h_forw_r_size
,
410 video
->v_forw_r_size
,
414 mpeg3video_motion_vector(slice
,
424 if(slice
->fault
) return 1;
426 /* decode backward motion vectors */
427 if(mb_type
& MB_BACKWARD
)
430 mpeg3video_motion_vectors(slice
,
438 video
->h_back_r_size
,
439 video
->v_back_r_size
,
443 mpeg3video_motion_vector(slice
,
454 if(slice
->fault
) return 1;
456 /* remove marker_bit */
457 if((mb_type
& MB_INTRA
) && video
->conceal_mv
)
458 mpeg3slice_flushbit(slice_buffer
);
460 /* macroblock_pattern */
461 if(mb_type
& MB_PATTERN
)
463 cbp
= mpeg3video_get_cbp(slice
);
464 if(video
->chroma_format
== CHROMA422
)
466 /* coded_block_pattern_1 */
467 cbp
= (cbp
<< 2) | mpeg3slice_getbits2(slice_buffer
);
470 if(video
->chroma_format
== CHROMA444
)
472 /* coded_block_pattern_2 */
473 cbp
= (cbp
<< 6) | mpeg3slice_getbits(slice_buffer
, 6);
477 cbp
= (mb_type
& MB_INTRA
) ? ((1 << video
->blk_cnt
) - 1) : 0;
479 if(slice
->fault
) return 1;
481 mpeg3video_clearblock(slice
, 0, video
->blk_cnt
);
482 for(comp
= 0; comp
< video
->blk_cnt
; comp
++)
484 if(cbp
& (1 << (video
->blk_cnt
- comp
- 1)))
486 if(mb_type
& MB_INTRA
)
489 mpeg3video_getmpg2intrablock(slice
, video
, comp
, dc_dct_pred
);
491 mpeg3video_getintrablock(slice
, video
, comp
, dc_dct_pred
);
496 mpeg3video_getmpg2interblock(slice
, video
, comp
);
498 mpeg3video_getinterblock(slice
, video
, comp
);
500 if(slice
->fault
) return 1;
504 /* reset intra_dc predictors */
505 if(!(mb_type
& MB_INTRA
))
506 dc_dct_pred
[0] = dc_dct_pred
[1] = dc_dct_pred
[2] = 0;
508 /* reset motion vector predictors */
509 if((mb_type
& MB_INTRA
) && !video
->conceal_mv
)
511 /* intra mb without concealment motion vectors */
512 pmv
[0][0][0] = pmv
[0][0][1] = pmv
[1][0][0] = pmv
[1][0][1] = 0;
513 pmv
[0][1][0] = pmv
[0][1][1] = pmv
[1][1][0] = pmv
[1][1][1] = 0;
516 if((video
->pict_type
== P_TYPE
) && !(mb_type
& (MB_FORWARD
| MB_INTRA
)))
518 /* non-intra mb without forward mv in a P picture */
519 pmv
[0][0][0] = pmv
[0][0][1] = pmv
[1][0][0] = pmv
[1][0][1] = 0;
521 /* derive motion_type */
522 if(video
->pict_struct
== FRAME_PICTURE
)
523 motion_type
= MC_FRAME
;
526 motion_type
= MC_FIELD
;
527 /* predict from field of same parity */
528 mv_field_sel
[0][0] = (video
->pict_struct
== BOTTOM_FIELD
);
534 /* purely spatially predicted macroblock */
535 pmv
[0][0][0] = pmv
[0][0][1] = pmv
[1][0][0] = pmv
[1][0][1] = 0;
536 pmv
[0][1][0] = pmv
[0][1][1] = pmv
[1][1][0] = pmv
[1][1][1] = 0;
541 /* mba_inc!=1: skipped macroblock */
542 mpeg3video_clearblock(slice
, 0, video
->blk_cnt
);
544 /* reset intra_dc predictors */
545 dc_dct_pred
[0] = dc_dct_pred
[1] = dc_dct_pred
[2] = 0;
547 /* reset motion vector predictors */
548 if(video
->pict_type
== P_TYPE
)
549 pmv
[0][0][0] = pmv
[0][0][1] = pmv
[1][0][0] = pmv
[1][0][1] = 0;
551 /* derive motion_type */
552 if(video
->pict_struct
== FRAME_PICTURE
)
553 motion_type
= MC_FRAME
;
556 motion_type
= MC_FIELD
;
557 /* predict from field of same parity */
558 mv_field_sel
[0][0] = mv_field_sel
[0][1] = (video
->pict_struct
== BOTTOM_FIELD
);
561 /* skipped I are spatial-only predicted, */
562 /* skipped P and B are temporal-only predicted */
563 stwtype
= (video
->pict_type
== I_TYPE
) ? 8 : 0;
566 mb_type
&= ~MB_INTRA
;
574 /* pixel coordinates of top left corner of current macroblock */
575 bx
= 16 * (macroblock_address
% video
->mb_width
);
576 by
= 16 * (macroblock_address
/ video
->mb_width
);
578 /* motion compensation */
579 if(!(mb_type
& MB_INTRA
))
580 mpeg3video_reconstruct(video
,
590 /* copy or add block data into picture */
591 for(comp
= 0; comp
< video
->blk_cnt
; comp
++)
593 if((cbp
| snr_cbp
) & (1 << (video
->blk_cnt
- 1 - comp
)))
597 IDCT_mmx(slice
->block
[comp
]);
600 mpeg3video_idct_conversion(slice
->block
[comp
]);
602 mpeg3video_addblock(slice
,
608 (mb_type
& MB_INTRA
) == 0);
612 /* advance to next macroblock */
613 macroblock_address
++;
620 void mpeg3_slice_loop(mpeg3_slice_t
*slice
)
622 mpeg3video_t
*video
= slice
->video
;
627 pthread_mutex_lock(&(slice
->input_lock
));
631 /* Get a buffer to decode */
633 pthread_mutex_lock(&(video
->slice_lock
));
634 if(slice
->buffer_step
> 0)
636 while(slice
->current_buffer
<= slice
->last_buffer
)
638 if(!video
->slice_buffers
[slice
->current_buffer
].done
&&
639 slice
->current_buffer
<= slice
->last_buffer
)
644 slice
->current_buffer
+= slice
->buffer_step
;
649 while(slice
->current_buffer
>= slice
->last_buffer
)
651 if(!video
->slice_buffers
[slice
->current_buffer
].done
&&
652 slice
->current_buffer
>= slice
->last_buffer
)
657 slice
->current_buffer
+= slice
->buffer_step
;
662 if(!result
&& slice
->current_buffer
>= 0 && slice
->current_buffer
< video
->total_slice_buffers
)
664 slice
->slice_buffer
= &(video
->slice_buffers
[slice
->current_buffer
]);
665 slice
->slice_buffer
->done
= 1;
666 pthread_mutex_unlock(&(video
->slice_lock
));
667 pthread_mutex_unlock(&(slice
->input_lock
));
668 mpeg3_decode_slice(slice
);
669 pthread_mutex_unlock(&(slice
->slice_buffer
->completion_lock
));
672 /* Finished with all */
674 pthread_mutex_unlock(&(slice
->completion_lock
));
675 pthread_mutex_unlock(&(video
->slice_lock
));
679 pthread_mutex_unlock(&(slice
->output_lock
));
683 int mpeg3_new_slice_decoder(void *video
, mpeg3_slice_t
*slice
)
686 pthread_mutexattr_t mutex_attr
;
688 slice
->video
= video
;
690 pthread_mutexattr_init(&mutex_attr
);
691 // pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_ADAPTIVE_NP);
692 pthread_mutex_init(&(slice
->input_lock
), &mutex_attr
);
693 pthread_mutex_lock(&(slice
->input_lock
));
694 pthread_mutex_init(&(slice
->output_lock
), &mutex_attr
);
695 pthread_mutex_lock(&(slice
->output_lock
));
696 pthread_mutex_init(&(slice
->completion_lock
), &mutex_attr
);
697 pthread_mutex_lock(&(slice
->completion_lock
));
699 pthread_attr_init(&attr
);
700 pthread_create(&(slice
->tid
), &attr
, (void*)mpeg3_slice_loop
, slice
);
705 int mpeg3_delete_slice_decoder(mpeg3_slice_t
*slice
)
708 pthread_mutex_unlock(&(slice
->input_lock
));
709 pthread_join(slice
->tid
, 0);
710 pthread_mutex_destroy(&(slice
->input_lock
));
711 pthread_mutex_destroy(&(slice
->output_lock
));