r553: Modern gccs require __attribute__((used)) for variables used only in assembly.
[cinelerra_cv/mob.git] / libmpeg3 / video / slice.c
blobe67b263ce152a29357c8250f70d7329fca4b9a3e
1 #include "mpeg3private.h"
2 #include "mpeg3protos.h"
4 #include <pthread.h>
5 #include <stdlib.h>
7 #define CLIP(x) ((x) >= 0 ? ((x) < 255 ? (x) : 255) : 0)
8 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3)
9 #define USED __attribute__((used))
10 #else
11 #define USED
12 #endif
14 static unsigned long long MMX_128 USED = 0x80008000800080LL;
16 int mpeg3_new_slice_buffer(mpeg3_slice_buffer_t *slice_buffer)
18 pthread_mutexattr_t mutex_attr;
20 slice_buffer->data = malloc(1024);
21 slice_buffer->buffer_size = 0;
22 slice_buffer->buffer_allocation = 1024;
23 slice_buffer->current_position = 0;
24 slice_buffer->bits_size = 0;
25 slice_buffer->bits = 0;
26 slice_buffer->done = 0;
27 pthread_mutexattr_init(&mutex_attr);
28 // pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_ADAPTIVE_NP);
29 pthread_mutex_init(&(slice_buffer->completion_lock), &mutex_attr);
30 return 0;
33 int mpeg3_delete_slice_buffer(mpeg3_slice_buffer_t *slice_buffer)
35 free(slice_buffer->data);
36 pthread_mutex_destroy(&(slice_buffer->completion_lock));
37 return 0;
40 int mpeg3_expand_slice_buffer(mpeg3_slice_buffer_t *slice_buffer)
42 int i;
43 unsigned char *new_buffer = malloc(slice_buffer->buffer_allocation * 2);
44 for(i = 0; i < slice_buffer->buffer_size; i++)
45 new_buffer[i] = slice_buffer->data[i];
46 free(slice_buffer->data);
47 slice_buffer->data = new_buffer;
48 slice_buffer->buffer_allocation *= 2;
49 return 0;
52 /* limit coefficients to -2048..2047 */
54 /* move/add 8x8-Block from block[comp] to refframe */
56 static inline int mpeg3video_addblock(mpeg3_slice_t *slice,
57 mpeg3video_t *video,
58 int comp,
59 int bx,
60 int by,
61 int dct_type,
62 int addflag)
64 int cc, i, iincr;
65 unsigned char *rfp;
66 short *bp;
67 int spar = slice->sparse[comp];
68 /* color component index */
69 cc = (comp < 4) ? 0 : (comp & 1) + 1;
71 if(cc == 0)
73 /* luminance */
74 if(video->pict_struct == FRAME_PICTURE)
76 if(dct_type)
78 /* field DCT coding */
79 rfp = video->newframe[0] +
80 video->coded_picture_width * (by + ((comp & 2) >> 1)) + bx + ((comp & 1) << 3);
81 iincr = (video->coded_picture_width << 1);
83 else
85 /* frame DCT coding */
86 rfp = video->newframe[0] +
87 video->coded_picture_width * (by + ((comp & 2) << 2)) + bx + ((comp & 1) << 3);
88 iincr = video->coded_picture_width;
91 else
93 /* field picture */
94 rfp = video->newframe[0] +
95 (video->coded_picture_width << 1) * (by + ((comp & 2) << 2)) + bx + ((comp & 1) << 3);
96 iincr = (video->coded_picture_width << 1);
99 else
101 /* chrominance */
103 /* scale coordinates */
104 if(video->chroma_format != CHROMA444) bx >>= 1;
105 if(video->chroma_format == CHROMA420) by >>= 1;
106 if(video->pict_struct == FRAME_PICTURE)
108 if(dct_type && (video->chroma_format != CHROMA420))
110 /* field DCT coding */
111 rfp = video->newframe[cc]
112 + video->chrom_width * (by + ((comp & 2) >> 1)) + bx + (comp & 8);
113 iincr = (video->chrom_width << 1);
115 else
117 /* frame DCT coding */
118 rfp = video->newframe[cc]
119 + video->chrom_width * (by + ((comp & 2) << 2)) + bx + (comp & 8);
120 iincr = video->chrom_width;
123 else
125 /* field picture */
126 rfp = video->newframe[cc]
127 + (video->chrom_width << 1) * (by + ((comp & 2) << 2)) + bx + (comp & 8);
128 iincr = (video->chrom_width << 1);
132 bp = slice->block[comp];
134 if(addflag)
136 #ifdef HAVE_MMX
137 if(video->have_mmx)
139 if(spar)
141 __asm__ __volatile__(
142 "movq (%2), %%mm6\n" /* 4 blockvals */
143 "pxor %%mm4, %%mm4\n"
144 "punpcklwd %%mm6, %%mm6\n"
145 "punpcklwd %%mm6, %%mm6\n"
146 ".align 8\n"
147 "1:"
148 "movq (%1), %%mm0\n" /* 8 rindex1 */
149 "movq %%mm0, %%mm2\n"
150 "punpcklbw %%mm4, %%mm0\n"
151 "punpckhbw %%mm4, %%mm2\n"
152 "paddw %%mm6, %%mm0\n"
153 "paddw %%mm6, %%mm2\n"
155 "packuswb %%mm2, %%mm0\n"
156 "movq %%mm0, (%1)\n"
158 "leal (%1, %3), %1\n"
159 "loop 1b\n"
160 : /* scr dest */
161 : "c" (8),"r" (rfp), "r" (bp), "r" (iincr)
164 else
166 __asm__ __volatile__(
167 "pxor %%mm4, %%mm4\n"
169 ".align 8\n"
170 "1:"
171 "movq (%2), %%mm0\n" /* 8 rfp 0 1 2 3 4 5 6 7*/
172 "movq (%1), %%mm6\n" /* 4 blockvals 0 1 2 3 */
174 "movq %%mm0, %%mm2\n"
175 "movq 8(%1), %%mm5\n" /* 4 blockvals 0 1 2 3 */
176 "punpcklbw %%mm4, %%mm0\n" /* 0 2 4 6 */
177 "punpckhbw %%mm4, %%mm2\n" /* 1 3 5 7 */
179 "paddw %%mm6, %%mm0\n"
180 "paddw %%mm5, %%mm2\n"
181 "packuswb %%mm2, %%mm0\n"
183 "addl $16, %1\n"
184 "movq %%mm0, (%2)\n"
186 "leal (%2,%3), %2\n"
187 "loop 1b\n"
188 : /* scr dest */
189 : "c" (8),"r" (bp), "r" (rfp), "r" (iincr)
193 else
194 #endif
195 for(i = 0; i < 8; i++)
197 rfp[0] = CLIP(bp[0] + rfp[0]);
198 rfp[1] = CLIP(bp[1] + rfp[1]);
199 rfp[2] = CLIP(bp[2] + rfp[2]);
200 rfp[3] = CLIP(bp[3] + rfp[3]);
201 rfp[4] = CLIP(bp[4] + rfp[4]);
202 rfp[5] = CLIP(bp[5] + rfp[5]);
203 rfp[6] = CLIP(bp[6] + rfp[6]);
204 rfp[7] = CLIP(bp[7] + rfp[7]);
205 rfp += iincr;
206 bp += 8;
209 else
211 #ifdef HAVE_MMX
212 if(video->have_mmx)
214 if(spar)
216 __asm__ __volatile__(
217 "movd (%2), %%mm0\n" /* " 0 0 0 v1" */
218 "punpcklwd %%mm0, %%mm0\n" /* " 0 0 v1 v1" */
219 "punpcklwd %%mm0, %%mm0\n"
220 "paddw MMX_128, %%mm0\n"
221 "packuswb %%mm0, %%mm0\n"
222 "leal (%0,%1,2), %%eax\n"
224 "movq %%mm0, (%0, %1)\n"
225 "movq %%mm0, (%%eax)\n"
226 "leal (%%eax,%1,2), %0\n"
227 "movq %%mm0, (%%eax, %1)\n"
229 "movq %%mm0, (%0)\n"
230 "leal (%0,%1,2), %%eax\n"
231 "movq %%mm0, (%0, %1)\n"
233 "movq %%mm0, (%%eax)\n"
234 "movq %%mm0, (%%eax, %1)\n"
236 : "D" (rfp), "c" (iincr), "r" (bp)
237 : "eax");
239 else
241 __asm__ __volatile__(
242 "movq MMX_128,%%mm4\n"
243 ".align 8\n"
244 "1:"
245 "movq (%1), %%mm0\n"
246 "movq 8(%1), %%mm1\n"
247 "paddw %%mm4, %%mm0\n"
249 "movq 16(%1), %%mm2\n"
250 "paddw %%mm4, %%mm1\n"
252 "movq 24(%1), %%mm3\n"
253 "paddw %%mm4, %%mm2\n"
255 "packuswb %%mm1, %%mm0\n"
256 "paddw %%mm4, %%mm3\n"
258 "addl $32, %1\n"
259 "packuswb %%mm3, %%mm2\n"
261 "movq %%mm0, (%2)\n"
263 "movq %%mm2, (%2,%3)\n"
265 "leal (%2,%3,2), %2\n"
266 "loop 1b\n"
268 : "c" (4), "r" (bp), "r" (rfp), "r" (iincr)
272 else
273 #endif
274 for(i = 0; i < 8; i++)
276 rfp[0] = CLIP(bp[0] + 128);
277 rfp[1] = CLIP(bp[1] + 128);
278 rfp[2] = CLIP(bp[2] + 128);
279 rfp[3] = CLIP(bp[3] + 128);
280 rfp[4] = CLIP(bp[4] + 128);
281 rfp[5] = CLIP(bp[5] + 128);
282 rfp[6] = CLIP(bp[6] + 128);
283 rfp[7] = CLIP(bp[7] + 128);
284 rfp+= iincr;
285 bp += 8;
288 return 0;
291 int mpeg3_decode_slice(mpeg3_slice_t *slice)
293 mpeg3video_t *video = slice->video;
294 int comp;
295 int mb_type, cbp, motion_type = 0, dct_type;
296 int macroblock_address, mba_inc, mba_max;
297 int slice_vert_pos_ext;
298 unsigned int code;
299 int bx, by;
300 int dc_dct_pred[3];
301 int mv_count, mv_format, mvscale;
302 int pmv[2][2][2], mv_field_sel[2][2];
303 int dmv, dmvector[2];
304 int qs;
305 int stwtype, stwclass;
306 int snr_cbp;
307 int i;
308 mpeg3_slice_buffer_t *slice_buffer = slice->slice_buffer;
310 /* number of macroblocks per picture */
311 mba_max = video->mb_width * video->mb_height;
313 /* field picture has half as many macroblocks as frame */
314 if(video->pict_struct != FRAME_PICTURE)
315 mba_max >>= 1;
317 /* macroblock address */
318 macroblock_address = 0;
319 /* first macroblock in slice is not skipped */
320 mba_inc = 0;
321 slice->fault = 0;
323 code = mpeg3slice_getbits(slice_buffer, 32);
324 /* decode slice header (may change quant_scale) */
325 slice_vert_pos_ext = mpeg3video_getslicehdr(slice, video);
327 /* reset all DC coefficient and motion vector predictors */
328 dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
329 pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
330 pmv[0][1][0] = pmv[0][1][1] = pmv[1][1][0] = pmv[1][1][1] = 0;
332 for(i = 0;
333 slice_buffer->current_position < slice_buffer->buffer_size;
334 i++)
336 if(mba_inc == 0)
338 /* Done */
339 if(!mpeg3slice_showbits(slice_buffer, 23)) return 0;
340 /* decode macroblock address increment */
341 mba_inc = mpeg3video_get_macroblock_address(slice);
343 if(slice->fault) return 1;
345 if(i == 0)
347 /* Get the macroblock_address */
348 macroblock_address = ((slice_vert_pos_ext << 7) + (code & 255) - 1) * video->mb_width + mba_inc - 1;
349 /* first macroblock in slice: not skipped */
350 mba_inc = 1;
354 if(slice->fault) return 1;
356 if(macroblock_address >= mba_max)
358 /* mba_inc points beyond picture dimensions */
359 /*fprintf(stderr, "mpeg3_decode_slice: too many macroblocks in picture\n"); */
360 return 1;
363 /* not skipped */
364 if(mba_inc == 1)
366 mpeg3video_macroblock_modes(slice,
367 video,
368 &mb_type,
369 &stwtype,
370 &stwclass,
371 &motion_type,
372 &mv_count,
373 &mv_format,
374 &dmv,
375 &mvscale,
376 &dct_type);
378 if(slice->fault) return 1;
380 if(mb_type & MB_QUANT)
382 qs = mpeg3slice_getbits(slice_buffer, 5);
384 if(video->mpeg2)
385 slice->quant_scale = video->qscale_type ? mpeg3_non_linear_mquant_table[qs] : (qs << 1);
386 else
387 slice->quant_scale = qs;
389 if(video->scalable_mode == SC_DP)
390 /* make sure quant_scale is valid */
391 slice->quant_scale = slice->quant_scale;
394 /* motion vectors */
397 /* decode forward motion vectors */
398 if((mb_type & MB_FORWARD) || ((mb_type & MB_INTRA) && video->conceal_mv))
400 if(video->mpeg2)
401 mpeg3video_motion_vectors(slice,
402 video,
403 pmv,
404 dmvector,
405 mv_field_sel,
407 mv_count,
408 mv_format,
409 video->h_forw_r_size,
410 video->v_forw_r_size,
411 dmv,
412 mvscale);
413 else
414 mpeg3video_motion_vector(slice,
415 video,
416 pmv[0][0],
417 dmvector,
418 video->forw_r_size,
419 video->forw_r_size,
422 video->full_forw);
424 if(slice->fault) return 1;
426 /* decode backward motion vectors */
427 if(mb_type & MB_BACKWARD)
429 if(video->mpeg2)
430 mpeg3video_motion_vectors(slice,
431 video,
432 pmv,
433 dmvector,
434 mv_field_sel,
436 mv_count,
437 mv_format,
438 video->h_back_r_size,
439 video->v_back_r_size,
441 mvscale);
442 else
443 mpeg3video_motion_vector(slice,
444 video,
445 pmv[0][1],
446 dmvector,
447 video->back_r_size,
448 video->back_r_size,
451 video->full_back);
454 if(slice->fault) return 1;
456 /* remove marker_bit */
457 if((mb_type & MB_INTRA) && video->conceal_mv)
458 mpeg3slice_flushbit(slice_buffer);
460 /* macroblock_pattern */
461 if(mb_type & MB_PATTERN)
463 cbp = mpeg3video_get_cbp(slice);
464 if(video->chroma_format == CHROMA422)
466 /* coded_block_pattern_1 */
467 cbp = (cbp << 2) | mpeg3slice_getbits2(slice_buffer);
469 else
470 if(video->chroma_format == CHROMA444)
472 /* coded_block_pattern_2 */
473 cbp = (cbp << 6) | mpeg3slice_getbits(slice_buffer, 6);
476 else
477 cbp = (mb_type & MB_INTRA) ? ((1 << video->blk_cnt) - 1) : 0;
479 if(slice->fault) return 1;
480 /* decode blocks */
481 mpeg3video_clearblock(slice, 0, video->blk_cnt);
482 for(comp = 0; comp < video->blk_cnt; comp++)
484 if(cbp & (1 << (video->blk_cnt - comp - 1)))
486 if(mb_type & MB_INTRA)
488 if(video->mpeg2)
489 mpeg3video_getmpg2intrablock(slice, video, comp, dc_dct_pred);
490 else
491 mpeg3video_getintrablock(slice, video, comp, dc_dct_pred);
493 else
495 if(video->mpeg2)
496 mpeg3video_getmpg2interblock(slice, video, comp);
497 else
498 mpeg3video_getinterblock(slice, video, comp);
500 if(slice->fault) return 1;
504 /* reset intra_dc predictors */
505 if(!(mb_type & MB_INTRA))
506 dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
508 /* reset motion vector predictors */
509 if((mb_type & MB_INTRA) && !video->conceal_mv)
511 /* intra mb without concealment motion vectors */
512 pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
513 pmv[0][1][0] = pmv[0][1][1] = pmv[1][1][0] = pmv[1][1][1] = 0;
516 if((video->pict_type == P_TYPE) && !(mb_type & (MB_FORWARD | MB_INTRA)))
518 /* non-intra mb without forward mv in a P picture */
519 pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
521 /* derive motion_type */
522 if(video->pict_struct == FRAME_PICTURE)
523 motion_type = MC_FRAME;
524 else
526 motion_type = MC_FIELD;
527 /* predict from field of same parity */
528 mv_field_sel[0][0] = (video->pict_struct == BOTTOM_FIELD);
532 if(stwclass == 4)
534 /* purely spatially predicted macroblock */
535 pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
536 pmv[0][1][0] = pmv[0][1][1] = pmv[1][1][0] = pmv[1][1][1] = 0;
539 else
541 /* mba_inc!=1: skipped macroblock */
542 mpeg3video_clearblock(slice, 0, video->blk_cnt);
544 /* reset intra_dc predictors */
545 dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
547 /* reset motion vector predictors */
548 if(video->pict_type == P_TYPE)
549 pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
551 /* derive motion_type */
552 if(video->pict_struct == FRAME_PICTURE)
553 motion_type = MC_FRAME;
554 else
556 motion_type = MC_FIELD;
557 /* predict from field of same parity */
558 mv_field_sel[0][0] = mv_field_sel[0][1] = (video->pict_struct == BOTTOM_FIELD);
561 /* skipped I are spatial-only predicted, */
562 /* skipped P and B are temporal-only predicted */
563 stwtype = (video->pict_type == I_TYPE) ? 8 : 0;
565 /* clear MB_INTRA */
566 mb_type &= ~MB_INTRA;
568 /* no block data */
569 cbp = 0;
572 snr_cbp = 0;
574 /* pixel coordinates of top left corner of current macroblock */
575 bx = 16 * (macroblock_address % video->mb_width);
576 by = 16 * (macroblock_address / video->mb_width);
578 /* motion compensation */
579 if(!(mb_type & MB_INTRA))
580 mpeg3video_reconstruct(video,
581 bx,
582 by,
583 mb_type,
584 motion_type,
585 pmv,
586 mv_field_sel,
587 dmvector,
588 stwtype);
590 /* copy or add block data into picture */
591 for(comp = 0; comp < video->blk_cnt; comp++)
593 if((cbp | snr_cbp) & (1 << (video->blk_cnt - 1 - comp)))
595 #ifdef HAVE_MMX
596 if(video->have_mmx)
597 IDCT_mmx(slice->block[comp]);
598 else
599 #endif
600 mpeg3video_idct_conversion(slice->block[comp]);
602 mpeg3video_addblock(slice,
603 video,
604 comp,
605 bx,
606 by,
607 dct_type,
608 (mb_type & MB_INTRA) == 0);
612 /* advance to next macroblock */
613 macroblock_address++;
614 mba_inc--;
617 return 0;
620 void mpeg3_slice_loop(mpeg3_slice_t *slice)
622 mpeg3video_t *video = slice->video;
623 int result = 1;
625 while(!slice->done)
627 pthread_mutex_lock(&(slice->input_lock));
629 if(!slice->done)
631 /* Get a buffer to decode */
632 result = 1;
633 pthread_mutex_lock(&(video->slice_lock));
634 if(slice->buffer_step > 0)
636 while(slice->current_buffer <= slice->last_buffer)
638 if(!video->slice_buffers[slice->current_buffer].done &&
639 slice->current_buffer <= slice->last_buffer)
641 result = 0;
642 break;
644 slice->current_buffer += slice->buffer_step;
647 else
649 while(slice->current_buffer >= slice->last_buffer)
651 if(!video->slice_buffers[slice->current_buffer].done &&
652 slice->current_buffer >= slice->last_buffer)
654 result = 0;
655 break;
657 slice->current_buffer += slice->buffer_step;
661 /* Got one */
662 if(!result && slice->current_buffer >= 0 && slice->current_buffer < video->total_slice_buffers)
664 slice->slice_buffer = &(video->slice_buffers[slice->current_buffer]);
665 slice->slice_buffer->done = 1;
666 pthread_mutex_unlock(&(video->slice_lock));
667 pthread_mutex_unlock(&(slice->input_lock));
668 mpeg3_decode_slice(slice);
669 pthread_mutex_unlock(&(slice->slice_buffer->completion_lock));
671 else
672 /* Finished with all */
674 pthread_mutex_unlock(&(slice->completion_lock));
675 pthread_mutex_unlock(&(video->slice_lock));
679 pthread_mutex_unlock(&(slice->output_lock));
683 int mpeg3_new_slice_decoder(void *video, mpeg3_slice_t *slice)
685 pthread_attr_t attr;
686 pthread_mutexattr_t mutex_attr;
688 slice->video = video;
689 slice->done = 0;
690 pthread_mutexattr_init(&mutex_attr);
691 // pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_ADAPTIVE_NP);
692 pthread_mutex_init(&(slice->input_lock), &mutex_attr);
693 pthread_mutex_lock(&(slice->input_lock));
694 pthread_mutex_init(&(slice->output_lock), &mutex_attr);
695 pthread_mutex_lock(&(slice->output_lock));
696 pthread_mutex_init(&(slice->completion_lock), &mutex_attr);
697 pthread_mutex_lock(&(slice->completion_lock));
699 pthread_attr_init(&attr);
700 pthread_create(&(slice->tid), &attr, (void*)mpeg3_slice_loop, slice);
702 return 0;
705 int mpeg3_delete_slice_decoder(mpeg3_slice_t *slice)
707 slice->done = 1;
708 pthread_mutex_unlock(&(slice->input_lock));
709 pthread_join(slice->tid, 0);
710 pthread_mutex_destroy(&(slice->input_lock));
711 pthread_mutex_destroy(&(slice->output_lock));
712 return 0;