r1006: configure: Use libx264_pic instead of libx264 if available.
[cinelerra_cv/mob.git] / cinelerra / maskengine.C
blob0f861b12e7eabe9d886da22af7d82ec415a5111a
1 #include "bcsignals.h"
2 #include "condition.h"
3 #include "clip.h"
4 #include "maskauto.h"
5 #include "maskautos.h"
6 #include "maskengine.h"
7 #include "mutex.h"
8 #include "vframe.h"
10 #include <math.h>
11 #include <stdint.h>
12 #include <string.h>
13 #include <limits.h>
15 #include "feather.h"
18 int64_t get_difference(struct timeval *start_time)
20         struct timeval new_time;
22         gettimeofday(&new_time, 0);
24         new_time.tv_usec -= start_time->tv_usec;
25         new_time.tv_sec -= start_time->tv_sec;
26         if(new_time.tv_usec < 0)
27         {
28                 new_time.tv_usec += 1000000;
29                 new_time.tv_sec--;
30         }
32         return (int64_t)new_time.tv_sec * 1000000 + 
33                 (int64_t)new_time.tv_usec;
39 MaskPackage::MaskPackage()
43 MaskPackage::~MaskPackage()
51 MaskUnit::MaskUnit(MaskEngine *engine)
52  : LoadClient(engine)
54         this->engine = engine;
55         row_spans_h = 0;
56         row_spans = 0;
60 MaskUnit::~MaskUnit()
62         if (row_spans)
63         {
64                 for (int i = 0; i < row_spans_h; i++) 
65                         free(row_spans[i]);
66                 delete [] row_spans;
67         }
70 #ifndef SQR
71 #define SQR(x) ((x) * (x))
72 #endif
89 inline void MaskUnit::draw_line_clamped(
90         int draw_x1, 
91         int draw_y1, 
92         int draw_x2, 
93         int draw_y2,
94         int w,
95         int h,
96         int hoffset)
98 //printf("MaskUnit::draw_line_clamped 1 %d %d %d %d\n", x1, y1, x2, y2);
99         if (draw_y1 == draw_y2) return; 
101         if(draw_y2 < draw_y1)
102         { /* change the order */
103                 int tmp;
104                 tmp = draw_x1;
105                 draw_x1 = draw_x2;
106                 draw_x2 = tmp;
107                 tmp = draw_y1;
108                 draw_y1 = draw_y2;
109                 draw_y2 = tmp;
110         }
112         float slope = ((float)draw_x2 - draw_x1) / ((float)draw_y2 - draw_y1); 
113         w--;
114         for(int y_i = draw_y1; y_i < draw_y2; y_i++) 
115         { 
116                 if (y_i >= h) 
117                         return; // since y gets larger, there is no point in continuing
118                 else if(y_i >= 0) 
119                 { 
120                         int x = (int)(slope * (y_i - draw_y1) + draw_x1); 
121                         int x_i = CLIP(x, 0, w); 
123                         /* now insert into span in order */
124                         short *span = row_spans[y_i + hoffset]; 
125                         if (span[0] >= span[1]) { /* do the reallocation */
126                                 span[1] *= 2;
127                                 span = row_spans[y_i + hoffset] = (short *) realloc (span, span[1] * sizeof(short)); /* be careful! row_spans has to be updated! */
128                         };
130                         short index = 2;
131                         while (index < span[0]  && span[index] < x_i)
132                                 index++;
133                         for (int j = span[0]; j > index; j--) {       // move forward
134                                 span[j] = span[j-1];
135                         }
136                         span[index] = x_i;
137                         span[0] ++;
138                 } 
139         } 
142 template<class T>
143 void MaskUnit::blur_strip(float *val_p, 
144         float *val_m, 
145         float *dst, 
146         float *src, 
147         int size,
148         T max)
150         float *sp_p = src;
151         float *sp_m = src + size - 1;
152         float *vp = val_p;
153         float *vm = val_m + size - 1;
154         float initial_p = sp_p[0];
155         float initial_m = sp_m[0];
157 //printf("MaskUnit::blur_strip %d\n", size);
158         for(int k = 0; k < size; k++)
159         {
160                 int terms = (k < 4) ? k : 4;
161                 int l;
162                 for(l = 0; l <= terms; l++)
163                 {
164                         *vp += n_p[l] * sp_p[-l] - d_p[l] * vp[-l];
165                         *vm += n_m[l] * sp_m[l] - d_m[l] * vm[l];
166                 }
168                 for( ; l <= 4; l++)
169                 {
170                         *vp += (n_p[l] - bd_p[l]) * initial_p;
171                         *vm += (n_m[l] - bd_m[l]) * initial_m;
172                 }
173                 sp_p++;
174                 sp_m--;
175                 vp++;
176                 vm--;
177         }
179         for(int i = 0; i < size; i++)
180         {
181                 float sum = val_p[i] + val_m[i];
182                 CLAMP(sum, 0, max);
183                 dst[i] = sum;
184         }
189 int MaskUnit::do_feather_2(VFrame *output,
190         VFrame *input, 
191         float feather, 
192         int start_out, 
193         int end_out)
195         
196         int fint = (int)feather;
197         DO_FEATHER_N(unsigned char, uint32_t, 0xffff, fint);
202 void MaskUnit::do_feather(VFrame *output,
203         VFrame *input, 
204         float feather, 
205         int start_out, 
206         int end_out)
208 //printf("MaskUnit::do_feather %f\n", feather);
209 // Get constants
210         double constants[8];
211         double div;
212         double std_dev = sqrt(-(double)(feather * feather) / (2 * log(1.0 / 255.0)));
213         div = sqrt(2 * M_PI) * std_dev;
214         constants[0] = -1.783 / std_dev;
215         constants[1] = -1.723 / std_dev;
216         constants[2] = 0.6318 / std_dev;
217         constants[3] = 1.997  / std_dev;
218         constants[4] = 1.6803 / div;
219         constants[5] = 3.735 / div;
220         constants[6] = -0.6803 / div;
221         constants[7] = -0.2598 / div;
223         n_p[0] = constants[4] + constants[6];
224         n_p[1] = exp(constants[1]) *
225                                 (constants[7] * sin(constants[3]) -
226                                 (constants[6] + 2 * constants[4]) * cos(constants[3])) +
227                                 exp(constants[0]) *
228                                 (constants[5] * sin(constants[2]) -
229                                 (2 * constants[6] + constants[4]) * cos(constants[2]));
231         n_p[2] = 2 * exp(constants[0] + constants[1]) *
232                                 ((constants[4] + constants[6]) * cos(constants[3]) * 
233                                 cos(constants[2]) - constants[5] * 
234                                 cos(constants[3]) * sin(constants[2]) -
235                                 constants[7] * cos(constants[2]) * sin(constants[3])) +
236                                 constants[6] * exp(2 * constants[0]) +
237                                 constants[4] * exp(2 * constants[1]);
239         n_p[3] = exp(constants[1] + 2 * constants[0]) *
240                                 (constants[7] * sin(constants[3]) - 
241                                 constants[6] * cos(constants[3])) +
242                                 exp(constants[0] + 2 * constants[1]) *
243                                 (constants[5] * sin(constants[2]) - constants[4] * 
244                                 cos(constants[2]));
245         n_p[4] = 0.0;
247         d_p[0] = 0.0;
248         d_p[1] = -2 * exp(constants[1]) * cos(constants[3]) -
249                                 2 * exp(constants[0]) * cos(constants[2]);
251         d_p[2] = 4 * cos(constants[3]) * cos(constants[2]) * 
252                                 exp(constants[0] + constants[1]) +
253                                 exp(2 * constants[1]) + exp (2 * constants[0]);
255         d_p[3] = -2 * cos(constants[2]) * exp(constants[0] + 2 * constants[1]) -
256                                 2 * cos(constants[3]) * exp(constants[1] + 2 * constants[0]);
258         d_p[4] = exp(2 * constants[0] + 2 * constants[1]);
260         for(int i = 0; i < 5; i++) d_m[i] = d_p[i];
262         n_m[0] = 0.0;
263         for(int i = 1; i <= 4; i++)
264                 n_m[i] = n_p[i] - d_p[i] * n_p[0];
266         double sum_n_p, sum_n_m, sum_d;
267         double a, b;
269         sum_n_p = 0.0;
270         sum_n_m = 0.0;
271         sum_d = 0.0;
272         for(int i = 0; i < 5; i++)
273         {
274                 sum_n_p += n_p[i];
275                 sum_n_m += n_m[i];
276                 sum_d += d_p[i];
277         }
279         a = sum_n_p / (1 + sum_d);
280         b = sum_n_m / (1 + sum_d);
282         for(int i = 0; i < 5; i++)
283         {
284                 bd_p[i] = d_p[i] * a;
285                 bd_m[i] = d_m[i] * b;
286         }
309 #define DO_FEATHER(type, max) \
310 { \
311         int frame_w = input->get_w(); \
312         int frame_h = input->get_h(); \
313         int size = MAX(frame_w, frame_h); \
314         float *src = new float[size]; \
315         float *dst = new float[size]; \
316         float *val_p = new float[size]; \
317         float *val_m = new float[size]; \
318         int start_in = start_out - (int)feather; \
319         int end_in = end_out + (int)feather; \
320         if(start_in < 0) start_in = 0; \
321         if(end_in > frame_h) end_in = frame_h; \
322         int strip_size = end_in - start_in; \
323         type **in_rows = (type**)input->get_rows(); \
324         type **out_rows = (type**)output->get_rows(); \
325         int j; \
327 /* printf("DO_FEATHER 1\n"); */ \
328         for(j = 0; j < frame_w; j++) \
329         { \
330 /* printf("DO_FEATHER 1.1 %d\n", j); */ \
331                 bzero(val_p, sizeof(float) * (end_in - start_in)); \
332                 bzero(val_m, sizeof(float) * (end_in - start_in)); \
333                 for(int l = 0, k = start_in; k < end_in; l++, k++) \
334                 { \
335                         src[l] = (float)in_rows[k][j]; \
336                 } \
338                 blur_strip(val_p, val_m, dst, src, strip_size, max); \
340                 for(int l = start_out - start_in, k = start_out; k < end_out; l++, k++) \
341                 { \
342                         out_rows[k][j] = (type)dst[l]; \
343                 } \
344         } \
346         for(j = start_out; j < end_out; j++) \
347         { \
348 /* printf("DO_FEATHER 2 %d\n", j); */ \
349                 bzero(val_p, sizeof(float) * frame_w); \
350                 bzero(val_m, sizeof(float) * frame_w); \
351                 for(int k = 0; k < frame_w; k++) \
352                 { \
353                         src[k] = (float)out_rows[j][k]; \
354                 } \
356                 blur_strip(val_p, val_m, dst, src, frame_w, max); \
358                 for(int k = 0; k < frame_w; k++) \
359                 { \
360                         out_rows[j][k] = (type)dst[k]; \
361                 } \
362         } \
364 /* printf("DO_FEATHER 3\n"); */ \
366         delete [] src; \
367         delete [] dst; \
368         delete [] val_p; \
369         delete [] val_m; \
370 /* printf("DO_FEATHER 4\n"); */ \
380 //printf("do_feather %d\n", frame->get_color_model());
381         switch(input->get_color_model())
382         {
383                 case BC_A8:
384                         DO_FEATHER(unsigned char, 0xff);
385                         break;
386                 
387                 case BC_A16:
388                         DO_FEATHER(uint16_t, 0xffff);
389                         break;
390                 
391                 case BC_A_FLOAT:
392                         DO_FEATHER(float, 1.0f);
393                         break;
394         }
401 void MaskUnit::process_package(LoadPackage *package)
403         MaskPackage *ptr = (MaskPackage*)package;
405         int start_row = SHRT_MIN;         // part for which mask exists
406         int end_row;
407         if(engine->recalculate)
408         {
409                 VFrame *mask;
410                 if(engine->feather > 0) 
411                         mask = engine->temp_mask;
412                 else
413                         mask = engine->mask;
415 SET_TRACE
416 // Generated oversampling frame
417                 int mask_w = mask->get_w();
418                 int mask_h = mask->get_h();
419                 int mask_color_model = mask->get_color_model();
420                 int oversampled_package_w = mask_w * OVERSAMPLE;
421                 int oversampled_package_h = (ptr->row2 - ptr->row1) * OVERSAMPLE;
422 //printf("MaskUnit::process_package 1\n");
424 SET_TRACE
426                 int local_first_nonempty_rowspan = SHRT_MIN;
427                 int local_last_nonempty_rowspan = SHRT_MIN;
429                 if (!row_spans || row_spans_h != mask_h * OVERSAMPLE) {
430                         int i;  
431                         if (row_spans) {   /* size change */
432                                 for (i = 0; i < row_spans_h; i++) 
433                                         free(row_spans[i]);
434                                 delete [] row_spans;
435                         }
436                         row_spans_h = mask_h * OVERSAMPLE;
437                         row_spans = new short *[mask_h * OVERSAMPLE]; 
438                         for (i= 0; i<mask_h * OVERSAMPLE; i++) {
439                                 /* we use malloc so we can use realloc */
440                                 row_spans[i] = (short *)malloc(sizeof(short) * NUM_SPANS);
441                                 /* [0] is initialized later */
442                                 row_spans[i][1] = NUM_SPANS;
443                         }
444                 }
446 SET_TRACE
447 //printf("MaskUnit::process_package 1 %d\n", engine->point_sets.total);
449 SET_TRACE
451 // Draw bezier curves onto span buffer
452 //struct timeval start_time;
453 //gettimeofday(&start_time, 0);
455                 for(int k = 0; k < engine->point_sets.total; k++)
456                 {
457                         int old_x, old_y;
458                         old_x = SHRT_MIN; // sentinel
459                         ArrayList<MaskPoint*> *points = engine->point_sets.values[k];
461                         if(points->total < 2) continue;
462 //printf("MaskUnit::process_package 2 %d %d\n", k, points->total);
463                         for (int i = ptr->row1 * OVERSAMPLE; i < ptr->row2 * OVERSAMPLE; i++) 
464                                 row_spans[i][0] = 2; /* initialize to zero */ 
465                         (ptr->row1*OVERSAMPLE, ptr->row2*OVERSAMPLE); // init just my rows
466                         for(int i = 0; i < points->total; i++)
467                         {
468                                 MaskPoint *point1 = points->values[i];
469                                 MaskPoint *point2 = (i >= points->total - 1) ? 
470                                         points->values[0] : 
471                                         points->values[i + 1];
473                                 float x0 = point1->x;
474                                 float y0 = point1->y;
475                                 float x1 = point1->x + point1->control_x2;
476                                 float y1 = point1->y + point1->control_y2;
477                                 float x2 = point2->x + point2->control_x1;
478                                 float y2 = point2->y + point2->control_y1;
479                                 float x3 = point2->x;
480                                 float y3 = point2->y;
482                                 // possible optimization here... since these coordinates are bounding box for curve
483                                 // we can continue with next curve if they are out of our range
485                                 // forward differencing bezier curves implementation taken from GPL code at
486                                 // http://cvs.sourceforge.net/viewcvs.py/guliverkli/guliverkli/src/subtitles/Rasterizer.cpp?rev=1.3
490                                 float cx3, cx2, cx1, cx0, cy3, cy2, cy1, cy0;
493                                 // [-1 +3 -3 +1]
494                                 // [+3 -6 +3  0]
495                                 // [-3 +3  0  0]
496                                 // [+1  0  0  0]
498                                 cx3 = (-  x0 + 3*x1 - 3*x2 + x3) * OVERSAMPLE;
499                                 cx2 = ( 3*x0 - 6*x1 + 3*x2) * OVERSAMPLE;
500                                 cx1 = (-3*x0 + 3*x1) * OVERSAMPLE;
501                                 cx0 = (   x0) * OVERSAMPLE;
503                                 cy3 = (-  y0 + 3*y1 - 3*y2 + y3) * OVERSAMPLE;
504                                 cy2 = ( 3*y0 - 6*y1 + 3*y2) * OVERSAMPLE;
505                                 cy1 = (-3*y0 + 3*y1) * OVERSAMPLE;
506                                 cy0 = (   y0 - ptr->row1) * OVERSAMPLE;
508                                 float maxaccel1 = fabs(2*cy2) + fabs(6*cy3);
509                                 float maxaccel2 = fabs(2*cx2) + fabs(6*cx3);
511                                 float maxaccel = maxaccel1 > maxaccel2 ? maxaccel1 : maxaccel2;
512                                 float h = 1.0;
514                                 if(maxaccel > 8.0 * OVERSAMPLE) h = sqrt((8.0 * OVERSAMPLE) / maxaccel);
516                                 for(float t = 0.0; t < 1.0; t += h)
517                                 {
518                                         int x = (int) (cx0 + t*(cx1 + t*(cx2 + t*cx3)));
519                                         int y = (int) (cy0 + t*(cy1 + t*(cy2 + t*cy3)));
521                                         if (old_x != SHRT_MIN) 
522                                                 draw_line_clamped(old_x, old_y, x, y, oversampled_package_w, oversampled_package_h, ptr->row1 * OVERSAMPLE);
523                                         old_x = x;
524                                         old_y = y;
525                                 }
527                                 int x = (int)(x3 * OVERSAMPLE);
528                                 int y = (int)((y3 - ptr->row1) * OVERSAMPLE);
529                                 draw_line_clamped(old_x, old_y, x, y, oversampled_package_w, oversampled_package_h, ptr->row1 * OVERSAMPLE);
530                                 old_x = (int)x;
531                                 old_y = (int)y;
532                 
533                         }
534 //printf("MaskUnit::process_package 1\n");
536                         // Now we have ordered spans ready!
537                         //printf("Segment : %i , row1: %i\n", oversampled_package_h, ptr->row1);
538                         uint16_t value;
539                         if (mask_color_model == BC_A8)
540                                 value = (int)((float)engine->value / 100 * 0xff);
541                         else
542                                 value = (int)((float)engine->value / 100 * 0xffff);     // also for BC_A_FLOAT
544                         /* Scaneline sampling, inspired by Graphics gems I, page 81 */
545                         for (int i = ptr->row1; i < ptr->row2; i++) 
546                         {
547                                 short min_x = SHRT_MAX;
548                                 short max_x = SHRT_MIN;
549                                 int j;                          /* universal counter for 0..OVERSAMPLE-1 */
550                                 short *span;                    /* current span - set inside loops with j */
551                                 short span_p[OVERSAMPLE];       /* pointers to current positions in spans */
552                                 #define P (span_p[j])           /* current span pointer */
553                                 #define MAXP (span[0])          /* current span length */
554                                 int num_empty_spans = 0;
555                                 /* get the initial span pointers ready */
556                                 for (j = 0; j < OVERSAMPLE; j++)
557                                 {       
558                                         span = row_spans[j + i * OVERSAMPLE];
559                                         P = 2;              /* starting pointers to spans */
560                                                 /* hypotetical hypotetical fix goes here: take care that there is maximum one empty span for every subpixel */ 
561                                         if (MAXP != 2) {                                        /* if span is not empty */
562                                                 if (span[2] < min_x) min_x = span[2];           /* take start of the first span */
563                                                 if (span[MAXP-1] > max_x) max_x = span[MAXP-1]; /* and end of last */
564                                         } else              
565                                         {       /* span is empty */
566                                                 num_empty_spans ++;     
567                                         }       
568                                 }
569                                 if (num_empty_spans == OVERSAMPLE)
570                                         continue; /* no work for us here */
571                                 else 
572                                 {       /* if we have engaged first nonempty rowspan... remember it to speed up mask applying */
573                                         if (local_first_nonempty_rowspan < 0 || i < local_first_nonempty_rowspan) 
574                                                 local_first_nonempty_rowspan = i;  
575                                         if (i > local_last_nonempty_rowspan) local_last_nonempty_rowspan = i;
576                                 }
577                                 /* we have some pixels to fill, do coverage calculation for span */
579                                 void *output_row = (unsigned char*)mask->get_rows()[i];
580                                 min_x = min_x / OVERSAMPLE;
581                                 max_x = (max_x + OVERSAMPLE - 1) / OVERSAMPLE;
582                                 
583                                 /* printf("row %i, pixel range: %i %i, spans0: %i\n", i, min_x, max_x, row_spans[i*OVERSAMPLE][0]-2); */
585                                 /* this is not a full loop, since we jump trough h if possible */
586                                 for (int h = min_x; h <= max_x; h++) 
587                                 {
588                                         short pixelleft = h * OVERSAMPLE;  /* leftmost subpixel of pixel*/
589                                         short pixelright = pixelleft + OVERSAMPLE - 1; /* rightmost subpixel of pixel */
590                                         uint32_t coverage = 0;
591                                         int num_left = 0;               /* number of spans that have start left of the next pixel */
592                                         short right_end = SHRT_MAX;     /* leftmost end of any span - right end of a full scanline */
593                                         short right_start = SHRT_MAX;   /* leftmost start of any span - left end of empty scanline */
595                                         for (j=0; j< OVERSAMPLE; j++) 
596                                         {       
597                                                 char chg = 1;
598                                                 span = row_spans[j + i * OVERSAMPLE];
599                                                 while (P < MAXP && chg)
600                                                 {
601                                                 //      printf("Sp: %i %i\n", span[P], span[P+1]);
602                                                         if (span[P] == span[P+1])           /* ignore empty spans */
603                                                         {
604                                                                 P +=2;
605                                                                 continue;
606                                                         }
607                                                         if (span[P] <= pixelright)          /* if span start is before the end of pixel */
608                                                                 coverage += MIN(span[P+1], pixelright)  /* 'clip' the span to pixel */
609                                                                           - MAX(span[P], pixelleft) + 1;
610                                                         if (span[P+1] <= pixelright) 
611                                                                 P += 2;
612                                                         else 
613                                                                 chg = 0;
614                                                 } 
615                                                 if (P == MAXP) 
616                                                         num_left = -OVERSAMPLE; /* just take care that num_left cannot equal OVERSAMPLE or zero again */
617                                                 else    
618                                                 { 
619                                                         if (span[P] <= pixelright)  /* if span starts before subpixel in the pixel on the right */
620                                                         {    /* useful for determining filled space till next non-fully-filled pixel */
621                                                                 num_left ++;                                            
622                                                                 if (span[P+1] < right_end) right_end = span[P+1]; 
623                                                         } else 
624                                                         {    /* useful for determining empty space till next non-empty pixel */
625                                                                 if (span[P] < right_start) right_start = span[P]; 
626                                                         }
627                                                 }
628                                         }
629                                         // calculate coverage
630                                         coverage *= value;
631                                         coverage /= OVERSAMPLE * OVERSAMPLE;
633                                         // when we have multiple masks the highest coverage wins
634                                         switch (mask_color_model)
635                                         {
636                                         case BC_A8:
637                                                 if (((unsigned char *) output_row)[h] < coverage)
638                                                         ((unsigned char*)output_row)[h] = coverage;
639                                                 break;
640                                         case BC_A16:
641                                                 if (((uint16_t *) output_row)[h] < coverage)
642                                                         ((uint16_t *) output_row)[h] = coverage;
643                                                 break;
644                                         case BC_A_FLOAT:
645                                                 if (((float *) output_row)[h] < coverage/float(0xffff))
646                                                         ((float *) output_row)[h] = coverage/float(0xffff);
647                                                 break;
648                                         }
649                                         /* possible optimization: do joining of multiple masks by span logics, not by bitmap logics*/
650                                         
651                                         if (num_left == OVERSAMPLE) 
652                                         {
653                                                 /* all current spans start more left than next pixel */
654                                                 /* this means we can probably (if lucky) draw a longer horizontal line */
655                                                 right_end = (right_end / OVERSAMPLE) - 1; /* last fully covered pixel */
656                                                 if (right_end > h)
657                                                 {
658                                                         if (mask_color_model == BC_A8) 
659                                                                 memset((char *)output_row + h + 1, value, right_end - h);
660                                                         else {
661                                                                 /* we are fucked, since there is no 16bit memset */
662                                                                 if (mask_color_model == BC_A16) {
663                                                                         for (int z = h +1; z <= right_end; z++)
664                                                                                 ((uint16_t *) output_row)[z] =  value;
665                                                                 } else {
666                                                                         for (int z = h +1; z <= right_end; z++)
667                                                                                 ((float *) output_row)[z] =  value/float(0xffff);
668                                                                 }
669                                                         }
670                                                         h = right_end;  
671                                                 }
672                                         } else 
673                                         if (num_left == 0) 
674                                         {
675                                                 /* all current spans start right of next pixel */ 
676                                                 /* this means we can probably (if lucky) skip some pixels */
677                                                 right_start = (right_start / OVERSAMPLE) - 1; /* last fully empty pixel */
678                                                 if (right_start > h)
679                                                 {
680                                                         h = right_start;
681                                                 }
682                                         }
683                                 }
684                         }
685                 }
686                 engine->protect_data.lock();
687                 if (local_first_nonempty_rowspan < engine->first_nonempty_rowspan)
688                         engine->first_nonempty_rowspan = local_first_nonempty_rowspan;
689                 if (local_last_nonempty_rowspan > engine->last_nonempty_rowspan)
690                         engine->last_nonempty_rowspan = local_last_nonempty_rowspan;
691                 engine->protect_data.unlock();
692         
694 //              int64_t dif= get_difference(&start_time);
695 //              printf("diff: %lli\n", dif);
696         }       /* END OF RECALCULATION! */
698 SET_TRACE
700         /* possible optimization: this could be useful for do_feather also */
702         // Feather polygon
703         if(engine->recalculate && engine->feather > 0) 
704         {       
705                 /* first take care that all packages are already drawn onto mask */
706                 pthread_mutex_lock(&engine->stage1_finished_mutex);
707                 engine->stage1_finished_count ++;
708                 if (engine->stage1_finished_count == engine->get_total_packages())
709                 {
710                         // let others pass
711                         pthread_cond_broadcast(&engine->stage1_finished_cond);
712                 }
713                 else
714                 {
715                         // wait until all are finished
716                         while (engine->stage1_finished_count < engine->get_total_packages())
717                                 pthread_cond_wait(&engine->stage1_finished_cond, &engine->stage1_finished_mutex);
718                 }
719                 pthread_mutex_unlock(&engine->stage1_finished_mutex);
720                 
721                 /* now do the feather */
722 //printf("MaskUnit::process_package 3 %f\n", engine->feather);
724         struct timeval start_time;
725         gettimeofday(&start_time, 0);
727         /* 
728         {
729         // EXPERIMENTAL CODE to find out how values between old and new do_feather map
730         // create a testcase and find out the closest match between do_feather_2 at 3 and do_feather
731         //                      2       3       4       5       6       7       8       10      13      15
732         // do_feather_2         3       5       7       9       11      13      15      19      25      29
733         // do_feather_1         2.683   3.401   4.139   4.768   5.315   5.819   6.271   7.093   8.170   8.844           
734         // diff                         0.718   0.738   0.629   0.547   0.504   0.452
735         // {(2,2.683),(3,3.401),(4,4.139),(5,4.768),(6,5.315),(7,5.819),(8,6.271),(10,7.093),(13,8.170),(15,8.844)}
736         // use http://mss.math.vanderbilt.edu/cgi-bin/MSSAgent/~pscrooke/MSS/fitpoly.def
737         // for calculating the coefficients
739                 VFrame *df2 = new VFrame (*engine->mask);
740                 VFrame *one_sample = new VFrame(*engine->mask);
741                 do_feather_2(df2, 
742                         engine->temp_mask, 
743                         25, 
744                         ptr->row1, 
745                         ptr->row2);
746                 float ftmp;
747                 for (ftmp = 8.15; ftmp <8.18; ftmp += 0.001) 
748                 {
749                         do_feather(one_sample, 
750                         engine->temp_mask, 
751                         ftmp, 
752                         ptr->row1, 
753                         ptr->row2);
754                         double squarediff = 0;
755                         for (int i=0; i< engine->mask->get_h(); i++)
756                                 for (int j = 0; j< engine->mask->get_w(); j++)
757                                 {
758                                         double v1= ((unsigned char *)one_sample->get_rows()[i])[j];
759                                         double v2= ((unsigned char *)df2->get_rows()[i])[j];
760                                         squarediff += (v1-v2)*(v1-v2);
761                                 }
762                         squarediff = sqrt(squarediff);
763                         printf("for value 3: ftmp: %2.3f, squarediff: %f\n", ftmp, squarediff);
764                 }
765         }
766         */      
767         
768                 int done = 0;
769                 done = do_feather_2(engine->mask,        // try if we have super fast implementation ready
770                                 engine->temp_mask,
771                                 engine->feather * 2 - 1, 
772                                 ptr->row1, 
773                                 ptr->row2);
774                 if (done) {
775                         engine->realfeather = engine->feather;
776                 }
777                 if (!done)
778                 {
779                 //      printf("not done\n");
780                         float feather = engine->feather;
781                         engine->realfeather = 0.878441 + 0.988534*feather - 0.0490204 *feather*feather  + 0.0012359 *feather*feather*feather;
782                         do_feather(engine->mask, 
783                                 engine->temp_mask, 
784                                 engine->realfeather, 
785                                 ptr->row1, 
786                                 ptr->row2); 
787                 }
788                 int64_t dif= get_difference(&start_time);
789                 printf("diff: %lli\n", dif);
790         } else
791         if (engine->feather <= 0) {
792                 engine->realfeather = 0;
793         }
794         start_row = MAX (ptr->row1, engine->first_nonempty_rowspan - (int)ceil(engine->realfeather)); 
795         end_row = MIN (ptr->row2, engine->last_nonempty_rowspan + 1 + (int)ceil(engine->realfeather));
799 // Apply mask
802 /* use the info about first and last column that are coloured from rowspan!  */
803 /* possible optimisation: also remember total spans */
804 /* possible optimisation: lookup for  X * (max - *mask_row) / max, where max is known mask_row and X are variabiles */
805 #define APPLY_MASK_SUBTRACT_ALPHA(type, max, components, do_yuv) \
806 { \
807         type chroma_offset = (max + 1) / 2; \
808         for(int i = start_row; i < end_row; i++) \
809         { \
810         type *output_row = (type*)engine->output->get_rows()[i]; \
811         type *mask_row = (type*)engine->mask->get_rows()[i]; \
812         \
814         for(int j  = 0; j < mask_w; j++) \
815         { \
816                 if(components == 4) \
817                 { \
818                         output_row[3] = output_row[3] * (max - *mask_row) / max; \
819                 } \
820                 else \
821                 { \
822                         output_row[0] = output_row[0] * (max - *mask_row) / max; \
824                         output_row[1] = output_row[1] * (max - *mask_row) / max; \
825                         output_row[2] = output_row[2] * (max - *mask_row) / max; \
827                         if(do_yuv) \
828                         { \
829                                 output_row[1] += chroma_offset * *mask_row / max; \
830                                 output_row[2] += chroma_offset * *mask_row / max; \
831                         } \
832                 } \
833                 output_row += components; \
834                 mask_row += 1;           \
835         } \
836         } \
839 #define APPLY_MASK_MULTIPLY_ALPHA(type, max, components, do_yuv) \
840 { \
841         type chroma_offset = (max + 1) / 2; \
842                 for(int i = ptr->row1; i < ptr->row2; i++) \
843                 { \
844         type *output_row = (type*)engine->output->get_rows()[i]; \
845         type *mask_row = (type*)engine->mask->get_rows()[i]; \
847         if (components == 4) output_row += 3; \
848         for(int j  = mask_w; j != 0;  j--) \
849         { \
850                 if(components == 4) \
851                 { \
852                         *output_row = *output_row * *mask_row / max; \
853                 } \
854                 else \
855                 { \
856                         output_row[0] = output_row[3] * *mask_row / max; \
858                         output_row[1] = output_row[1] * *mask_row / max; \
859                         output_row[2] = output_row[2] * *mask_row / max; \
861                         if(do_yuv) \
862                         { \
863                                 output_row[1] += chroma_offset * (max - *mask_row) / max; \
864                                 output_row[2] += chroma_offset * (max - *mask_row) / max; \
865                         } \
866                 } \
867                 output_row += components; \
868                 mask_row += 1;           \
869         } \
870         } \
874 //struct timeval start_time;
875 //gettimeofday(&start_time, 0);
877 //printf("MaskUnit::process_package 1 %d\n", engine->mode);
878         int mask_w = engine->mask->get_w();
879         switch(engine->mode)
880         {
881                 case MASK_MULTIPLY_ALPHA:
882                         switch(engine->output->get_color_model())
883                         {
884                                 case BC_RGB888:
885                                         APPLY_MASK_MULTIPLY_ALPHA(unsigned char, 0xff, 3, 0);
886                                         break;
887                                 case BC_YUV888:
888                                         APPLY_MASK_MULTIPLY_ALPHA(unsigned char, 0xff, 3, 1);
889                                         break;
890                                 case BC_YUVA8888:
891                                 case BC_RGBA8888:
892                                         APPLY_MASK_MULTIPLY_ALPHA(unsigned char, 0xff, 4, 0);
893                                         break;
894                                 case BC_RGB161616:
895                                         APPLY_MASK_MULTIPLY_ALPHA(uint16_t, 0xffff, 3, 0);
896                                         break;
897                                 case BC_YUV161616:
898                                         APPLY_MASK_MULTIPLY_ALPHA(uint16_t, 0xffff, 3, 1);
899                                         break;
900                                 case BC_YUVA16161616:
901                                 case BC_RGBA16161616:
902                                         APPLY_MASK_MULTIPLY_ALPHA(uint16_t, 0xffff, 4, 0);
903                                         break;
904                                 case BC_RGB_FLOAT:
905                                         APPLY_MASK_MULTIPLY_ALPHA(float, 1.0f, 3, 0);
906                                         break;
907                                 case BC_RGBA_FLOAT:
908                                         APPLY_MASK_MULTIPLY_ALPHA(float, 1.0f, 4, 0);
909                                         break;
910                         }
911                         break;
913                 case MASK_SUBTRACT_ALPHA:
914                         switch(engine->output->get_color_model())
915                         {
916                                 case BC_RGB888:
917                                         APPLY_MASK_SUBTRACT_ALPHA(unsigned char, 0xff, 3, 0);
918                                         break;
919                                 case BC_YUV888:
920                                         APPLY_MASK_SUBTRACT_ALPHA(unsigned char, 0xff, 3, 1);
921                                         break;
922                                 case BC_YUVA8888:
923                                 case BC_RGBA8888:
924                                         APPLY_MASK_SUBTRACT_ALPHA(unsigned char, 0xff, 4, 0);
925                                         break;
926                                 case BC_RGB161616:
927                                         APPLY_MASK_SUBTRACT_ALPHA(uint16_t, 0xffff, 3, 0);
928                                         break;
929                                 case BC_YUV161616:
930                                         APPLY_MASK_SUBTRACT_ALPHA(uint16_t, 0xffff, 3, 1);
931                                         break;
932                                 case BC_YUVA16161616:
933                                 case BC_RGBA16161616:
934                                         APPLY_MASK_SUBTRACT_ALPHA(uint16_t, 0xffff, 4, 0);
935                                         break;
936                                 case BC_RGB_FLOAT:
937                                         APPLY_MASK_SUBTRACT_ALPHA(float, 1.0f, 3, 0);
938                                         break;
939                                 case BC_RGBA_FLOAT:
940                                         APPLY_MASK_SUBTRACT_ALPHA(float, 1.0f, 4, 0);
941                                         break;
942                         }
943                         break;
944         }
945 //      int64_t dif= get_difference(&start_time);
946 //      printf("diff: %lli\n", dif);
947 //printf("diff2: %lli\n", get_difference(&start_time));
948 //printf("MaskUnit::process_package 4 %d\n", get_package_number());
955 MaskEngine::MaskEngine(int cpus)
956  : LoadServer(cpus, cpus )      /* these two HAVE to be the same, since packages communicate  */
957 // : LoadServer(1, 2)
959         mask = 0;
960         pthread_mutex_init(&stage1_finished_mutex, NULL);
961         pthread_cond_init(&stage1_finished_cond, NULL);
964 MaskEngine::~MaskEngine()
966         pthread_cond_destroy(&stage1_finished_cond);
967         pthread_mutex_destroy(&stage1_finished_mutex);
968         if(mask) 
969         {
970                 delete mask;
971                 delete temp_mask;
972         }
974         for(int i = 0; i < point_sets.total; i++)
975         {
976                 ArrayList<MaskPoint*> *points = point_sets.values[i];
977                 points->remove_all_objects();
978         }
979         point_sets.remove_all_objects();
982 int MaskEngine::points_equivalent(ArrayList<MaskPoint*> *new_points, 
983         ArrayList<MaskPoint*> *points)
985 //printf("MaskEngine::points_equivalent %d %d\n", new_points->total, points->total);
986         if(new_points->total != points->total) return 0;
987         
988         for(int i = 0; i < new_points->total; i++)
989         {
990                 if(!(*new_points->values[i] == *points->values[i])) return 0;
991         }
992         
993         return 1;
996 void MaskEngine::do_mask(VFrame *output, 
997         int64_t start_position,
998         double frame_rate,
999         double project_frame_rate,
1000         MaskAutos *keyframe_set, 
1001         int direction,
1002         int before_plugins)
1004         int64_t start_position_project = (int64_t)(start_position *
1005                 project_frame_rate / 
1006                 frame_rate);
1007         Auto *current = 0;
1008         MaskAuto *default_auto = (MaskAuto*)keyframe_set->default_auto;
1009         MaskAuto *keyframe = (MaskAuto*)keyframe_set->get_prev_auto(start_position_project, 
1010                 direction,
1011                 current);
1012         
1013         if (keyframe->apply_before_plugins != before_plugins)
1014                 return;
1017         int total_points = 0;
1018         for(int i = 0; i < keyframe->masks.total; i++)
1019         {
1020                 SubMask *mask = keyframe->get_submask(i);
1021                 int submask_points = mask->points.total;
1022                 if(submask_points > 1) total_points += submask_points;
1023         }
1025 //printf("MaskEngine::do_mask 1 %d %d\n", total_points, keyframe->value);
1026 // Ignore certain masks
1027         if(total_points < 2 || 
1028                 (keyframe->value == 0 && default_auto->mode == MASK_SUBTRACT_ALPHA))
1029         {
1030                 return;
1031         }
1033 // Fake certain masks
1034         if(keyframe->value == 0 && default_auto->mode == MASK_MULTIPLY_ALPHA)
1035         {
1036                 output->clear_frame();
1037                 return;
1038         }
1040 //printf("MaskEngine::do_mask 1\n");
1042         int new_color_model = 0;
1043         recalculate = 0;
1045         switch(output->get_color_model())
1046         {
1047                 case BC_RGB_FLOAT:
1048                 case BC_RGBA_FLOAT:
1049                         new_color_model = BC_A_FLOAT;
1050                         break;
1052                 case BC_RGB888:
1053                 case BC_RGBA8888:
1054                 case BC_YUV888:
1055                 case BC_YUVA8888:
1056                         new_color_model = BC_A8;
1057                         break;
1059                 case BC_RGB161616:
1060                 case BC_RGBA16161616:
1061                 case BC_YUV161616:
1062                 case BC_YUVA16161616:
1063                         new_color_model = BC_A16;
1064                         break;
1065         }
1067 // Determine if recalculation is needed
1068 SET_TRACE
1070         if(mask && 
1071                 (mask->get_w() != output->get_w() ||
1072                 mask->get_h() != output->get_h() ||
1073                 mask->get_color_model() != new_color_model))
1074         {
1075                 delete mask;
1076                 delete temp_mask;
1077                 mask = 0;
1078                 recalculate = 1;
1079         }
1081         if(!recalculate)
1082         {
1083                 if(point_sets.total != keyframe_set->total_submasks(start_position_project, 
1084                         direction))
1085                         recalculate = 1;
1086         }
1088         if(!recalculate)
1089         {
1090                 for(int i = 0; 
1091                         i < keyframe_set->total_submasks(start_position_project, 
1092                                 direction) && !recalculate; 
1093                         i++)
1094                 {
1095                         ArrayList<MaskPoint*> *new_points = new ArrayList<MaskPoint*>;
1096                         keyframe_set->get_points(new_points, 
1097                                 i, 
1098                                 start_position_project, 
1099                                 direction);
1100                         if(!points_equivalent(new_points, point_sets.values[i])) recalculate = 1;
1101                         new_points->remove_all_objects();
1102                         delete new_points;
1103                 }
1104         }
1106         if(recalculate ||
1107                 !EQUIV(keyframe->feather, feather) ||
1108                 !EQUIV(keyframe->value, value))
1109         {
1110                 recalculate = 1;
1111                 if(!mask) 
1112                 {
1113                         mask = new VFrame(0, 
1114                                         output->get_w(), 
1115                                         output->get_h(),
1116                                         new_color_model);
1117                         temp_mask = new VFrame(0, 
1118                                         output->get_w(), 
1119                                         output->get_h(),
1120                                         new_color_model);
1121                 }
1122                 if(keyframe->feather > 0)
1123                         temp_mask->clear_frame();
1124                 else
1125                         mask->clear_frame();
1127                 for(int i = 0; i < point_sets.total; i++)
1128                 {
1129                         ArrayList<MaskPoint*> *points = point_sets.values[i];
1130                         points->remove_all_objects();
1131                 }
1132                 point_sets.remove_all_objects();
1134                 for(int i = 0; 
1135                         i < keyframe_set->total_submasks(start_position_project, 
1136                                 direction); 
1137                         i++)
1138                 {
1139                         ArrayList<MaskPoint*> *new_points = new ArrayList<MaskPoint*>;
1140                         keyframe_set->get_points(new_points, 
1141                                 i, 
1142                                 start_position_project, 
1143                                 direction);
1144                         point_sets.append(new_points);
1145                 }
1146         }
1150         this->output = output;
1151         this->mode = default_auto->mode;
1152         this->feather = keyframe->feather;
1153         this->value = keyframe->value;
1156 // Run units
1157 SET_TRACE
1158         process_packages();
1159 SET_TRACE
1164 void MaskEngine::init_packages()
1166 SET_TRACE
1167 //printf("MaskEngine::init_packages 1\n");
1168         int division = (int)((float)output->get_h() / (get_total_packages()) + 0.5);
1169         if(division < 1) division = 1;
1171         stage1_finished_count = 0;
1172         if (recalculate) {
1173                 last_nonempty_rowspan = SHRT_MIN;
1174                 first_nonempty_rowspan = SHRT_MAX;
1175         }
1176 SET_TRACE
1177 // Always a multiple of 2 packages exist
1178         for(int i = 0; i < get_total_packages(); i++)
1179         {
1180                 MaskPackage *pkg = (MaskPackage*)get_package(i);
1181                 pkg->row1 = division * i;
1182                 pkg->row2 = MIN (division * i + division, output->get_h());
1183                 
1184                 if(i == get_total_packages() - 1)  // last package
1185                 {
1186                         pkg->row2 = pkg->row2 = output->get_h();
1187                 }
1189         }
1190 SET_TRACE
1191 //printf("MaskEngine::init_packages 2\n");
1194 LoadClient* MaskEngine::new_client()
1196         return new MaskUnit(this);
1199 LoadPackage* MaskEngine::new_package()
1201         return new MaskPackage;