r870: Merge 2.1:
[cinelerra_cv.git] / cinelerra / overlayframe.C
blob64400cee292b8926bd9684a4fb7d4bddf3d2570f
1 #include <math.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <stdint.h>
5 #include <stdlib.h>
6 #include <unistd.h>
8 #include "clip.h"
9 #include "edl.inc"
10 #include "mutex.h"
11 #include "overlayframe.h"
12 #include "units.h"
13 #include "vframe.h"
15 // Easy abstraction of the float and int types.  Most of these are never used
16 // but GCC expects them.
17 static int my_abs(int32_t x)
19         return abs(x);
22 static int my_abs(uint32_t x)
24         return x;
27 static int my_abs(int64_t x)
29         return llabs(x);
32 static int my_abs(uint64_t x)
34         return x;
37 static float my_abs(float x)
39         return fabsf(x);
45 OverlayFrame::OverlayFrame(int cpus)
47         temp_frame = 0;
48         blend_engine = 0;
49         scale_engine = 0;
50         scaletranslate_engine = 0;
51         translate_engine = 0;
52         this->cpus = cpus;
55 OverlayFrame::~OverlayFrame()
57         if(temp_frame) delete temp_frame;
58         if(scale_engine) delete scale_engine;
59         if(translate_engine) delete translate_engine;
60         if(blend_engine) delete blend_engine;
61         if(scaletranslate_engine) delete scaletranslate_engine;
71 // Verification: 
73 // (255 * 255 + 0 * 0) / 255 = 255
74 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
76 // (65535 * 65535 + 0 * 0) / 65535 = 65535
77 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
80 // Permutation 4 U
82 #define BLEND_3(max, temp_type, type, chroma_offset) \
83 { \
84         temp_type r, g, b; \
85  \
86 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
87         switch(mode) \
88         { \
89                 case TRANSFER_DIVIDE: \
90                         r = input1 ? (((temp_type)output[0] * max) / input1) : max; \
91                         if(chroma_offset) \
92                         { \
93                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
94                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
95                         } \
96                         else \
97                         { \
98                                 g = input2 ? (temp_type)output[1] * max / (temp_type)input2 : max; \
99                                 b = input3 ? (temp_type)output[2] * max / (temp_type)input3 : max; \
100                         } \
101                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
102                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
103                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
104                         break; \
105                 case TRANSFER_MULTIPLY: \
106                         r = ((temp_type)input1 * output[0]) / max; \
107                         if(chroma_offset) \
108                         { \
109                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
110                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
111                         } \
112                         else \
113                         { \
114                                 g = (temp_type)input2 * (temp_type)output[1] / max; \
115                                 b = (temp_type)input3 * (temp_type)output[2] / max; \
116                         } \
117                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
118                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
119                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
120                         break; \
121                 case TRANSFER_SUBTRACT: \
122                         r = (temp_type)output[0] - (temp_type)input1; \
123                         g = ((temp_type)output[1] - (temp_type)chroma_offset) - \
124                                 ((temp_type)input2 - (temp_type)chroma_offset) + \
125                                 (temp_type)chroma_offset; \
126                         b = ((temp_type)output[2] - (temp_type)chroma_offset) - \
127                                 ((temp_type)input3 - (temp_type)chroma_offset) + \
128                                 (temp_type)chroma_offset; \
129                         if(r < 0) r = 0; \
130                         if(g < 0) g = 0; \
131                         if(b < 0) b = 0; \
132                         r = (r * opacity + output[0] * transparency) / max; \
133                         g = (g * opacity + output[1] * transparency) / max; \
134                         b = (b * opacity + output[2] * transparency) / max; \
135                         break; \
136                 case TRANSFER_ADDITION: \
137                         r = (temp_type)input1 + output[0]; \
138                         g = ((temp_type)input2 - chroma_offset) + \
139                                 ((temp_type)output[1] - chroma_offset) + \
140                                 (temp_type)chroma_offset; \
141                         b = ((temp_type)input3 - chroma_offset) + \
142                                 ((temp_type)output[2] - chroma_offset) + \
143                                 (temp_type)chroma_offset; \
144                         r = (r * opacity + output[0] * transparency) / max; \
145                         g = (g * opacity + output[1] * transparency) / max; \
146                         b = (b * opacity + output[2] * transparency) / max; \
147                         break; \
148                 case TRANSFER_MAX: \
149                 { \
150                         r = (temp_type)MAX(input1, output[0]); \
151                         temp_type g1 = ((temp_type)input2 - chroma_offset); \
152                         if(g1 < 0) g1 = -g1; \
153                         temp_type g2 = ((temp_type)output[1] - chroma_offset); \
154                         if(g2 < 0) g2 = -g2; \
155                         if(g1 > g2) \
156                                 g = input2; \
157                         else \
158                                 g = output[1]; \
159                         temp_type b1 = ((temp_type)input3 - chroma_offset); \
160                         if(b1 < 0) b1 = -b1; \
161                         temp_type b2 = ((temp_type)output[2] - chroma_offset); \
162                         if(b2 < 0) b2 = -b2; \
163                         if(b1 > b2) \
164                                 b = input3; \
165                         else \
166                                 b = output[2]; \
167                         r = (r * opacity + output[0] * transparency) / max; \
168                         g = (g * opacity + output[1] * transparency) / max; \
169                         b = (b * opacity + output[2] * transparency) / max; \
170                         break; \
171                 } \
172                 case TRANSFER_REPLACE: \
173                         r = input1; \
174                         g = input2; \
175                         b = input3; \
176                         break; \
177                 case TRANSFER_NORMAL: \
178                         r = ((temp_type)input1 * opacity + output[0] * transparency) / max; \
179                         g = ((temp_type)input2 * opacity + output[1] * transparency) / max; \
180                         b = ((temp_type)input3 * opacity + output[2] * transparency) / max; \
181                         break; \
182         } \
184         if(sizeof(type) != 4) \
185         { \
186                 output[0] = (type)CLIP(r, 0, max); \
187                 output[1] = (type)CLIP(g, 0, max); \
188                 output[2] = (type)CLIP(b, 0, max); \
189         } \
190         else \
191         { \
192                 output[0] = r; \
193                 output[1] = g; \
194                 output[2] = b; \
195         } \
202 // Blending equations are drastically different for 3 and 4 components
203 #define BLEND_4(max, temp_type, type, chroma_offset) \
204 { \
205         temp_type r, g, b, a; \
206         temp_type pixel_opacity, pixel_transparency; \
207         temp_type output1 = output[0]; \
208         temp_type output2 = output[1]; \
209         temp_type output3 = output[2]; \
210         temp_type output4 = output[3]; \
212         pixel_opacity = opacity * input4; \
213         pixel_transparency = (temp_type)max * max - pixel_opacity; \
215         switch(mode) \
216         { \
217                 case TRANSFER_DIVIDE: \
218                         r = input1 ? (((temp_type)output1 * max) / input1) : max; \
219                         if(chroma_offset) \
220                         { \
221                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
222                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
223                         } \
224                         else \
225                         { \
226                                 g = input2 ? (temp_type)output2 * max / (temp_type)input2 : max; \
227                                 b = input3 ? (temp_type)output3 * max / (temp_type)input3 : max; \
228                         } \
229                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
230                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
231                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
232                         a = input4 > output4 ? input4 : output4; \
233                         break; \
234                 case TRANSFER_MULTIPLY: \
235                         r = ((temp_type)input1 * output1) / max; \
236                         if(chroma_offset) \
237                         { \
238                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
239                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
240                         } \
241                         else \
242                         { \
243                                 g = (temp_type)input2 * (temp_type)output2 / max; \
244                                 b = (temp_type)input3 * (temp_type)output3 / max; \
245                         } \
246                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
247                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
248                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
249                         a = input4 > output4 ? input4 : output4; \
250                         break; \
251                 case TRANSFER_SUBTRACT: \
252                         r = (temp_type)output1 - input1; \
253                         g = ((temp_type)output2 - chroma_offset) - \
254                                 ((temp_type)input2 - (temp_type)chroma_offset) + \
255                                 (temp_type)chroma_offset; \
256                         b = ((temp_type)output3 - chroma_offset) - \
257                                 ((temp_type)input3 - (temp_type)chroma_offset) + \
258                                 (temp_type)chroma_offset; \
259                         if(r < 0) r = 0; \
260                         if(g < 0) g = 0; \
261                         if(b < 0) b = 0; \
262                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
263                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
264                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
265                         a = input4 > output4 ? input4 : output4; \
266                         break; \
267                 case TRANSFER_ADDITION: \
268                         r = (temp_type)input1 + output1; \
269                         g = ((temp_type)input2 - chroma_offset) + \
270                                 ((temp_type)output2 - chroma_offset) + \
271                                 chroma_offset; \
272                         b = ((temp_type)input3 - chroma_offset) + \
273                                 ((temp_type)output3 - chroma_offset) + \
274                                 chroma_offset; \
275                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
276                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
277                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
278                         a = input4 > output4 ? input4 : output4; \
279                         break; \
280                 case TRANSFER_MAX: \
281                 { \
282                         r = (temp_type)MAX(input1, output1); \
283                         temp_type g1 = ((temp_type)input2 - chroma_offset); \
284                         if(g1 < 0) g1 = -g1; \
285                         temp_type g2 = ((temp_type)output2 - chroma_offset); \
286                         if(g2 < 0) g2 = -g2; \
287                         if(g1 > g2) \
288                                 g = input2; \
289                         else \
290                                 g = output2; \
291                         temp_type b1 = ((temp_type)input3 - chroma_offset); \
292                         if(b1 < 0) b1 = -b1; \
293                         temp_type b2 = ((temp_type)output3 - chroma_offset); \
294                         if(b2 < 0) b2 = -b2; \
295                         if(b1 > b2) \
296                                 b = input3; \
297                         else \
298                                 b = output3; \
299                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
300                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
301                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
302                         a = input4 > output4 ? input4 : output4; \
303                         break; \
304                 } \
305                 case TRANSFER_REPLACE: \
306                         r = input1; \
307                         g = input2; \
308                         b = input3; \
309                         a = input4; \
310                         break; \
311                 case TRANSFER_NORMAL: \
312                         r = (input1 * pixel_opacity + \
313                                 output1 * pixel_transparency) / max / max; \
314                         g = ((input2 - chroma_offset) * pixel_opacity + \
315                                 (output2 - chroma_offset) * pixel_transparency) \
316                                 / max / max + \
317                                 chroma_offset; \
318                         b = ((input3 - chroma_offset) * pixel_opacity + \
319                                 (output3 - chroma_offset) * pixel_transparency) \
320                                 / max / max + \
321                                 chroma_offset; \
322                         a = input4 > output4 ? input4 : output4; \
323                         break; \
324         } \
326         if(sizeof(type) != 4) \
327         { \
328                 output[0] = (type)CLIP(r, 0, max); \
329                 output[1] = (type)CLIP(g, 0, max); \
330                 output[2] = (type)CLIP(b, 0, max); \
331                 output[3] = (type)a; \
332         } \
333         else \
334         { \
335                 output[0] = r; \
336                 output[1] = g; \
337                 output[2] = b; \
338                 output[3] = a; \
339         } \
344 // Bicubic algorithm using multiprocessors
345 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
347 // Nearest neighbor algorithm using multiprocessors for blending
348 // input -> scale + translate -> blend -> output
351 int OverlayFrame::overlay(VFrame *output, 
352         VFrame *input, 
353         float in_x1, 
354         float in_y1, 
355         float in_x2, 
356         float in_y2, 
357         float out_x1, 
358         float out_y1, 
359         float out_x2, 
360         float out_y2, 
361         float alpha,       // 0 - 1
362         int mode,
363         int interpolation_type)
365         float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
366         float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
375         if(isnan(in_x1) ||
376                 isnan(in_y1) ||
377                 isnan(in_x2) ||
378                 isnan(in_y2) ||
379                 isnan(out_x1) ||
380                 isnan(out_y1) ||
381                 isnan(out_x2) ||
382                 isnan(out_y2)) return 1;
383 // printf("OverlayFrame::overlay 1 %f %f %f %f -> %f %f %f %f scale=%f %f\n", in_x1,
384 // in_y1,
385 // in_x2,
386 // in_y2,
387 // out_x1,
388 // out_y1,
389 // out_x2,
390 // out_y2,
391 // out_x2 - out_x1, 
392 // out_y2 - out_y1);
394 // Limit values
395         if(in_x1 < 0)
396         {
397                 out_x1 += -in_x1 * w_scale;
398                 in_x1 = 0;
399         }
400         else
401         if(in_x1 >= input->get_w())
402         {
403                 out_x1 -= (in_x1 - input->get_w()) * w_scale;
404                 in_x1 = input->get_w();
405         }
407         if(in_y1 < 0)
408         {
409                 out_y1 += -in_y1 * h_scale;
410                 in_y1 = 0;
411         }
412         else
413         if(in_y1 >= input->get_h())
414         {
415                 out_y1 -= (in_y1 - input->get_h()) * h_scale;
416                 in_y1 = input->get_h();
417         }
419         if(in_x2 < 0)
420         {
421                 out_x2 += -in_x2 * w_scale;
422                 in_x2 = 0;
423         }
424         else
425         if(in_x2 >= input->get_w())
426         {
427                 out_x2 -= (in_x2 - input->get_w()) * w_scale;
428                 in_x2 = input->get_w();
429         }
431         if(in_y2 < 0)
432         {
433                 out_y2 += -in_y2 * h_scale;
434                 in_y2 = 0;
435         }
436         else
437         if(in_y2 >= input->get_h())
438         {
439                 out_y2 -= (in_y2 - input->get_h()) * h_scale;
440                 in_y2 = input->get_h();
441         }
443         if(out_x1 < 0)
444         {
445                 in_x1 += -out_x1 / w_scale;
446                 out_x1 = 0;
447         }
448         else
449         if(out_x1 >= output->get_w())
450         {
451                 in_x1 -= (out_x1 - output->get_w()) / w_scale;
452                 out_x1 = output->get_w();
453         }
455         if(out_y1 < 0)
456         {
457                 in_y1 += -out_y1 / h_scale;
458                 out_y1 = 0;
459         }
460         else
461         if(out_y1 >= output->get_h())
462         {
463                 in_y1 -= (out_y1 - output->get_h()) / h_scale;
464                 out_y1 = output->get_h();
465         }
467         if(out_x2 < 0)
468         {
469                 in_x2 += -out_x2 / w_scale;
470                 out_x2 = 0;
471         }
472         else
473         if(out_x2 >= output->get_w())
474         {
475                 in_x2 -= (out_x2 - output->get_w()) / w_scale;
476                 out_x2 = output->get_w();
477         }
479         if(out_y2 < 0)
480         {
481                 in_y2 += -out_y2 / h_scale;
482                 out_y2 = 0;
483         }
484         else
485         if(out_y2 >= output->get_h())
486         {
487                 in_y2 -= (out_y2 - output->get_h()) / h_scale;
488                 out_y2 = output->get_h();
489         }
500         float in_w = in_x2 - in_x1;
501         float in_h = in_y2 - in_y1;
502         float out_w = out_x2 - out_x1;
503         float out_h = out_y2 - out_y1;
504 // Input for translation operation
505         VFrame *translation_input = input;
508         if(in_w <= 0 || in_h <= 0 || out_w <= 0 || out_h <= 0) return 0;
511 // printf("OverlayFrame::overlay 2 %f %f %f %f -> %f %f %f %f\n", in_x1,
512 //                      in_y1,
513 //                      in_x2,
514 //                      in_y2,
515 //                      out_x1,
516 //                      out_y1,
517 //                      out_x2,
518 //                      out_y2);
524 // ****************************************************************************
525 // Transfer to temp buffer by scaling nearest integer boundaries
526 // ****************************************************************************
527         if(interpolation_type != NEAREST_NEIGHBOR &&
528                 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
529         {
530 // Create integer boundaries for interpolation
531                 float in_x1_float = in_x1;
532                 float in_y1_float = in_y1;
533                 float in_x2_float = MIN(in_x2, input->get_w());
534                 float in_y2_float = MIN(in_y2, input->get_h());
535                 int out_x1_int = (int)out_x1;
536                 int out_y1_int = (int)out_y1;
537                 int out_x2_int = MIN((int)ceil(out_x2), output->get_w());
538                 int out_y2_int = MIN((int)ceil(out_y2), output->get_h());
540 // Dimensions of temp frame.  Integer boundaries scaled.
541                 int temp_w = (out_x2_int - out_x1_int);
542                 int temp_h = (out_y2_int - out_y1_int);
543                 VFrame *scale_output;
547 #define NO_TRANSLATION1 \
548         (EQUIV(in_x1, 0) && \
549         EQUIV(in_y1, 0) && \
550         EQUIV(out_x1, 0) && \
551         EQUIV(out_y1, 0) && \
552         EQUIV(in_x2, in_x2_float) && \
553         EQUIV(in_y2, in_y2_float) && \
554         EQUIV(out_x2, temp_w) && \
555         EQUIV(out_y2, temp_h))
558 #define NO_BLEND \
559         (EQUIV(alpha, 1) && \
560         (mode == TRANSFER_REPLACE || \
561         (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
567 // Prepare destination for operation
569 // No translation and no blending.  The blending operation is built into the
570 // translation unit but not the scaling unit.
571 // input -> output
572                 if(NO_TRANSLATION1 &&
573                         NO_BLEND)
574                 {
575 // printf("OverlayFrame::overlay input -> output\n");
577                         scale_output = output;
578                         translation_input = 0;
579                 }
580                 else
581 // If translation or blending
582 // input -> nearest integer boundary temp
583                 {
584                         if(temp_frame && 
585                                 (temp_frame->get_w() != temp_w ||
586                                         temp_frame->get_h() != temp_h))
587                         {
588                                 delete temp_frame;
589                                 temp_frame = 0;
590                         }
592                         if(!temp_frame)
593                         {
594                                 temp_frame = new VFrame(0,
595                                         temp_w,
596                                         temp_h,
597                                         input->get_color_model(),
598                                         -1);
599                         }
600 //printf("OverlayFrame::overlay input -> temp\n");
603                         temp_frame->clear_frame();
605 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
606 //      temp_w, temp_h);
607                         scale_output = temp_frame;
608                         translation_input = scale_output;
610 // Adjust input coordinates to reflect new scaled coordinates.
611                         in_x1 = 0;
612                         in_y1 = 0;
613                         in_x2 = temp_w;
614                         in_y2 = temp_h;
615                 }
619 //printf("Overlay 1\n");
621 // Scale input -> scale_output
622                 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
623                 scale_engine->scale_output = scale_output;
624                 scale_engine->scale_input = input;
625                 scale_engine->w_scale = w_scale;
626                 scale_engine->h_scale = h_scale;
627                 scale_engine->in_x1_float = in_x1_float;
628                 scale_engine->in_y1_float = in_y1_float;
629                 scale_engine->out_w_int = temp_w;
630                 scale_engine->out_h_int = temp_h;
631                 scale_engine->interpolation_type = interpolation_type;
632 //printf("Overlay 2\n");
634 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
635                 scale_engine->process_packages();
636 //printf("OverlayFrame::overlay ScaleEngine 2\n");
640         }
642 // printf("OverlayFrame::overlay 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", 
643 //      in_x1, 
644 //      in_y1, 
645 //      in_x2, 
646 //      in_y2, 
647 //      out_x1, 
648 //      out_y1, 
649 //      out_x2, 
650 //      out_y2);
656 #define NO_TRANSLATION2 \
657         (EQUIV(in_x1, 0) && \
658         EQUIV(in_y1, 0) && \
659         EQUIV(in_x2, translation_input->get_w()) && \
660         EQUIV(in_y2, translation_input->get_h()) && \
661         EQUIV(out_x1, 0) && \
662         EQUIV(out_y1, 0) && \
663         EQUIV(out_x2, output->get_w()) && \
664         EQUIV(out_y2, output->get_h())) \
666 #define NO_SCALE \
667         (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
668         EQUIV(out_y2 - out_y1, in_y2 - in_y1))
670         
673 //printf("OverlayFrame::overlay 4 %d\n", mode);
678         if(translation_input)
679         {
680 // Direct copy
681                 if( NO_TRANSLATION2 &&
682                         NO_SCALE &&
683                         NO_BLEND)
684                 {
685 //printf("OverlayFrame::overlay direct copy\n");
686                         output->copy_from(translation_input);
687                 }
688                 else
689 // Blend only
690                 if( NO_TRANSLATION2 &&
691                         NO_SCALE)
692                 {
693                         if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
696                         blend_engine->output = output;
697                         blend_engine->input = translation_input;
698                         blend_engine->alpha = alpha;
699                         blend_engine->mode = mode;
701                         blend_engine->process_packages();
702                 }
703                 else
704 // Scale and translate using nearest neighbor
705 // Translation is exactly on integer boundaries
706                 if(interpolation_type == NEAREST_NEIGHBOR ||
707                         EQUIV(in_x1, (int)in_x1) &&
708                         EQUIV(in_y1, (int)in_y1) &&
709                         EQUIV(in_x2, (int)in_x2) &&
710                         EQUIV(in_y2, (int)in_y2) &&
712                         EQUIV(out_x1, (int)out_x1) &&
713                         EQUIV(out_y1, (int)out_y1) &&
714                         EQUIV(out_x2, (int)out_x2) &&
715                         EQUIV(out_y2, (int)out_y2))
716                 {
717 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
718                         if(!scaletranslate_engine) scaletranslate_engine = 
719                                 new ScaleTranslateEngine(this, cpus);
722                         scaletranslate_engine->output = output;
723                         scaletranslate_engine->input = translation_input;
724 // Input for Scaletranslate is subpixel precise!
725                         scaletranslate_engine->in_x1 = in_x1;
726                         scaletranslate_engine->in_y1 = in_y1;
727                         scaletranslate_engine->in_x2 = in_x2;
728                         scaletranslate_engine->in_y2 = in_y2;
729                         scaletranslate_engine->out_x1 = (int)out_x1;
730                         scaletranslate_engine->out_y1 = (int)out_y1;
731                         scaletranslate_engine->out_x2 = (int)out_x1 + (int)(out_x2 - out_x1);
732                         scaletranslate_engine->out_y2 = (int)out_y1 + (int)(out_y2 - out_y1);
733                         scaletranslate_engine->alpha = alpha;
734                         scaletranslate_engine->mode = mode;
736                         scaletranslate_engine->process_packages();
737                 }
738                 else
739 // Fractional translation
740                 {
741 // Use fractional translation
742 // printf("OverlayFrame::overlay temp -> output  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", 
743 //      in_x1, 
744 //      in_y1, 
745 //      in_x2, 
746 //      in_y2, 
747 //      out_x1, 
748 //      out_y1, 
749 //      out_x2, 
750 //      out_y2);
752 //printf("Overlay 3\n");
753                         if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
754                         translate_engine->translate_output = output;
755                         translate_engine->translate_input = translation_input;
756                         translate_engine->translate_in_x1 = in_x1;
757                         translate_engine->translate_in_y1 = in_y1;
758                         translate_engine->translate_in_x2 = in_x2;
759                         translate_engine->translate_in_y2 = in_y2;
760                         translate_engine->translate_out_x1 = out_x1;
761                         translate_engine->translate_out_y1 = out_y1;
762                         translate_engine->translate_out_x2 = out_x2;
763                         translate_engine->translate_out_y2 = out_y2;
764                         translate_engine->translate_alpha = alpha;
765                         translate_engine->translate_mode = mode;
766 //printf("Overlay 4\n");
768 //printf("OverlayFrame::overlay 5 %d\n", mode);
769                         translate_engine->process_packages();
771                 }
772         }
773 //printf("OverlayFrame::overlay 2\n");
775         return 0;
784 ScalePackage::ScalePackage()
791 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
792  : LoadClient(server)
794         this->overlay = overlay;
795         this->engine = server;
798 ScaleUnit::~ScaleUnit()
804 void ScaleUnit::tabulate_reduction(bilinear_table_t* &table,
805         float scale,
806         int in_pixel1, 
807         int out_total,
808         int in_total)
810         table = new bilinear_table_t[out_total];
811         bzero(table, sizeof(bilinear_table_t) * out_total);
812 //printf("ScaleUnit::tabulate_reduction 1 %f %d %d %d\n", scale, in_pixel1, out_total, in_total);
813         for(int i = 0; i < out_total; i++)
814         {
815                 float out_start = i;
816                 float in_start = out_start * scale;
817                 float out_end = i + 1;
818                 float in_end = out_end * scale;
819                 bilinear_table_t *entry = table + i;
820 //printf("ScaleUnit::tabulate_reduction 1 %f %f %f %f\n", out_start, out_end, in_start, in_end);
822 // Store input fraction.  Using scale to normalize these didn't work.
823                 entry->input_fraction1 = (floor(in_start + 1) - in_start) /* / scale */;
824                 entry->input_fraction2 = 1.0 /* / scale */;
825                 entry->input_fraction3 = (in_end - floor(in_end)) /* / scale */;
827                 if(in_end >= in_total - in_pixel1)
828                 {
829                         in_end = in_total - in_pixel1 - 1;
830                         
831                         int difference = (int)in_end - (int)in_start - 1;
832                         if(difference < 0) difference = 0;
833                         entry->input_fraction3 = 1.0 - 
834                                 entry->input_fraction1 - 
835                                 entry->input_fraction2 * difference;
836                 }
838 // Store input pixels
839                 entry->input_pixel1 = (int)in_start;
840                 entry->input_pixel2 = (int)in_end;
842 // Normalize for middle pixels
843                 if(entry->input_pixel2 > entry->input_pixel1 + 1)
844                 {
845                         float total = entry->input_fraction1 + 
846                                 entry->input_fraction2 * 
847                                 (entry->input_pixel2 - entry->input_pixel1 - 1) + 
848                                 entry->input_fraction3;
849                         entry->input_fraction1 /= total;
850                         entry->input_fraction2 /= total;
851                         entry->input_fraction3 /= total;
852                 }
853                 else
854                 {
855                         float total = entry->input_fraction1 +
856                                 entry->input_fraction3;
857                         entry->input_fraction1 /= total;
858                         entry->input_fraction3 /= total;
859                 }
861 // printf("ScaleUnit::tabulate_reduction 1 %d %d %d %f %f %f %f\n", 
862 // i,
863 // entry->input_pixel1, 
864 // entry->input_pixel2,
865 // entry->input_fraction1,
866 // entry->input_fraction2,
867 // entry->input_fraction3,
868 // entry->input_fraction1 + 
869 //      entry->input_fraction2 * 
870 //      (entry->input_pixel2 - entry->input_pixel1 - 1) + 
871 //      entry->input_fraction3);
874 // Sanity check
875                 if(entry->input_pixel1 > entry->input_pixel2)
876                 {
877                         entry->input_pixel1 = entry->input_pixel2;
878                         entry->input_fraction1 = 0;
879                 }
881 // Get total fraction of output pixel used
882 //              if(entry->input_pixel2 > entry->input_pixel1)
883                 entry->total_fraction = 
884                         entry->input_fraction1 +
885                         entry->input_fraction2 * (entry->input_pixel2 - entry->input_pixel1 - 1) +
886                         entry->input_fraction3;
887                 entry->input_pixel1 += in_pixel1;
888                 entry->input_pixel2 += in_pixel1;
889         }
892 void ScaleUnit::tabulate_enlarge(bilinear_table_t* &table,
893         float scale,
894         float in_pixel1, 
895         int out_total,
896         int in_total)
898         table = new bilinear_table_t[out_total];
899         bzero(table, sizeof(bilinear_table_t) * out_total);
901         for(int i = 0; i < out_total; i++)
902         {
903                 bilinear_table_t *entry = table + i;
904                 float in_pixel = i * scale + in_pixel1;
905                 entry->input_pixel1 = (int)floor(in_pixel);
906                 entry->input_pixel2 = entry->input_pixel1 + 1;
908                 if(in_pixel - in_pixel1 <= in_total)
909                 {
910                         entry->input_fraction3 = in_pixel - entry->input_pixel1;
911                 }
912                 else
913                 {
914                         entry->input_fraction3 = 0;
915                         entry->input_pixel2 = 0;
916                 }
918                 if(in_pixel - in_pixel1 >= 0)
919                 {
920                         entry->input_fraction1 = entry->input_pixel2 - in_pixel;
921                 }
922                 else
923                 {
924                         entry->input_fraction1 = 0;
925                         entry->input_pixel1 = (int)in_pixel1;
926                 }
928                 if(entry->input_pixel2 >= in_total)
929                 {
930                         entry->input_pixel2 = entry->input_pixel1;
931                         entry->input_fraction3 = 1.0 - entry->input_fraction1;
932                 }
934                 entry->total_fraction = 
935                         entry->input_fraction1 + 
936                         entry->input_fraction3;
937 // 
938 // printf("ScaleUnit::tabulate_enlarge %d %d %f %f %f\n",
939 // entry->input_pixel1,
940 // entry->input_pixel2,
941 // entry->input_fraction1,
942 // entry->input_fraction2,
943 // entry->input_fraction3);
944         }
947 void ScaleUnit::dump_bilinear(bilinear_table_t *table, int total)
949         printf("ScaleUnit::dump_bilinear\n");
950         for(int i = 0; i < total; i++)
951         {
952                 printf("out=%d inpixel1=%d inpixel2=%d infrac1=%f infrac2=%f infrac3=%f total=%f\n", 
953                         i,
954                         table[i].input_pixel1,
955                         table[i].input_pixel2,
956                         table[i].input_fraction1,
957                         table[i].input_fraction2,
958                         table[i].input_fraction3,
959                         table[i].total_fraction);
960         }
963 #define PIXEL_REDUCE_MACRO(type, components, row) \
964 { \
965         type *input_row = &in_rows[row][x_entry->input_pixel1 * components]; \
966         type *input_end = &in_rows[row][x_entry->input_pixel2 * components]; \
968 /* Do first pixel */ \
969         temp_f1 += input_scale1 * input_row[0]; \
970         temp_f2 += input_scale1 * input_row[1]; \
971         temp_f3 += input_scale1 * input_row[2]; \
972         if(components == 4) temp_f4 += input_scale1 * input_row[3]; \
974 /* Do last pixel */ \
975 /*      if(input_row < input_end) */\
976         { \
977                 temp_f1 += input_scale3 * input_end[0]; \
978                 temp_f2 += input_scale3 * input_end[1]; \
979                 temp_f3 += input_scale3 * input_end[2]; \
980                 if(components == 4) temp_f4 += input_scale3 * input_end[3]; \
981         } \
983 /* Do middle pixels */ \
984         for(input_row += components; input_row < input_end; input_row += components) \
985         { \
986                 temp_f1 += input_scale2 * input_row[0]; \
987                 temp_f2 += input_scale2 * input_row[1]; \
988                 temp_f3 += input_scale2 * input_row[2]; \
989                 if(components == 4) temp_f4 += input_scale2 * input_row[3]; \
990         } \
993 // Bilinear reduction and suboptimal enlargement.
994 // Very high quality.
995 #define BILINEAR_REDUCE(max, type, components) \
996 { \
997         bilinear_table_t *x_table, *y_table; \
998         int out_h = pkg->out_row2 - pkg->out_row1; \
999         type **in_rows = (type**)input->get_rows(); \
1000         type **out_rows = (type**)output->get_rows(); \
1002         if(scale_w < 1) \
1003                 tabulate_reduction(x_table, \
1004                         1.0 / scale_w, \
1005                         (int)in_x1_float, \
1006                         out_w_int, \
1007                         input->get_w()); \
1008         else \
1009                 tabulate_enlarge(x_table, \
1010                         1.0 / scale_w, \
1011                         in_x1_float, \
1012                         out_w_int, \
1013                         input->get_w()); \
1015         if(scale_h < 1) \
1016                 tabulate_reduction(y_table, \
1017                         1.0 / scale_h, \
1018                         (int)in_y1_float, \
1019                         out_h_int, \
1020                         input->get_h()); \
1021         else \
1022                 tabulate_enlarge(y_table, \
1023                         1.0 / scale_h, \
1024                         in_y1_float, \
1025                         out_h_int, \
1026                         input->get_h()); \
1027 /* dump_bilinear(y_table, out_h_int); */\
1029         for(int i = 0; i < out_h; i++) \
1030         { \
1031                 type *out_row = out_rows[i + pkg->out_row1]; \
1032                 bilinear_table_t *y_entry = &y_table[i + pkg->out_row1]; \
1033 /* printf("BILINEAR_REDUCE 2 %d %d %d %f %f %f\n", */ \
1034 /* i, */ \
1035 /* y_entry->input_pixel1, */ \
1036 /* y_entry->input_pixel2, */ \
1037 /* y_entry->input_fraction1, */ \
1038 /* y_entry->input_fraction2, */ \
1039 /* y_entry->input_fraction3); */ \
1041                 for(int j = 0; j < out_w_int; j++) \
1042                 { \
1043                         bilinear_table_t *x_entry = &x_table[j]; \
1044 /* Load rounding factors */ \
1045                         float temp_f1; \
1046                         float temp_f2; \
1047                         float temp_f3; \
1048                         float temp_f4; \
1049                         if(sizeof(type) != 4) \
1050                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = .5; \
1051                         else \
1052                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = 0; \
1054 /* First row */ \
1055                         float input_scale1 = y_entry->input_fraction1 * x_entry->input_fraction1; \
1056                         float input_scale2 = y_entry->input_fraction1 * x_entry->input_fraction2; \
1057                         float input_scale3 = y_entry->input_fraction1 * x_entry->input_fraction3; \
1058                         PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel1) \
1060 /* Last row */ \
1061                         if(out_h) \
1062                         { \
1063                                 input_scale1 = y_entry->input_fraction3 * x_entry->input_fraction1; \
1064                                 input_scale2 = y_entry->input_fraction3 * x_entry->input_fraction2; \
1065                                 input_scale3 = y_entry->input_fraction3 * x_entry->input_fraction3; \
1066                                 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel2) \
1068 /* Middle rows */ \
1069                                 if(out_h > 1) \
1070                                 { \
1071                                         input_scale1 = y_entry->input_fraction2 * x_entry->input_fraction1; \
1072                                         input_scale2 = y_entry->input_fraction2 * x_entry->input_fraction2; \
1073                                         input_scale3 = y_entry->input_fraction2 * x_entry->input_fraction3; \
1074                                         for(int k = y_entry->input_pixel1 + 1; \
1075                                                 k < y_entry->input_pixel2; \
1076                                                 k++) \
1077                                         { \
1078                                                 PIXEL_REDUCE_MACRO(type, components, k) \
1079                                         } \
1080                                 } \
1081                         } \
1084                         if(max != 1.0) \
1085                         { \
1086                                 if(temp_f1 > max) temp_f1 = max; \
1087                                 if(temp_f2 > max) temp_f2 = max; \
1088                                 if(temp_f3 > max) temp_f3 = max; \
1089                                 if(components == 4) if(temp_f4 > max) temp_f4 = max; \
1090                         } \
1092                         out_row[j * components    ] = (type)temp_f1; \
1093                         out_row[j * components + 1] = (type)temp_f2; \
1094                         out_row[j * components + 2] = (type)temp_f3; \
1095                         if(components == 4) out_row[j * components + 3] = (type)temp_f4; \
1096                 } \
1097 /*printf("BILINEAR_REDUCE 3 %d\n", i);*/ \
1098         } \
1100         delete [] x_table; \
1101         delete [] y_table; \
1106 // Only 2 input pixels
1107 #define BILINEAR_ENLARGE(max, type, components) \
1108 { \
1109 /*printf("BILINEAR_ENLARGE 1\n");*/ \
1110         float k_y = 1.0 / scale_h; \
1111         float k_x = 1.0 / scale_w; \
1112         type **in_rows = (type**)input->get_rows(); \
1113         type **out_rows = (type**)output->get_rows(); \
1114         int out_h = pkg->out_row2 - pkg->out_row1; \
1115         int in_h_int = input->get_h(); \
1116         int in_w_int = input->get_w(); \
1117         int *table_int_x1, *table_int_y1; \
1118         int *table_int_x2, *table_int_y2; \
1119         float *table_frac_x_f, *table_antifrac_x_f, *table_frac_y_f, *table_antifrac_y_f; \
1120         int *table_frac_x_i, *table_antifrac_x_i, *table_frac_y_i, *table_antifrac_y_i; \
1122         tabulate_blinear_f(table_int_x1,  \
1123                 table_int_x2,  \
1124                 table_frac_x_f,  \
1125                 table_antifrac_x_f,  \
1126                 k_x,  \
1127                 0,  \
1128                 out_w_int, \
1129                 in_x1_float,  \
1130                 in_w_int); \
1131         tabulate_blinear_f(table_int_y1,  \
1132                 table_int_y2,  \
1133                 table_frac_y_f,  \
1134                 table_antifrac_y_f,  \
1135                 k_y,  \
1136                 pkg->out_row1,  \
1137                 pkg->out_row2,  \
1138                 in_y1_float, \
1139                 in_h_int); \
1141         for(int i = 0; i < out_h; i++) \
1142         { \
1143                 int i_y1 = table_int_y1[i]; \
1144                 int i_y2 = table_int_y2[i]; \
1145                 float a_f; \
1146         float anti_a_f; \
1147                 uint64_t a_i; \
1148         uint64_t anti_a_i; \
1149                 a_f = table_frac_y_f[i]; \
1150         anti_a_f = table_antifrac_y_f[i]; \
1151                 type *in_row1 = in_rows[i_y1]; \
1152                 type *in_row2 = in_rows[i_y2]; \
1153                 type *out_row = out_rows[i + pkg->out_row1]; \
1155                 for(int j = 0; j < out_w_int; j++) \
1156                 { \
1157                         int i_x1 = table_int_x1[j]; \
1158                         int i_x2 = table_int_x2[j]; \
1159                         float output1r, output1g, output1b, output1a; \
1160                         float output2r, output2g, output2b, output2a; \
1161                         float output3r, output3g, output3b, output3a; \
1162                         float output4r, output4g, output4b, output4a; \
1163                         float b_f; \
1164                         float anti_b_f; \
1165                         b_f = table_frac_x_f[j]; \
1166                         anti_b_f = table_antifrac_x_f[j]; \
1168                 output1r = in_row1[i_x1 * components]; \
1169                 output1g = in_row1[i_x1 * components + 1]; \
1170                 output1b = in_row1[i_x1 * components + 2]; \
1171                 if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1173                 output2r = in_row1[i_x2 * components]; \
1174                 output2g = in_row1[i_x2 * components + 1]; \
1175                 output2b = in_row1[i_x2 * components + 2]; \
1176                 if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1178                 output3r = in_row2[i_x1 * components]; \
1179                 output3g = in_row2[i_x1 * components + 1]; \
1180                 output3b = in_row2[i_x1 * components + 2]; \
1181                 if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1183                 output4r = in_row2[i_x2 * components]; \
1184                 output4g = in_row2[i_x2 * components + 1]; \
1185                 output4b = in_row2[i_x2 * components + 2]; \
1186                 if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1188                         out_row[j * components] =  \
1189                                 (type)(anti_a_f * (anti_b_f * output1r +  \
1190                                 b_f * output2r) +  \
1191                 a_f * (anti_b_f * output3r +  \
1192                                 b_f * output4r)); \
1193                         out_row[j * components + 1] =   \
1194                                 (type)(anti_a_f * (anti_b_f * output1g +  \
1195                                 b_f * output2g) +  \
1196                 a_f * ((anti_b_f * output3g) +  \
1197                                 b_f * output4g)); \
1198                         out_row[j * components + 2] =   \
1199                                 (type)(anti_a_f * ((anti_b_f * output1b) +  \
1200                                 (b_f * output2b)) +  \
1201                 a_f * ((anti_b_f * output3b) +  \
1202                                 b_f * output4b)); \
1203                         if(components == 4) \
1204                                 out_row[j * components + 3] =   \
1205                                         (type)(anti_a_f * ((anti_b_f * output1a) +  \
1206                                         (b_f * output2a)) +  \
1207                         a_f * ((anti_b_f * output3a) +  \
1208                                         b_f * output4a)); \
1209                 } \
1210         } \
1213         delete [] table_int_x1; \
1214         delete [] table_int_x2; \
1215         delete [] table_int_y1; \
1216         delete [] table_int_y2; \
1217         delete [] table_frac_x_f; \
1218         delete [] table_antifrac_x_f; \
1219         delete [] table_frac_y_f; \
1220         delete [] table_antifrac_y_f; \
1222 /*printf("BILINEAR_ENLARGE 2\n");*/ \
1226 #define BICUBIC(max, type, components) \
1227 { \
1228         float k_y = 1.0 / scale_h; \
1229         float k_x = 1.0 / scale_w; \
1230         type **in_rows = (type**)input->get_rows(); \
1231         type **out_rows = (type**)output->get_rows(); \
1232         float *bspline_x_f, *bspline_y_f; \
1233         int *bspline_x_i, *bspline_y_i; \
1234         int *in_x_table, *in_y_table; \
1235         int in_h_int = input->get_h(); \
1236         int in_w_int = input->get_w(); \
1238         tabulate_bcubic_f(bspline_x_f,  \
1239                 in_x_table, \
1240                 k_x, \
1241                 in_x1_float, \
1242                 out_w_int, \
1243                 in_w_int, \
1244                 -1); \
1246         tabulate_bcubic_f(bspline_y_f,  \
1247                 in_y_table, \
1248                 k_y, \
1249                 in_y1_float, \
1250                 out_h_int, \
1251                 in_h_int, \
1252                 1); \
1254         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1255         { \
1256                 for(int j = 0; j < out_w_int; j++) \
1257                 { \
1258                         int i_x = (int)(k_x * j); \
1259                         float output1_f, output2_f, output3_f, output4_f; \
1260                         uint64_t output1_i, output2_i, output3_i, output4_i; \
1261                         output1_f = 0; \
1262                         output2_f = 0; \
1263                         output3_f = 0; \
1264                         if(components == 4) \
1265                                 output4_f = 0; \
1266                         int table_y = i * 4; \
1268 /* Kernel */ \
1269                         for(int m = -1; m < 3; m++) \
1270                         { \
1271                                 float r1_f; \
1272                                 uint64_t r1_i; \
1273                                 r1_f = bspline_y_f[table_y]; \
1274                                 int y = in_y_table[table_y]; \
1275                                 int table_x = j * 4; \
1277                                 for(int n = -1; n < 3; n++) \
1278                                 { \
1279                                         float r2_f; \
1280                                         uint64_t r2_i; \
1281                                         r2_f = bspline_x_f[table_x]; \
1282                                         int x = in_x_table[table_x]; \
1283                                         float r_square_f; \
1284                                         uint64_t r_square_i; \
1285                                         r_square_f = r1_f * r2_f; \
1286                                         output1_f += r_square_f * in_rows[y][x * components]; \
1287                                         output2_f += r_square_f * in_rows[y][x * components + 1]; \
1288                                         output3_f += r_square_f * in_rows[y][x * components + 2]; \
1289                                         if(components == 4) \
1290                                                 output4_f += r_square_f * in_rows[y][x * components + 3]; \
1292                                         table_x++; \
1293                                 } \
1294                                 table_y++; \
1295                         } \
1298                         out_rows[i][j * components] = (type)output1_f; \
1299                         out_rows[i][j * components + 1] = (type)output2_f; \
1300                         out_rows[i][j * components + 2] = (type)output3_f; \
1301                         if(components == 4) \
1302                                 out_rows[i][j * components + 3] = (type)output4_f; \
1304                 } \
1305         } \
1307         delete [] bspline_x_f; \
1308         delete [] bspline_y_f; \
1309         delete [] in_x_table; \
1310         delete [] in_y_table; \
1316 // Pow function is not thread safe in Compaqt C
1317 #define CUBE(x) ((x) * (x) * (x))
1319 float ScaleUnit::cubic_bspline(float x)
1321         float a, b, c, d;
1323         if((x + 2.0F) <= 0.0F) 
1324         {
1325         a = 0.0F;
1326         }
1327         else 
1328         {
1329         a = CUBE(x + 2.0F);
1330         }
1333         if((x + 1.0F) <= 0.0F) 
1334         {
1335         b = 0.0F;
1336         }
1337         else 
1338         {
1339         b = CUBE(x + 1.0F);
1340         }    
1342         if(x <= 0) 
1343         {
1344         c = 0.0F;
1345         }
1346         else 
1347         {
1348         c = CUBE(x);
1349         }  
1351         if((x - 1.0F) <= 0.0F) 
1352         {
1353         d = 0.0F;
1354         }
1355         else 
1356         {
1357         d = CUBE(x - 1.0F);
1358         }
1361         return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
1365 void ScaleUnit::tabulate_bcubic_f(float* &coef_table, 
1366         int* &coord_table,
1367         float scale,
1368         float start, 
1369         int pixels,
1370         int total_pixels,
1371         float coefficient)
1373         coef_table = new float[pixels * 4];
1374         coord_table = new int[pixels * 4];
1375         for(int i = 0, j = 0; i < pixels; i++)
1376         {
1377                 float f_x = (float)i * scale + start;
1378                 float a = f_x - floor(f_x);
1379                 
1380                 for(float m = -1; m < 3; m++)
1381                 {
1382                         coef_table[j] = cubic_bspline(coefficient * (m - a));
1383                         coord_table[j] = (int)(f_x + m);
1384                         CLAMP(coord_table[j], 0, total_pixels - 1);
1385                         j++;
1386                 }
1387                 
1388         }
1391 void ScaleUnit::tabulate_bcubic_i(int* &coef_table, 
1392         int* &coord_table,
1393         float scale,
1394         int start, 
1395         int pixels,
1396         int total_pixels,
1397         float coefficient)
1399         coef_table = new int[pixels * 4];
1400         coord_table = new int[pixels * 4];
1401         for(int i = 0, j = 0; i < pixels; i++)
1402         {
1403                 float f_x = (float)i * scale + start;
1404                 float a = f_x - floor(f_x);
1405                 
1406                 for(float m = -1; m < 3; m++)
1407                 {
1408                         coef_table[j] = (int)(cubic_bspline(coefficient * (m - a)) * 0x10000);
1409                         coord_table[j] = (int)(f_x + m);
1410                         CLAMP(coord_table[j], 0, total_pixels - 1);
1411                         j++;
1412                 }
1413                 
1414         }
1417 void ScaleUnit::tabulate_blinear_f(int* &table_int1,
1418                 int* &table_int2,
1419                 float* &table_frac,
1420                 float* &table_antifrac,
1421                 float scale,
1422                 int pixel1,
1423                 int pixel2,
1424                 float start,
1425                 int total_pixels)
1427         table_int1 = new int[pixel2 - pixel1];
1428         table_int2 = new int[pixel2 - pixel1];
1429         table_frac = new float[pixel2 - pixel1];
1430         table_antifrac = new float[pixel2 - pixel1];
1432         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1433         {
1434                 float f_x = (float)i * scale + start;
1435                 int i_x = (int)floor(f_x);
1436                 float a = (f_x - floor(f_x));
1438                 table_int1[j] = i_x;
1439                 table_int2[j] = i_x + 1;
1440                 CLAMP(table_int1[j], 0, total_pixels - 1);
1441                 CLAMP(table_int2[j], 0, total_pixels - 1);
1442                 table_frac[j] = a;
1443                 table_antifrac[j] = 1.0F - a;
1444 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1445         }
1448 void ScaleUnit::tabulate_blinear_i(int* &table_int1,
1449                 int* &table_int2,
1450                 int* &table_frac,
1451                 int* &table_antifrac,
1452                 float scale,
1453                 int pixel1,
1454                 int pixel2,
1455                 float start,
1456                 int total_pixels)
1458         table_int1 = new int[pixel2 - pixel1];
1459         table_int2 = new int[pixel2 - pixel1];
1460         table_frac = new int[pixel2 - pixel1];
1461         table_antifrac = new int[pixel2 - pixel1];
1463         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1464         {
1465                 double f_x = (float)i * scale + start;
1466                 int i_x = (int)floor(f_x);
1467                 float a = (f_x - floor(f_x));
1469                 table_int1[j] = i_x;
1470                 table_int2[j] = i_x + 1;
1471                 CLAMP(table_int1[j], 0, total_pixels - 1);
1472                 CLAMP(table_int2[j], 0, total_pixels - 1);
1473                 table_frac[j] = (int)(a * 0xffff);
1474                 table_antifrac[j] = (int)((1.0F - a) * 0x10000);
1475 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1476         }
1479 void ScaleUnit::process_package(LoadPackage *package)
1481         ScalePackage *pkg = (ScalePackage*)package;
1483 //printf("ScaleUnit::process_package 1\n");
1484 // Arguments for macros
1485         VFrame *output = engine->scale_output;
1486         VFrame *input = engine->scale_input;
1487         float scale_w = engine->w_scale;
1488         float scale_h = engine->h_scale;
1489         float in_x1_float = engine->in_x1_float;
1490         float in_y1_float = engine->in_y1_float;
1491         int out_h_int = engine->out_h_int;
1492         int out_w_int = engine->out_w_int;
1493         int do_yuv = 
1494                 (input->get_color_model() == BC_YUV888 ||
1495                 input->get_color_model() == BC_YUVA8888 ||
1496                 input->get_color_model() == BC_YUV161616 ||
1497                 input->get_color_model() == BC_YUVA16161616);
1499 //printf("ScaleUnit::process_package 2 %f %f\n", engine->w_scale, engine->h_scale);
1500         if(engine->interpolation_type == CUBIC_CUBIC || 
1501                 (engine->interpolation_type == CUBIC_LINEAR 
1502                         && engine->w_scale > 1 && 
1503                         engine->h_scale > 1))
1504         {
1505                 switch(engine->scale_input->get_color_model())
1506                 {
1507                         case BC_RGB_FLOAT:
1508                                 BICUBIC(1.0, float, 3);
1509                                 break;
1511                         case BC_RGBA_FLOAT:
1512                                 BICUBIC(1.0, float, 4);
1513                                 break;
1515                         case BC_RGB888:
1516                         case BC_YUV888:
1517                                 BICUBIC(0xff, unsigned char, 3);
1518                                 break;
1520                         case BC_RGBA8888:
1521                         case BC_YUVA8888:
1522                                 BICUBIC(0xff, unsigned char, 4);
1523                                 break;
1525                         case BC_RGB161616:
1526                         case BC_YUV161616:
1527                                 BICUBIC(0xffff, uint16_t, 3);
1528                                 break;
1530                         case BC_RGBA16161616:
1531                         case BC_YUVA16161616:
1532                                 BICUBIC(0xffff, uint16_t, 4);
1533                                 break;
1534                 }
1535         }
1536         else
1537 // Perform bilinear scaling input -> scale_output
1538         if(engine->w_scale > 1 && 
1539                 engine->h_scale > 1)
1540         {
1541                 switch(engine->scale_input->get_color_model())
1542                 {
1543                         case BC_RGB_FLOAT:
1544                                 BILINEAR_ENLARGE(1.0, float, 3);
1545                                 break;
1547                         case BC_RGBA_FLOAT:
1548                                 BILINEAR_ENLARGE(1.0, float, 4);
1549                                 break;
1551                         case BC_RGB888:
1552                         case BC_YUV888:
1553                                 BILINEAR_ENLARGE(0xff, unsigned char, 3);
1554                                 break;
1556                         case BC_RGBA8888:
1557                         case BC_YUVA8888:
1558                                 BILINEAR_ENLARGE(0xff, unsigned char, 4);
1559                                 break;
1561                         case BC_RGB161616:
1562                         case BC_YUV161616:
1563                                 BILINEAR_ENLARGE(0xffff, uint16_t, 3);
1564                                 break;
1566                         case BC_RGBA16161616:
1567                         case BC_YUVA16161616:
1568                                 BILINEAR_ENLARGE(0xffff, uint16_t, 4);
1569                                 break;
1570                 }
1571         }
1572         else
1573 // Bilinear reduction
1574         {
1575                 switch(engine->scale_input->get_color_model())
1576                 {
1577                         case BC_RGB_FLOAT:
1578                                 BILINEAR_REDUCE(1.0, float, 3);
1579                                 break;
1580                         case BC_RGBA_FLOAT:
1581                                 BILINEAR_REDUCE(1.0, float, 4);
1582                                 break;
1583                         case BC_RGB888:
1584                         case BC_YUV888:
1585                                 BILINEAR_REDUCE(0xff, unsigned char, 3);
1586                                 break;
1588                         case BC_RGBA8888:
1589                         case BC_YUVA8888:
1590                                 BILINEAR_REDUCE(0xff, unsigned char, 4);
1591                                 break;
1593                         case BC_RGB161616:
1594                         case BC_YUV161616:
1595                                 BILINEAR_REDUCE(0xffff, uint16_t, 3);
1596                                 break;
1598                         case BC_RGBA16161616:
1599                         case BC_YUVA16161616:
1600                                 BILINEAR_REDUCE(0xffff, uint16_t, 4);
1601                                 break;
1602                 }
1603         }
1604 //printf("ScaleUnit::process_package 3\n");
1620 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
1621  : LoadServer(cpus, cpus)
1623         this->overlay = overlay;
1626 ScaleEngine::~ScaleEngine()
1630 void ScaleEngine::init_packages()
1632         for(int i = 0; i < get_total_packages(); i++)
1633         {
1634                 ScalePackage *package = (ScalePackage*)get_package(i);
1635                 package->out_row1 = out_h_int / get_total_packages() * i;
1636                 package->out_row2 = package->out_row1 + out_h_int / get_total_packages();
1638                 if(i >= get_total_packages() - 1)
1639                         package->out_row2 = out_h_int;
1640         }
1643 LoadClient* ScaleEngine::new_client()
1645         return new ScaleUnit(this, overlay);
1648 LoadPackage* ScaleEngine::new_package()
1650         return new ScalePackage;
1665 TranslatePackage::TranslatePackage()
1671 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1672  : LoadClient(server)
1674         this->overlay = overlay;
1675         this->engine = server;
1678 TranslateUnit::~TranslateUnit()
1684 void TranslateUnit::translation_array_f(transfer_table_f* &table, 
1685         float out_x1, 
1686         float out_x2,
1687         float in_x1,
1688         float in_x2,
1689         int in_total, 
1690         int out_total, 
1691         int &out_x1_int,
1692         int &out_x2_int)
1694         int out_w_int;
1695         float offset = out_x1 - in_x1;
1696 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1698         out_x1_int = (int)out_x1;
1699         out_x2_int = MIN((int)ceil(out_x2), out_total);
1700         out_w_int = out_x2_int - out_x1_int;
1702         table = new transfer_table_f[out_w_int];
1703         bzero(table, sizeof(transfer_table_f) * out_w_int);
1706 // printf("OverlayFrame::translation_array_f 2 %f %f -> %f %f scale=%f %f\n", 
1707 // in_x1, 
1708 // in_x2, 
1709 // out_x1, 
1710 // out_x2,
1711 // in_x2 - in_x1,
1712 // out_x2 - out_x1);
1713 // 
1715         float in_x = in_x1;
1716         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1717         {
1718                 transfer_table_f *entry = &table[out_x - out_x1_int];
1720                 entry->in_x1 = (int)in_x;
1721                 entry->in_x2 = (int)in_x + 1;
1723 // Get fraction of output pixel to fill
1724                 entry->output_fraction = 1;
1726                 if(out_x1 > out_x)
1727                 {
1728                         entry->output_fraction -= out_x1 - out_x;
1729                 }
1731                 if(out_x2 < out_x + 1)
1732                 {
1733                         entry->output_fraction = (out_x2 - out_x);
1734                 }
1736 // Advance in_x until out_x_fraction is filled
1737                 float out_x_fraction = entry->output_fraction;
1738                 float in_x_fraction = floor(in_x + 1) - in_x;
1740                 if(out_x_fraction <= in_x_fraction)
1741                 {
1742                         entry->in_fraction1 = out_x_fraction;
1743                         entry->in_fraction2 = 0.0;
1744                         in_x += out_x_fraction;
1745                 }
1746                 else
1747                 {
1748                         entry->in_fraction1 = in_x_fraction;
1749                         in_x += out_x_fraction;
1750                         entry->in_fraction2 = in_x - floor(in_x);
1751                 }
1753 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1754                 if(entry->in_x2 >= in_total)
1755                 {
1756                         entry->in_x2 = in_total - 1;
1757                         entry->in_fraction2 = 0.0;
1758                 }
1759                 
1760                 if(entry->in_x1 >= in_total)
1761                 {
1762                         entry->in_x1 = in_total - 1;
1763                         entry->in_fraction1 = 0.0;
1764                 }
1765 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n", 
1766 //      out_x, 
1767 //      entry->in_x1, 
1768 //      entry->in_x2, 
1769 //      entry->in_fraction1, 
1770 //      entry->in_fraction2, 
1771 //      entry->output_fraction);
1772         }
1776 void TranslateUnit::translation_array_i(transfer_table_i* &table, 
1777         float out_x1, 
1778         float out_x2,
1779         float in_x1,
1780         float in_x2,
1781         int in_total, 
1782         int out_total, 
1783         int &out_x1_int,
1784         int &out_x2_int)
1786         int out_w_int;
1787         float offset = out_x1 - in_x1;
1789         out_x1_int = (int)out_x1;
1790         out_x2_int = MIN((int)ceil(out_x2), out_total);
1791         out_w_int = out_x2_int - out_x1_int;
1793         table = new transfer_table_i[out_w_int];
1794         bzero(table, sizeof(transfer_table_i) * out_w_int);
1797 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1799         float in_x = in_x1;
1800         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1801         {
1802                 transfer_table_i *entry = &table[out_x - out_x1_int];
1804                 entry->in_x1 = (int)in_x;
1805                 entry->in_x2 = (int)in_x + 1;
1807 // Get fraction of output pixel to fill
1808                 entry->output_fraction = 0x10000;
1810                 if(out_x1 > out_x)
1811                 {
1812                         entry->output_fraction -= (int)((out_x1 - out_x) * 0x10000);
1813                 }
1815                 if(out_x2 < out_x + 1)
1816                 {
1817                         entry->output_fraction = (int)((out_x2 - out_x) * 0x10000);
1818                 }
1820 // Advance in_x until out_x_fraction is filled
1821                 int out_x_fraction = entry->output_fraction;
1822                 int in_x_fraction = (int)((floor(in_x + 1) - in_x) * 0x10000);
1824                 if(out_x_fraction <= in_x_fraction)
1825                 {
1826                         entry->in_fraction1 = out_x_fraction;
1827                         entry->in_fraction2 = 0;
1828                         in_x += (float)out_x_fraction / 0x10000;
1829                 }
1830                 else
1831                 {
1832                         entry->in_fraction1 = in_x_fraction;
1833                         in_x += (float)out_x_fraction / 0x10000;
1834                         entry->in_fraction2 = (int)((in_x - floor(in_x)) * 0x10000);
1835                 }
1837 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1838                 if(entry->in_x2 >= in_total)
1839                 {
1840                         entry->in_x2 = in_total - 1;
1841                         entry->in_fraction2 = 0;
1842                 }
1844                 if(entry->in_x1 >= in_total)
1845                 {
1846                         entry->in_x1 = in_total - 1;
1847                         entry->in_fraction1 = 0;
1848                 }
1849 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n", 
1850 //      out_x, 
1851 //      entry->in_x1, 
1852 //      entry->in_x2, 
1853 //      entry->in_fraction1, 
1854 //      entry->in_fraction2, 
1855 //      entry->output_fraction);
1856         }
1892 #define TRANSLATE(max, temp_type, type, components, chroma_offset) \
1893 { \
1895         type **in_rows = (type**)input->get_rows(); \
1896         type **out_rows = (type**)output->get_rows(); \
1899         temp_type master_opacity; \
1900         if(sizeof(type) != 4) \
1901                 master_opacity = (temp_type)(alpha * max + 0.5); \
1902         else \
1903                 master_opacity = (temp_type)(alpha * max); \
1904         temp_type master_transparency = max - master_opacity; \
1905         float round = 0.0; \
1906         if(sizeof(type) != 4) \
1907                 round = 0.5; \
1910         for(int i = row1; i < row2; i++) \
1911         { \
1912                 int in_y1; \
1913                 int in_y2; \
1914                 float y_fraction1_f; \
1915                 float y_fraction2_f; \
1916                 float y_output_fraction_f; \
1917                 in_y1 = y_table_f[i - out_y1_int].in_x1; \
1918                 in_y2 = y_table_f[i - out_y1_int].in_x2; \
1919                 y_fraction1_f = y_table_f[i - out_y1_int].in_fraction1; \
1920                 y_fraction2_f = y_table_f[i - out_y1_int].in_fraction2; \
1921                 y_output_fraction_f = y_table_f[i - out_y1_int].output_fraction; \
1922                 type *in_row1 = in_rows[(in_y1)]; \
1923                 type *in_row2 = in_rows[(in_y2)]; \
1924                 type *out_row = out_rows[i]; \
1926                 for(int j = out_x1_int; j < out_x2_int; j++) \
1927                 { \
1928                         int in_x1; \
1929                         int in_x2; \
1930                         float x_fraction1_f; \
1931                         float x_fraction2_f; \
1932                         float x_output_fraction_f; \
1933                         in_x1 = x_table_f[j - out_x1_int].in_x1; \
1934                         in_x2 = x_table_f[j - out_x1_int].in_x2; \
1935                         x_fraction1_f = x_table_f[j - out_x1_int].in_fraction1; \
1936                         x_fraction2_f = x_table_f[j - out_x1_int].in_fraction2; \
1937                         x_output_fraction_f = x_table_f[j - out_x1_int].output_fraction; \
1938                         type *output = &out_row[j * components]; \
1939                         temp_type input1, input2, input3, input4; \
1941                         float fraction1 = x_fraction1_f * y_fraction1_f; \
1942                         float fraction2 = x_fraction2_f * y_fraction1_f; \
1943                         float fraction3 = x_fraction1_f * y_fraction2_f; \
1944                         float fraction4 = x_fraction2_f * y_fraction2_f; \
1946                         input1 = (type)(in_row1[in_x1 * components] * fraction1 +  \
1947                                 in_row1[in_x2 * components] * fraction2 +  \
1948                                 in_row2[in_x1 * components] * fraction3 +  \
1949                                 in_row2[in_x2 * components] * fraction4 + round); \
1951 /* Add chroma to fractional pixels */ \
1952                         if(chroma_offset) \
1953                         { \
1954                                 float extra_chroma = (1.0F - \
1955                                         fraction1 - \
1956                                         fraction2 - \
1957                                         fraction3 - \
1958                                         fraction4) * chroma_offset; \
1959                                 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1960                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1961                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1962                                         in_row2[in_x2 * components + 1] * fraction4 + \
1963                                         extra_chroma + round); \
1964                                 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1965                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1966                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1967                                         in_row2[in_x2 * components + 2] * fraction4 +  \
1968                                         extra_chroma + round); \
1969                         } \
1970                         else \
1971                         { \
1972                                 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1973                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1974                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1975                                         in_row2[in_x2 * components + 1] * fraction4 + round); \
1976                                 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1977                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1978                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1979                                         in_row2[in_x2 * components + 2] * fraction4 + round); \
1980                         } \
1982                         if(components == 4) \
1983                                 input4 = (type)(in_row1[in_x1 * components + 3] * fraction1 +  \
1984                                         in_row1[in_x2 * components + 3] * fraction2 +  \
1985                                         in_row2[in_x1 * components + 3] * fraction3 +  \
1986                                         in_row2[in_x2 * components + 3] * fraction4 + round); \
1988                         temp_type opacity; \
1989                         if(sizeof(type) != 4) \
1990                                 opacity = (temp_type)(master_opacity *  \
1991                                         y_output_fraction_f *  \
1992                                         x_output_fraction_f + 0.5); \
1993                         else \
1994                                 opacity = (temp_type)(master_opacity *  \
1995                                         y_output_fraction_f *  \
1996                                         x_output_fraction_f); \
1997                         temp_type transparency = max - opacity; \
1999 /* printf("TRANSLATE 2 %x %d %d\n", opacity, j, i); */ \
2001                         if(components == 3) \
2002                         { \
2003                                 BLEND_3(max, temp_type, type, chroma_offset); \
2004                         } \
2005                         else \
2006                         { \
2007                                 BLEND_4(max, temp_type, type, chroma_offset); \
2008                         } \
2009                 } \
2010         } \
2013 void TranslateUnit::process_package(LoadPackage *package)
2015         TranslatePackage *pkg = (TranslatePackage*)package;
2016         int out_y1_int; 
2017         int out_y2_int; 
2018         int out_x1_int; 
2019         int out_x2_int; 
2022 // Variables for TRANSLATE
2023         VFrame *input = engine->translate_input;
2024         VFrame *output = engine->translate_output;
2025         float in_x1 = engine->translate_in_x1;
2026         float in_y1 = engine->translate_in_y1;
2027         float in_x2 = engine->translate_in_x2;
2028         float in_y2 = engine->translate_in_y2;
2029         float out_x1 = engine->translate_out_x1;
2030         float out_y1 = engine->translate_out_y1;
2031         float out_x2 = engine->translate_out_x2;
2032         float out_y2 = engine->translate_out_y2;
2033         float alpha = engine->translate_alpha;
2034         int row1 = pkg->out_row1;
2035         int row2 = pkg->out_row2;
2036         int mode = engine->translate_mode;
2037         int in_total_x = input->get_w();
2038         int in_total_y = input->get_h();
2039         int do_yuv = 
2040                 (engine->translate_input->get_color_model() == BC_YUV888 ||
2041                 engine->translate_input->get_color_model() == BC_YUVA8888 ||
2042                 engine->translate_input->get_color_model() == BC_YUV161616 ||
2043                 engine->translate_input->get_color_model() == BC_YUVA16161616);
2045         transfer_table_f *x_table_f; 
2046         transfer_table_f *y_table_f; 
2047         transfer_table_i *x_table_i; 
2048         transfer_table_i *y_table_i; 
2050         translation_array_f(x_table_f,  
2051                 out_x1,  
2052                 out_x2, 
2053                 in_x1, 
2054                 in_x2, 
2055                 in_total_x,  
2056                 output->get_w(),  
2057                 out_x1_int, 
2058                 out_x2_int); 
2059         translation_array_f(y_table_f,  
2060                 out_y1,  
2061                 out_y2, 
2062                 in_y1, 
2063                 in_y2, 
2064                 in_total_y,  
2065                 output->get_h(),  
2066                 out_y1_int, 
2067                 out_y2_int); 
2068 //      printf("TranslateUnit::process_package 1 %d\n", mode);
2069 //      Timer a;
2070 //      a.update();
2072         switch(engine->translate_input->get_color_model())
2073         {
2074                 case BC_RGB888:
2075                         TRANSLATE(0xff, uint32_t, unsigned char, 3, 0);
2076                         break;
2078                 case BC_RGBA8888:
2079                         TRANSLATE(0xff, uint32_t, unsigned char, 4, 0);
2080                         break;
2082                 case BC_RGB_FLOAT:
2083                         TRANSLATE(1.0, float, float, 3, 0);
2084                         break;
2086                 case BC_RGBA_FLOAT:
2087                         TRANSLATE(1.0, float, float, 4, 0);
2088                         break;
2090                 case BC_RGB161616:
2091                         TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2092                         break;
2094                 case BC_RGBA16161616:
2095                         TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2096                         break;
2098                 case BC_YUV888:
2099                         TRANSLATE(0xff, int32_t, unsigned char, 3, 0x80);
2100                         break;
2102                 case BC_YUVA8888:
2103                         TRANSLATE(0xff, int32_t, unsigned char, 4, 0x80);
2104                         break;
2106                 case BC_YUV161616:
2107                         TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2108                         break;
2110                 case BC_YUVA16161616:
2111                         TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2112                         break;
2113         }
2114 //      printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2116         delete [] x_table_f; 
2117         delete [] y_table_f; 
2129 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
2130  : LoadServer(cpus, cpus)
2132         this->overlay = overlay;
2135 TranslateEngine::~TranslateEngine()
2139 void TranslateEngine::init_packages()
2141         int out_y1_int = (int)translate_out_y1;
2142         int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
2143         int out_h = out_y2_int - out_y1_int;
2145         for(int i = 0; i < get_total_packages(); i++)
2146         {
2147                 TranslatePackage *package = (TranslatePackage*)get_package(i);
2148                 package->out_row1 = (int)(out_y1_int + out_h / 
2149                         get_total_packages() * 
2150                         i);
2151                 package->out_row2 = (int)((float)package->out_row1 + 
2152                         out_h / 
2153                         get_total_packages());
2154                 if(i >= get_total_packages() - 1)
2155                         package->out_row2 = out_y2_int;
2156         }
2159 LoadClient* TranslateEngine::new_client()
2161         return new TranslateUnit(this, overlay);
2164 LoadPackage* TranslateEngine::new_package()
2166         return new TranslatePackage;
2176 #define SCALE_TRANSLATE(max, temp_type, type, components, chroma_offset) \
2177 { \
2178         temp_type opacity; \
2179         if(sizeof(type) != 4) \
2180                 opacity = (temp_type)(alpha * max + 0.5); \
2181         else \
2182                 opacity = (temp_type)(alpha * max); \
2183         temp_type transparency = max - opacity; \
2185         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2186         { \
2187                 int in_y = y_table[i - out_y1]; \
2188                 type *in_row = (type*)in_rows[in_y]; \
2189                 type *output = (type*)out_rows[i] + out_x1 * components; \
2191 /* X direction is scaled and requires a table lookup */ \
2192                 if(out_w != in_x2 - in_x1) \
2193                 { \
2194                         for(int j = 0; j < out_w; j++) \
2195                         { \
2196                                 type *in_row_plus_x = in_row + x_table[j] * components; \
2197                                 temp_type input1, input2, input3, input4; \
2198          \
2199                                 input1 = in_row_plus_x[0]; \
2200                                 input2 = in_row_plus_x[1]; \
2201                                 input3 = in_row_plus_x[2]; \
2202                                 if(components == 4) \
2203                                         input4 = in_row_plus_x[3]; \
2204          \
2205                                 if(components == 3) \
2206                                 { \
2207                                         BLEND_3(max, temp_type, type, chroma_offset); \
2208                                 } \
2209                                 else \
2210                                 { \
2211                                         BLEND_4(max, temp_type, type, chroma_offset); \
2212                                 } \
2213                                 output += components; \
2214                         } \
2215                 } \
2216                 else \
2217 /* X direction is not scaled */ \
2218                 { \
2219                         in_row += in_x1 * components; \
2220                         for(int j = 0; j < out_w; j++) \
2221                         { \
2222                                 temp_type input1, input2, input3, input4; \
2223          \
2224                                 input1 = in_row[0]; \
2225                                 input2 = in_row[1]; \
2226                                 input3 = in_row[2]; \
2227                                 if(components == 4) \
2228                                         input4 = in_row[3]; \
2229          \
2230                                 if(components == 3) \
2231                                 { \
2232                                         BLEND_3(max, temp_type, type, chroma_offset); \
2233                                 } \
2234                                 else \
2235                                 { \
2236                                         BLEND_4(max, temp_type, type, chroma_offset); \
2237                                 } \
2238                                 in_row += components; \
2239                                 output += components; \
2240                         } \
2241                 } \
2242         } \
2247 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
2248  : LoadClient(server)
2250         this->overlay = overlay;
2251         this->scale_translate = server;
2254 ScaleTranslateUnit::~ScaleTranslateUnit()
2258 void ScaleTranslateUnit::scale_array_f(int* &table, 
2259         int out_x1, 
2260         int out_x2,
2261         float in_x1,
2262         float in_x2)
2264         float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
2266         table = new int[(int)out_x2 - out_x1];
2267         
2268         for(int i = 0; i < out_x2 - out_x1; i++)
2269                 table[i] = (int)((float)i / scale + in_x1);
2272 void ScaleTranslateUnit::process_package(LoadPackage *package)
2274         ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
2276 // Args for NEAREST_NEIGHBOR_MACRO
2277         VFrame *output = scale_translate->output;
2278         VFrame *input = scale_translate->input;
2279         int in_x1 = (int)scale_translate->in_x1;
2280         int in_y1 = (int)scale_translate->in_y1;
2281         int in_x2 = (int)scale_translate->in_x2;
2282         int in_y2 = (int)scale_translate->in_y2;
2283         int out_x1 = scale_translate->out_x1;
2284         int out_y1 = scale_translate->out_y1;
2285         int out_x2 = scale_translate->out_x2;
2286         int out_y2 = scale_translate->out_y2;
2287         float alpha = scale_translate->alpha;
2288         int mode = scale_translate->mode;
2289         int out_w = out_x2 - out_x1;
2291         int *x_table = 0;
2292         int *y_table;
2293         unsigned char **in_rows = input->get_rows();
2294         unsigned char **out_rows = output->get_rows();
2296 //      Timer a;
2297 //      a.update();
2298 //printf("ScaleTranslateUnit::process_package 1 %d\n", mode);
2299         if(out_w != in_x2 - in_x1)
2300         {
2301                 scale_array_f(x_table, 
2302                         out_x1, 
2303                         out_x2,
2304                         scale_translate->in_x1,
2305                         scale_translate->in_x2);
2306         }
2307         scale_array_f(y_table, 
2308                 out_y1, 
2309                 out_y2,
2310                 scale_translate->in_y1,
2311                 scale_translate->in_y2);
2314         if (mode == TRANSFER_REPLACE && (out_w == in_x2 - in_x1)) 
2315         {
2316 // if we have transfer replace and x direction is not scaled, PARTY!
2317                 char bytes_per_pixel = input->calculate_bytes_per_pixel(input->get_color_model());
2318                 int line_len = out_w * bytes_per_pixel;
2319                 int in_start_byte = in_x1 * bytes_per_pixel;
2320                 int out_start_byte = out_x1 * bytes_per_pixel;
2321                 for(int i = pkg->out_row1; i < pkg->out_row2; i++) 
2322                 {
2323                         memcpy (out_rows[i] + out_start_byte, 
2324                                 in_rows[y_table[i - out_y1]] + in_start_byte , 
2325                                 line_len);
2326                 }
2328         } 
2329         else
2330         switch(input->get_color_model())
2331         {
2332                 case BC_RGB888:
2333                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 3, 0);
2334                         break;
2336                 case BC_RGB_FLOAT:
2337                         SCALE_TRANSLATE(1.0, float, float, 3, 0);
2338                         break;
2340                 case BC_YUV888:
2341                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 3, 0x80);
2342                         break;
2344                 case BC_RGBA8888:
2345                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 4, 0);
2346                         break;
2348                 case BC_RGBA_FLOAT:
2349                         SCALE_TRANSLATE(1.0, float, float, 4, 0);
2350                         break;
2352                 case BC_YUVA8888:
2353                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 4, 0x80);
2354                         break;
2357                 case BC_RGB161616:
2358                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2359                         break;
2361                 case BC_YUV161616:
2362                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2363                         break;
2365                 case BC_RGBA16161616:
2366                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2367                         break;
2369                 case BC_YUVA16161616:
2370                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2371                         break;
2372         }
2373         
2374 //printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2375         if(x_table)
2376                 delete [] x_table;
2377         delete [] y_table;
2389 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
2390  : LoadServer(cpus, cpus)
2392         this->overlay = overlay;
2395 ScaleTranslateEngine::~ScaleTranslateEngine()
2399 void ScaleTranslateEngine::init_packages()
2401         int out_h = out_y2 - out_y1;
2403         for(int i = 0; i < get_total_packages(); i++)
2404         {
2405                 ScaleTranslatePackage *package = (ScaleTranslatePackage*)get_package(i);
2406                 package->out_row1 = (int)(out_y1 + out_h / 
2407                         get_total_packages() * 
2408                         i);
2409                 package->out_row2 = (int)((float)package->out_row1 + 
2410                         out_h / 
2411                         get_total_packages());
2412                 if(i >= get_total_packages() - 1)
2413                         package->out_row2 = out_y2;
2414         }
2417 LoadClient* ScaleTranslateEngine::new_client()
2419         return new ScaleTranslateUnit(this, overlay);
2422 LoadPackage* ScaleTranslateEngine::new_package()
2424         return new ScaleTranslatePackage;
2428 ScaleTranslatePackage::ScaleTranslatePackage()
2459 #define BLEND_ONLY(temp_type, type, max, components, chroma_offset) \
2460 { \
2461         temp_type opacity; \
2462         if(sizeof(type) != 4) \
2463                 opacity = (temp_type)(alpha * max + 0.5); \
2464         else \
2465                 opacity = (temp_type)(alpha * max); \
2466         temp_type transparency = max - opacity; \
2468         type** output_rows = (type**)output->get_rows(); \
2469         type** input_rows = (type**)input->get_rows(); \
2470         int w = input->get_w(); \
2471         int h = input->get_h(); \
2473         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2474         { \
2475                 type* in_row = input_rows[i]; \
2476                 type* output = output_rows[i]; \
2478                 for(int j = 0; j < w; j++) \
2479                 { \
2480                         temp_type input1, input2, input3, input4; \
2481                         input1 = in_row[0]; \
2482                         input2 = in_row[1]; \
2483                         input3 = in_row[2]; \
2484                         if(components == 4) input4 = in_row[3]; \
2487                         if(components == 3) \
2488                         { \
2489                                 BLEND_3(max, temp_type, type, chroma_offset); \
2490                         } \
2491                         else \
2492                         { \
2493                                 BLEND_4(max, temp_type, type, chroma_offset); \
2494                         } \
2496                         in_row += components; \
2497                         output += components; \
2498                 } \
2499         } \
2503 #define BLEND_ONLY_TRANSFER_REPLACE(type, components) \
2504 { \
2506         type** output_rows = (type**)output->get_rows(); \
2507         type** input_rows = (type**)input->get_rows(); \
2508         int w = input->get_w(); \
2509         int h = input->get_h(); \
2510         int line_len = w * sizeof(type) * components; \
2512         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2513         { \
2514                 memcpy(output_rows[i], input_rows[i], line_len); \
2515         } \
2518 // components is always 4
2519 #define BLEND_ONLY_4_NORMAL(temp_type, type, max, chroma_offset) \
2520 { \
2521         temp_type opacity = (temp_type)(alpha * max + 0.5); \
2522         temp_type transparency = max - opacity; \
2523         temp_type max_squared = ((temp_type)max) * max; \
2525         type** output_rows = (type**)output->get_rows(); \
2526         type** input_rows = (type**)input->get_rows(); \
2527         int w = input->get_w(); \
2528         int h = input->get_h(); \
2530         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2531         { \
2532                 type* in_row = input_rows[i]; \
2533                 type* output = output_rows[i]; \
2535                 for(int j = 0; j < w; j++) \
2536                 { \
2537                         temp_type pixel_opacity, pixel_transparency; \
2538                         pixel_opacity = opacity * in_row[3]; \
2539                         pixel_transparency = (temp_type)max_squared - pixel_opacity; \
2540                  \
2541                  \
2542                         temp_type r,g,b; \
2543                         output[0] = ((temp_type)in_row[0] * pixel_opacity + \
2544                                 (temp_type)output[0] * pixel_transparency) / max / max; \
2545                         output[1] = (((temp_type)in_row[1] - chroma_offset) * pixel_opacity + \
2546                                 ((temp_type)output[1] - chroma_offset) * pixel_transparency) \
2547                                 / max / max + \
2548                                 chroma_offset; \
2549                         output[2] = (((temp_type)in_row[2] - chroma_offset) * pixel_opacity + \
2550                                 ((temp_type)output[2] - chroma_offset) * pixel_transparency) \
2551                                 / max / max + \
2552                                 chroma_offset; \
2553                         output[3] = (type)(in_row[3] > output[3] ? in_row[3] : output[3]); \
2555                         in_row += 4; \
2556                         output += 4; \
2557                 } \
2558         } \
2563 // components is always 3
2564 #define BLEND_ONLY_3_NORMAL(temp_type, type, max, chroma_offset) \
2565 { \
2566         const int bits = sizeof(type) * 8; \
2567         temp_type opacity = (temp_type)(alpha * ((temp_type)1 << bits) + 0.5); \
2568         temp_type transparency = ((temp_type)1 << bits) - opacity; \
2570         type** output_rows = (type**)output->get_rows(); \
2571         type** input_rows = (type**)input->get_rows(); \
2572         int w = input->get_w() * 3; \
2573         int h = input->get_h(); \
2575         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2576         { \
2577                 type* in_row = input_rows[i]; \
2578                 type* output = output_rows[i]; \
2580                 for(int j = 0; j < w; j++) /* w = 3x width! */ \
2581                 { \
2582                         *output = ((temp_type)*in_row * opacity + *output * transparency) >> bits; \
2583                         in_row ++; \
2584                         output ++; \
2585                 } \
2586         } \
2591 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
2592  : LoadClient(server)
2594         this->overlay = overlay;
2595         this->blend_engine = server;
2598 BlendUnit::~BlendUnit()
2602 void BlendUnit::process_package(LoadPackage *package)
2604         BlendPackage *pkg = (BlendPackage*)package;
2607         VFrame *output = blend_engine->output;
2608         VFrame *input = blend_engine->input;
2609         float alpha = blend_engine->alpha;
2610         int mode = blend_engine->mode;
2612         if (mode == TRANSFER_REPLACE) 
2613         {
2614                 switch(input->get_color_model())
2615                 {
2616                         case BC_RGB_FLOAT:
2617                                 BLEND_ONLY_TRANSFER_REPLACE(float, 3);
2618                                 break;
2619                         case BC_RGBA_FLOAT:
2620                                 BLEND_ONLY_TRANSFER_REPLACE(float, 4);
2621                                 break;
2622                         case BC_RGB888:
2623                         case BC_YUV888:
2624                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 3);
2625                                 break;
2626                         case BC_RGBA8888:
2627                         case BC_YUVA8888:
2628                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 4);
2629                                 break;
2630                         case BC_RGB161616:
2631                         case BC_YUV161616:
2632                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 3);
2633                                 break;
2634                         case BC_RGBA16161616:
2635                         case BC_YUVA16161616:
2636                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 4);
2637                                 break;
2638                 }
2639         }
2640         else
2641         if (mode == TRANSFER_NORMAL) 
2642         {
2643                 switch(input->get_color_model())
2644                 {
2645                         case BC_RGB_FLOAT:
2646                         {
2647                                 float opacity = alpha;
2648                                 float transparency = 1.0 - alpha;
2650                                 float** output_rows = (float**)output->get_rows();
2651                                 float** input_rows = (float**)input->get_rows();
2652                                 int w = input->get_w() * 3;
2653                                 int h = input->get_h();
2655                                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2656                                 {
2657                                         float* in_row = input_rows[i];
2658                                         float* output = output_rows[i];
2659 /* w = 3x width! */
2660                                         for(int j = 0; j < w; j++) 
2661                                         {
2662                                                 *output = *in_row * opacity + *output * transparency;
2663                                                 in_row++;
2664                                                 output++;
2665                                         }
2666                                 }
2667                                 break;
2668                         }
2669                         case BC_RGBA_FLOAT:
2670                         {
2671                                 float opacity = alpha;
2672                                 float transparency = 1.0 - alpha;
2673                         
2674                                 float** output_rows = (float**)output->get_rows();
2675                                 float** input_rows = (float**)input->get_rows();
2676                                 int w = input->get_w();
2677                                 int h = input->get_h();
2678                         
2679                                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2680                                 {
2681                                         float* in_row = input_rows[i];
2682                                         float* output = output_rows[i];
2683                         
2684                                         for(int j = 0; j < w; j++)
2685                                         {
2686                                                 float pixel_opacity, pixel_transparency;
2687                                                 pixel_opacity = opacity * in_row[3];
2688                                                 pixel_transparency = 1.0 - pixel_opacity;
2689                                         
2690                                         
2691                                                 output[0] = in_row[0] * pixel_opacity +
2692                                                         output[0] * pixel_transparency;
2693                                                 output[1] = in_row[1] * pixel_opacity +
2694                                                         output[1] * pixel_transparency;
2695                                                 output[2] = in_row[2] * pixel_opacity +
2696                                                         output[2] * pixel_transparency;
2697                                                 output[3] = in_row[3] > output[3] ? in_row[3] : output[3];
2699                                                 in_row += 4;
2700                                                 output += 4;
2701                                         }
2702                                 }
2703                                 break;
2704                         }
2705                         case BC_RGB888:
2706                                 BLEND_ONLY_3_NORMAL(uint32_t, unsigned char, 0xff, 0);
2707                                 break;
2708                         case BC_YUV888:
2709                                 BLEND_ONLY_3_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2710                                 break;
2711                         case BC_RGBA8888:
2712                                 BLEND_ONLY_4_NORMAL(uint32_t, unsigned char, 0xff, 0);
2713                                 break;
2714                         case BC_YUVA8888:
2715                                 BLEND_ONLY_4_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2716                                 break;
2717                         case BC_RGB161616:
2718                                 BLEND_ONLY_3_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2719                                 break;
2720                         case BC_YUV161616:
2721                                 BLEND_ONLY_3_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2722                                 break;
2723                         case BC_RGBA16161616:
2724                                 BLEND_ONLY_4_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2725                                 break;
2726                         case BC_YUVA16161616:
2727                                 BLEND_ONLY_4_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2728                                 break;
2729                 }
2730         }
2731         else
2732         switch(input->get_color_model())
2733         {
2734                 case BC_RGB_FLOAT:
2735                         BLEND_ONLY(float, float, 1.0, 3, 0);
2736                         break;
2737                 case BC_RGBA_FLOAT:
2738                         BLEND_ONLY(float, float, 1.0, 4, 0);
2739                         break;
2740                 case BC_RGB888:
2741                         BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0);
2742                         break;
2743                 case BC_YUV888:
2744                         BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0x80);
2745                         break;
2746                 case BC_RGBA8888:
2747                         BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0);
2748                         break;
2749                 case BC_YUVA8888:
2750                         BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0x80);
2751                         break;
2752                 case BC_RGB161616:
2753                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0);
2754                         break;
2755                 case BC_YUV161616:
2756                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0x8000);
2757                         break;
2758                 case BC_RGBA16161616:
2759                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0);
2760                         break;
2761                 case BC_YUVA16161616:
2762                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0x8000);
2763                         break;
2764         }
2769 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
2770  : LoadServer(cpus, cpus)
2772         this->overlay = overlay;
2775 BlendEngine::~BlendEngine()
2779 void BlendEngine::init_packages()
2781         for(int i = 0; i < get_total_packages(); i++)
2782         {
2783                 BlendPackage *package = (BlendPackage*)get_package(i);
2784                 package->out_row1 = (int)(input->get_h() / 
2785                         get_total_packages() * 
2786                         i);
2787                 package->out_row2 = (int)((float)package->out_row1 +
2788                         input->get_h() / 
2789                         get_total_packages());
2791                 if(i >= get_total_packages() - 1)
2792                         package->out_row2 = input->get_h();
2793         }
2796 LoadClient* BlendEngine::new_client()
2798         return new BlendUnit(this, overlay);
2801 LoadPackage* BlendEngine::new_package()
2803         return new BlendPackage;
2807 BlendPackage::BlendPackage()