r370: Heroine Virutal's official release 1.2.1
[cinelerra_cv/mob.git] / hvirtual / cinelerra / overlayframe.C
blobeacf17b3e2835025763009b686024df7514efd05
1 #include <math.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <stdint.h>
5 #include <stdlib.h>
6 #include <unistd.h>
8 #include "clip.h"
9 #include "edl.inc"
10 #include "mutex.h"
11 #include "overlayframe.h"
12 #include "vframe.h"
14 // Easy abstraction of the float and int types.  Most of these are never used
15 // but GCC expects them.
16 static int my_abs(int32_t x)
18         return abs(x);
21 static int my_abs(uint32_t x)
23         return x;
26 static int my_abs(int64_t x)
28         return llabs(x);
31 static int my_abs(uint64_t x)
33         return x;
36 static float my_abs(float x)
38         return fabsf(x);
44 OverlayFrame::OverlayFrame(int cpus)
46         temp_frame = 0;
47         blend_engine = 0;
48         scale_engine = 0;
49         scaletranslate_engine = 0;
50         translate_engine = 0;
51         this->cpus = cpus;
54 OverlayFrame::~OverlayFrame()
56         if(temp_frame) delete temp_frame;
57         if(scale_engine) delete scale_engine;
58         if(translate_engine) delete translate_engine;
59         if(blend_engine) delete blend_engine;
60         if(scaletranslate_engine) delete scaletranslate_engine;
70 // Verification: 
72 // (255 * 255 + 0 * 0) / 255 = 255
73 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
75 // (65535 * 65535 + 0 * 0) / 65535 = 65535
76 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
79 // Branch prediction 4 U
81 #define BLEND_3(max, temp_type, type, chroma_offset) \
82 { \
83         temp_type r, g, b; \
84  \
85 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
86         switch(mode) \
87         { \
88                 case TRANSFER_DIVIDE: \
89                         r = output[0] ? (((temp_type)input1 * max) / output[0]) : max; \
90                         if(chroma_offset) \
91                         { \
92                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
93                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
94                         } \
95                         else \
96                         { \
97                                 g = output[1] ? (temp_type)input2 * max / (temp_type)output[1] : max; \
98                                 b = output[2] ? (temp_type)input3 * max / (temp_type)output[2] : max; \
99                         } \
100                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
101                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
102                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
103                         break; \
104                 case TRANSFER_MULTIPLY: \
105                         r = ((temp_type)input1 * output[0]) / max; \
106                         if(chroma_offset) \
107                         { \
108                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
109                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
110                         } \
111                         else \
112                         { \
113                                 g = (temp_type)input2 * (temp_type)output[1] / max; \
114                                 b = (temp_type)input3 * (temp_type)output[2] / max; \
115                         } \
116                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
117                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
118                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
119                         break; \
120                 case TRANSFER_SUBTRACT: \
121                         r = (temp_type)input1 - output[0]; \
122                         g = (temp_type)input2 - ((temp_type)output[1] - chroma_offset); \
123                         b = (temp_type)input3 - ((temp_type)output[2] - chroma_offset); \
124                         r = (r * opacity + output[0] * transparency) / max; \
125                         g = (g * opacity + output[1] * transparency) / max; \
126                         b = (b * opacity + output[2] * transparency) / max; \
127                         break; \
128                 case TRANSFER_ADDITION: \
129                         r = (temp_type)input1 + output[0]; \
130                         g = (temp_type)input2 - chroma_offset + output[1]; \
131                         b = (temp_type)input3 - chroma_offset + output[2]; \
132                         r = (r * opacity + output[0] * transparency) / max; \
133                         g = (g * opacity + output[1] * transparency) / max; \
134                         b = (b * opacity + output[2] * transparency) / max; \
135                         break; \
136                 case TRANSFER_REPLACE: \
137                         r = input1; \
138                         g = input2; \
139                         b = input3; \
140                         break; \
141                 case TRANSFER_NORMAL: \
142                         r = ((temp_type)input1 * opacity + output[0] * transparency) / max; \
143                         g = ((temp_type)input2 * opacity + output[1] * transparency) / max; \
144                         b = ((temp_type)input3 * opacity + output[2] * transparency) / max; \
145                         break; \
146         } \
148         if(sizeof(type) != 4) \
149         { \
150                 output[0] = (type)CLIP(r, 0, max); \
151                 output[1] = (type)CLIP(g, 0, max); \
152                 output[2] = (type)CLIP(b, 0, max); \
153         } \
154         else \
155         { \
156                 output[0] = r; \
157                 output[1] = g; \
158                 output[2] = b; \
159         } \
166 // Blending equations are drastically different for 3 and 4 components
167 #define BLEND_4(max, temp_type, type, chroma_offset) \
168 { \
169         temp_type r, g, b, a; \
170         temp_type pixel_opacity, pixel_transparency; \
171         temp_type output1 = output[0]; \
172         temp_type output2 = output[1]; \
173         temp_type output3 = output[2]; \
174         temp_type output4 = output[3]; \
176         pixel_opacity = opacity * input4; \
177         pixel_transparency = (temp_type)max * max - pixel_opacity; \
179         switch(mode) \
180         { \
181                 case TRANSFER_DIVIDE: \
182                         r = output1 ? (((temp_type)input1 * max) / output1) : max; \
183                         if(chroma_offset) \
184                         { \
185                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
186                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
187                         } \
188                         else \
189                         { \
190                                 g = output2 ? (temp_type)input2 * max / (temp_type)output2 : max; \
191                                 b = output3 ? (temp_type)input3 * max / (temp_type)output3 : max; \
192                         } \
193                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
194                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
195                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
196                         a = input4 > output4 ? input4 : output4; \
197                         break; \
198                 case TRANSFER_MULTIPLY: \
199                         r = ((temp_type)input1 * output1) / max; \
200                         if(chroma_offset) \
201                         { \
202                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
203                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
204                         } \
205                         else \
206                         { \
207                                 g = (temp_type)input2 * (temp_type)output2 / max; \
208                                 b = (temp_type)input3 * (temp_type)output3 / max; \
209                         } \
210                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
211                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
212                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
213                         a = input4 > output4 ? input4 : output4; \
214                         break; \
215                 case TRANSFER_SUBTRACT: \
216                         r = (temp_type)input1 - output1; \
217                         g = (temp_type)input2 - ((temp_type)output2 - chroma_offset); \
218                         b = (temp_type)input3 - ((temp_type)output3 - chroma_offset); \
219                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
220                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
221                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
222                         a = input4 > output4 ? input4 : output4; \
223                         break; \
224                 case TRANSFER_ADDITION: \
225                         r = (temp_type)input1 + output1; \
226                         g = (temp_type)input2 - chroma_offset + output2; \
227                         b = (temp_type)input3 - chroma_offset + output3; \
228                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
229                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
230                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
231                         a = input4 > output4 ? input4 : output4; \
232                         break; \
233                 case TRANSFER_REPLACE: \
234                         r = input1; \
235                         g = input2; \
236                         b = input3; \
237                         a = input4; \
238                         break; \
239                 case TRANSFER_NORMAL: \
240                         r = (input1 * pixel_opacity + \
241                                 output1 * pixel_transparency) / max / max; \
242                         g = ((input2 - chroma_offset) * pixel_opacity + \
243                                 (output2 - chroma_offset) * pixel_transparency) \
244                                 / max / max + \
245                                 chroma_offset; \
246                         b = ((input3 - chroma_offset) * pixel_opacity + \
247                                 (output3 - chroma_offset) * pixel_transparency) \
248                                 / max / max + \
249                                 chroma_offset; \
250                         a = input4 > output4 ? input4 : output4; \
251                         break; \
252         } \
254         if(sizeof(type) != 4) \
255         { \
256                 output[0] = (type)CLIP(r, 0, max); \
257                 output[1] = (type)CLIP(g, 0, max); \
258                 output[2] = (type)CLIP(b, 0, max); \
259                 output[3] = (type)a; \
260         } \
261         else \
262         { \
263                 output[0] = r; \
264                 output[1] = g; \
265                 output[2] = b; \
266                 output[3] = a; \
267         } \
272 // Bicubic algorithm using multiprocessors
273 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
275 // Nearest neighbor algorithm using multiprocessors for blending
276 // input -> scale + translate -> blend -> output
279 int OverlayFrame::overlay(VFrame *output, 
280         VFrame *input, 
281         float in_x1, 
282         float in_y1, 
283         float in_x2, 
284         float in_y2, 
285         float out_x1, 
286         float out_y1, 
287         float out_x2, 
288         float out_y2, 
289         float alpha,       // 0 - 1
290         int mode,
291         int interpolation_type)
293         float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
294         float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
296         if(isnan(in_x1) ||
297                 isnan(in_y1) ||
298                 isnan(in_x2) ||
299                 isnan(in_y2) ||
300                 isnan(out_x1) ||
301                 isnan(out_y1) ||
302                 isnan(out_x2) ||
303                 isnan(out_y2)) return 1;
304 // printf("OverlayFrame::overlay 1 %f %f %f %f -> %f %f %f %f\n", in_x1,
305 //                      in_y1,
306 //                      in_x2,
307 //                      in_y2,
308 //                      out_x1,
309 //                      out_y1,
310 //                      out_x2,
311 //                      out_y2);
313 // Limit values
314         if(in_x1 < 0)
315         {
316                 out_x1 += -in_x1 * w_scale;
317                 in_x1 = 0;
318         }
319         else
320         if(in_x1 >= input->get_w())
321         {
322                 out_x1 -= (in_x1 - input->get_w()) * w_scale;
323                 in_x1 = input->get_w();
324         }
326         if(in_y1 < 0)
327         {
328                 out_y1 += -in_y1 * h_scale;
329                 in_y1 = 0;
330         }
331         else
332         if(in_y1 >= input->get_h())
333         {
334                 out_y1 -= (in_y1 - input->get_h()) * h_scale;
335                 in_y1 = input->get_h();
336         }
338         if(in_x2 < 0)
339         {
340                 out_x2 += -in_x2 * w_scale;
341                 in_x2 = 0;
342         }
343         else
344         if(in_x2 >= input->get_w())
345         {
346                 out_x2 -= (in_x2 - input->get_w()) * w_scale;
347                 in_x2 = input->get_w();
348         }
350         if(in_y2 < 0)
351         {
352                 out_y2 += -in_y2 * h_scale;
353                 in_y2 = 0;
354         }
355         else
356         if(in_y2 >= input->get_h())
357         {
358                 out_y2 -= (in_y2 - input->get_h()) * h_scale;
359                 in_y2 = input->get_h();
360         }
362         if(out_x1 < 0)
363         {
364                 in_x1 += -out_x1 / w_scale;
365                 out_x1 = 0;
366         }
367         else
368         if(out_x1 >= output->get_w())
369         {
370                 in_x1 -= (out_x1 - output->get_w()) / w_scale;
371                 out_x1 = output->get_w();
372         }
374         if(out_y1 < 0)
375         {
376                 in_y1 += -out_y1 / h_scale;
377                 out_y1 = 0;
378         }
379         else
380         if(out_y1 >= output->get_h())
381         {
382                 in_y1 -= (out_y1 - output->get_h()) / h_scale;
383                 out_y1 = output->get_h();
384         }
386         if(out_x2 < 0)
387         {
388                 in_x2 += -out_x2 / w_scale;
389                 out_x2 = 0;
390         }
391         else
392         if(out_x2 >= output->get_w())
393         {
394                 in_x2 -= (out_x2 - output->get_w()) / w_scale;
395                 out_x2 = output->get_w();
396         }
398         if(out_y2 < 0)
399         {
400                 in_y2 += -out_y2 / h_scale;
401                 out_y2 = 0;
402         }
403         else
404         if(out_y2 >= output->get_h())
405         {
406                 in_y2 -= (out_y2 - output->get_h()) / h_scale;
407                 out_y2 = output->get_h();
408         }
419         float in_w = in_x2 - in_x1;
420         float in_h = in_y2 - in_y1;
421         float out_w = out_x2 - out_x1;
422         float out_h = out_y2 - out_y1;
423 // Input for translation operation
424         VFrame *translation_input = input;
427         if(in_w <= 0 || in_h <= 0 || out_w <= 0 || out_h <= 0) return 0;
430 // printf("OverlayFrame::overlay 2 %f %f %f %f -> %f %f %f %f\n", in_x1,
431 //                      in_y1,
432 //                      in_x2,
433 //                      in_y2,
434 //                      out_x1,
435 //                      out_y1,
436 //                      out_x2,
437 //                      out_y2);
443 // ****************************************************************************
444 // Transfer to temp buffer by scaling nearest integer boundaries
445 // ****************************************************************************
446         if(interpolation_type != NEAREST_NEIGHBOR &&
447                 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
448         {
449 // Create integer boundaries for interpolation
450                 int in_x1_int = (int)in_x1;
451                 int in_y1_int = (int)in_y1;
452                 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
453                 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
455 // Dimensions of temp frame.  Integer boundaries scaled.
456                 int temp_w = (int)ceil(w_scale * (in_x2_int - in_x1_int));
457                 int temp_h = (int)ceil(h_scale * (in_y2_int - in_y1_int));
458                 VFrame *scale_output;
462 #define NO_TRANSLATION1 \
463         (EQUIV(in_x1, 0) && \
464         EQUIV(in_y1, 0) && \
465         EQUIV(out_x1, 0) && \
466         EQUIV(out_y1, 0) && \
467         EQUIV(in_x2, in_x2_int) && \
468         EQUIV(in_y2, in_y2_int) && \
469         EQUIV(out_x2, temp_w) && \
470         EQUIV(out_y2, temp_h))
473 #define NO_BLEND \
474         (EQUIV(alpha, 1) && \
475         (mode == TRANSFER_REPLACE || \
476         (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
482 // Prepare destination for operation
484 // No translation and no blending.  The blending operation is built into the
485 // translation unit but not the scaling unit.
486 // input -> output
487                 if(NO_TRANSLATION1 &&
488                         NO_BLEND)
489                 {
490 // printf("OverlayFrame::overlay input -> output\n");
492                         scale_output = output;
493                         translation_input = 0;
494                 }
495                 else
496 // If translation or blending
497 // input -> nearest integer boundary temp
498                 {
499                         if(temp_frame && 
500                                 (temp_frame->get_w() != temp_w ||
501                                         temp_frame->get_h() != temp_h))
502                         {
503                                 delete temp_frame;
504                                 temp_frame = 0;
505                         }
507                         if(!temp_frame)
508                         {
509                                 temp_frame = new VFrame(0,
510                                         temp_w,
511                                         temp_h,
512                                         input->get_color_model(),
513                                         -1);
514                         }
515 //printf("OverlayFrame::overlay input -> temp\n");
518                         temp_frame->clear_frame();
520 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
521 //      temp_w, temp_h);
522                         scale_output = temp_frame;
523                         translation_input = scale_output;
525 // Adjust input coordinates to reflect new scaled coordinates.
526                         in_x1 = (in_x1 - in_x1_int) * w_scale;
527                         in_y1 = (in_y1 - in_y1_int) * h_scale;
528                         in_x2 = (in_x2 - in_x1_int) * w_scale;
529                         in_y2 = (in_y2 - in_y1_int) * h_scale;
530                 }
534 //printf("Overlay 1\n");
536 // Scale input -> scale_output
537                 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
538                 scale_engine->scale_output = scale_output;
539                 scale_engine->scale_input = input;
540                 scale_engine->w_scale = w_scale;
541                 scale_engine->h_scale = h_scale;
542                 scale_engine->in_x1_int = in_x1_int;
543                 scale_engine->in_y1_int = in_y1_int;
544                 scale_engine->out_w_int = temp_w;
545                 scale_engine->out_h_int = temp_h;
546                 scale_engine->interpolation_type = interpolation_type;
547 //printf("Overlay 2\n");
549 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
550                 scale_engine->process_packages();
551 //printf("OverlayFrame::overlay ScaleEngine 2\n");
555         }
557 // printf("OverlayFrame::overlay 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", 
558 //      in_x1, 
559 //      in_y1, 
560 //      in_x2, 
561 //      in_y2, 
562 //      out_x1, 
563 //      out_y1, 
564 //      out_x2, 
565 //      out_y2);
571 #define NO_TRANSLATION2 \
572         (EQUIV(in_x1, 0) && \
573         EQUIV(in_y1, 0) && \
574         EQUIV(in_x2, translation_input->get_w()) && \
575         EQUIV(in_y2, translation_input->get_h()) && \
576         EQUIV(out_x1, 0) && \
577         EQUIV(out_y1, 0) && \
578         EQUIV(out_x2, output->get_w()) && \
579         EQUIV(out_y2, output->get_h())) \
581 #define NO_SCALE \
582         (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
583         EQUIV(out_y2 - out_y1, in_y2 - in_y1))
585         
588 //printf("OverlayFrame::overlay 4 %d\n", mode);
593         if(translation_input)
594         {
595 // Direct copy
596                 if( NO_TRANSLATION2 &&
597                         NO_SCALE &&
598                         NO_BLEND)
599                 {
600 //printf("OverlayFrame::overlay direct copy\n");
601                         output->copy_from(translation_input);
602                 }
603                 else
604 // Blend only
605                 if( NO_TRANSLATION2 &&
606                         NO_SCALE)
607                 {
608                         if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
611                         blend_engine->output = output;
612                         blend_engine->input = translation_input;
613                         blend_engine->alpha = alpha;
614                         blend_engine->mode = mode;
616                         blend_engine->process_packages();
617                 }
618                 else
619 // Scale and translate using nearest neighbor
620 // Translation is exactly on integer boundaries
621                 if(interpolation_type == NEAREST_NEIGHBOR ||
622                         EQUIV(in_x1, (int)in_x1) &&
623                         EQUIV(in_y1, (int)in_y1) &&
624                         EQUIV(in_x2, (int)in_x2) &&
625                         EQUIV(in_y2, (int)in_y2) &&
627                         EQUIV(out_x1, (int)out_x1) &&
628                         EQUIV(out_y1, (int)out_y1) &&
629                         EQUIV(out_x2, (int)out_x2) &&
630                         EQUIV(out_y2, (int)out_y2))
631                 {
632 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
633                         if(!scaletranslate_engine) scaletranslate_engine = 
634                                 new ScaleTranslateEngine(this, cpus);
637                         scaletranslate_engine->output = output;
638                         scaletranslate_engine->input = translation_input;
639                         scaletranslate_engine->in_x1 = (int)in_x1;
640                         scaletranslate_engine->in_y1 = (int)in_y1;
641 // we need to do this mumbo-jumbo in order to get numerical stability
642 // other option would be to round all the coordinates
643                         scaletranslate_engine->in_x2 = (int)in_x1 + (int)(in_x2 - in_x1);
644                         scaletranslate_engine->in_y2 = (int)in_y1 + (int)(in_y2 - in_y1);
645                         scaletranslate_engine->out_x1 = (int)out_x1;
646                         scaletranslate_engine->out_y1 = (int)out_y1;
647                         scaletranslate_engine->out_x2 = (int)out_x1 + (int)(out_x2 - out_x1);
648                         scaletranslate_engine->out_y2 = (int)out_y1 + (int)(out_y2 - out_y1);
649                         scaletranslate_engine->alpha = alpha;
650                         scaletranslate_engine->mode = mode;
652                         scaletranslate_engine->process_packages();
653                 }
654                 else
655 // Fractional translation
656                 {
657 // Use fractional translation
658 // printf("OverlayFrame::overlay temp -> output  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", 
659 //      in_x1, 
660 //      in_y1, 
661 //      in_x2, 
662 //      in_y2, 
663 //      out_x1, 
664 //      out_y1, 
665 //      out_x2, 
666 //      out_y2);
668 //printf("Overlay 3\n");
669                         if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
670                         translate_engine->translate_output = output;
671                         translate_engine->translate_input = translation_input;
672                         translate_engine->translate_in_x1 = in_x1;
673                         translate_engine->translate_in_y1 = in_y1;
674                         translate_engine->translate_in_x2 = in_x2;
675                         translate_engine->translate_in_y2 = in_y2;
676                         translate_engine->translate_out_x1 = out_x1;
677                         translate_engine->translate_out_y1 = out_y1;
678                         translate_engine->translate_out_x2 = out_x2;
679                         translate_engine->translate_out_y2 = out_y2;
680                         translate_engine->translate_alpha = alpha;
681                         translate_engine->translate_mode = mode;
682 //printf("Overlay 4\n");
684 //printf("OverlayFrame::overlay 5 %d\n", mode);
685                         translate_engine->process_packages();
687                 }
688         }
689 //printf("OverlayFrame::overlay 2\n");
691         return 0;
700 ScalePackage::ScalePackage()
707 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
708  : LoadClient(server)
710         this->overlay = overlay;
711         this->engine = server;
714 ScaleUnit::~ScaleUnit()
720 void ScaleUnit::tabulate_reduction(bilinear_table_t* &table,
721         float scale,
722         int in_pixel1, 
723         int out_total,
724         int in_total)
726         table = new bilinear_table_t[out_total];
727         bzero(table, sizeof(bilinear_table_t) * out_total);
728 //printf("ScaleUnit::tabulate_reduction 1 %f %d %d %d\n", scale, in_pixel1, out_total, in_total);
729         for(int i = 0; i < out_total; i++)
730         {
731                 float out_start = i;
732                 float in_start = out_start * scale;
733                 float out_end = i + 1;
734                 float in_end = out_end * scale;
735                 bilinear_table_t *entry = table + i;
736 //printf("ScaleUnit::tabulate_reduction 1 %f %f %f %f\n", out_start, out_end, in_start, in_end);
738 // Store input fraction
739                 entry->input_fraction1 = (floor(in_start + 1) - in_start) / scale;
740                 entry->input_fraction2 = 1.0 / scale;
741                 entry->input_fraction3 = (in_end - floor(in_end)) / scale;
743                 if(in_end >= in_total - in_pixel1)
744                 {
745                         in_end = in_total - in_pixel1 - 1;
746                         
747                         int difference = (int)in_end - (int)in_start - 1;
748                         if(difference < 0) difference = 0;
749                         entry->input_fraction3 = 1.0 - 
750                                 entry->input_fraction1 - 
751                                 entry->input_fraction2 * difference;
752                 }
754 // Store input pixels
755                 entry->input_pixel1 = (int)in_start;
756                 entry->input_pixel2 = (int)in_end;
758 // printf("ScaleUnit::tabulate_reduction 1 %d %d %f %f  %f\n", 
759 // entry->input_pixel1, 
760 // entry->input_pixel2,
761 // entry->input_fraction1,
762 // entry->input_fraction2,
763 // entry->input_fraction3);
766 // Sanity check
767                 if(entry->input_pixel1 > entry->input_pixel2)
768                 {
769                         entry->input_pixel1 = entry->input_pixel2;
770                         entry->input_fraction1 = 0;
771                 }
773 // Get total fraction of output pixel used
774 //              if(entry->input_pixel2 > entry->input_pixel1)
775                 entry->total_fraction = 
776                         entry->input_fraction1 +
777                         entry->input_fraction2 * (entry->input_pixel2 - entry->input_pixel1 - 1) +
778                         entry->input_fraction3;
779                 entry->input_pixel1 += in_pixel1;
780                 entry->input_pixel2 += in_pixel1;
781         }
784 void ScaleUnit::tabulate_enlarge(bilinear_table_t* &table,
785         float scale,
786         int in_pixel1, 
787         int out_total,
788         int in_total)
790         table = new bilinear_table_t[out_total];
791         bzero(table, sizeof(bilinear_table_t) * out_total);
793         for(int i = 0; i < out_total; i++)
794         {
795                 bilinear_table_t *entry = table + i;
796                 float in_pixel = i * scale;
797                 entry->input_pixel1 = (int)floor(in_pixel);
798                 entry->input_pixel2 = entry->input_pixel1 + 1;
800                 if(in_pixel <= in_total)
801                 {
802                         entry->input_fraction3 = in_pixel - entry->input_pixel1;
803                 }
804                 else
805                 {
806                         entry->input_fraction3 = 0;
807                         entry->input_pixel2 = 0;
808                 }
810                 if(in_pixel >= 0)
811                 {
812                         entry->input_fraction1 = entry->input_pixel2 - in_pixel;
813                 }
814                 else
815                 {
816                         entry->input_fraction1 = 0;
817                         entry->input_pixel1 = 0;
818                 }
820                 if(entry->input_pixel2 >= in_total - in_pixel1)
821                 {
822                         entry->input_pixel2 = entry->input_pixel1;
823                         entry->input_fraction3 = 1.0 - entry->input_fraction1;
824                 }
826                 entry->total_fraction = 
827                         entry->input_fraction1 + 
828                         entry->input_fraction3;
829                 entry->input_pixel1 += in_pixel1;
830                 entry->input_pixel2 += in_pixel1;
831 // 
832 // printf("ScaleUnit::tabulate_enlarge %d %d %f %f %f\n",
833 // entry->input_pixel1,
834 // entry->input_pixel2,
835 // entry->input_fraction1,
836 // entry->input_fraction2,
837 // entry->input_fraction3);
838         }
841 void ScaleUnit::dump_bilinear(bilinear_table_t *table, int total)
843         printf("ScaleUnit::dump_bilinear\n");
844         for(int i = 0; i < total; i++)
845         {
846                 printf("out=%d inpixel1=%d inpixel2=%d infrac1=%f infrac2=%f infrac3=%f total=%f\n", 
847                         i,
848                         table[i].input_pixel1,
849                         table[i].input_pixel2,
850                         table[i].input_fraction1,
851                         table[i].input_fraction2,
852                         table[i].input_fraction3,
853                         table[i].total_fraction);
854         }
857 #define PIXEL_REDUCE_MACRO(type, components, row) \
858 { \
859         type *input_row = &in_rows[row][x_entry->input_pixel1 * components]; \
860         type *input_end = &in_rows[row][x_entry->input_pixel2 * components]; \
862 /* Do first pixel */ \
863         temp_f1 += input_scale1 * input_row[0]; \
864         temp_f2 += input_scale1 * input_row[1]; \
865         temp_f3 += input_scale1 * input_row[2]; \
866         if(components == 4) temp_f4 += input_scale1 * input_row[3]; \
868 /* Do last pixel */ \
869 /*      if(input_row < input_end) */\
870         { \
871                 temp_f1 += input_scale3 * input_end[0]; \
872                 temp_f2 += input_scale3 * input_end[1]; \
873                 temp_f3 += input_scale3 * input_end[2]; \
874                 if(components == 4) temp_f4 += input_scale3 * input_end[3]; \
875         } \
877 /* Do middle pixels */ \
878         for(input_row += components; input_row < input_end; input_row += components) \
879         { \
880                 temp_f1 += input_scale2 * input_row[0]; \
881                 temp_f2 += input_scale2 * input_row[1]; \
882                 temp_f3 += input_scale2 * input_row[2]; \
883                 if(components == 4) temp_f4 += input_scale2 * input_row[3]; \
884         } \
887 // Bilinear reduction and suboptimal enlargement.
888 // Very high quality.
889 #define BILINEAR_REDUCE(max, type, components) \
890 { \
891         bilinear_table_t *x_table, *y_table; \
892         int out_h = pkg->out_row2 - pkg->out_row1; \
893         type **in_rows = (type**)input->get_rows(); \
894         type **out_rows = (type**)output->get_rows(); \
896         if(scale_w < 1) \
897                 tabulate_reduction(x_table, \
898                         1.0 / scale_w, \
899                         in_x1_int, \
900                         out_w_int, \
901                         input->get_w()); \
902         else \
903                 tabulate_enlarge(x_table, \
904                         1.0 / scale_w, \
905                         in_x1_int, \
906                         out_w_int, \
907                         input->get_w()); \
909         if(scale_h < 1) \
910                 tabulate_reduction(y_table, \
911                         1.0 / scale_h, \
912                         in_y1_int, \
913                         out_h_int, \
914                         input->get_h()); \
915         else \
916                 tabulate_enlarge(y_table, \
917                         1.0 / scale_h, \
918                         in_y1_int, \
919                         out_h_int, \
920                         input->get_h()); \
921 /* dump_bilinear(y_table, out_h_int); */\
923         for(int i = 0; i < out_h; i++) \
924         { \
925                 type *out_row = out_rows[i + pkg->out_row1]; \
926                 bilinear_table_t *y_entry = &y_table[i + pkg->out_row1]; \
927 /*printf("BILINEAR_REDUCE 2 %d %d %d\n", i, y_entry->input_pixel1, y_entry->input_pixel2); */\
929                 for(int j = 0; j < out_w_int; j++) \
930                 { \
931                         bilinear_table_t *x_entry = &x_table[j]; \
932 /* Load rounding factors */ \
933                         float temp_f1; \
934                         float temp_f2; \
935                         float temp_f3; \
936                         float temp_f4; \
937                         if(sizeof(type) != 4) \
938                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = .5; \
939                         else \
940                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = 0; \
942 /* First row */ \
943                         float input_scale1 = y_entry->input_fraction1 * x_entry->input_fraction1; \
944                         float input_scale2 = y_entry->input_fraction1 * x_entry->input_fraction2; \
945                         float input_scale3 = y_entry->input_fraction1 * x_entry->input_fraction3; \
946                         PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel1) \
948 /* Last row */ \
949                         if(out_h) \
950                         { \
951                                 input_scale1 = y_entry->input_fraction3 * x_entry->input_fraction1; \
952                                 input_scale2 = y_entry->input_fraction3 * x_entry->input_fraction2; \
953                                 input_scale3 = y_entry->input_fraction3 * x_entry->input_fraction3; \
954                                 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel2) \
956 /* Middle rows */ \
957                                 if(out_h > 1) \
958                                 { \
959                                         input_scale1 = y_entry->input_fraction2 * x_entry->input_fraction1; \
960                                         input_scale2 = y_entry->input_fraction2 * x_entry->input_fraction2; \
961                                         input_scale3 = y_entry->input_fraction2 * x_entry->input_fraction3; \
962                                         for(int k = y_entry->input_pixel1 + 1; \
963                                                 k < y_entry->input_pixel2; \
964                                                 k++) \
965                                         { \
966                                                 PIXEL_REDUCE_MACRO(type, components, k) \
967                                         } \
968                                 } \
969                         } \
971                         if(max != 1.0) \
972                         { \
973                                 if(temp_f1 > max) temp_f1 = max; \
974                                 if(temp_f2 > max) temp_f2 = max; \
975                                 if(temp_f3 > max) temp_f3 = max; \
976                                 if(components == 4) if(temp_f4 > max) temp_f4 = max; \
977                         } \
978                         out_row[j * components    ] = (type)temp_f1; \
979                         out_row[j * components + 1] = (type)temp_f2; \
980                         out_row[j * components + 2] = (type)temp_f3; \
981                         if(components == 4) out_row[j * components + 3] = (type)temp_f4; \
982                 } \
983 /*printf("BILINEAR_REDUCE 3 %d\n", i);*/ \
984         } \
986         delete [] x_table; \
987         delete [] y_table; \
992 // Only 2 input pixels
993 #define BILINEAR_ENLARGE(max, type, components) \
994 { \
995 /*printf("BILINEAR_ENLARGE 1\n");*/ \
996         float k_y = 1.0 / scale_h; \
997         float k_x = 1.0 / scale_w; \
998         type **in_rows = (type**)input->get_rows(); \
999         type **out_rows = (type**)output->get_rows(); \
1000         int out_h = pkg->out_row2 - pkg->out_row1; \
1001         int in_h_int = input->get_h(); \
1002         int in_w_int = input->get_w(); \
1003         int *table_int_x1, *table_int_y1; \
1004         int *table_int_x2, *table_int_y2; \
1005         float *table_frac_x_f, *table_antifrac_x_f, *table_frac_y_f, *table_antifrac_y_f; \
1006         int *table_frac_x_i, *table_antifrac_x_i, *table_frac_y_i, *table_antifrac_y_i; \
1008         tabulate_blinear_f(table_int_x1,  \
1009                 table_int_x2,  \
1010                 table_frac_x_f,  \
1011                 table_antifrac_x_f,  \
1012                 k_x,  \
1013                 0,  \
1014                 out_w_int, \
1015                 in_x1_int,  \
1016                 in_w_int); \
1017         tabulate_blinear_f(table_int_y1,  \
1018                 table_int_y2,  \
1019                 table_frac_y_f,  \
1020                 table_antifrac_y_f,  \
1021                 k_y,  \
1022                 pkg->out_row1,  \
1023                 pkg->out_row2,  \
1024                 in_y1_int, \
1025                 in_h_int); \
1027         for(int i = 0; i < out_h; i++) \
1028         { \
1029                 int i_y1 = table_int_y1[i]; \
1030                 int i_y2 = table_int_y2[i]; \
1031                 float a_f; \
1032         float anti_a_f; \
1033                 uint64_t a_i; \
1034         uint64_t anti_a_i; \
1035                 a_f = table_frac_y_f[i]; \
1036         anti_a_f = table_antifrac_y_f[i]; \
1037                 type *in_row1 = in_rows[i_y1]; \
1038                 type *in_row2 = in_rows[i_y2]; \
1039                 type *out_row = out_rows[i + pkg->out_row1]; \
1041                 for(int j = 0; j < out_w_int; j++) \
1042                 { \
1043                         int i_x1 = table_int_x1[j]; \
1044                         int i_x2 = table_int_x2[j]; \
1045                         float output1r, output1g, output1b, output1a; \
1046                         float output2r, output2g, output2b, output2a; \
1047                         float output3r, output3g, output3b, output3a; \
1048                         float output4r, output4g, output4b, output4a; \
1049                         float b_f; \
1050                         float anti_b_f; \
1051                         b_f = table_frac_x_f[j]; \
1052                         anti_b_f = table_antifrac_x_f[j]; \
1054                 output1r = in_row1[i_x1 * components]; \
1055                 output1g = in_row1[i_x1 * components + 1]; \
1056                 output1b = in_row1[i_x1 * components + 2]; \
1057                 if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1059                 output2r = in_row1[i_x2 * components]; \
1060                 output2g = in_row1[i_x2 * components + 1]; \
1061                 output2b = in_row1[i_x2 * components + 2]; \
1062                 if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1064                 output3r = in_row2[i_x1 * components]; \
1065                 output3g = in_row2[i_x1 * components + 1]; \
1066                 output3b = in_row2[i_x1 * components + 2]; \
1067                 if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1069                 output4r = in_row2[i_x2 * components]; \
1070                 output4g = in_row2[i_x2 * components + 1]; \
1071                 output4b = in_row2[i_x2 * components + 2]; \
1072                 if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1074                         out_row[j * components] =  \
1075                                 (type)(anti_a_f * (anti_b_f * output1r +  \
1076                                 b_f * output2r) +  \
1077                 a_f * (anti_b_f * output3r +  \
1078                                 b_f * output4r)); \
1079                         out_row[j * components + 1] =   \
1080                                 (type)(anti_a_f * (anti_b_f * output1g +  \
1081                                 b_f * output2g) +  \
1082                 a_f * ((anti_b_f * output3g) +  \
1083                                 b_f * output4g)); \
1084                         out_row[j * components + 2] =   \
1085                                 (type)(anti_a_f * ((anti_b_f * output1b) +  \
1086                                 (b_f * output2b)) +  \
1087                 a_f * ((anti_b_f * output3b) +  \
1088                                 b_f * output4b)); \
1089                         if(components == 4) \
1090                                 out_row[j * components + 3] =   \
1091                                         (type)(anti_a_f * ((anti_b_f * output1a) +  \
1092                                         (b_f * output2a)) +  \
1093                         a_f * ((anti_b_f * output3a) +  \
1094                                         b_f * output4a)); \
1095                 } \
1096         } \
1099         delete [] table_int_x1; \
1100         delete [] table_int_x2; \
1101         delete [] table_int_y1; \
1102         delete [] table_int_y2; \
1103         delete [] table_frac_x_f; \
1104         delete [] table_antifrac_x_f; \
1105         delete [] table_frac_y_f; \
1106         delete [] table_antifrac_y_f; \
1108 /*printf("BILINEAR_ENLARGE 2\n");*/ \
1112 #define BICUBIC(max, type, components) \
1113 { \
1114         float k_y = 1.0 / scale_h; \
1115         float k_x = 1.0 / scale_w; \
1116         type **in_rows = (type**)input->get_rows(); \
1117         type **out_rows = (type**)output->get_rows(); \
1118         float *bspline_x_f, *bspline_y_f; \
1119         int *bspline_x_i, *bspline_y_i; \
1120         int *in_x_table, *in_y_table; \
1121         int in_h_int = input->get_h(); \
1122         int in_w_int = input->get_w(); \
1124         tabulate_bcubic_f(bspline_x_f,  \
1125                 in_x_table, \
1126                 k_x, \
1127                 in_x1_int, \
1128                 out_w_int, \
1129                 in_w_int, \
1130                 -1); \
1132         tabulate_bcubic_f(bspline_y_f,  \
1133                 in_y_table, \
1134                 k_y, \
1135                 in_y1_int, \
1136                 out_h_int, \
1137                 in_h_int, \
1138                 1); \
1140         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1141         { \
1142                 for(int j = 0; j < out_w_int; j++) \
1143                 { \
1144                         int i_x = (int)(k_x * j); \
1145                         float output1_f, output2_f, output3_f, output4_f; \
1146                         uint64_t output1_i, output2_i, output3_i, output4_i; \
1147                         output1_f = 0; \
1148                         output2_f = 0; \
1149                         output3_f = 0; \
1150                         if(components == 4) \
1151                                 output4_f = 0; \
1152                         int table_y = i * 4; \
1154 /* Kernel */ \
1155                         for(int m = -1; m < 3; m++) \
1156                         { \
1157                                 float r1_f; \
1158                                 uint64_t r1_i; \
1159                                 r1_f = bspline_y_f[table_y]; \
1160                                 int y = in_y_table[table_y]; \
1161                                 int table_x = j * 4; \
1163                                 for(int n = -1; n < 3; n++) \
1164                                 { \
1165                                         float r2_f; \
1166                                         uint64_t r2_i; \
1167                                         r2_f = bspline_x_f[table_x]; \
1168                                         int x = in_x_table[table_x]; \
1169                                         float r_square_f; \
1170                                         uint64_t r_square_i; \
1171                                         r_square_f = r1_f * r2_f; \
1172                                         output1_f += r_square_f * in_rows[y][x * components]; \
1173                                         output2_f += r_square_f * in_rows[y][x * components + 1]; \
1174                                         output3_f += r_square_f * in_rows[y][x * components + 2]; \
1175                                         if(components == 4) \
1176                                                 output4_f += r_square_f * in_rows[y][x * components + 3]; \
1178                                         table_x++; \
1179                                 } \
1180                                 table_y++; \
1181                         } \
1184                         out_rows[i][j * components] = (type)output1_f; \
1185                         out_rows[i][j * components + 1] = (type)output2_f; \
1186                         out_rows[i][j * components + 2] = (type)output3_f; \
1187                         if(components == 4) \
1188                                 out_rows[i][j * components + 3] = (type)output4_f; \
1190                 } \
1191         } \
1193         delete [] bspline_x_f; \
1194         delete [] bspline_y_f; \
1195         delete [] in_x_table; \
1196         delete [] in_y_table; \
1202 // Pow function is not thread safe in Compaqt C
1203 #define CUBE(x) ((x) * (x) * (x))
1205 float ScaleUnit::cubic_bspline(float x)
1207         float a, b, c, d;
1209         if((x + 2.0F) <= 0.0F) 
1210         {
1211         a = 0.0F;
1212         }
1213         else 
1214         {
1215         a = CUBE(x + 2.0F);
1216         }
1219         if((x + 1.0F) <= 0.0F) 
1220         {
1221         b = 0.0F;
1222         }
1223         else 
1224         {
1225         b = CUBE(x + 1.0F);
1226         }    
1228         if(x <= 0) 
1229         {
1230         c = 0.0F;
1231         }
1232         else 
1233         {
1234         c = CUBE(x);
1235         }  
1237         if((x - 1.0F) <= 0.0F) 
1238         {
1239         d = 0.0F;
1240         }
1241         else 
1242         {
1243         d = CUBE(x - 1.0F);
1244         }
1247         return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
1251 void ScaleUnit::tabulate_bcubic_f(float* &coef_table, 
1252         int* &coord_table,
1253         float scale,
1254         int start, 
1255         int pixels,
1256         int total_pixels,
1257         float coefficient)
1259         coef_table = new float[pixels * 4];
1260         coord_table = new int[pixels * 4];
1261         for(int i = 0, j = 0; i < pixels; i++)
1262         {
1263                 float f_x = (float)i * scale;
1264                 float a = f_x - floor(f_x);
1265                 
1266                 for(float m = -1; m < 3; m++)
1267                 {
1268                         coef_table[j] = cubic_bspline(coefficient * (m - a));
1269                         coord_table[j] = (int)(start + (int)f_x + m);
1270                         CLAMP(coord_table[j], 0, total_pixels - 1);
1271                         j++;
1272                 }
1273                 
1274         }
1277 void ScaleUnit::tabulate_bcubic_i(int* &coef_table, 
1278         int* &coord_table,
1279         float scale,
1280         int start, 
1281         int pixels,
1282         int total_pixels,
1283         float coefficient)
1285         coef_table = new int[pixels * 4];
1286         coord_table = new int[pixels * 4];
1287         for(int i = 0, j = 0; i < pixels; i++)
1288         {
1289                 float f_x = (float)i * scale;
1290                 float a = f_x - floor(f_x);
1291                 
1292                 for(float m = -1; m < 3; m++)
1293                 {
1294                         coef_table[j] = (int)(cubic_bspline(coefficient * (m - a)) * 0x10000);
1295                         coord_table[j] = (int)(start + (int)f_x + m);
1296                         CLAMP(coord_table[j], 0, total_pixels - 1);
1297                         j++;
1298                 }
1299                 
1300         }
1303 void ScaleUnit::tabulate_blinear_f(int* &table_int1,
1304                 int* &table_int2,
1305                 float* &table_frac,
1306                 float* &table_antifrac,
1307                 float scale,
1308                 int pixel1,
1309                 int pixel2,
1310                 int start,
1311                 int total_pixels)
1313         table_int1 = new int[pixel2 - pixel1];
1314         table_int2 = new int[pixel2 - pixel1];
1315         table_frac = new float[pixel2 - pixel1];
1316         table_antifrac = new float[pixel2 - pixel1];
1318         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1319         {
1320                 float f_x = (float)i * scale;
1321                 int i_x = (int)floor(f_x);
1322                 float a = (f_x - floor(f_x));
1324                 table_int1[j] = i_x + start;
1325                 table_int2[j] = i_x + start + 1;
1326                 CLAMP(table_int1[j], 0, total_pixels - 1);
1327                 CLAMP(table_int2[j], 0, total_pixels - 1);
1328                 table_frac[j] = a;
1329                 table_antifrac[j] = 1.0F - a;
1330 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1331         }
1334 void ScaleUnit::tabulate_blinear_i(int* &table_int1,
1335                 int* &table_int2,
1336                 int* &table_frac,
1337                 int* &table_antifrac,
1338                 float scale,
1339                 int pixel1,
1340                 int pixel2,
1341                 int start,
1342                 int total_pixels)
1344         table_int1 = new int[pixel2 - pixel1];
1345         table_int2 = new int[pixel2 - pixel1];
1346         table_frac = new int[pixel2 - pixel1];
1347         table_antifrac = new int[pixel2 - pixel1];
1349         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1350         {
1351                 double f_x = (float)i * scale;
1352                 int i_x = (int)floor(f_x);
1353                 float a = (f_x - floor(f_x));
1355                 table_int1[j] = i_x + start;
1356                 table_int2[j] = i_x + start + 1;
1357                 CLAMP(table_int1[j], 0, total_pixels - 1);
1358                 CLAMP(table_int2[j], 0, total_pixels - 1);
1359                 table_frac[j] = (int)(a * 0xffff);
1360                 table_antifrac[j] = (int)((1.0F - a) * 0x10000);
1361 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1362         }
1365 void ScaleUnit::process_package(LoadPackage *package)
1367         ScalePackage *pkg = (ScalePackage*)package;
1369 //printf("ScaleUnit::process_package 1\n");
1370 // Arguments for macros
1371         VFrame *output = engine->scale_output;
1372         VFrame *input = engine->scale_input;
1373         float scale_w = engine->w_scale;
1374         float scale_h = engine->h_scale;
1375         int in_x1_int = engine->in_x1_int;
1376         int in_y1_int = engine->in_y1_int;
1377         int out_h_int = engine->out_h_int;
1378         int out_w_int = engine->out_w_int;
1379         int do_yuv = 
1380                 (input->get_color_model() == BC_YUV888 ||
1381                 input->get_color_model() == BC_YUVA8888 ||
1382                 input->get_color_model() == BC_YUV161616 ||
1383                 input->get_color_model() == BC_YUVA16161616);
1385 //printf("ScaleUnit::process_package 2 %f %f\n", engine->w_scale, engine->h_scale);
1386         if(engine->interpolation_type == CUBIC_CUBIC || 
1387                 (engine->interpolation_type == CUBIC_LINEAR 
1388                         && engine->w_scale > 1 && 
1389                         engine->h_scale > 1))
1390         {
1391                 switch(engine->scale_input->get_color_model())
1392                 {
1393                         case BC_RGB_FLOAT:
1394                                 BICUBIC(1.0, float, 3);
1395                                 break;
1397                         case BC_RGBA_FLOAT:
1398                                 BICUBIC(1.0, float, 4);
1399                                 break;
1401                         case BC_RGB888:
1402                         case BC_YUV888:
1403                                 BICUBIC(0xff, unsigned char, 3);
1404                                 break;
1406                         case BC_RGBA8888:
1407                         case BC_YUVA8888:
1408                                 BICUBIC(0xff, unsigned char, 4);
1409                                 break;
1411                         case BC_RGB161616:
1412                         case BC_YUV161616:
1413                                 BICUBIC(0xffff, uint16_t, 3);
1414                                 break;
1416                         case BC_RGBA16161616:
1417                         case BC_YUVA16161616:
1418                                 BICUBIC(0xffff, uint16_t, 4);
1419                                 break;
1420                 }
1421         }
1422         else
1423 // Perform bilinear scaling input -> scale_output
1424         if(engine->w_scale > 1 && 
1425                 engine->h_scale > 1)
1426         {
1427                 switch(engine->scale_input->get_color_model())
1428                 {
1429                         case BC_RGB_FLOAT:
1430                                 BILINEAR_ENLARGE(1.0, float, 3);
1431                                 break;
1433                         case BC_RGBA_FLOAT:
1434                                 BILINEAR_ENLARGE(1.0, float, 4);
1435                                 break;
1437                         case BC_RGB888:
1438                         case BC_YUV888:
1439                                 BILINEAR_ENLARGE(0xff, unsigned char, 3);
1440                                 break;
1442                         case BC_RGBA8888:
1443                         case BC_YUVA8888:
1444                                 BILINEAR_ENLARGE(0xff, unsigned char, 4);
1445                                 break;
1447                         case BC_RGB161616:
1448                         case BC_YUV161616:
1449                                 BILINEAR_ENLARGE(0xffff, uint16_t, 3);
1450                                 break;
1452                         case BC_RGBA16161616:
1453                         case BC_YUVA16161616:
1454                                 BILINEAR_ENLARGE(0xffff, uint16_t, 4);
1455                                 break;
1456                 }
1457         }
1458         else
1459 // Bilinear reduction
1460         {
1461                 switch(engine->scale_input->get_color_model())
1462                 {
1463                         case BC_RGB_FLOAT:
1464                                 BILINEAR_REDUCE(1.0, float, 3);
1465                                 break;
1466                         case BC_RGBA_FLOAT:
1467                                 BILINEAR_REDUCE(1.0, float, 4);
1468                                 break;
1469                         case BC_RGB888:
1470                         case BC_YUV888:
1471                                 BILINEAR_REDUCE(0xff, unsigned char, 3);
1472                                 break;
1474                         case BC_RGBA8888:
1475                         case BC_YUVA8888:
1476                                 BILINEAR_REDUCE(0xff, unsigned char, 4);
1477                                 break;
1479                         case BC_RGB161616:
1480                         case BC_YUV161616:
1481                                 BILINEAR_REDUCE(0xffff, uint16_t, 3);
1482                                 break;
1484                         case BC_RGBA16161616:
1485                         case BC_YUVA16161616:
1486                                 BILINEAR_REDUCE(0xffff, uint16_t, 4);
1487                                 break;
1488                 }
1489         }
1490 //printf("ScaleUnit::process_package 3\n");
1506 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
1507  : LoadServer(cpus, cpus)
1509         this->overlay = overlay;
1512 ScaleEngine::~ScaleEngine()
1516 void ScaleEngine::init_packages()
1518         for(int i = 0; i < total_packages; i++)
1519         {
1520                 ScalePackage *package = (ScalePackage*)packages[i];
1521                 package->out_row1 = out_h_int / total_packages * i;
1522                 package->out_row2 = package->out_row1 + out_h_int / total_packages;
1524                 if(i >= total_packages - 1)
1525                         package->out_row2 = out_h_int;
1526         }
1529 LoadClient* ScaleEngine::new_client()
1531         return new ScaleUnit(this, overlay);
1534 LoadPackage* ScaleEngine::new_package()
1536         return new ScalePackage;
1551 TranslatePackage::TranslatePackage()
1557 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1558  : LoadClient(server)
1560         this->overlay = overlay;
1561         this->engine = server;
1564 TranslateUnit::~TranslateUnit()
1570 void TranslateUnit::translation_array_f(transfer_table_f* &table, 
1571         float out_x1, 
1572         float out_x2,
1573         float in_x1,
1574         float in_x2,
1575         int in_total, 
1576         int out_total, 
1577         int &out_x1_int,
1578         int &out_x2_int)
1580         int out_w_int;
1581         float offset = out_x1 - in_x1;
1582 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1584         out_x1_int = (int)out_x1;
1585         out_x2_int = MIN((int)ceil(out_x2), out_total);
1586         out_w_int = out_x2_int - out_x1_int;
1588         table = new transfer_table_f[out_w_int];
1589         bzero(table, sizeof(transfer_table_f) * out_w_int);
1592 //printf("OverlayFrame::translation_array_f 2 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1594         float in_x = in_x1;
1595         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1596         {
1597                 transfer_table_f *entry = &table[out_x - out_x1_int];
1599                 entry->in_x1 = (int)in_x;
1600                 entry->in_x2 = (int)in_x + 1;
1602 // Get fraction of output pixel to fill
1603                 entry->output_fraction = 1;
1605                 if(out_x1 > out_x)
1606                 {
1607                         entry->output_fraction -= out_x1 - out_x;
1608                 }
1610                 if(out_x2 < out_x + 1)
1611                 {
1612                         entry->output_fraction = (out_x2 - out_x);
1613                 }
1615 // Advance in_x until out_x_fraction is filled
1616                 float out_x_fraction = entry->output_fraction;
1617                 float in_x_fraction = floor(in_x + 1) - in_x;
1619                 if(out_x_fraction <= in_x_fraction)
1620                 {
1621                         entry->in_fraction1 = out_x_fraction;
1622                         entry->in_fraction2 = 0.0;
1623                         in_x += out_x_fraction;
1624                 }
1625                 else
1626                 {
1627                         entry->in_fraction1 = in_x_fraction;
1628                         in_x += out_x_fraction;
1629                         entry->in_fraction2 = in_x - floor(in_x);
1630                 }
1632 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1633                 if(entry->in_x2 >= in_total)
1634                 {
1635                         entry->in_x2 = in_total - 1;
1636                         entry->in_fraction2 = 0.0;
1637                 }
1638                 
1639                 if(entry->in_x1 >= in_total)
1640                 {
1641                         entry->in_x1 = in_total - 1;
1642                         entry->in_fraction1 = 0.0;
1643                 }
1644 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n", 
1645 //      out_x, 
1646 //      entry->in_x1, 
1647 //      entry->in_x2, 
1648 //      entry->in_fraction1, 
1649 //      entry->in_fraction2, 
1650 //      entry->output_fraction);
1651         }
1655 void TranslateUnit::translation_array_i(transfer_table_i* &table, 
1656         float out_x1, 
1657         float out_x2,
1658         float in_x1,
1659         float in_x2,
1660         int in_total, 
1661         int out_total, 
1662         int &out_x1_int,
1663         int &out_x2_int)
1665         int out_w_int;
1666         float offset = out_x1 - in_x1;
1668         out_x1_int = (int)out_x1;
1669         out_x2_int = MIN((int)ceil(out_x2), out_total);
1670         out_w_int = out_x2_int - out_x1_int;
1672         table = new transfer_table_i[out_w_int];
1673         bzero(table, sizeof(transfer_table_i) * out_w_int);
1676 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1678         float in_x = in_x1;
1679         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1680         {
1681                 transfer_table_i *entry = &table[out_x - out_x1_int];
1683                 entry->in_x1 = (int)in_x;
1684                 entry->in_x2 = (int)in_x + 1;
1686 // Get fraction of output pixel to fill
1687                 entry->output_fraction = 0x10000;
1689                 if(out_x1 > out_x)
1690                 {
1691                         entry->output_fraction -= (int)((out_x1 - out_x) * 0x10000);
1692                 }
1694                 if(out_x2 < out_x + 1)
1695                 {
1696                         entry->output_fraction = (int)((out_x2 - out_x) * 0x10000);
1697                 }
1699 // Advance in_x until out_x_fraction is filled
1700                 int out_x_fraction = entry->output_fraction;
1701                 int in_x_fraction = (int)((floor(in_x + 1) - in_x) * 0x10000);
1703                 if(out_x_fraction <= in_x_fraction)
1704                 {
1705                         entry->in_fraction1 = out_x_fraction;
1706                         entry->in_fraction2 = 0;
1707                         in_x += (float)out_x_fraction / 0x10000;
1708                 }
1709                 else
1710                 {
1711                         entry->in_fraction1 = in_x_fraction;
1712                         in_x += (float)out_x_fraction / 0x10000;
1713                         entry->in_fraction2 = (int)((in_x - floor(in_x)) * 0x10000);
1714                 }
1716 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1717                 if(entry->in_x2 >= in_total)
1718                 {
1719                         entry->in_x2 = in_total - 1;
1720                         entry->in_fraction2 = 0;
1721                 }
1723                 if(entry->in_x1 >= in_total)
1724                 {
1725                         entry->in_x1 = in_total - 1;
1726                         entry->in_fraction1 = 0;
1727                 }
1728 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n", 
1729 //      out_x, 
1730 //      entry->in_x1, 
1731 //      entry->in_x2, 
1732 //      entry->in_fraction1, 
1733 //      entry->in_fraction2, 
1734 //      entry->output_fraction);
1735         }
1771 #define TRANSLATE(max, temp_type, type, components, chroma_offset) \
1772 { \
1774         type **in_rows = (type**)input->get_rows(); \
1775         type **out_rows = (type**)output->get_rows(); \
1778         temp_type master_opacity; \
1779         if(sizeof(type) != 4) \
1780                 master_opacity = (temp_type)(alpha * max + 0.5); \
1781         else \
1782                 master_opacity = (temp_type)(alpha * max); \
1783         temp_type master_transparency = max - master_opacity; \
1784         float round = 0.0; \
1785         if(sizeof(type) != 4) \
1786                 round = 0.5; \
1789         for(int i = row1; i < row2; i++) \
1790         { \
1791                 int in_y1; \
1792                 int in_y2; \
1793                 float y_fraction1_f; \
1794                 float y_fraction2_f; \
1795                 float y_output_fraction_f; \
1796                 in_y1 = y_table_f[i - out_y1_int].in_x1; \
1797                 in_y2 = y_table_f[i - out_y1_int].in_x2; \
1798                 y_fraction1_f = y_table_f[i - out_y1_int].in_fraction1; \
1799                 y_fraction2_f = y_table_f[i - out_y1_int].in_fraction2; \
1800                 y_output_fraction_f = y_table_f[i - out_y1_int].output_fraction; \
1801                 type *in_row1 = in_rows[(in_y1)]; \
1802                 type *in_row2 = in_rows[(in_y2)]; \
1803                 type *out_row = out_rows[i]; \
1805                 for(int j = out_x1_int; j < out_x2_int; j++) \
1806                 { \
1807                         int in_x1; \
1808                         int in_x2; \
1809                         float x_fraction1_f; \
1810                         float x_fraction2_f; \
1811                         float x_output_fraction_f; \
1812                         in_x1 = x_table_f[j - out_x1_int].in_x1; \
1813                         in_x2 = x_table_f[j - out_x1_int].in_x2; \
1814                         x_fraction1_f = x_table_f[j - out_x1_int].in_fraction1; \
1815                         x_fraction2_f = x_table_f[j - out_x1_int].in_fraction2; \
1816                         x_output_fraction_f = x_table_f[j - out_x1_int].output_fraction; \
1817                         type *output = &out_row[j * components]; \
1818                         temp_type input1, input2, input3, input4; \
1820                         float fraction1 = x_fraction1_f * y_fraction1_f; \
1821                         float fraction2 = x_fraction2_f * y_fraction1_f; \
1822                         float fraction3 = x_fraction1_f * y_fraction2_f; \
1823                         float fraction4 = x_fraction2_f * y_fraction2_f; \
1825                         input1 = (type)(in_row1[in_x1 * components] * fraction1 +  \
1826                                 in_row1[in_x2 * components] * fraction2 +  \
1827                                 in_row2[in_x1 * components] * fraction3 +  \
1828                                 in_row2[in_x2 * components] * fraction4 + round); \
1830 /* Add chroma to fractional pixels */ \
1831                         if(chroma_offset) \
1832                         { \
1833                                 float extra_chroma = (1.0F - \
1834                                         fraction1 - \
1835                                         fraction2 - \
1836                                         fraction3 - \
1837                                         fraction4) * chroma_offset; \
1838                                 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1839                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1840                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1841                                         in_row2[in_x2 * components + 1] * fraction4 + \
1842                                         extra_chroma + round); \
1843                                 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1844                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1845                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1846                                         in_row2[in_x2 * components + 2] * fraction4 +  \
1847                                         extra_chroma + round); \
1848                         } \
1849                         else \
1850                         { \
1851                                 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1852                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1853                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1854                                         in_row2[in_x2 * components + 1] * fraction4 + round); \
1855                                 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1856                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1857                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1858                                         in_row2[in_x2 * components + 2] * fraction4 + round); \
1859                         } \
1861                         if(components == 4) \
1862                                 input4 = (type)(in_row1[in_x1 * components + 3] * fraction1 +  \
1863                                         in_row1[in_x2 * components + 3] * fraction2 +  \
1864                                         in_row2[in_x1 * components + 3] * fraction3 +  \
1865                                         in_row2[in_x2 * components + 3] * fraction4 + round); \
1867                         temp_type opacity; \
1868                         if(sizeof(type) != 4) \
1869                                 opacity = (temp_type)(master_opacity *  \
1870                                         y_output_fraction_f *  \
1871                                         x_output_fraction_f + 0.5); \
1872                         else \
1873                                 opacity = (temp_type)(master_opacity *  \
1874                                         y_output_fraction_f *  \
1875                                         x_output_fraction_f); \
1876                         temp_type transparency = max - opacity; \
1878 /* printf("TRANSLATE 2 %x %d %d\n", opacity, j, i); */ \
1880                         if(components == 3) \
1881                         { \
1882                                 BLEND_3(max, temp_type, type, chroma_offset); \
1883                         } \
1884                         else \
1885                         { \
1886                                 BLEND_4(max, temp_type, type, chroma_offset); \
1887                         } \
1888                 } \
1889         } \
1892 void TranslateUnit::process_package(LoadPackage *package)
1894         TranslatePackage *pkg = (TranslatePackage*)package;
1895         int out_y1_int; 
1896         int out_y2_int; 
1897         int out_x1_int; 
1898         int out_x2_int; 
1901 // Variables for TRANSLATE
1902         VFrame *input = engine->translate_input;
1903         VFrame *output = engine->translate_output;
1904         float in_x1 = engine->translate_in_x1;
1905         float in_y1 = engine->translate_in_y1;
1906         float in_x2 = engine->translate_in_x2;
1907         float in_y2 = engine->translate_in_y2;
1908         float out_x1 = engine->translate_out_x1;
1909         float out_y1 = engine->translate_out_y1;
1910         float out_x2 = engine->translate_out_x2;
1911         float out_y2 = engine->translate_out_y2;
1912         float alpha = engine->translate_alpha;
1913         int row1 = pkg->out_row1;
1914         int row2 = pkg->out_row2;
1915         int mode = engine->translate_mode;
1916         int in_total_x = input->get_w();
1917         int in_total_y = input->get_h();
1918         int do_yuv = 
1919                 (engine->translate_input->get_color_model() == BC_YUV888 ||
1920                 engine->translate_input->get_color_model() == BC_YUVA8888 ||
1921                 engine->translate_input->get_color_model() == BC_YUV161616 ||
1922                 engine->translate_input->get_color_model() == BC_YUVA16161616);
1924         transfer_table_f *x_table_f; 
1925         transfer_table_f *y_table_f; 
1926         transfer_table_i *x_table_i; 
1927         transfer_table_i *y_table_i; 
1929         translation_array_f(x_table_f,  
1930                 out_x1,  
1931                 out_x2, 
1932                 in_x1, 
1933                 in_x2, 
1934                 in_total_x,  
1935                 output->get_w(),  
1936                 out_x1_int, 
1937                 out_x2_int); 
1938         translation_array_f(y_table_f,  
1939                 out_y1,  
1940                 out_y2, 
1941                 in_y1, 
1942                 in_y2, 
1943                 in_total_y,  
1944                 output->get_h(),  
1945                 out_y1_int, 
1946                 out_y2_int); 
1947 //      printf("TranslateUnit::process_package 1 %d\n", mode);
1948 //      Timer a;
1949 //      a.update();
1951         switch(engine->translate_input->get_color_model())
1952         {
1953                 case BC_RGB888:
1954                         TRANSLATE(0xff, uint32_t, unsigned char, 3, 0);
1955                         break;
1957                 case BC_RGBA8888:
1958                         TRANSLATE(0xff, uint32_t, unsigned char, 4, 0);
1959                         break;
1961                 case BC_RGB_FLOAT:
1962                         TRANSLATE(1.0, float, float, 3, 0);
1963                         break;
1965                 case BC_RGBA_FLOAT:
1966                         TRANSLATE(1.0, float, float, 4, 0);
1967                         break;
1969                 case BC_RGB161616:
1970                         TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
1971                         break;
1973                 case BC_RGBA16161616:
1974                         TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
1975                         break;
1977                 case BC_YUV888:
1978                         TRANSLATE(0xff, int32_t, unsigned char, 3, 0x80);
1979                         break;
1981                 case BC_YUVA8888:
1982                         TRANSLATE(0xff, int32_t, unsigned char, 4, 0x80);
1983                         break;
1985                 case BC_YUV161616:
1986                         TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
1987                         break;
1989                 case BC_YUVA16161616:
1990                         TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
1991                         break;
1992         }
1993 //      printf("blend mode %i, took %li ms\n", mode, a.get_difference());
1995         delete [] x_table_f; 
1996         delete [] y_table_f; 
2008 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
2009  : LoadServer(cpus, cpus)
2011         this->overlay = overlay;
2014 TranslateEngine::~TranslateEngine()
2018 void TranslateEngine::init_packages()
2020         int out_y1_int = (int)translate_out_y1;
2021         int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
2022         int out_h = out_y2_int - out_y1_int;
2024         for(int i = 0; i < total_packages; i++)
2025         {
2026                 TranslatePackage *package = (TranslatePackage*)packages[i];
2027                 package->out_row1 = (int)(out_y1_int + out_h / 
2028                         total_packages * 
2029                         i);
2030                 package->out_row2 = (int)((float)package->out_row1 + 
2031                         out_h / 
2032                         total_packages);
2033                 if(i >= total_packages - 1)
2034                         package->out_row2 = out_y2_int;
2035         }
2038 LoadClient* TranslateEngine::new_client()
2040         return new TranslateUnit(this, overlay);
2043 LoadPackage* TranslateEngine::new_package()
2045         return new TranslatePackage;
2055 #define SCALE_TRANSLATE(max, temp_type, type, components, chroma_offset) \
2056 { \
2057         temp_type opacity; \
2058         if(sizeof(type) != 4) \
2059                 opacity = (temp_type)(alpha * max + 0.5); \
2060         else \
2061                 opacity = (temp_type)(alpha * max); \
2062         temp_type transparency = max - opacity; \
2064         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2065         { \
2066                 int in_y = y_table[i - out_y1]; \
2067                 type *in_row = (type*)in_rows[in_y] + in_x1 * components; \
2068                 type *output = (type*)out_rows[i] + out_x1 * components; \
2070 /* X direction is scaled and requires a table lookup */ \
2071                 if(out_w != in_x2 - in_x1) \
2072                 { \
2073                         for(int j = 0; j < out_w; j++) \
2074                         { \
2075                                 type *in_row_plus_x = in_row + x_table[j] * components; \
2076                                 temp_type input1, input2, input3, input4; \
2077          \
2078                                 input1 = in_row_plus_x[0]; \
2079                                 input2 = in_row_plus_x[1]; \
2080                                 input3 = in_row_plus_x[2]; \
2081                                 if(components == 4) \
2082                                         input4 = in_row_plus_x[3]; \
2083          \
2084                                 if(components == 3) \
2085                                 { \
2086                                         BLEND_3(max, temp_type, type, chroma_offset); \
2087                                 } \
2088                                 else \
2089                                 { \
2090                                         BLEND_4(max, temp_type, type, chroma_offset); \
2091                                 } \
2092                                 output += components; \
2093                         } \
2094                 } \
2095                 else \
2096 /* X direction is not scaled */ \
2097                 { \
2098                         for(int j = 0; j < out_w; j++) \
2099                         { \
2100                                 temp_type input1, input2, input3, input4; \
2101          \
2102                                 input1 = in_row[0]; \
2103                                 input2 = in_row[1]; \
2104                                 input3 = in_row[2]; \
2105                                 if(components == 4) \
2106                                         input4 = in_row[3]; \
2107          \
2108                                 if(components == 3) \
2109                                 { \
2110                                         BLEND_3(max, temp_type, type, chroma_offset); \
2111                                 } \
2112                                 else \
2113                                 { \
2114                                         BLEND_4(max, temp_type, type, chroma_offset); \
2115                                 } \
2116                                 in_row += components; \
2117                                 output += components; \
2118                         } \
2119                 } \
2120         } \
2125 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
2126  : LoadClient(server)
2128         this->overlay = overlay;
2129         this->scale_translate = server;
2132 ScaleTranslateUnit::~ScaleTranslateUnit()
2136 void ScaleTranslateUnit::scale_array(int* &table, 
2137         int out_x1, 
2138         int out_x2,
2139         int in_x1,
2140         int in_x2,
2141         int is_x)
2143         float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
2145         table = new int[out_x2 - out_x1];
2146         
2147         if(!is_x)
2148         {
2149                 for(int i = 0; i < out_x2 - out_x1; i++)
2150                 {
2151                         table[i] = (int)((float)i / scale + in_x1);
2152                 }
2153         }
2154         else
2155         {       
2156                 for(int i = 0; i < out_x2 - out_x1; i++)
2157                 {
2158                         table[i] = (int)((float)i / scale);
2159                 }
2160         }
2164 void ScaleTranslateUnit::process_package(LoadPackage *package)
2166         ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
2168 // Args for NEAREST_NEIGHBOR_MACRO
2169         VFrame *output = scale_translate->output;
2170         VFrame *input = scale_translate->input;
2171         int in_x1 = scale_translate->in_x1;
2172         int in_y1 = scale_translate->in_y1;
2173         int in_x2 = scale_translate->in_x2;
2174         int in_y2 = scale_translate->in_y2;
2175         int out_x1 = scale_translate->out_x1;
2176         int out_y1 = scale_translate->out_y1;
2177         int out_x2 = scale_translate->out_x2;
2178         int out_y2 = scale_translate->out_y2;
2179         float alpha = scale_translate->alpha;
2180         int mode = scale_translate->mode;
2181         int out_w = out_x2 - out_x1;
2183         int *x_table;
2184         int *y_table;
2185         unsigned char **in_rows = input->get_rows();
2186         unsigned char **out_rows = output->get_rows();
2188 //      Timer a;
2189 //      a.update();
2190 //printf("ScaleTranslateUnit::process_package 1 %d\n", mode);
2191         if(out_w != in_x2 - in_x1)
2192         {
2193                 scale_array(x_table, 
2194                         out_x1, 
2195                         out_x2,
2196                         in_x1,
2197                         in_x2,
2198                         1);
2199         }
2200         scale_array(y_table, 
2201                 out_y1, 
2202                 out_y2,
2203                 in_y1,
2204                 in_y2,
2205                 0);
2208         if (mode == TRANSFER_REPLACE && (out_w == in_x2 - in_x1)) 
2209         {
2210 // if we have transfer replace and x direction is not scaled, PARTY!
2211                 char bytes_per_pixel = input->calculate_bytes_per_pixel(input->get_color_model());
2212                 int line_len = out_w * bytes_per_pixel;
2213                 int in_start_byte = in_x1 * bytes_per_pixel;
2214                 int out_start_byte = out_x1 * bytes_per_pixel;
2215                 for(int i = pkg->out_row1; i < pkg->out_row2; i++) 
2216                 {
2217                         memcpy (out_rows[i] + out_start_byte, 
2218                                 in_rows[y_table[i - out_y1]] + in_start_byte , 
2219                                 line_len);
2220                 }
2222         } 
2223         else
2224         switch(input->get_color_model())
2225         {
2226                 case BC_RGB888:
2227                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 3, 0);
2228                         break;
2230                 case BC_RGB_FLOAT:
2231                         SCALE_TRANSLATE(1.0, float, float, 3, 0);
2232                         break;
2234                 case BC_YUV888:
2235                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 3, 0x80);
2236                         break;
2238                 case BC_RGBA8888:
2239                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 4, 0);
2240                         break;
2242                 case BC_RGBA_FLOAT:
2243                         SCALE_TRANSLATE(1.0, float, float, 4, 0);
2244                         break;
2246                 case BC_YUVA8888:
2247                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 4, 0x80);
2248                         break;
2251                 case BC_RGB161616:
2252                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2253                         break;
2255                 case BC_YUV161616:
2256                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2257                         break;
2259                 case BC_RGBA16161616:
2260                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2261                         break;
2263                 case BC_YUVA16161616:
2264                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2265                         break;
2266         }
2267         
2268 //printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2269         if(out_x2 - out_x1 != in_x2 - in_x1)
2270                 delete [] x_table;
2271         delete [] y_table;
2283 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
2284  : LoadServer(cpus, cpus)
2286         this->overlay = overlay;
2289 ScaleTranslateEngine::~ScaleTranslateEngine()
2293 void ScaleTranslateEngine::init_packages()
2295         int out_h = out_y2 - out_y1;
2297         for(int i = 0; i < total_packages; i++)
2298         {
2299                 ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i];
2300                 package->out_row1 = (int)(out_y1 + out_h / 
2301                         total_packages * 
2302                         i);
2303                 package->out_row2 = (int)((float)package->out_row1 + 
2304                         out_h / 
2305                         total_packages);
2306                 if(i >= total_packages - 1)
2307                         package->out_row2 = out_y2;
2308         }
2311 LoadClient* ScaleTranslateEngine::new_client()
2313         return new ScaleTranslateUnit(this, overlay);
2316 LoadPackage* ScaleTranslateEngine::new_package()
2318         return new ScaleTranslatePackage;
2322 ScaleTranslatePackage::ScaleTranslatePackage()
2353 #define BLEND_ONLY(temp_type, type, max, components, chroma_offset) \
2354 { \
2355         temp_type opacity; \
2356         if(sizeof(type) != 4) \
2357                 opacity = (temp_type)(alpha * max + 0.5); \
2358         else \
2359                 opacity = (temp_type)(alpha * max); \
2360         temp_type transparency = max - opacity; \
2362         type** output_rows = (type**)output->get_rows(); \
2363         type** input_rows = (type**)input->get_rows(); \
2364         int w = input->get_w(); \
2365         int h = input->get_h(); \
2367         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2368         { \
2369                 type* in_row = input_rows[i]; \
2370                 type* output = output_rows[i]; \
2372                 for(int j = 0; j < w; j++) \
2373                 { \
2374                         temp_type input1, input2, input3, input4; \
2375                         input1 = in_row[0]; \
2376                         input2 = in_row[1]; \
2377                         input3 = in_row[2]; \
2378                         if(components == 4) input4 = in_row[3]; \
2381                         if(components == 3) \
2382                         { \
2383                                 BLEND_3(max, temp_type, type, chroma_offset); \
2384                         } \
2385                         else \
2386                         { \
2387                                 BLEND_4(max, temp_type, type, chroma_offset); \
2388                         } \
2390                         in_row += components; \
2391                         output += components; \
2392                 } \
2393         } \
2397 #define BLEND_ONLY_TRANSFER_REPLACE(type, components) \
2398 { \
2400         type** output_rows = (type**)output->get_rows(); \
2401         type** input_rows = (type**)input->get_rows(); \
2402         int w = input->get_w(); \
2403         int h = input->get_h(); \
2404         int line_len = w * sizeof(type) * components; \
2406         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2407         { \
2408                 memcpy(output_rows[i], input_rows[i], line_len); \
2409         } \
2412 // components is always 4
2413 #define BLEND_ONLY_4_NORMAL(temp_type, type, max, chroma_offset) \
2414 { \
2415         temp_type opacity = (temp_type)(alpha * max + 0.5); \
2416         temp_type transparency = max - opacity; \
2417         temp_type max_squared = ((temp_type)max) * max; \
2419         type** output_rows = (type**)output->get_rows(); \
2420         type** input_rows = (type**)input->get_rows(); \
2421         int w = input->get_w(); \
2422         int h = input->get_h(); \
2424         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2425         { \
2426                 type* in_row = input_rows[i]; \
2427                 type* output = output_rows[i]; \
2429                 for(int j = 0; j < w; j++) \
2430                 { \
2431                         temp_type pixel_opacity, pixel_transparency; \
2432                         pixel_opacity = opacity * in_row[3]; \
2433                         pixel_transparency = (temp_type)max_squared - pixel_opacity; \
2434                  \
2435                  \
2436                         temp_type r,g,b; \
2437                         output[0] = ((temp_type)in_row[0] * pixel_opacity + \
2438                                 (temp_type)output[0] * pixel_transparency) / max / max; \
2439                         output[1] = (((temp_type)in_row[1] - chroma_offset) * pixel_opacity + \
2440                                 ((temp_type)output[1] - chroma_offset) * pixel_transparency) \
2441                                 / max / max + \
2442                                 chroma_offset; \
2443                         output[2] = (((temp_type)in_row[2] - chroma_offset) * pixel_opacity + \
2444                                 ((temp_type)output[2] - chroma_offset) * pixel_transparency) \
2445                                 / max / max + \
2446                                 chroma_offset; \
2447                         output[3] = (type)(in_row[3] > output[3] ? in_row[3] : output[3]); \
2449                         in_row += 4; \
2450                         output += 4; \
2451                 } \
2452         } \
2457 // components is always 3
2458 #define BLEND_ONLY_3_NORMAL(temp_type, type, max, chroma_offset) \
2459 { \
2460         const int bits = sizeof(type) * 8; \
2461         temp_type opacity = (temp_type)(alpha * ((temp_type)1 << bits) + 0.5); \
2462         temp_type transparency = ((temp_type)1 << bits) - opacity; \
2464         type** output_rows = (type**)output->get_rows(); \
2465         type** input_rows = (type**)input->get_rows(); \
2466         int w = input->get_w() * 3; \
2467         int h = input->get_h(); \
2469         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2470         { \
2471                 type* in_row = input_rows[i]; \
2472                 type* output = output_rows[i]; \
2474                 for(int j = 0; j < w; j++) /* w = 3x width! */ \
2475                 { \
2476                         *output = ((temp_type)*in_row * opacity + *output * transparency) >> bits; \
2477                         in_row ++; \
2478                         output ++; \
2479                 } \
2480         } \
2485 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
2486  : LoadClient(server)
2488         this->overlay = overlay;
2489         this->blend_engine = server;
2492 BlendUnit::~BlendUnit()
2496 void BlendUnit::process_package(LoadPackage *package)
2498         BlendPackage *pkg = (BlendPackage*)package;
2501         VFrame *output = blend_engine->output;
2502         VFrame *input = blend_engine->input;
2503         float alpha = blend_engine->alpha;
2504         int mode = blend_engine->mode;
2506         if (mode == TRANSFER_REPLACE) 
2507         {
2508                 switch(input->get_color_model())
2509                 {
2510                         case BC_RGB_FLOAT:
2511                                 BLEND_ONLY_TRANSFER_REPLACE(float, 3);
2512                                 break;
2513                         case BC_RGBA_FLOAT:
2514                                 BLEND_ONLY_TRANSFER_REPLACE(float, 4);
2515                                 break;
2516                         case BC_RGB888:
2517                         case BC_YUV888:
2518                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 3);
2519                                 break;
2520                         case BC_RGBA8888:
2521                         case BC_YUVA8888:
2522                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 4);
2523                                 break;
2524                         case BC_RGB161616:
2525                         case BC_YUV161616:
2526                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 3);
2527                                 break;
2528                         case BC_RGBA16161616:
2529                         case BC_YUVA16161616:
2530                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 4);
2531                                 break;
2532                 }
2533         } 
2534         else
2535         if (mode == TRANSFER_NORMAL) 
2536         {
2537                 switch(input->get_color_model())
2538                 {
2539                         case BC_RGB_FLOAT:
2540                         {
2541                                 float opacity = alpha;
2542                                 float transparency = 1.0 - alpha;
2544                                 float** output_rows = (float**)output->get_rows();
2545                                 float** input_rows = (float**)input->get_rows();
2546                                 int w = input->get_w() * 3;
2547                                 int h = input->get_h();
2549                                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2550                                 {
2551                                         float* in_row = input_rows[i];
2552                                         float* output = output_rows[i];
2553 /* w = 3x width! */
2554                                         for(int j = 0; j < w; j++) 
2555                                         {
2556                                                 *output = *in_row * opacity + *output * transparency;
2557                                                 in_row++;
2558                                                 output++;
2559                                         }
2560                                 }
2561                                 break;
2562                         }
2563                         case BC_RGBA_FLOAT:
2564                         {
2565                                 float opacity = alpha;
2566                                 float transparency = 1.0 - alpha;
2567                         
2568                                 float** output_rows = (float**)output->get_rows();
2569                                 float** input_rows = (float**)input->get_rows();
2570                                 int w = input->get_w();
2571                                 int h = input->get_h();
2572                         
2573                                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2574                                 {
2575                                         float* in_row = input_rows[i];
2576                                         float* output = output_rows[i];
2577                         
2578                                         for(int j = 0; j < w; j++)
2579                                         {
2580                                                 float pixel_opacity, pixel_transparency;
2581                                                 pixel_opacity = opacity * in_row[3];
2582                                                 pixel_transparency = 1.0 - pixel_opacity;
2583                                         
2584                                         
2585                                                 output[0] = in_row[0] * pixel_opacity +
2586                                                         output[0] * pixel_transparency;
2587                                                 output[1] = in_row[1] * pixel_opacity +
2588                                                         output[1] * pixel_transparency;
2589                                                 output[2] = in_row[2] * pixel_opacity +
2590                                                         output[2] * pixel_transparency;
2591                                                 output[3] = in_row[3] > output[3] ? in_row[3] : output[3];
2593                                                 in_row += 4;
2594                                                 output += 4;
2595                                         }
2596                                 }
2597                                 break;
2598                         }
2599                         case BC_RGB888:
2600                                 BLEND_ONLY_3_NORMAL(uint32_t, unsigned char, 0xff, 0);
2601                                 break;
2602                         case BC_YUV888:
2603                                 BLEND_ONLY_3_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2604                                 break;
2605                         case BC_RGBA8888:
2606                                 BLEND_ONLY_4_NORMAL(uint32_t, unsigned char, 0xff, 0);
2607                                 break;
2608                         case BC_YUVA8888:
2609                                 BLEND_ONLY_4_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2610                                 break;
2611                         case BC_RGB161616:
2612                                 BLEND_ONLY_3_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2613                                 break;
2614                         case BC_YUV161616:
2615                                 BLEND_ONLY_3_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2616                                 break;
2617                         case BC_RGBA16161616:
2618                                 BLEND_ONLY_4_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2619                                 break;
2620                         case BC_YUVA16161616:
2621                                 BLEND_ONLY_4_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2622                                 break;
2623                 }
2624         }
2625         else
2626         switch(input->get_color_model())
2627         {
2628                 case BC_RGB_FLOAT:
2629                         BLEND_ONLY(float, float, 1.0, 3, 0);
2630                         break;
2631                 case BC_RGBA_FLOAT:
2632                         BLEND_ONLY(float, float, 1.0, 4, 0);
2633                         break;
2634                 case BC_RGB888:
2635                         BLEND_ONLY(uint32_t, unsigned char, 0xff, 3, 0);
2636                         break;
2637                 case BC_YUV888:
2638                         BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0x80);
2639                         break;
2640                 case BC_RGBA8888:
2641                         BLEND_ONLY(uint32_t, unsigned char, 0xff, 4, 0);
2642                         break;
2643                 case BC_YUVA8888:
2644                         BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0x80);
2645                         break;
2646                 case BC_RGB161616:
2647                         BLEND_ONLY(uint64_t, uint16_t, 0xffff, 3, 0);
2648                         break;
2649                 case BC_YUV161616:
2650                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0x8000);
2651                         break;
2652                 case BC_RGBA16161616:
2653                         BLEND_ONLY(uint64_t, uint16_t, 0xffff, 4, 0);
2654                         break;
2655                 case BC_YUVA16161616:
2656                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0x8000);
2657                         break;
2658         }
2663 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
2664  : LoadServer(cpus, cpus)
2666         this->overlay = overlay;
2669 BlendEngine::~BlendEngine()
2673 void BlendEngine::init_packages()
2675         for(int i = 0; i < total_packages; i++)
2676         {
2677                 BlendPackage *package = (BlendPackage*)packages[i];
2678                 package->out_row1 = (int)(input->get_h() / 
2679                         total_packages * 
2680                         i);
2681                 package->out_row2 = (int)((float)package->out_row1 +
2682                         input->get_h() / 
2683                         total_packages);
2685                 if(i >= total_packages - 1)
2686                         package->out_row2 = input->get_h();
2687         }
2690 LoadClient* BlendEngine::new_client()
2692         return new BlendUnit(this, overlay);
2695 LoadPackage* BlendEngine::new_package()
2697         return new BlendPackage;
2701 BlendPackage::BlendPackage()