11 #include "overlayframe.h"
14 // Easy abstraction of the float and int types. Most of these are never used
15 // but GCC expects them.
16 static int my_abs(int32_t x)
21 static int my_abs(uint32_t x)
26 static int my_abs(int64_t x)
31 static int my_abs(uint64_t x)
36 static float my_abs(float x)
44 OverlayFrame::OverlayFrame(int cpus)
49 scaletranslate_engine = 0;
54 OverlayFrame::~OverlayFrame()
56 if(temp_frame) delete temp_frame;
57 if(scale_engine) delete scale_engine;
58 if(translate_engine) delete translate_engine;
59 if(blend_engine) delete blend_engine;
60 if(scaletranslate_engine) delete scaletranslate_engine;
72 // (255 * 255 + 0 * 0) / 255 = 255
73 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
75 // (65535 * 65535 + 0 * 0) / 65535 = 65535
76 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
79 // Branch prediction 4 U
81 #define BLEND_3(max, temp_type, type, chroma_offset) \
85 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
88 case TRANSFER_DIVIDE: \
89 r = output[0] ? (((temp_type)input1 * max) / output[0]) : max; \
92 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
93 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
97 g = output[1] ? (temp_type)input2 * max / (temp_type)output[1] : max; \
98 b = output[2] ? (temp_type)input3 * max / (temp_type)output[2] : max; \
100 r = (r * opacity + (temp_type)output[0] * transparency) / max; \
101 g = (g * opacity + (temp_type)output[1] * transparency) / max; \
102 b = (b * opacity + (temp_type)output[2] * transparency) / max; \
104 case TRANSFER_MULTIPLY: \
105 r = ((temp_type)input1 * output[0]) / max; \
108 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
109 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
113 g = (temp_type)input2 * (temp_type)output[1] / max; \
114 b = (temp_type)input3 * (temp_type)output[2] / max; \
116 r = (r * opacity + (temp_type)output[0] * transparency) / max; \
117 g = (g * opacity + (temp_type)output[1] * transparency) / max; \
118 b = (b * opacity + (temp_type)output[2] * transparency) / max; \
120 case TRANSFER_SUBTRACT: \
121 r = (temp_type)input1 - output[0]; \
122 g = (temp_type)input2 - ((temp_type)output[1] - chroma_offset); \
123 b = (temp_type)input3 - ((temp_type)output[2] - chroma_offset); \
124 r = (r * opacity + output[0] * transparency) / max; \
125 g = (g * opacity + output[1] * transparency) / max; \
126 b = (b * opacity + output[2] * transparency) / max; \
128 case TRANSFER_ADDITION: \
129 r = (temp_type)input1 + output[0]; \
130 g = (temp_type)input2 - chroma_offset + output[1]; \
131 b = (temp_type)input3 - chroma_offset + output[2]; \
132 r = (r * opacity + output[0] * transparency) / max; \
133 g = (g * opacity + output[1] * transparency) / max; \
134 b = (b * opacity + output[2] * transparency) / max; \
136 case TRANSFER_REPLACE: \
141 case TRANSFER_NORMAL: \
142 r = ((temp_type)input1 * opacity + output[0] * transparency) / max; \
143 g = ((temp_type)input2 * opacity + output[1] * transparency) / max; \
144 b = ((temp_type)input3 * opacity + output[2] * transparency) / max; \
148 if(sizeof(type) != 4) \
150 output[0] = (type)CLIP(r, 0, max); \
151 output[1] = (type)CLIP(g, 0, max); \
152 output[2] = (type)CLIP(b, 0, max); \
166 // Blending equations are drastically different for 3 and 4 components
167 #define BLEND_4(max, temp_type, type, chroma_offset) \
169 temp_type r, g, b, a; \
170 temp_type pixel_opacity, pixel_transparency; \
171 temp_type output1 = output[0]; \
172 temp_type output2 = output[1]; \
173 temp_type output3 = output[2]; \
174 temp_type output4 = output[3]; \
176 pixel_opacity = opacity * input4; \
177 pixel_transparency = (temp_type)max * max - pixel_opacity; \
181 case TRANSFER_DIVIDE: \
182 r = output1 ? (((temp_type)input1 * max) / output1) : max; \
185 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
186 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
190 g = output2 ? (temp_type)input2 * max / (temp_type)output2 : max; \
191 b = output3 ? (temp_type)input3 * max / (temp_type)output3 : max; \
193 r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
194 g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
195 b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
196 a = input4 > output4 ? input4 : output4; \
198 case TRANSFER_MULTIPLY: \
199 r = ((temp_type)input1 * output1) / max; \
202 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
203 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
207 g = (temp_type)input2 * (temp_type)output2 / max; \
208 b = (temp_type)input3 * (temp_type)output3 / max; \
210 r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
211 g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
212 b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
213 a = input4 > output4 ? input4 : output4; \
215 case TRANSFER_SUBTRACT: \
216 r = (temp_type)input1 - output1; \
217 g = (temp_type)input2 - ((temp_type)output2 - chroma_offset); \
218 b = (temp_type)input3 - ((temp_type)output3 - chroma_offset); \
219 r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
220 g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
221 b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
222 a = input4 > output4 ? input4 : output4; \
224 case TRANSFER_ADDITION: \
225 r = (temp_type)input1 + output1; \
226 g = (temp_type)input2 - chroma_offset + output2; \
227 b = (temp_type)input3 - chroma_offset + output3; \
228 r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
229 g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
230 b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
231 a = input4 > output4 ? input4 : output4; \
233 case TRANSFER_REPLACE: \
239 case TRANSFER_NORMAL: \
240 r = (input1 * pixel_opacity + \
241 output1 * pixel_transparency) / max / max; \
242 g = ((input2 - chroma_offset) * pixel_opacity + \
243 (output2 - chroma_offset) * pixel_transparency) \
246 b = ((input3 - chroma_offset) * pixel_opacity + \
247 (output3 - chroma_offset) * pixel_transparency) \
250 a = input4 > output4 ? input4 : output4; \
254 if(sizeof(type) != 4) \
256 output[0] = (type)CLIP(r, 0, max); \
257 output[1] = (type)CLIP(g, 0, max); \
258 output[2] = (type)CLIP(b, 0, max); \
259 output[3] = (type)a; \
272 // Bicubic algorithm using multiprocessors
273 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
275 // Nearest neighbor algorithm using multiprocessors for blending
276 // input -> scale + translate -> blend -> output
279 int OverlayFrame::overlay(VFrame *output,
289 float alpha, // 0 - 1
291 int interpolation_type)
293 float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
294 float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
303 isnan(out_y2)) return 1;
304 // printf("OverlayFrame::overlay 1 %f %f %f %f -> %f %f %f %f\n", in_x1,
316 out_x1 += -in_x1 * w_scale;
320 if(in_x1 >= input->get_w())
322 out_x1 -= (in_x1 - input->get_w()) * w_scale;
323 in_x1 = input->get_w();
328 out_y1 += -in_y1 * h_scale;
332 if(in_y1 >= input->get_h())
334 out_y1 -= (in_y1 - input->get_h()) * h_scale;
335 in_y1 = input->get_h();
340 out_x2 += -in_x2 * w_scale;
344 if(in_x2 >= input->get_w())
346 out_x2 -= (in_x2 - input->get_w()) * w_scale;
347 in_x2 = input->get_w();
352 out_y2 += -in_y2 * h_scale;
356 if(in_y2 >= input->get_h())
358 out_y2 -= (in_y2 - input->get_h()) * h_scale;
359 in_y2 = input->get_h();
364 in_x1 += -out_x1 / w_scale;
368 if(out_x1 >= output->get_w())
370 in_x1 -= (out_x1 - output->get_w()) / w_scale;
371 out_x1 = output->get_w();
376 in_y1 += -out_y1 / h_scale;
380 if(out_y1 >= output->get_h())
382 in_y1 -= (out_y1 - output->get_h()) / h_scale;
383 out_y1 = output->get_h();
388 in_x2 += -out_x2 / w_scale;
392 if(out_x2 >= output->get_w())
394 in_x2 -= (out_x2 - output->get_w()) / w_scale;
395 out_x2 = output->get_w();
400 in_y2 += -out_y2 / h_scale;
404 if(out_y2 >= output->get_h())
406 in_y2 -= (out_y2 - output->get_h()) / h_scale;
407 out_y2 = output->get_h();
419 float in_w = in_x2 - in_x1;
420 float in_h = in_y2 - in_y1;
421 float out_w = out_x2 - out_x1;
422 float out_h = out_y2 - out_y1;
423 // Input for translation operation
424 VFrame *translation_input = input;
427 if(in_w <= 0 || in_h <= 0 || out_w <= 0 || out_h <= 0) return 0;
430 // printf("OverlayFrame::overlay 2 %f %f %f %f -> %f %f %f %f\n", in_x1,
443 // ****************************************************************************
444 // Transfer to temp buffer by scaling nearest integer boundaries
445 // ****************************************************************************
446 if(interpolation_type != NEAREST_NEIGHBOR &&
447 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
449 // Create integer boundaries for interpolation
450 int in_x1_int = (int)in_x1;
451 int in_y1_int = (int)in_y1;
452 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
453 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
455 // Dimensions of temp frame. Integer boundaries scaled.
456 int temp_w = (int)ceil(w_scale * (in_x2_int - in_x1_int));
457 int temp_h = (int)ceil(h_scale * (in_y2_int - in_y1_int));
458 VFrame *scale_output;
462 #define NO_TRANSLATION1 \
463 (EQUIV(in_x1, 0) && \
465 EQUIV(out_x1, 0) && \
466 EQUIV(out_y1, 0) && \
467 EQUIV(in_x2, in_x2_int) && \
468 EQUIV(in_y2, in_y2_int) && \
469 EQUIV(out_x2, temp_w) && \
470 EQUIV(out_y2, temp_h))
474 (EQUIV(alpha, 1) && \
475 (mode == TRANSFER_REPLACE || \
476 (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
482 // Prepare destination for operation
484 // No translation and no blending. The blending operation is built into the
485 // translation unit but not the scaling unit.
487 if(NO_TRANSLATION1 &&
490 // printf("OverlayFrame::overlay input -> output\n");
492 scale_output = output;
493 translation_input = 0;
496 // If translation or blending
497 // input -> nearest integer boundary temp
500 (temp_frame->get_w() != temp_w ||
501 temp_frame->get_h() != temp_h))
509 temp_frame = new VFrame(0,
512 input->get_color_model(),
515 //printf("OverlayFrame::overlay input -> temp\n");
518 temp_frame->clear_frame();
520 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
522 scale_output = temp_frame;
523 translation_input = scale_output;
525 // Adjust input coordinates to reflect new scaled coordinates.
526 in_x1 = (in_x1 - in_x1_int) * w_scale;
527 in_y1 = (in_y1 - in_y1_int) * h_scale;
528 in_x2 = (in_x2 - in_x1_int) * w_scale;
529 in_y2 = (in_y2 - in_y1_int) * h_scale;
534 //printf("Overlay 1\n");
536 // Scale input -> scale_output
537 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
538 scale_engine->scale_output = scale_output;
539 scale_engine->scale_input = input;
540 scale_engine->w_scale = w_scale;
541 scale_engine->h_scale = h_scale;
542 scale_engine->in_x1_int = in_x1_int;
543 scale_engine->in_y1_int = in_y1_int;
544 scale_engine->out_w_int = temp_w;
545 scale_engine->out_h_int = temp_h;
546 scale_engine->interpolation_type = interpolation_type;
547 //printf("Overlay 2\n");
549 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
550 scale_engine->process_packages();
551 //printf("OverlayFrame::overlay ScaleEngine 2\n");
557 // printf("OverlayFrame::overlay 1 %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
571 #define NO_TRANSLATION2 \
572 (EQUIV(in_x1, 0) && \
574 EQUIV(in_x2, translation_input->get_w()) && \
575 EQUIV(in_y2, translation_input->get_h()) && \
576 EQUIV(out_x1, 0) && \
577 EQUIV(out_y1, 0) && \
578 EQUIV(out_x2, output->get_w()) && \
579 EQUIV(out_y2, output->get_h())) \
582 (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
583 EQUIV(out_y2 - out_y1, in_y2 - in_y1))
588 //printf("OverlayFrame::overlay 4 %d\n", mode);
593 if(translation_input)
596 if( NO_TRANSLATION2 &&
600 //printf("OverlayFrame::overlay direct copy\n");
601 output->copy_from(translation_input);
605 if( NO_TRANSLATION2 &&
608 if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
611 blend_engine->output = output;
612 blend_engine->input = translation_input;
613 blend_engine->alpha = alpha;
614 blend_engine->mode = mode;
616 blend_engine->process_packages();
619 // Scale and translate using nearest neighbor
620 // Translation is exactly on integer boundaries
621 if(interpolation_type == NEAREST_NEIGHBOR ||
622 EQUIV(in_x1, (int)in_x1) &&
623 EQUIV(in_y1, (int)in_y1) &&
624 EQUIV(in_x2, (int)in_x2) &&
625 EQUIV(in_y2, (int)in_y2) &&
627 EQUIV(out_x1, (int)out_x1) &&
628 EQUIV(out_y1, (int)out_y1) &&
629 EQUIV(out_x2, (int)out_x2) &&
630 EQUIV(out_y2, (int)out_y2))
632 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
633 if(!scaletranslate_engine) scaletranslate_engine =
634 new ScaleTranslateEngine(this, cpus);
637 scaletranslate_engine->output = output;
638 scaletranslate_engine->input = translation_input;
639 scaletranslate_engine->in_x1 = (int)in_x1;
640 scaletranslate_engine->in_y1 = (int)in_y1;
641 // we need to do this mumbo-jumbo in order to get numerical stability
642 // other option would be to round all the coordinates
643 scaletranslate_engine->in_x2 = (int)in_x1 + (int)(in_x2 - in_x1);
644 scaletranslate_engine->in_y2 = (int)in_y1 + (int)(in_y2 - in_y1);
645 scaletranslate_engine->out_x1 = (int)out_x1;
646 scaletranslate_engine->out_y1 = (int)out_y1;
647 scaletranslate_engine->out_x2 = (int)out_x1 + (int)(out_x2 - out_x1);
648 scaletranslate_engine->out_y2 = (int)out_y1 + (int)(out_y2 - out_y1);
649 scaletranslate_engine->alpha = alpha;
650 scaletranslate_engine->mode = mode;
652 scaletranslate_engine->process_packages();
655 // Fractional translation
657 // Use fractional translation
658 // printf("OverlayFrame::overlay temp -> output %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
668 //printf("Overlay 3\n");
669 if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
670 translate_engine->translate_output = output;
671 translate_engine->translate_input = translation_input;
672 translate_engine->translate_in_x1 = in_x1;
673 translate_engine->translate_in_y1 = in_y1;
674 translate_engine->translate_in_x2 = in_x2;
675 translate_engine->translate_in_y2 = in_y2;
676 translate_engine->translate_out_x1 = out_x1;
677 translate_engine->translate_out_y1 = out_y1;
678 translate_engine->translate_out_x2 = out_x2;
679 translate_engine->translate_out_y2 = out_y2;
680 translate_engine->translate_alpha = alpha;
681 translate_engine->translate_mode = mode;
682 //printf("Overlay 4\n");
684 //printf("OverlayFrame::overlay 5 %d\n", mode);
685 translate_engine->process_packages();
689 //printf("OverlayFrame::overlay 2\n");
700 ScalePackage::ScalePackage()
707 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
710 this->overlay = overlay;
711 this->engine = server;
714 ScaleUnit::~ScaleUnit()
720 void ScaleUnit::tabulate_reduction(bilinear_table_t* &table,
726 table = new bilinear_table_t[out_total];
727 bzero(table, sizeof(bilinear_table_t) * out_total);
728 //printf("ScaleUnit::tabulate_reduction 1 %f %d %d %d\n", scale, in_pixel1, out_total, in_total);
729 for(int i = 0; i < out_total; i++)
732 float in_start = out_start * scale;
733 float out_end = i + 1;
734 float in_end = out_end * scale;
735 bilinear_table_t *entry = table + i;
736 //printf("ScaleUnit::tabulate_reduction 1 %f %f %f %f\n", out_start, out_end, in_start, in_end);
738 // Store input fraction
739 entry->input_fraction1 = (floor(in_start + 1) - in_start) / scale;
740 entry->input_fraction2 = 1.0 / scale;
741 entry->input_fraction3 = (in_end - floor(in_end)) / scale;
743 if(in_end >= in_total - in_pixel1)
745 in_end = in_total - in_pixel1 - 1;
747 int difference = (int)in_end - (int)in_start - 1;
748 if(difference < 0) difference = 0;
749 entry->input_fraction3 = 1.0 -
750 entry->input_fraction1 -
751 entry->input_fraction2 * difference;
754 // Store input pixels
755 entry->input_pixel1 = (int)in_start;
756 entry->input_pixel2 = (int)in_end;
758 // printf("ScaleUnit::tabulate_reduction 1 %d %d %f %f %f\n",
759 // entry->input_pixel1,
760 // entry->input_pixel2,
761 // entry->input_fraction1,
762 // entry->input_fraction2,
763 // entry->input_fraction3);
767 if(entry->input_pixel1 > entry->input_pixel2)
769 entry->input_pixel1 = entry->input_pixel2;
770 entry->input_fraction1 = 0;
773 // Get total fraction of output pixel used
774 // if(entry->input_pixel2 > entry->input_pixel1)
775 entry->total_fraction =
776 entry->input_fraction1 +
777 entry->input_fraction2 * (entry->input_pixel2 - entry->input_pixel1 - 1) +
778 entry->input_fraction3;
779 entry->input_pixel1 += in_pixel1;
780 entry->input_pixel2 += in_pixel1;
784 void ScaleUnit::tabulate_enlarge(bilinear_table_t* &table,
790 table = new bilinear_table_t[out_total];
791 bzero(table, sizeof(bilinear_table_t) * out_total);
793 for(int i = 0; i < out_total; i++)
795 bilinear_table_t *entry = table + i;
796 float in_pixel = i * scale;
797 entry->input_pixel1 = (int)floor(in_pixel);
798 entry->input_pixel2 = entry->input_pixel1 + 1;
800 if(in_pixel <= in_total)
802 entry->input_fraction3 = in_pixel - entry->input_pixel1;
806 entry->input_fraction3 = 0;
807 entry->input_pixel2 = 0;
812 entry->input_fraction1 = entry->input_pixel2 - in_pixel;
816 entry->input_fraction1 = 0;
817 entry->input_pixel1 = 0;
820 if(entry->input_pixel2 >= in_total - in_pixel1)
822 entry->input_pixel2 = entry->input_pixel1;
823 entry->input_fraction3 = 1.0 - entry->input_fraction1;
826 entry->total_fraction =
827 entry->input_fraction1 +
828 entry->input_fraction3;
829 entry->input_pixel1 += in_pixel1;
830 entry->input_pixel2 += in_pixel1;
832 // printf("ScaleUnit::tabulate_enlarge %d %d %f %f %f\n",
833 // entry->input_pixel1,
834 // entry->input_pixel2,
835 // entry->input_fraction1,
836 // entry->input_fraction2,
837 // entry->input_fraction3);
841 void ScaleUnit::dump_bilinear(bilinear_table_t *table, int total)
843 printf("ScaleUnit::dump_bilinear\n");
844 for(int i = 0; i < total; i++)
846 printf("out=%d inpixel1=%d inpixel2=%d infrac1=%f infrac2=%f infrac3=%f total=%f\n",
848 table[i].input_pixel1,
849 table[i].input_pixel2,
850 table[i].input_fraction1,
851 table[i].input_fraction2,
852 table[i].input_fraction3,
853 table[i].total_fraction);
857 #define PIXEL_REDUCE_MACRO(type, components, row) \
859 type *input_row = &in_rows[row][x_entry->input_pixel1 * components]; \
860 type *input_end = &in_rows[row][x_entry->input_pixel2 * components]; \
862 /* Do first pixel */ \
863 temp_f1 += input_scale1 * input_row[0]; \
864 temp_f2 += input_scale1 * input_row[1]; \
865 temp_f3 += input_scale1 * input_row[2]; \
866 if(components == 4) temp_f4 += input_scale1 * input_row[3]; \
868 /* Do last pixel */ \
869 /* if(input_row < input_end) */\
871 temp_f1 += input_scale3 * input_end[0]; \
872 temp_f2 += input_scale3 * input_end[1]; \
873 temp_f3 += input_scale3 * input_end[2]; \
874 if(components == 4) temp_f4 += input_scale3 * input_end[3]; \
877 /* Do middle pixels */ \
878 for(input_row += components; input_row < input_end; input_row += components) \
880 temp_f1 += input_scale2 * input_row[0]; \
881 temp_f2 += input_scale2 * input_row[1]; \
882 temp_f3 += input_scale2 * input_row[2]; \
883 if(components == 4) temp_f4 += input_scale2 * input_row[3]; \
887 // Bilinear reduction and suboptimal enlargement.
888 // Very high quality.
889 #define BILINEAR_REDUCE(max, type, components) \
891 bilinear_table_t *x_table, *y_table; \
892 int out_h = pkg->out_row2 - pkg->out_row1; \
893 type **in_rows = (type**)input->get_rows(); \
894 type **out_rows = (type**)output->get_rows(); \
897 tabulate_reduction(x_table, \
903 tabulate_enlarge(x_table, \
910 tabulate_reduction(y_table, \
916 tabulate_enlarge(y_table, \
921 /* dump_bilinear(y_table, out_h_int); */\
923 for(int i = 0; i < out_h; i++) \
925 type *out_row = out_rows[i + pkg->out_row1]; \
926 bilinear_table_t *y_entry = &y_table[i + pkg->out_row1]; \
927 /*printf("BILINEAR_REDUCE 2 %d %d %d\n", i, y_entry->input_pixel1, y_entry->input_pixel2); */\
929 for(int j = 0; j < out_w_int; j++) \
931 bilinear_table_t *x_entry = &x_table[j]; \
932 /* Load rounding factors */ \
937 if(sizeof(type) != 4) \
938 temp_f1 = temp_f2 = temp_f3 = temp_f4 = .5; \
940 temp_f1 = temp_f2 = temp_f3 = temp_f4 = 0; \
943 float input_scale1 = y_entry->input_fraction1 * x_entry->input_fraction1; \
944 float input_scale2 = y_entry->input_fraction1 * x_entry->input_fraction2; \
945 float input_scale3 = y_entry->input_fraction1 * x_entry->input_fraction3; \
946 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel1) \
951 input_scale1 = y_entry->input_fraction3 * x_entry->input_fraction1; \
952 input_scale2 = y_entry->input_fraction3 * x_entry->input_fraction2; \
953 input_scale3 = y_entry->input_fraction3 * x_entry->input_fraction3; \
954 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel2) \
959 input_scale1 = y_entry->input_fraction2 * x_entry->input_fraction1; \
960 input_scale2 = y_entry->input_fraction2 * x_entry->input_fraction2; \
961 input_scale3 = y_entry->input_fraction2 * x_entry->input_fraction3; \
962 for(int k = y_entry->input_pixel1 + 1; \
963 k < y_entry->input_pixel2; \
966 PIXEL_REDUCE_MACRO(type, components, k) \
973 if(temp_f1 > max) temp_f1 = max; \
974 if(temp_f2 > max) temp_f2 = max; \
975 if(temp_f3 > max) temp_f3 = max; \
976 if(components == 4) if(temp_f4 > max) temp_f4 = max; \
978 out_row[j * components ] = (type)temp_f1; \
979 out_row[j * components + 1] = (type)temp_f2; \
980 out_row[j * components + 2] = (type)temp_f3; \
981 if(components == 4) out_row[j * components + 3] = (type)temp_f4; \
983 /*printf("BILINEAR_REDUCE 3 %d\n", i);*/ \
992 // Only 2 input pixels
993 #define BILINEAR_ENLARGE(max, type, components) \
995 /*printf("BILINEAR_ENLARGE 1\n");*/ \
996 float k_y = 1.0 / scale_h; \
997 float k_x = 1.0 / scale_w; \
998 type **in_rows = (type**)input->get_rows(); \
999 type **out_rows = (type**)output->get_rows(); \
1000 int out_h = pkg->out_row2 - pkg->out_row1; \
1001 int in_h_int = input->get_h(); \
1002 int in_w_int = input->get_w(); \
1003 int *table_int_x1, *table_int_y1; \
1004 int *table_int_x2, *table_int_y2; \
1005 float *table_frac_x_f, *table_antifrac_x_f, *table_frac_y_f, *table_antifrac_y_f; \
1006 int *table_frac_x_i, *table_antifrac_x_i, *table_frac_y_i, *table_antifrac_y_i; \
1008 tabulate_blinear_f(table_int_x1, \
1011 table_antifrac_x_f, \
1017 tabulate_blinear_f(table_int_y1, \
1020 table_antifrac_y_f, \
1027 for(int i = 0; i < out_h; i++) \
1029 int i_y1 = table_int_y1[i]; \
1030 int i_y2 = table_int_y2[i]; \
1034 uint64_t anti_a_i; \
1035 a_f = table_frac_y_f[i]; \
1036 anti_a_f = table_antifrac_y_f[i]; \
1037 type *in_row1 = in_rows[i_y1]; \
1038 type *in_row2 = in_rows[i_y2]; \
1039 type *out_row = out_rows[i + pkg->out_row1]; \
1041 for(int j = 0; j < out_w_int; j++) \
1043 int i_x1 = table_int_x1[j]; \
1044 int i_x2 = table_int_x2[j]; \
1045 float output1r, output1g, output1b, output1a; \
1046 float output2r, output2g, output2b, output2a; \
1047 float output3r, output3g, output3b, output3a; \
1048 float output4r, output4g, output4b, output4a; \
1051 b_f = table_frac_x_f[j]; \
1052 anti_b_f = table_antifrac_x_f[j]; \
1054 output1r = in_row1[i_x1 * components]; \
1055 output1g = in_row1[i_x1 * components + 1]; \
1056 output1b = in_row1[i_x1 * components + 2]; \
1057 if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1059 output2r = in_row1[i_x2 * components]; \
1060 output2g = in_row1[i_x2 * components + 1]; \
1061 output2b = in_row1[i_x2 * components + 2]; \
1062 if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1064 output3r = in_row2[i_x1 * components]; \
1065 output3g = in_row2[i_x1 * components + 1]; \
1066 output3b = in_row2[i_x1 * components + 2]; \
1067 if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1069 output4r = in_row2[i_x2 * components]; \
1070 output4g = in_row2[i_x2 * components + 1]; \
1071 output4b = in_row2[i_x2 * components + 2]; \
1072 if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1074 out_row[j * components] = \
1075 (type)(anti_a_f * (anti_b_f * output1r + \
1077 a_f * (anti_b_f * output3r + \
1079 out_row[j * components + 1] = \
1080 (type)(anti_a_f * (anti_b_f * output1g + \
1082 a_f * ((anti_b_f * output3g) + \
1084 out_row[j * components + 2] = \
1085 (type)(anti_a_f * ((anti_b_f * output1b) + \
1086 (b_f * output2b)) + \
1087 a_f * ((anti_b_f * output3b) + \
1089 if(components == 4) \
1090 out_row[j * components + 3] = \
1091 (type)(anti_a_f * ((anti_b_f * output1a) + \
1092 (b_f * output2a)) + \
1093 a_f * ((anti_b_f * output3a) + \
1099 delete [] table_int_x1; \
1100 delete [] table_int_x2; \
1101 delete [] table_int_y1; \
1102 delete [] table_int_y2; \
1103 delete [] table_frac_x_f; \
1104 delete [] table_antifrac_x_f; \
1105 delete [] table_frac_y_f; \
1106 delete [] table_antifrac_y_f; \
1108 /*printf("BILINEAR_ENLARGE 2\n");*/ \
1112 #define BICUBIC(max, type, components) \
1114 float k_y = 1.0 / scale_h; \
1115 float k_x = 1.0 / scale_w; \
1116 type **in_rows = (type**)input->get_rows(); \
1117 type **out_rows = (type**)output->get_rows(); \
1118 float *bspline_x_f, *bspline_y_f; \
1119 int *bspline_x_i, *bspline_y_i; \
1120 int *in_x_table, *in_y_table; \
1121 int in_h_int = input->get_h(); \
1122 int in_w_int = input->get_w(); \
1124 tabulate_bcubic_f(bspline_x_f, \
1132 tabulate_bcubic_f(bspline_y_f, \
1140 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1142 for(int j = 0; j < out_w_int; j++) \
1144 int i_x = (int)(k_x * j); \
1145 float output1_f, output2_f, output3_f, output4_f; \
1146 uint64_t output1_i, output2_i, output3_i, output4_i; \
1150 if(components == 4) \
1152 int table_y = i * 4; \
1155 for(int m = -1; m < 3; m++) \
1159 r1_f = bspline_y_f[table_y]; \
1160 int y = in_y_table[table_y]; \
1161 int table_x = j * 4; \
1163 for(int n = -1; n < 3; n++) \
1167 r2_f = bspline_x_f[table_x]; \
1168 int x = in_x_table[table_x]; \
1170 uint64_t r_square_i; \
1171 r_square_f = r1_f * r2_f; \
1172 output1_f += r_square_f * in_rows[y][x * components]; \
1173 output2_f += r_square_f * in_rows[y][x * components + 1]; \
1174 output3_f += r_square_f * in_rows[y][x * components + 2]; \
1175 if(components == 4) \
1176 output4_f += r_square_f * in_rows[y][x * components + 3]; \
1184 out_rows[i][j * components] = (type)output1_f; \
1185 out_rows[i][j * components + 1] = (type)output2_f; \
1186 out_rows[i][j * components + 2] = (type)output3_f; \
1187 if(components == 4) \
1188 out_rows[i][j * components + 3] = (type)output4_f; \
1193 delete [] bspline_x_f; \
1194 delete [] bspline_y_f; \
1195 delete [] in_x_table; \
1196 delete [] in_y_table; \
1202 // Pow function is not thread safe in Compaqt C
1203 #define CUBE(x) ((x) * (x) * (x))
1205 float ScaleUnit::cubic_bspline(float x)
1209 if((x + 2.0F) <= 0.0F)
1219 if((x + 1.0F) <= 0.0F)
1237 if((x - 1.0F) <= 0.0F)
1247 return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
1251 void ScaleUnit::tabulate_bcubic_f(float* &coef_table,
1259 coef_table = new float[pixels * 4];
1260 coord_table = new int[pixels * 4];
1261 for(int i = 0, j = 0; i < pixels; i++)
1263 float f_x = (float)i * scale;
1264 float a = f_x - floor(f_x);
1266 for(float m = -1; m < 3; m++)
1268 coef_table[j] = cubic_bspline(coefficient * (m - a));
1269 coord_table[j] = (int)(start + (int)f_x + m);
1270 CLAMP(coord_table[j], 0, total_pixels - 1);
1277 void ScaleUnit::tabulate_bcubic_i(int* &coef_table,
1285 coef_table = new int[pixels * 4];
1286 coord_table = new int[pixels * 4];
1287 for(int i = 0, j = 0; i < pixels; i++)
1289 float f_x = (float)i * scale;
1290 float a = f_x - floor(f_x);
1292 for(float m = -1; m < 3; m++)
1294 coef_table[j] = (int)(cubic_bspline(coefficient * (m - a)) * 0x10000);
1295 coord_table[j] = (int)(start + (int)f_x + m);
1296 CLAMP(coord_table[j], 0, total_pixels - 1);
1303 void ScaleUnit::tabulate_blinear_f(int* &table_int1,
1306 float* &table_antifrac,
1313 table_int1 = new int[pixel2 - pixel1];
1314 table_int2 = new int[pixel2 - pixel1];
1315 table_frac = new float[pixel2 - pixel1];
1316 table_antifrac = new float[pixel2 - pixel1];
1318 for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1320 float f_x = (float)i * scale;
1321 int i_x = (int)floor(f_x);
1322 float a = (f_x - floor(f_x));
1324 table_int1[j] = i_x + start;
1325 table_int2[j] = i_x + start + 1;
1326 CLAMP(table_int1[j], 0, total_pixels - 1);
1327 CLAMP(table_int2[j], 0, total_pixels - 1);
1329 table_antifrac[j] = 1.0F - a;
1330 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1334 void ScaleUnit::tabulate_blinear_i(int* &table_int1,
1337 int* &table_antifrac,
1344 table_int1 = new int[pixel2 - pixel1];
1345 table_int2 = new int[pixel2 - pixel1];
1346 table_frac = new int[pixel2 - pixel1];
1347 table_antifrac = new int[pixel2 - pixel1];
1349 for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1351 double f_x = (float)i * scale;
1352 int i_x = (int)floor(f_x);
1353 float a = (f_x - floor(f_x));
1355 table_int1[j] = i_x + start;
1356 table_int2[j] = i_x + start + 1;
1357 CLAMP(table_int1[j], 0, total_pixels - 1);
1358 CLAMP(table_int2[j], 0, total_pixels - 1);
1359 table_frac[j] = (int)(a * 0xffff);
1360 table_antifrac[j] = (int)((1.0F - a) * 0x10000);
1361 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1365 void ScaleUnit::process_package(LoadPackage *package)
1367 ScalePackage *pkg = (ScalePackage*)package;
1369 //printf("ScaleUnit::process_package 1\n");
1370 // Arguments for macros
1371 VFrame *output = engine->scale_output;
1372 VFrame *input = engine->scale_input;
1373 float scale_w = engine->w_scale;
1374 float scale_h = engine->h_scale;
1375 int in_x1_int = engine->in_x1_int;
1376 int in_y1_int = engine->in_y1_int;
1377 int out_h_int = engine->out_h_int;
1378 int out_w_int = engine->out_w_int;
1380 (input->get_color_model() == BC_YUV888 ||
1381 input->get_color_model() == BC_YUVA8888 ||
1382 input->get_color_model() == BC_YUV161616 ||
1383 input->get_color_model() == BC_YUVA16161616);
1385 //printf("ScaleUnit::process_package 2 %f %f\n", engine->w_scale, engine->h_scale);
1386 if(engine->interpolation_type == CUBIC_CUBIC ||
1387 (engine->interpolation_type == CUBIC_LINEAR
1388 && engine->w_scale > 1 &&
1389 engine->h_scale > 1))
1391 switch(engine->scale_input->get_color_model())
1394 BICUBIC(1.0, float, 3);
1398 BICUBIC(1.0, float, 4);
1403 BICUBIC(0xff, unsigned char, 3);
1408 BICUBIC(0xff, unsigned char, 4);
1413 BICUBIC(0xffff, uint16_t, 3);
1416 case BC_RGBA16161616:
1417 case BC_YUVA16161616:
1418 BICUBIC(0xffff, uint16_t, 4);
1423 // Perform bilinear scaling input -> scale_output
1424 if(engine->w_scale > 1 &&
1425 engine->h_scale > 1)
1427 switch(engine->scale_input->get_color_model())
1430 BILINEAR_ENLARGE(1.0, float, 3);
1434 BILINEAR_ENLARGE(1.0, float, 4);
1439 BILINEAR_ENLARGE(0xff, unsigned char, 3);
1444 BILINEAR_ENLARGE(0xff, unsigned char, 4);
1449 BILINEAR_ENLARGE(0xffff, uint16_t, 3);
1452 case BC_RGBA16161616:
1453 case BC_YUVA16161616:
1454 BILINEAR_ENLARGE(0xffff, uint16_t, 4);
1459 // Bilinear reduction
1461 switch(engine->scale_input->get_color_model())
1464 BILINEAR_REDUCE(1.0, float, 3);
1467 BILINEAR_REDUCE(1.0, float, 4);
1471 BILINEAR_REDUCE(0xff, unsigned char, 3);
1476 BILINEAR_REDUCE(0xff, unsigned char, 4);
1481 BILINEAR_REDUCE(0xffff, uint16_t, 3);
1484 case BC_RGBA16161616:
1485 case BC_YUVA16161616:
1486 BILINEAR_REDUCE(0xffff, uint16_t, 4);
1490 //printf("ScaleUnit::process_package 3\n");
1506 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
1507 : LoadServer(cpus, cpus)
1509 this->overlay = overlay;
1512 ScaleEngine::~ScaleEngine()
1516 void ScaleEngine::init_packages()
1518 for(int i = 0; i < total_packages; i++)
1520 ScalePackage *package = (ScalePackage*)packages[i];
1521 package->out_row1 = out_h_int / total_packages * i;
1522 package->out_row2 = package->out_row1 + out_h_int / total_packages;
1524 if(i >= total_packages - 1)
1525 package->out_row2 = out_h_int;
1529 LoadClient* ScaleEngine::new_client()
1531 return new ScaleUnit(this, overlay);
1534 LoadPackage* ScaleEngine::new_package()
1536 return new ScalePackage;
1551 TranslatePackage::TranslatePackage()
1557 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1558 : LoadClient(server)
1560 this->overlay = overlay;
1561 this->engine = server;
1564 TranslateUnit::~TranslateUnit()
1570 void TranslateUnit::translation_array_f(transfer_table_f* &table,
1581 float offset = out_x1 - in_x1;
1582 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1584 out_x1_int = (int)out_x1;
1585 out_x2_int = MIN((int)ceil(out_x2), out_total);
1586 out_w_int = out_x2_int - out_x1_int;
1588 table = new transfer_table_f[out_w_int];
1589 bzero(table, sizeof(transfer_table_f) * out_w_int);
1592 //printf("OverlayFrame::translation_array_f 2 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1595 for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1597 transfer_table_f *entry = &table[out_x - out_x1_int];
1599 entry->in_x1 = (int)in_x;
1600 entry->in_x2 = (int)in_x + 1;
1602 // Get fraction of output pixel to fill
1603 entry->output_fraction = 1;
1607 entry->output_fraction -= out_x1 - out_x;
1610 if(out_x2 < out_x + 1)
1612 entry->output_fraction = (out_x2 - out_x);
1615 // Advance in_x until out_x_fraction is filled
1616 float out_x_fraction = entry->output_fraction;
1617 float in_x_fraction = floor(in_x + 1) - in_x;
1619 if(out_x_fraction <= in_x_fraction)
1621 entry->in_fraction1 = out_x_fraction;
1622 entry->in_fraction2 = 0.0;
1623 in_x += out_x_fraction;
1627 entry->in_fraction1 = in_x_fraction;
1628 in_x += out_x_fraction;
1629 entry->in_fraction2 = in_x - floor(in_x);
1632 // Clip in_x and zero out fraction. This doesn't work for YUV.
1633 if(entry->in_x2 >= in_total)
1635 entry->in_x2 = in_total - 1;
1636 entry->in_fraction2 = 0.0;
1639 if(entry->in_x1 >= in_total)
1641 entry->in_x1 = in_total - 1;
1642 entry->in_fraction1 = 0.0;
1644 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1648 // entry->in_fraction1,
1649 // entry->in_fraction2,
1650 // entry->output_fraction);
1655 void TranslateUnit::translation_array_i(transfer_table_i* &table,
1666 float offset = out_x1 - in_x1;
1668 out_x1_int = (int)out_x1;
1669 out_x2_int = MIN((int)ceil(out_x2), out_total);
1670 out_w_int = out_x2_int - out_x1_int;
1672 table = new transfer_table_i[out_w_int];
1673 bzero(table, sizeof(transfer_table_i) * out_w_int);
1676 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1679 for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1681 transfer_table_i *entry = &table[out_x - out_x1_int];
1683 entry->in_x1 = (int)in_x;
1684 entry->in_x2 = (int)in_x + 1;
1686 // Get fraction of output pixel to fill
1687 entry->output_fraction = 0x10000;
1691 entry->output_fraction -= (int)((out_x1 - out_x) * 0x10000);
1694 if(out_x2 < out_x + 1)
1696 entry->output_fraction = (int)((out_x2 - out_x) * 0x10000);
1699 // Advance in_x until out_x_fraction is filled
1700 int out_x_fraction = entry->output_fraction;
1701 int in_x_fraction = (int)((floor(in_x + 1) - in_x) * 0x10000);
1703 if(out_x_fraction <= in_x_fraction)
1705 entry->in_fraction1 = out_x_fraction;
1706 entry->in_fraction2 = 0;
1707 in_x += (float)out_x_fraction / 0x10000;
1711 entry->in_fraction1 = in_x_fraction;
1712 in_x += (float)out_x_fraction / 0x10000;
1713 entry->in_fraction2 = (int)((in_x - floor(in_x)) * 0x10000);
1716 // Clip in_x and zero out fraction. This doesn't work for YUV.
1717 if(entry->in_x2 >= in_total)
1719 entry->in_x2 = in_total - 1;
1720 entry->in_fraction2 = 0;
1723 if(entry->in_x1 >= in_total)
1725 entry->in_x1 = in_total - 1;
1726 entry->in_fraction1 = 0;
1728 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1732 // entry->in_fraction1,
1733 // entry->in_fraction2,
1734 // entry->output_fraction);
1771 #define TRANSLATE(max, temp_type, type, components, chroma_offset) \
1774 type **in_rows = (type**)input->get_rows(); \
1775 type **out_rows = (type**)output->get_rows(); \
1778 temp_type master_opacity; \
1779 if(sizeof(type) != 4) \
1780 master_opacity = (temp_type)(alpha * max + 0.5); \
1782 master_opacity = (temp_type)(alpha * max); \
1783 temp_type master_transparency = max - master_opacity; \
1784 float round = 0.0; \
1785 if(sizeof(type) != 4) \
1789 for(int i = row1; i < row2; i++) \
1793 float y_fraction1_f; \
1794 float y_fraction2_f; \
1795 float y_output_fraction_f; \
1796 in_y1 = y_table_f[i - out_y1_int].in_x1; \
1797 in_y2 = y_table_f[i - out_y1_int].in_x2; \
1798 y_fraction1_f = y_table_f[i - out_y1_int].in_fraction1; \
1799 y_fraction2_f = y_table_f[i - out_y1_int].in_fraction2; \
1800 y_output_fraction_f = y_table_f[i - out_y1_int].output_fraction; \
1801 type *in_row1 = in_rows[(in_y1)]; \
1802 type *in_row2 = in_rows[(in_y2)]; \
1803 type *out_row = out_rows[i]; \
1805 for(int j = out_x1_int; j < out_x2_int; j++) \
1809 float x_fraction1_f; \
1810 float x_fraction2_f; \
1811 float x_output_fraction_f; \
1812 in_x1 = x_table_f[j - out_x1_int].in_x1; \
1813 in_x2 = x_table_f[j - out_x1_int].in_x2; \
1814 x_fraction1_f = x_table_f[j - out_x1_int].in_fraction1; \
1815 x_fraction2_f = x_table_f[j - out_x1_int].in_fraction2; \
1816 x_output_fraction_f = x_table_f[j - out_x1_int].output_fraction; \
1817 type *output = &out_row[j * components]; \
1818 temp_type input1, input2, input3, input4; \
1820 float fraction1 = x_fraction1_f * y_fraction1_f; \
1821 float fraction2 = x_fraction2_f * y_fraction1_f; \
1822 float fraction3 = x_fraction1_f * y_fraction2_f; \
1823 float fraction4 = x_fraction2_f * y_fraction2_f; \
1825 input1 = (type)(in_row1[in_x1 * components] * fraction1 + \
1826 in_row1[in_x2 * components] * fraction2 + \
1827 in_row2[in_x1 * components] * fraction3 + \
1828 in_row2[in_x2 * components] * fraction4 + round); \
1830 /* Add chroma to fractional pixels */ \
1833 float extra_chroma = (1.0F - \
1837 fraction4) * chroma_offset; \
1838 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 + \
1839 in_row1[in_x2 * components + 1] * fraction2 + \
1840 in_row2[in_x1 * components + 1] * fraction3 + \
1841 in_row2[in_x2 * components + 1] * fraction4 + \
1842 extra_chroma + round); \
1843 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 + \
1844 in_row1[in_x2 * components + 2] * fraction2 + \
1845 in_row2[in_x1 * components + 2] * fraction3 + \
1846 in_row2[in_x2 * components + 2] * fraction4 + \
1847 extra_chroma + round); \
1851 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 + \
1852 in_row1[in_x2 * components + 1] * fraction2 + \
1853 in_row2[in_x1 * components + 1] * fraction3 + \
1854 in_row2[in_x2 * components + 1] * fraction4 + round); \
1855 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 + \
1856 in_row1[in_x2 * components + 2] * fraction2 + \
1857 in_row2[in_x1 * components + 2] * fraction3 + \
1858 in_row2[in_x2 * components + 2] * fraction4 + round); \
1861 if(components == 4) \
1862 input4 = (type)(in_row1[in_x1 * components + 3] * fraction1 + \
1863 in_row1[in_x2 * components + 3] * fraction2 + \
1864 in_row2[in_x1 * components + 3] * fraction3 + \
1865 in_row2[in_x2 * components + 3] * fraction4 + round); \
1867 temp_type opacity; \
1868 if(sizeof(type) != 4) \
1869 opacity = (temp_type)(master_opacity * \
1870 y_output_fraction_f * \
1871 x_output_fraction_f + 0.5); \
1873 opacity = (temp_type)(master_opacity * \
1874 y_output_fraction_f * \
1875 x_output_fraction_f); \
1876 temp_type transparency = max - opacity; \
1878 /* printf("TRANSLATE 2 %x %d %d\n", opacity, j, i); */ \
1880 if(components == 3) \
1882 BLEND_3(max, temp_type, type, chroma_offset); \
1886 BLEND_4(max, temp_type, type, chroma_offset); \
1892 void TranslateUnit::process_package(LoadPackage *package)
1894 TranslatePackage *pkg = (TranslatePackage*)package;
1901 // Variables for TRANSLATE
1902 VFrame *input = engine->translate_input;
1903 VFrame *output = engine->translate_output;
1904 float in_x1 = engine->translate_in_x1;
1905 float in_y1 = engine->translate_in_y1;
1906 float in_x2 = engine->translate_in_x2;
1907 float in_y2 = engine->translate_in_y2;
1908 float out_x1 = engine->translate_out_x1;
1909 float out_y1 = engine->translate_out_y1;
1910 float out_x2 = engine->translate_out_x2;
1911 float out_y2 = engine->translate_out_y2;
1912 float alpha = engine->translate_alpha;
1913 int row1 = pkg->out_row1;
1914 int row2 = pkg->out_row2;
1915 int mode = engine->translate_mode;
1916 int in_total_x = input->get_w();
1917 int in_total_y = input->get_h();
1919 (engine->translate_input->get_color_model() == BC_YUV888 ||
1920 engine->translate_input->get_color_model() == BC_YUVA8888 ||
1921 engine->translate_input->get_color_model() == BC_YUV161616 ||
1922 engine->translate_input->get_color_model() == BC_YUVA16161616);
1924 transfer_table_f *x_table_f;
1925 transfer_table_f *y_table_f;
1926 transfer_table_i *x_table_i;
1927 transfer_table_i *y_table_i;
1929 translation_array_f(x_table_f,
1938 translation_array_f(y_table_f,
1947 // printf("TranslateUnit::process_package 1 %d\n", mode);
1951 switch(engine->translate_input->get_color_model())
1954 TRANSLATE(0xff, uint32_t, unsigned char, 3, 0);
1958 TRANSLATE(0xff, uint32_t, unsigned char, 4, 0);
1962 TRANSLATE(1.0, float, float, 3, 0);
1966 TRANSLATE(1.0, float, float, 4, 0);
1970 TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
1973 case BC_RGBA16161616:
1974 TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
1978 TRANSLATE(0xff, int32_t, unsigned char, 3, 0x80);
1982 TRANSLATE(0xff, int32_t, unsigned char, 4, 0x80);
1986 TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
1989 case BC_YUVA16161616:
1990 TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
1993 // printf("blend mode %i, took %li ms\n", mode, a.get_difference());
1995 delete [] x_table_f;
1996 delete [] y_table_f;
2008 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
2009 : LoadServer(cpus, cpus)
2011 this->overlay = overlay;
2014 TranslateEngine::~TranslateEngine()
2018 void TranslateEngine::init_packages()
2020 int out_y1_int = (int)translate_out_y1;
2021 int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
2022 int out_h = out_y2_int - out_y1_int;
2024 for(int i = 0; i < total_packages; i++)
2026 TranslatePackage *package = (TranslatePackage*)packages[i];
2027 package->out_row1 = (int)(out_y1_int + out_h /
2030 package->out_row2 = (int)((float)package->out_row1 +
2033 if(i >= total_packages - 1)
2034 package->out_row2 = out_y2_int;
2038 LoadClient* TranslateEngine::new_client()
2040 return new TranslateUnit(this, overlay);
2043 LoadPackage* TranslateEngine::new_package()
2045 return new TranslatePackage;
2055 #define SCALE_TRANSLATE(max, temp_type, type, components, chroma_offset) \
2057 temp_type opacity; \
2058 if(sizeof(type) != 4) \
2059 opacity = (temp_type)(alpha * max + 0.5); \
2061 opacity = (temp_type)(alpha * max); \
2062 temp_type transparency = max - opacity; \
2064 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2066 int in_y = y_table[i - out_y1]; \
2067 type *in_row = (type*)in_rows[in_y] + in_x1 * components; \
2068 type *output = (type*)out_rows[i] + out_x1 * components; \
2070 /* X direction is scaled and requires a table lookup */ \
2071 if(out_w != in_x2 - in_x1) \
2073 for(int j = 0; j < out_w; j++) \
2075 type *in_row_plus_x = in_row + x_table[j] * components; \
2076 temp_type input1, input2, input3, input4; \
2078 input1 = in_row_plus_x[0]; \
2079 input2 = in_row_plus_x[1]; \
2080 input3 = in_row_plus_x[2]; \
2081 if(components == 4) \
2082 input4 = in_row_plus_x[3]; \
2084 if(components == 3) \
2086 BLEND_3(max, temp_type, type, chroma_offset); \
2090 BLEND_4(max, temp_type, type, chroma_offset); \
2092 output += components; \
2096 /* X direction is not scaled */ \
2098 for(int j = 0; j < out_w; j++) \
2100 temp_type input1, input2, input3, input4; \
2102 input1 = in_row[0]; \
2103 input2 = in_row[1]; \
2104 input3 = in_row[2]; \
2105 if(components == 4) \
2106 input4 = in_row[3]; \
2108 if(components == 3) \
2110 BLEND_3(max, temp_type, type, chroma_offset); \
2114 BLEND_4(max, temp_type, type, chroma_offset); \
2116 in_row += components; \
2117 output += components; \
2125 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
2126 : LoadClient(server)
2128 this->overlay = overlay;
2129 this->scale_translate = server;
2132 ScaleTranslateUnit::~ScaleTranslateUnit()
2136 void ScaleTranslateUnit::scale_array(int* &table,
2143 float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
2145 table = new int[out_x2 - out_x1];
2149 for(int i = 0; i < out_x2 - out_x1; i++)
2151 table[i] = (int)((float)i / scale + in_x1);
2156 for(int i = 0; i < out_x2 - out_x1; i++)
2158 table[i] = (int)((float)i / scale);
2164 void ScaleTranslateUnit::process_package(LoadPackage *package)
2166 ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
2168 // Args for NEAREST_NEIGHBOR_MACRO
2169 VFrame *output = scale_translate->output;
2170 VFrame *input = scale_translate->input;
2171 int in_x1 = scale_translate->in_x1;
2172 int in_y1 = scale_translate->in_y1;
2173 int in_x2 = scale_translate->in_x2;
2174 int in_y2 = scale_translate->in_y2;
2175 int out_x1 = scale_translate->out_x1;
2176 int out_y1 = scale_translate->out_y1;
2177 int out_x2 = scale_translate->out_x2;
2178 int out_y2 = scale_translate->out_y2;
2179 float alpha = scale_translate->alpha;
2180 int mode = scale_translate->mode;
2181 int out_w = out_x2 - out_x1;
2185 unsigned char **in_rows = input->get_rows();
2186 unsigned char **out_rows = output->get_rows();
2190 //printf("ScaleTranslateUnit::process_package 1 %d\n", mode);
2191 if(out_w != in_x2 - in_x1)
2193 scale_array(x_table,
2200 scale_array(y_table,
2208 if (mode == TRANSFER_REPLACE && (out_w == in_x2 - in_x1))
2210 // if we have transfer replace and x direction is not scaled, PARTY!
2211 char bytes_per_pixel = input->calculate_bytes_per_pixel(input->get_color_model());
2212 int line_len = out_w * bytes_per_pixel;
2213 int in_start_byte = in_x1 * bytes_per_pixel;
2214 int out_start_byte = out_x1 * bytes_per_pixel;
2215 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2217 memcpy (out_rows[i] + out_start_byte,
2218 in_rows[y_table[i - out_y1]] + in_start_byte ,
2224 switch(input->get_color_model())
2227 SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 3, 0);
2231 SCALE_TRANSLATE(1.0, float, float, 3, 0);
2235 SCALE_TRANSLATE(0xff, int32_t, uint8_t, 3, 0x80);
2239 SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 4, 0);
2243 SCALE_TRANSLATE(1.0, float, float, 4, 0);
2247 SCALE_TRANSLATE(0xff, int32_t, uint8_t, 4, 0x80);
2252 SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2256 SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2259 case BC_RGBA16161616:
2260 SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2263 case BC_YUVA16161616:
2264 SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2268 //printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2269 if(out_x2 - out_x1 != in_x2 - in_x1)
2283 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
2284 : LoadServer(cpus, cpus)
2286 this->overlay = overlay;
2289 ScaleTranslateEngine::~ScaleTranslateEngine()
2293 void ScaleTranslateEngine::init_packages()
2295 int out_h = out_y2 - out_y1;
2297 for(int i = 0; i < total_packages; i++)
2299 ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i];
2300 package->out_row1 = (int)(out_y1 + out_h /
2303 package->out_row2 = (int)((float)package->out_row1 +
2306 if(i >= total_packages - 1)
2307 package->out_row2 = out_y2;
2311 LoadClient* ScaleTranslateEngine::new_client()
2313 return new ScaleTranslateUnit(this, overlay);
2316 LoadPackage* ScaleTranslateEngine::new_package()
2318 return new ScaleTranslatePackage;
2322 ScaleTranslatePackage::ScaleTranslatePackage()
2353 #define BLEND_ONLY(temp_type, type, max, components, chroma_offset) \
2355 temp_type opacity; \
2356 if(sizeof(type) != 4) \
2357 opacity = (temp_type)(alpha * max + 0.5); \
2359 opacity = (temp_type)(alpha * max); \
2360 temp_type transparency = max - opacity; \
2362 type** output_rows = (type**)output->get_rows(); \
2363 type** input_rows = (type**)input->get_rows(); \
2364 int w = input->get_w(); \
2365 int h = input->get_h(); \
2367 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2369 type* in_row = input_rows[i]; \
2370 type* output = output_rows[i]; \
2372 for(int j = 0; j < w; j++) \
2374 temp_type input1, input2, input3, input4; \
2375 input1 = in_row[0]; \
2376 input2 = in_row[1]; \
2377 input3 = in_row[2]; \
2378 if(components == 4) input4 = in_row[3]; \
2381 if(components == 3) \
2383 BLEND_3(max, temp_type, type, chroma_offset); \
2387 BLEND_4(max, temp_type, type, chroma_offset); \
2390 in_row += components; \
2391 output += components; \
2397 #define BLEND_ONLY_TRANSFER_REPLACE(type, components) \
2400 type** output_rows = (type**)output->get_rows(); \
2401 type** input_rows = (type**)input->get_rows(); \
2402 int w = input->get_w(); \
2403 int h = input->get_h(); \
2404 int line_len = w * sizeof(type) * components; \
2406 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2408 memcpy(output_rows[i], input_rows[i], line_len); \
2412 // components is always 4
2413 #define BLEND_ONLY_4_NORMAL(temp_type, type, max, chroma_offset) \
2415 temp_type opacity = (temp_type)(alpha * max + 0.5); \
2416 temp_type transparency = max - opacity; \
2417 temp_type max_squared = ((temp_type)max) * max; \
2419 type** output_rows = (type**)output->get_rows(); \
2420 type** input_rows = (type**)input->get_rows(); \
2421 int w = input->get_w(); \
2422 int h = input->get_h(); \
2424 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2426 type* in_row = input_rows[i]; \
2427 type* output = output_rows[i]; \
2429 for(int j = 0; j < w; j++) \
2431 temp_type pixel_opacity, pixel_transparency; \
2432 pixel_opacity = opacity * in_row[3]; \
2433 pixel_transparency = (temp_type)max_squared - pixel_opacity; \
2437 output[0] = ((temp_type)in_row[0] * pixel_opacity + \
2438 (temp_type)output[0] * pixel_transparency) / max / max; \
2439 output[1] = (((temp_type)in_row[1] - chroma_offset) * pixel_opacity + \
2440 ((temp_type)output[1] - chroma_offset) * pixel_transparency) \
2443 output[2] = (((temp_type)in_row[2] - chroma_offset) * pixel_opacity + \
2444 ((temp_type)output[2] - chroma_offset) * pixel_transparency) \
2447 output[3] = (type)(in_row[3] > output[3] ? in_row[3] : output[3]); \
2457 // components is always 3
2458 #define BLEND_ONLY_3_NORMAL(temp_type, type, max, chroma_offset) \
2460 const int bits = sizeof(type) * 8; \
2461 temp_type opacity = (temp_type)(alpha * ((temp_type)1 << bits) + 0.5); \
2462 temp_type transparency = ((temp_type)1 << bits) - opacity; \
2464 type** output_rows = (type**)output->get_rows(); \
2465 type** input_rows = (type**)input->get_rows(); \
2466 int w = input->get_w() * 3; \
2467 int h = input->get_h(); \
2469 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2471 type* in_row = input_rows[i]; \
2472 type* output = output_rows[i]; \
2474 for(int j = 0; j < w; j++) /* w = 3x width! */ \
2476 *output = ((temp_type)*in_row * opacity + *output * transparency) >> bits; \
2485 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
2486 : LoadClient(server)
2488 this->overlay = overlay;
2489 this->blend_engine = server;
2492 BlendUnit::~BlendUnit()
2496 void BlendUnit::process_package(LoadPackage *package)
2498 BlendPackage *pkg = (BlendPackage*)package;
2501 VFrame *output = blend_engine->output;
2502 VFrame *input = blend_engine->input;
2503 float alpha = blend_engine->alpha;
2504 int mode = blend_engine->mode;
2506 if (mode == TRANSFER_REPLACE)
2508 switch(input->get_color_model())
2511 BLEND_ONLY_TRANSFER_REPLACE(float, 3);
2514 BLEND_ONLY_TRANSFER_REPLACE(float, 4);
2518 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 3);
2522 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 4);
2526 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 3);
2528 case BC_RGBA16161616:
2529 case BC_YUVA16161616:
2530 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 4);
2535 if (mode == TRANSFER_NORMAL)
2537 switch(input->get_color_model())
2541 float opacity = alpha;
2542 float transparency = 1.0 - alpha;
2544 float** output_rows = (float**)output->get_rows();
2545 float** input_rows = (float**)input->get_rows();
2546 int w = input->get_w() * 3;
2547 int h = input->get_h();
2549 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2551 float* in_row = input_rows[i];
2552 float* output = output_rows[i];
2554 for(int j = 0; j < w; j++)
2556 *output = *in_row * opacity + *output * transparency;
2565 float opacity = alpha;
2566 float transparency = 1.0 - alpha;
2568 float** output_rows = (float**)output->get_rows();
2569 float** input_rows = (float**)input->get_rows();
2570 int w = input->get_w();
2571 int h = input->get_h();
2573 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2575 float* in_row = input_rows[i];
2576 float* output = output_rows[i];
2578 for(int j = 0; j < w; j++)
2580 float pixel_opacity, pixel_transparency;
2581 pixel_opacity = opacity * in_row[3];
2582 pixel_transparency = 1.0 - pixel_opacity;
2585 output[0] = in_row[0] * pixel_opacity +
2586 output[0] * pixel_transparency;
2587 output[1] = in_row[1] * pixel_opacity +
2588 output[1] * pixel_transparency;
2589 output[2] = in_row[2] * pixel_opacity +
2590 output[2] * pixel_transparency;
2591 output[3] = in_row[3] > output[3] ? in_row[3] : output[3];
2600 BLEND_ONLY_3_NORMAL(uint32_t, unsigned char, 0xff, 0);
2603 BLEND_ONLY_3_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2606 BLEND_ONLY_4_NORMAL(uint32_t, unsigned char, 0xff, 0);
2609 BLEND_ONLY_4_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2612 BLEND_ONLY_3_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2615 BLEND_ONLY_3_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2617 case BC_RGBA16161616:
2618 BLEND_ONLY_4_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2620 case BC_YUVA16161616:
2621 BLEND_ONLY_4_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2626 switch(input->get_color_model())
2629 BLEND_ONLY(float, float, 1.0, 3, 0);
2632 BLEND_ONLY(float, float, 1.0, 4, 0);
2635 BLEND_ONLY(uint32_t, unsigned char, 0xff, 3, 0);
2638 BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0x80);
2641 BLEND_ONLY(uint32_t, unsigned char, 0xff, 4, 0);
2644 BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0x80);
2647 BLEND_ONLY(uint64_t, uint16_t, 0xffff, 3, 0);
2650 BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0x8000);
2652 case BC_RGBA16161616:
2653 BLEND_ONLY(uint64_t, uint16_t, 0xffff, 4, 0);
2655 case BC_YUVA16161616:
2656 BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0x8000);
2663 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
2664 : LoadServer(cpus, cpus)
2666 this->overlay = overlay;
2669 BlendEngine::~BlendEngine()
2673 void BlendEngine::init_packages()
2675 for(int i = 0; i < total_packages; i++)
2677 BlendPackage *package = (BlendPackage*)packages[i];
2678 package->out_row1 = (int)(input->get_h() /
2681 package->out_row2 = (int)((float)package->out_row1 +
2685 if(i >= total_packages - 1)
2686 package->out_row2 = input->get_h();
2690 LoadClient* BlendEngine::new_client()
2692 return new BlendUnit(this, overlay);
2695 LoadPackage* BlendEngine::new_package()
2697 return new BlendPackage;
2701 BlendPackage::BlendPackage()