hvirtual/cinelerra/overlayframe.C

   1 #include <math.h>
   2 #include <stdio.h>
   3 #include <string.h>
   4 #include <stdint.h>
   5 #include <stdlib.h>
   6 #include <unistd.h>
   7
   8 #include "clip.h"
   9 #include "edl.inc"
  10 #include "mutex.h"
  11 #include "overlayframe.h"
  12 #include "vframe.h"
  13
  14 // Easy abstraction of the float and int types.  Most of these are never used
  15 // but GCC expects them.
  16 static int my_abs(int32_t x)
  17 {
  18         return abs(x);
  19 }
  20
  21 static int my_abs(uint32_t x)
  22 {
  23         return x;
  24 }
  25
  26 static int my_abs(int64_t x)
  27 {
  28         return llabs(x);
  29 }
  30
  31 static int my_abs(uint64_t x)
  32 {
  33         return x;
  34 }
  35
  36 static float my_abs(float x)
  37 {
  38         return fabsf(x);
  39 }
  40
  41
  42
  43
  44 OverlayFrame::OverlayFrame(int cpus)
  45 {
  46         temp_frame = 0;
  47         blend_engine = 0;
  48         scale_engine = 0;
  49         scaletranslate_engine = 0;
  50         translate_engine = 0;
  51         this->cpus = cpus;
  52 }
  53
  54 OverlayFrame::~OverlayFrame()
  55 {
  56         if(temp_frame) delete temp_frame;
  57         if(scale_engine) delete scale_engine;
  58         if(translate_engine) delete translate_engine;
  59         if(blend_engine) delete blend_engine;
  60         if(scaletranslate_engine) delete scaletranslate_engine;
  61 }
  62
  63
  64
  65
  66
  67
  68
  69
  70 // Verification:
  71
  72 // (255 * 255 + 0 * 0) / 255 = 255
  73 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
  74
  75 // (65535 * 65535 + 0 * 0) / 65535 = 65535
  76 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
  77
  78
  79 // Branch prediction 4 U
  80
  81 #define BLEND_3(max, temp_type, type, chroma_offset) \
  82 { \
  83         temp_type r, g, b; \
  84  \
  85 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
  86         switch(mode) \
  87         { \
  88                 case TRANSFER_DIVIDE: \
  89                         r = output[0] ? (((temp_type)input1 * max) / output[0]) : max; \
  90                         if(chroma_offset) \
  91                         { \
  92                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
  93                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
  94                         } \
  95                         else \
  96                         { \
  97                                 g = output[1] ? (temp_type)input2 * max / (temp_type)output[1] : max; \
  98                                 b = output[2] ? (temp_type)input3 * max / (temp_type)output[2] : max; \
  99                         } \
 100                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
 101                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
 102                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
 103                         break; \
 104                 case TRANSFER_MULTIPLY: \
 105                         r = ((temp_type)input1 * output[0]) / max; \
 106                         if(chroma_offset) \
 107                         { \
 108                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
 109                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
 110                         } \
 111                         else \
 112                         { \
 113                                 g = (temp_type)input2 * (temp_type)output[1] / max; \
 114                                 b = (temp_type)input3 * (temp_type)output[2] / max; \
 115                         } \
 116                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
 117                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
 118                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
 119                         break; \
 120                 case TRANSFER_SUBTRACT: \
 121                         r = (temp_type)input1 - output[0]; \
 122                         g = (temp_type)input2 - ((temp_type)output[1] - chroma_offset); \
 123                         b = (temp_type)input3 - ((temp_type)output[2] - chroma_offset); \
 124                         r = (r * opacity + output[0] * transparency) / max; \
 125                         g = (g * opacity + output[1] * transparency) / max; \
 126                         b = (b * opacity + output[2] * transparency) / max; \
 127                         break; \
 128                 case TRANSFER_ADDITION: \
 129                         r = (temp_type)input1 + output[0]; \
 130                         g = (temp_type)input2 - chroma_offset + output[1]; \
 131                         b = (temp_type)input3 - chroma_offset + output[2]; \
 132                         r = (r * opacity + output[0] * transparency) / max; \
 133                         g = (g * opacity + output[1] * transparency) / max; \
 134                         b = (b * opacity + output[2] * transparency) / max; \
 135                         break; \
 136                 case TRANSFER_REPLACE: \
 137                         r = input1; \
 138                         g = input2; \
 139                         b = input3; \
 140                         break; \
 141                 case TRANSFER_NORMAL: \
 142                         r = ((temp_type)input1 * opacity + output[0] * transparency) / max; \
 143                         g = ((temp_type)input2 * opacity + output[1] * transparency) / max; \
 144                         b = ((temp_type)input3 * opacity + output[2] * transparency) / max; \
 145                         break; \
 146         } \
 147  \
 148         if(sizeof(type) != 4) \
 149         { \
 150                 output[0] = (type)CLIP(r, 0, max); \
 151                 output[1] = (type)CLIP(g, 0, max); \
 152                 output[2] = (type)CLIP(b, 0, max); \
 153         } \
 154         else \
 155         { \
 156                 output[0] = r; \
 157                 output[1] = g; \
 158                 output[2] = b; \
 159         } \
 160 }
 161
 162
 163
 164
 165
 166 // Blending equations are drastically different for 3 and 4 components
 167 #define BLEND_4(max, temp_type, type, chroma_offset) \
 168 { \
 169         temp_type r, g, b, a; \
 170         temp_type pixel_opacity, pixel_transparency; \
 171         temp_type output1 = output[0]; \
 172         temp_type output2 = output[1]; \
 173         temp_type output3 = output[2]; \
 174         temp_type output4 = output[3]; \
 175  \
 176         pixel_opacity = opacity * input4; \
 177         pixel_transparency = (temp_type)max * max - pixel_opacity; \
 178  \
 179         switch(mode) \
 180         { \
 181                 case TRANSFER_DIVIDE: \
 182                         r = output1 ? (((temp_type)input1 * max) / output1) : max; \
 183                         if(chroma_offset) \
 184                         { \
 185                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
 186                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
 187                         } \
 188                         else \
 189                         { \
 190                                 g = output2 ? (temp_type)input2 * max / (temp_type)output2 : max; \
 191                                 b = output3 ? (temp_type)input3 * max / (temp_type)output3 : max; \
 192                         } \
 193                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
 194                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
 195                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
 196                         a = input4 > output4 ? input4 : output4; \
 197                         break; \
 198                 case TRANSFER_MULTIPLY: \
 199                         r = ((temp_type)input1 * output1) / max; \
 200                         if(chroma_offset) \
 201                         { \
 202                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
 203                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
 204                         } \
 205                         else \
 206                         { \
 207                                 g = (temp_type)input2 * (temp_type)output2 / max; \
 208                                 b = (temp_type)input3 * (temp_type)output3 / max; \
 209                         } \
 210                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
 211                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
 212                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
 213                         a = input4 > output4 ? input4 : output4; \
 214                         break; \
 215                 case TRANSFER_SUBTRACT: \
 216                         r = (temp_type)input1 - output1; \
 217                         g = (temp_type)input2 - ((temp_type)output2 - chroma_offset); \
 218                         b = (temp_type)input3 - ((temp_type)output3 - chroma_offset); \
 219                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
 220                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
 221                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
 222                         a = input4 > output4 ? input4 : output4; \
 223                         break; \
 224                 case TRANSFER_ADDITION: \
 225                         r = (temp_type)input1 + output1; \
 226                         g = (temp_type)input2 - chroma_offset + output2; \
 227                         b = (temp_type)input3 - chroma_offset + output3; \
 228                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
 229                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
 230                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
 231                         a = input4 > output4 ? input4 : output4; \
 232                         break; \
 233                 case TRANSFER_REPLACE: \
 234                         r = input1; \
 235                         g = input2; \
 236                         b = input3; \
 237                         a = input4; \
 238                         break; \
 239                 case TRANSFER_NORMAL: \
 240                         r = (input1 * pixel_opacity + \
 241                                 output1 * pixel_transparency) / max / max; \
 242                         g = ((input2 - chroma_offset) * pixel_opacity + \
 243                                 (output2 - chroma_offset) * pixel_transparency) \
 244                                 / max / max + \
 245                                 chroma_offset; \
 246                         b = ((input3 - chroma_offset) * pixel_opacity + \
 247                                 (output3 - chroma_offset) * pixel_transparency) \
 248                                 / max / max + \
 249                                 chroma_offset; \
 250                         a = input4 > output4 ? input4 : output4; \
 251                         break; \
 252         } \
 253  \
 254         if(sizeof(type) != 4) \
 255         { \
 256                 output[0] = (type)CLIP(r, 0, max); \
 257                 output[1] = (type)CLIP(g, 0, max); \
 258                 output[2] = (type)CLIP(b, 0, max); \
 259                 output[3] = (type)a; \
 260         } \
 261         else \
 262         { \
 263                 output[0] = r; \
 264                 output[1] = g; \
 265                 output[2] = b; \
 266                 output[3] = a; \
 267         } \
 268 }
 269
 270
 271
 272 // Bicubic algorithm using multiprocessors
 273 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
 274
 275 // Nearest neighbor algorithm using multiprocessors for blending
 276 // input -> scale + translate -> blend -> output
 277
 278
 279 int OverlayFrame::overlay(VFrame *output,
 280         VFrame *input,
 281         float in_x1,
 282         float in_y1,
 283         float in_x2,
 284         float in_y2,
 285         float out_x1,
 286         float out_y1,
 287         float out_x2,
 288         float out_y2,
 289         float alpha,       // 0 - 1
 290         int mode,
 291         int interpolation_type)
 292 {
 293         float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
 294         float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
 295
 296         if(isnan(in_x1) ||
 297                 isnan(in_y1) ||
 298                 isnan(in_x2) ||
 299                 isnan(in_y2) ||
 300                 isnan(out_x1) ||
 301                 isnan(out_y1) ||
 302                 isnan(out_x2) ||
 303                 isnan(out_y2)) return 1;
 304 // printf("OverlayFrame::overlay 1 %f %f %f %f -> %f %f %f %f\n", in_x1,
 305 //                      in_y1,
 306 //                      in_x2,
 307 //                      in_y2,
 308 //                      out_x1,
 309 //                      out_y1,
 310 //                      out_x2,
 311 //                      out_y2);
 312
 313 // Limit values
 314         if(in_x1 < 0)
 315         {
 316                 out_x1 += -in_x1 * w_scale;
 317                 in_x1 = 0;
 318         }
 319         else
 320         if(in_x1 >= input->get_w())
 321         {
 322                 out_x1 -= (in_x1 - input->get_w()) * w_scale;
 323                 in_x1 = input->get_w();
 324         }
 325
 326         if(in_y1 < 0)
 327         {
 328                 out_y1 += -in_y1 * h_scale;
 329                 in_y1 = 0;
 330         }
 331         else
 332         if(in_y1 >= input->get_h())
 333         {
 334                 out_y1 -= (in_y1 - input->get_h()) * h_scale;
 335                 in_y1 = input->get_h();
 336         }
 337
 338         if(in_x2 < 0)
 339         {
 340                 out_x2 += -in_x2 * w_scale;
 341                 in_x2 = 0;
 342         }
 343         else
 344         if(in_x2 >= input->get_w())
 345         {
 346                 out_x2 -= (in_x2 - input->get_w()) * w_scale;
 347                 in_x2 = input->get_w();
 348         }
 349
 350         if(in_y2 < 0)
 351         {
 352                 out_y2 += -in_y2 * h_scale;
 353                 in_y2 = 0;
 354         }
 355         else
 356         if(in_y2 >= input->get_h())
 357         {
 358                 out_y2 -= (in_y2 - input->get_h()) * h_scale;
 359                 in_y2 = input->get_h();
 360         }
 361
 362         if(out_x1 < 0)
 363         {
 364                 in_x1 += -out_x1 / w_scale;
 365                 out_x1 = 0;
 366         }
 367         else
 368         if(out_x1 >= output->get_w())
 369         {
 370                 in_x1 -= (out_x1 - output->get_w()) / w_scale;
 371                 out_x1 = output->get_w();
 372         }
 373
 374         if(out_y1 < 0)
 375         {
 376                 in_y1 += -out_y1 / h_scale;
 377                 out_y1 = 0;
 378         }
 379         else
 380         if(out_y1 >= output->get_h())
 381         {
 382                 in_y1 -= (out_y1 - output->get_h()) / h_scale;
 383                 out_y1 = output->get_h();
 384         }
 385
 386         if(out_x2 < 0)
 387         {
 388                 in_x2 += -out_x2 / w_scale;
 389                 out_x2 = 0;
 390         }
 391         else
 392         if(out_x2 >= output->get_w())
 393         {
 394                 in_x2 -= (out_x2 - output->get_w()) / w_scale;
 395                 out_x2 = output->get_w();
 396         }
 397
 398         if(out_y2 < 0)
 399         {
 400                 in_y2 += -out_y2 / h_scale;
 401                 out_y2 = 0;
 402         }
 403         else
 404         if(out_y2 >= output->get_h())
 405         {
 406                 in_y2 -= (out_y2 - output->get_h()) / h_scale;
 407                 out_y2 = output->get_h();
 408         }
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419         float in_w = in_x2 - in_x1;
 420         float in_h = in_y2 - in_y1;
 421         float out_w = out_x2 - out_x1;
 422         float out_h = out_y2 - out_y1;
 423 // Input for translation operation
 424         VFrame *translation_input = input;
 425
 426
 427         if(in_w <= 0 || in_h <= 0 || out_w <= 0 || out_h <= 0) return 0;
 428
 429
 430 // printf("OverlayFrame::overlay 2 %f %f %f %f -> %f %f %f %f\n", in_x1,
 431 //                      in_y1,
 432 //                      in_x2,
 433 //                      in_y2,
 434 //                      out_x1,
 435 //                      out_y1,
 436 //                      out_x2,
 437 //                      out_y2);
 438
 439
 440
 441
 442
 443 // ****************************************************************************
 444 // Transfer to temp buffer by scaling nearest integer boundaries
 445 // ****************************************************************************
 446         if(interpolation_type != NEAREST_NEIGHBOR &&
 447                 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
 448         {
 449 // Create integer boundaries for interpolation
 450                 int in_x1_int = (int)in_x1;
 451                 int in_y1_int = (int)in_y1;
 452                 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
 453                 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
 454
 455 // Dimensions of temp frame.  Integer boundaries scaled.
 456                 int temp_w = (int)ceil(w_scale * (in_x2_int - in_x1_int));
 457                 int temp_h = (int)ceil(h_scale * (in_y2_int - in_y1_int));
 458                 VFrame *scale_output;
 459
 460
 461
 462 #define NO_TRANSLATION1 \
 463         (EQUIV(in_x1, 0) && \
 464         EQUIV(in_y1, 0) && \
 465         EQUIV(out_x1, 0) && \
 466         EQUIV(out_y1, 0) && \
 467         EQUIV(in_x2, in_x2_int) && \
 468         EQUIV(in_y2, in_y2_int) && \
 469         EQUIV(out_x2, temp_w) && \
 470         EQUIV(out_y2, temp_h))
 471
 472
 473 #define NO_BLEND \
 474         (EQUIV(alpha, 1) && \
 475         (mode == TRANSFER_REPLACE || \
 476         (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
 477
 478
 479
 480
 481
 482 // Prepare destination for operation
 483
 484 // No translation and no blending.  The blending operation is built into the
 485 // translation unit but not the scaling unit.
 486 // input -> output
 487                 if(NO_TRANSLATION1 &&
 488                         NO_BLEND)
 489                 {
 490 // printf("OverlayFrame::overlay input -> output\n");
 491
 492                         scale_output = output;
 493                         translation_input = 0;
 494                 }
 495                 else
 496 // If translation or blending
 497 // input -> nearest integer boundary temp
 498                 {
 499                         if(temp_frame &&
 500                                 (temp_frame->get_w() != temp_w ||
 501                                         temp_frame->get_h() != temp_h))
 502                         {
 503                                 delete temp_frame;
 504                                 temp_frame = 0;
 505                         }
 506
 507                         if(!temp_frame)
 508                         {
 509                                 temp_frame = new VFrame(0,
 510                                         temp_w,
 511                                         temp_h,
 512                                         input->get_color_model(),
 513                                         -1);
 514                         }
 515 //printf("OverlayFrame::overlay input -> temp\n");
 516
 517
 518                         temp_frame->clear_frame();
 519
 520 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
 521 //      temp_w, temp_h);
 522                         scale_output = temp_frame;
 523                         translation_input = scale_output;
 524
 525 // Adjust input coordinates to reflect new scaled coordinates.
 526                         in_x1 = (in_x1 - in_x1_int) * w_scale;
 527                         in_y1 = (in_y1 - in_y1_int) * h_scale;
 528                         in_x2 = (in_x2 - in_x1_int) * w_scale;
 529                         in_y2 = (in_y2 - in_y1_int) * h_scale;
 530                 }
 531
 532
 533
 534 //printf("Overlay 1\n");
 535
 536 // Scale input -> scale_output
 537                 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
 538                 scale_engine->scale_output = scale_output;
 539                 scale_engine->scale_input = input;
 540                 scale_engine->w_scale = w_scale;
 541                 scale_engine->h_scale = h_scale;
 542                 scale_engine->in_x1_int = in_x1_int;
 543                 scale_engine->in_y1_int = in_y1_int;
 544                 scale_engine->out_w_int = temp_w;
 545                 scale_engine->out_h_int = temp_h;
 546                 scale_engine->interpolation_type = interpolation_type;
 547 //printf("Overlay 2\n");
 548
 549 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
 550                 scale_engine->process_packages();
 551 //printf("OverlayFrame::overlay ScaleEngine 2\n");
 552
 553
 554
 555         }
 556
 557 // printf("OverlayFrame::overlay 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
 558 //      in_x1,
 559 //      in_y1,
 560 //      in_x2,
 561 //      in_y2,
 562 //      out_x1,
 563 //      out_y1,
 564 //      out_x2,
 565 //      out_y2);
 566
 567
 568
 569
 570
 571 #define NO_TRANSLATION2 \
 572         (EQUIV(in_x1, 0) && \
 573         EQUIV(in_y1, 0) && \
 574         EQUIV(in_x2, translation_input->get_w()) && \
 575         EQUIV(in_y2, translation_input->get_h()) && \
 576         EQUIV(out_x1, 0) && \
 577         EQUIV(out_y1, 0) && \
 578         EQUIV(out_x2, output->get_w()) && \
 579         EQUIV(out_y2, output->get_h())) \
 580
 581 #define NO_SCALE \
 582         (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
 583         EQUIV(out_y2 - out_y1, in_y2 - in_y1))
 584
 585
 586
 587
 588 //printf("OverlayFrame::overlay 4 %d\n", mode);
 589
 590
 591
 592
 593         if(translation_input)
 594         {
 595 // Direct copy
 596                 if( NO_TRANSLATION2 &&
 597                         NO_SCALE &&
 598                         NO_BLEND)
 599                 {
 600 //printf("OverlayFrame::overlay direct copy\n");
 601                         output->copy_from(translation_input);
 602                 }
 603                 else
 604 // Blend only
 605                 if( NO_TRANSLATION2 &&
 606                         NO_SCALE)
 607                 {
 608                         if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
 609
 610
 611                         blend_engine->output = output;
 612                         blend_engine->input = translation_input;
 613                         blend_engine->alpha = alpha;
 614                         blend_engine->mode = mode;
 615
 616                         blend_engine->process_packages();
 617                 }
 618                 else
 619 // Scale and translate using nearest neighbor
 620 // Translation is exactly on integer boundaries
 621                 if(interpolation_type == NEAREST_NEIGHBOR ||
 622                         EQUIV(in_x1, (int)in_x1) &&
 623                         EQUIV(in_y1, (int)in_y1) &&
 624                         EQUIV(in_x2, (int)in_x2) &&
 625                         EQUIV(in_y2, (int)in_y2) &&
 626
 627                         EQUIV(out_x1, (int)out_x1) &&
 628                         EQUIV(out_y1, (int)out_y1) &&
 629                         EQUIV(out_x2, (int)out_x2) &&
 630                         EQUIV(out_y2, (int)out_y2))
 631                 {
 632 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
 633                         if(!scaletranslate_engine) scaletranslate_engine =
 634                                 new ScaleTranslateEngine(this, cpus);
 635
 636
 637                         scaletranslate_engine->output = output;
 638                         scaletranslate_engine->input = translation_input;
 639                         scaletranslate_engine->in_x1 = (int)in_x1;
 640                         scaletranslate_engine->in_y1 = (int)in_y1;
 641 // we need to do this mumbo-jumbo in order to get numerical stability
 642 // other option would be to round all the coordinates
 643                         scaletranslate_engine->in_x2 = (int)in_x1 + (int)(in_x2 - in_x1);
 644                         scaletranslate_engine->in_y2 = (int)in_y1 + (int)(in_y2 - in_y1);
 645                         scaletranslate_engine->out_x1 = (int)out_x1;
 646                         scaletranslate_engine->out_y1 = (int)out_y1;
 647                         scaletranslate_engine->out_x2 = (int)out_x1 + (int)(out_x2 - out_x1);
 648                         scaletranslate_engine->out_y2 = (int)out_y1 + (int)(out_y2 - out_y1);
 649                         scaletranslate_engine->alpha = alpha;
 650                         scaletranslate_engine->mode = mode;
 651
 652                         scaletranslate_engine->process_packages();
 653                 }
 654                 else
 655 // Fractional translation
 656                 {
 657 // Use fractional translation
 658 // printf("OverlayFrame::overlay temp -> output  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
 659 //      in_x1,
 660 //      in_y1,
 661 //      in_x2,
 662 //      in_y2,
 663 //      out_x1,
 664 //      out_y1,
 665 //      out_x2,
 666 //      out_y2);
 667
 668 //printf("Overlay 3\n");
 669                         if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
 670                         translate_engine->translate_output = output;
 671                         translate_engine->translate_input = translation_input;
 672                         translate_engine->translate_in_x1 = in_x1;
 673                         translate_engine->translate_in_y1 = in_y1;
 674                         translate_engine->translate_in_x2 = in_x2;
 675                         translate_engine->translate_in_y2 = in_y2;
 676                         translate_engine->translate_out_x1 = out_x1;
 677                         translate_engine->translate_out_y1 = out_y1;
 678                         translate_engine->translate_out_x2 = out_x2;
 679                         translate_engine->translate_out_y2 = out_y2;
 680                         translate_engine->translate_alpha = alpha;
 681                         translate_engine->translate_mode = mode;
 682 //printf("Overlay 4\n");
 683
 684 //printf("OverlayFrame::overlay 5 %d\n", mode);
 685                         translate_engine->process_packages();
 686
 687                 }
 688         }
 689 //printf("OverlayFrame::overlay 2\n");
 690
 691         return 0;
 692 }
 693
 694
 695
 696
 697
 698
 699
 700 ScalePackage::ScalePackage()
 701 {
 702 }
 703
 704
 705
 706
 707 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
 708  : LoadClient(server)
 709 {
 710         this->overlay = overlay;
 711         this->engine = server;
 712 }
 713
 714 ScaleUnit::~ScaleUnit()
 715 {
 716 }
 717
 718
 719
 720 void ScaleUnit::tabulate_reduction(bilinear_table_t* &table,
 721         float scale,
 722         int in_pixel1,
 723         int out_total,
 724         int in_total)
 725 {
 726         table = new bilinear_table_t[out_total];
 727         bzero(table, sizeof(bilinear_table_t) * out_total);
 728 //printf("ScaleUnit::tabulate_reduction 1 %f %d %d %d\n", scale, in_pixel1, out_total, in_total);
 729         for(int i = 0; i < out_total; i++)
 730         {
 731                 float out_start = i;
 732                 float in_start = out_start * scale;
 733                 float out_end = i + 1;
 734                 float in_end = out_end * scale;
 735                 bilinear_table_t *entry = table + i;
 736 //printf("ScaleUnit::tabulate_reduction 1 %f %f %f %f\n", out_start, out_end, in_start, in_end);
 737
 738 // Store input fraction
 739                 entry->input_fraction1 = (floor(in_start + 1) - in_start) / scale;
 740                 entry->input_fraction2 = 1.0 / scale;
 741                 entry->input_fraction3 = (in_end - floor(in_end)) / scale;
 742
 743                 if(in_end >= in_total - in_pixel1)
 744                 {
 745                         in_end = in_total - in_pixel1 - 1;
 746
 747                         int difference = (int)in_end - (int)in_start - 1;
 748                         if(difference < 0) difference = 0;
 749                         entry->input_fraction3 = 1.0 -
 750                                 entry->input_fraction1 -
 751                                 entry->input_fraction2 * difference;
 752                 }
 753
 754 // Store input pixels
 755                 entry->input_pixel1 = (int)in_start;
 756                 entry->input_pixel2 = (int)in_end;
 757
 758 // printf("ScaleUnit::tabulate_reduction 1 %d %d %f %f  %f\n",
 759 // entry->input_pixel1,
 760 // entry->input_pixel2,
 761 // entry->input_fraction1,
 762 // entry->input_fraction2,
 763 // entry->input_fraction3);
 764
 765
 766 // Sanity check
 767                 if(entry->input_pixel1 > entry->input_pixel2)
 768                 {
 769                         entry->input_pixel1 = entry->input_pixel2;
 770                         entry->input_fraction1 = 0;
 771                 }
 772
 773 // Get total fraction of output pixel used
 774 //              if(entry->input_pixel2 > entry->input_pixel1)
 775                 entry->total_fraction =
 776                         entry->input_fraction1 +
 777                         entry->input_fraction2 * (entry->input_pixel2 - entry->input_pixel1 - 1) +
 778                         entry->input_fraction3;
 779                 entry->input_pixel1 += in_pixel1;
 780                 entry->input_pixel2 += in_pixel1;
 781         }
 782 }
 783
 784 void ScaleUnit::tabulate_enlarge(bilinear_table_t* &table,
 785         float scale,
 786         int in_pixel1,
 787         int out_total,
 788         int in_total)
 789 {
 790         table = new bilinear_table_t[out_total];
 791         bzero(table, sizeof(bilinear_table_t) * out_total);
 792
 793         for(int i = 0; i < out_total; i++)
 794         {
 795                 bilinear_table_t *entry = table + i;
 796                 float in_pixel = i * scale;
 797                 entry->input_pixel1 = (int)floor(in_pixel);
 798                 entry->input_pixel2 = entry->input_pixel1 + 1;
 799
 800                 if(in_pixel <= in_total)
 801                 {
 802                         entry->input_fraction3 = in_pixel - entry->input_pixel1;
 803                 }
 804                 else
 805                 {
 806                         entry->input_fraction3 = 0;
 807                         entry->input_pixel2 = 0;
 808                 }
 809
 810                 if(in_pixel >= 0)
 811                 {
 812                         entry->input_fraction1 = entry->input_pixel2 - in_pixel;
 813                 }
 814                 else
 815                 {
 816                         entry->input_fraction1 = 0;
 817                         entry->input_pixel1 = 0;
 818                 }
 819
 820                 if(entry->input_pixel2 >= in_total - in_pixel1)
 821                 {
 822                         entry->input_pixel2 = entry->input_pixel1;
 823                         entry->input_fraction3 = 1.0 - entry->input_fraction1;
 824                 }
 825
 826                 entry->total_fraction =
 827                         entry->input_fraction1 +
 828                         entry->input_fraction3;
 829                 entry->input_pixel1 += in_pixel1;
 830                 entry->input_pixel2 += in_pixel1;
 831 //
 832 // printf("ScaleUnit::tabulate_enlarge %d %d %f %f %f\n",
 833 // entry->input_pixel1,
 834 // entry->input_pixel2,
 835 // entry->input_fraction1,
 836 // entry->input_fraction2,
 837 // entry->input_fraction3);
 838         }
 839 }
 840
 841 void ScaleUnit::dump_bilinear(bilinear_table_t *table, int total)
 842 {
 843         printf("ScaleUnit::dump_bilinear\n");
 844         for(int i = 0; i < total; i++)
 845         {
 846                 printf("out=%d inpixel1=%d inpixel2=%d infrac1=%f infrac2=%f infrac3=%f total=%f\n",
 847                         i,
 848                         table[i].input_pixel1,
 849                         table[i].input_pixel2,
 850                         table[i].input_fraction1,
 851                         table[i].input_fraction2,
 852                         table[i].input_fraction3,
 853                         table[i].total_fraction);
 854         }
 855 }
 856
 857 #define PIXEL_REDUCE_MACRO(type, components, row) \
 858 { \
 859         type *input_row = &in_rows[row][x_entry->input_pixel1 * components]; \
 860         type *input_end = &in_rows[row][x_entry->input_pixel2 * components]; \
 861  \
 862 /* Do first pixel */ \
 863         temp_f1 += input_scale1 * input_row[0]; \
 864         temp_f2 += input_scale1 * input_row[1]; \
 865         temp_f3 += input_scale1 * input_row[2]; \
 866         if(components == 4) temp_f4 += input_scale1 * input_row[3]; \
 867  \
 868 /* Do last pixel */ \
 869 /*      if(input_row < input_end) */\
 870         { \
 871                 temp_f1 += input_scale3 * input_end[0]; \
 872                 temp_f2 += input_scale3 * input_end[1]; \
 873                 temp_f3 += input_scale3 * input_end[2]; \
 874                 if(components == 4) temp_f4 += input_scale3 * input_end[3]; \
 875         } \
 876  \
 877 /* Do middle pixels */ \
 878         for(input_row += components; input_row < input_end; input_row += components) \
 879         { \
 880                 temp_f1 += input_scale2 * input_row[0]; \
 881                 temp_f2 += input_scale2 * input_row[1]; \
 882                 temp_f3 += input_scale2 * input_row[2]; \
 883                 if(components == 4) temp_f4 += input_scale2 * input_row[3]; \
 884         } \
 885 }
 886
 887 // Bilinear reduction and suboptimal enlargement.
 888 // Very high quality.
 889 #define BILINEAR_REDUCE(max, type, components) \
 890 { \
 891         bilinear_table_t *x_table, *y_table; \
 892         int out_h = pkg->out_row2 - pkg->out_row1; \
 893         type **in_rows = (type**)input->get_rows(); \
 894         type **out_rows = (type**)output->get_rows(); \
 895  \
 896         if(scale_w < 1) \
 897                 tabulate_reduction(x_table, \
 898                         1.0 / scale_w, \
 899                         in_x1_int, \
 900                         out_w_int, \
 901                         input->get_w()); \
 902         else \
 903                 tabulate_enlarge(x_table, \
 904                         1.0 / scale_w, \
 905                         in_x1_int, \
 906                         out_w_int, \
 907                         input->get_w()); \
 908  \
 909         if(scale_h < 1) \
 910                 tabulate_reduction(y_table, \
 911                         1.0 / scale_h, \
 912                         in_y1_int, \
 913                         out_h_int, \
 914                         input->get_h()); \
 915         else \
 916                 tabulate_enlarge(y_table, \
 917                         1.0 / scale_h, \
 918                         in_y1_int, \
 919                         out_h_int, \
 920                         input->get_h()); \
 921 /* dump_bilinear(y_table, out_h_int); */\
 922  \
 923         for(int i = 0; i < out_h; i++) \
 924         { \
 925                 type *out_row = out_rows[i + pkg->out_row1]; \
 926                 bilinear_table_t *y_entry = &y_table[i + pkg->out_row1]; \
 927 /*printf("BILINEAR_REDUCE 2 %d %d %d\n", i, y_entry->input_pixel1, y_entry->input_pixel2); */\
 928  \
 929                 for(int j = 0; j < out_w_int; j++) \
 930                 { \
 931                         bilinear_table_t *x_entry = &x_table[j]; \
 932 /* Load rounding factors */ \
 933                         float temp_f1; \
 934                         float temp_f2; \
 935                         float temp_f3; \
 936                         float temp_f4; \
 937                         if(sizeof(type) != 4) \
 938                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = .5; \
 939                         else \
 940                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = 0; \
 941  \
 942 /* First row */ \
 943                         float input_scale1 = y_entry->input_fraction1 * x_entry->input_fraction1; \
 944                         float input_scale2 = y_entry->input_fraction1 * x_entry->input_fraction2; \
 945                         float input_scale3 = y_entry->input_fraction1 * x_entry->input_fraction3; \
 946                         PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel1) \
 947  \
 948 /* Last row */ \
 949                         if(out_h) \
 950                         { \
 951                                 input_scale1 = y_entry->input_fraction3 * x_entry->input_fraction1; \
 952                                 input_scale2 = y_entry->input_fraction3 * x_entry->input_fraction2; \
 953                                 input_scale3 = y_entry->input_fraction3 * x_entry->input_fraction3; \
 954                                 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel2) \
 955  \
 956 /* Middle rows */ \
 957                                 if(out_h > 1) \
 958                                 { \
 959                                         input_scale1 = y_entry->input_fraction2 * x_entry->input_fraction1; \
 960                                         input_scale2 = y_entry->input_fraction2 * x_entry->input_fraction2; \
 961                                         input_scale3 = y_entry->input_fraction2 * x_entry->input_fraction3; \
 962                                         for(int k = y_entry->input_pixel1 + 1; \
 963                                                 k < y_entry->input_pixel2; \
 964                                                 k++) \
 965                                         { \
 966                                                 PIXEL_REDUCE_MACRO(type, components, k) \
 967                                         } \
 968                                 } \
 969                         } \
 970  \
 971                         if(max != 1.0) \
 972                         { \
 973                                 if(temp_f1 > max) temp_f1 = max; \
 974                                 if(temp_f2 > max) temp_f2 = max; \
 975                                 if(temp_f3 > max) temp_f3 = max; \
 976                                 if(components == 4) if(temp_f4 > max) temp_f4 = max; \
 977                         } \
 978                         out_row[j * components    ] = (type)temp_f1; \
 979                         out_row[j * components + 1] = (type)temp_f2; \
 980                         out_row[j * components + 2] = (type)temp_f3; \
 981                         if(components == 4) out_row[j * components + 3] = (type)temp_f4; \
 982                 } \
 983 /*printf("BILINEAR_REDUCE 3 %d\n", i);*/ \
 984         } \
 985  \
 986         delete [] x_table; \
 987         delete [] y_table; \
 988 }
 989
 990
 991
 992 // Only 2 input pixels
 993 #define BILINEAR_ENLARGE(max, type, components) \
 994 { \
 995 /*printf("BILINEAR_ENLARGE 1\n");*/ \
 996         float k_y = 1.0 / scale_h; \
 997         float k_x = 1.0 / scale_w; \
 998         type **in_rows = (type**)input->get_rows(); \
 999         type **out_rows = (type**)output->get_rows(); \
1000         int out_h = pkg->out_row2 - pkg->out_row1; \
1001         int in_h_int = input->get_h(); \
1002         int in_w_int = input->get_w(); \
1003         int *table_int_x1, *table_int_y1; \
1004         int *table_int_x2, *table_int_y2; \
1005         float *table_frac_x_f, *table_antifrac_x_f, *table_frac_y_f, *table_antifrac_y_f; \
1006         int *table_frac_x_i, *table_antifrac_x_i, *table_frac_y_i, *table_antifrac_y_i; \
1007  \
1008         tabulate_blinear_f(table_int_x1,  \
1009                 table_int_x2,  \
1010                 table_frac_x_f,  \
1011                 table_antifrac_x_f,  \
1012                 k_x,  \
1013                 0,  \
1014                 out_w_int, \
1015                 in_x1_int,  \
1016                 in_w_int); \
1017         tabulate_blinear_f(table_int_y1,  \
1018                 table_int_y2,  \
1019                 table_frac_y_f,  \
1020                 table_antifrac_y_f,  \
1021                 k_y,  \
1022                 pkg->out_row1,  \
1023                 pkg->out_row2,  \
1024                 in_y1_int, \
1025                 in_h_int); \
1026  \
1027         for(int i = 0; i < out_h; i++) \
1028         { \
1029                 int i_y1 = table_int_y1[i]; \
1030                 int i_y2 = table_int_y2[i]; \
1031                 float a_f; \
1032         float anti_a_f; \
1033                 uint64_t a_i; \
1034         uint64_t anti_a_i; \
1035                 a_f = table_frac_y_f[i]; \
1036         anti_a_f = table_antifrac_y_f[i]; \
1037                 type *in_row1 = in_rows[i_y1]; \
1038                 type *in_row2 = in_rows[i_y2]; \
1039                 type *out_row = out_rows[i + pkg->out_row1]; \
1040  \
1041                 for(int j = 0; j < out_w_int; j++) \
1042                 { \
1043                         int i_x1 = table_int_x1[j]; \
1044                         int i_x2 = table_int_x2[j]; \
1045                         float output1r, output1g, output1b, output1a; \
1046                         float output2r, output2g, output2b, output2a; \
1047                         float output3r, output3g, output3b, output3a; \
1048                         float output4r, output4g, output4b, output4a; \
1049                         float b_f; \
1050                         float anti_b_f; \
1051                         b_f = table_frac_x_f[j]; \
1052                         anti_b_f = table_antifrac_x_f[j]; \
1053 \
1054                 output1r = in_row1[i_x1 * components]; \
1055                 output1g = in_row1[i_x1 * components + 1]; \
1056                 output1b = in_row1[i_x1 * components + 2]; \
1057                 if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1058 \
1059                 output2r = in_row1[i_x2 * components]; \
1060                 output2g = in_row1[i_x2 * components + 1]; \
1061                 output2b = in_row1[i_x2 * components + 2]; \
1062                 if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1063 \
1064                 output3r = in_row2[i_x1 * components]; \
1065                 output3g = in_row2[i_x1 * components + 1]; \
1066                 output3b = in_row2[i_x1 * components + 2]; \
1067                 if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1068 \
1069                 output4r = in_row2[i_x2 * components]; \
1070                 output4g = in_row2[i_x2 * components + 1]; \
1071                 output4b = in_row2[i_x2 * components + 2]; \
1072                 if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1073 \
1074                         out_row[j * components] =  \
1075                                 (type)(anti_a_f * (anti_b_f * output1r +  \
1076                                 b_f * output2r) +  \
1077                 a_f * (anti_b_f * output3r +  \
1078                                 b_f * output4r)); \
1079                         out_row[j * components + 1] =   \
1080                                 (type)(anti_a_f * (anti_b_f * output1g +  \
1081                                 b_f * output2g) +  \
1082                 a_f * ((anti_b_f * output3g) +  \
1083                                 b_f * output4g)); \
1084                         out_row[j * components + 2] =   \
1085                                 (type)(anti_a_f * ((anti_b_f * output1b) +  \
1086                                 (b_f * output2b)) +  \
1087                 a_f * ((anti_b_f * output3b) +  \
1088                                 b_f * output4b)); \
1089                         if(components == 4) \
1090                                 out_row[j * components + 3] =   \
1091                                         (type)(anti_a_f * ((anti_b_f * output1a) +  \
1092                                         (b_f * output2a)) +  \
1093                         a_f * ((anti_b_f * output3a) +  \
1094                                         b_f * output4a)); \
1095                 } \
1096         } \
1097  \
1098  \
1099         delete [] table_int_x1; \
1100         delete [] table_int_x2; \
1101         delete [] table_int_y1; \
1102         delete [] table_int_y2; \
1103         delete [] table_frac_x_f; \
1104         delete [] table_antifrac_x_f; \
1105         delete [] table_frac_y_f; \
1106         delete [] table_antifrac_y_f; \
1107  \
1108 /*printf("BILINEAR_ENLARGE 2\n");*/ \
1109 }
1110
1111
1112 #define BICUBIC(max, type, components) \
1113 { \
1114         float k_y = 1.0 / scale_h; \
1115         float k_x = 1.0 / scale_w; \
1116         type **in_rows = (type**)input->get_rows(); \
1117         type **out_rows = (type**)output->get_rows(); \
1118         float *bspline_x_f, *bspline_y_f; \
1119         int *bspline_x_i, *bspline_y_i; \
1120         int *in_x_table, *in_y_table; \
1121         int in_h_int = input->get_h(); \
1122         int in_w_int = input->get_w(); \
1123  \
1124         tabulate_bcubic_f(bspline_x_f,  \
1125                 in_x_table, \
1126                 k_x, \
1127                 in_x1_int, \
1128                 out_w_int, \
1129                 in_w_int, \
1130                 -1); \
1131  \
1132         tabulate_bcubic_f(bspline_y_f,  \
1133                 in_y_table, \
1134                 k_y, \
1135                 in_y1_int, \
1136                 out_h_int, \
1137                 in_h_int, \
1138                 1); \
1139  \
1140         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1141         { \
1142                 for(int j = 0; j < out_w_int; j++) \
1143                 { \
1144                         int i_x = (int)(k_x * j); \
1145                         float output1_f, output2_f, output3_f, output4_f; \
1146                         uint64_t output1_i, output2_i, output3_i, output4_i; \
1147                         output1_f = 0; \
1148                         output2_f = 0; \
1149                         output3_f = 0; \
1150                         if(components == 4) \
1151                                 output4_f = 0; \
1152                         int table_y = i * 4; \
1153  \
1154 /* Kernel */ \
1155                         for(int m = -1; m < 3; m++) \
1156                         { \
1157                                 float r1_f; \
1158                                 uint64_t r1_i; \
1159                                 r1_f = bspline_y_f[table_y]; \
1160                                 int y = in_y_table[table_y]; \
1161                                 int table_x = j * 4; \
1162  \
1163                                 for(int n = -1; n < 3; n++) \
1164                                 { \
1165                                         float r2_f; \
1166                                         uint64_t r2_i; \
1167                                         r2_f = bspline_x_f[table_x]; \
1168                                         int x = in_x_table[table_x]; \
1169                                         float r_square_f; \
1170                                         uint64_t r_square_i; \
1171                                         r_square_f = r1_f * r2_f; \
1172                                         output1_f += r_square_f * in_rows[y][x * components]; \
1173                                         output2_f += r_square_f * in_rows[y][x * components + 1]; \
1174                                         output3_f += r_square_f * in_rows[y][x * components + 2]; \
1175                                         if(components == 4) \
1176                                                 output4_f += r_square_f * in_rows[y][x * components + 3]; \
1177  \
1178                                         table_x++; \
1179                                 } \
1180                                 table_y++; \
1181                         } \
1182  \
1183  \
1184                         out_rows[i][j * components] = (type)output1_f; \
1185                         out_rows[i][j * components + 1] = (type)output2_f; \
1186                         out_rows[i][j * components + 2] = (type)output3_f; \
1187                         if(components == 4) \
1188                                 out_rows[i][j * components + 3] = (type)output4_f; \
1189  \
1190                 } \
1191         } \
1192  \
1193         delete [] bspline_x_f; \
1194         delete [] bspline_y_f; \
1195         delete [] in_x_table; \
1196         delete [] in_y_table; \
1197 }
1198
1199
1200
1201
1202 // Pow function is not thread safe in Compaqt C
1203 #define CUBE(x) ((x) * (x) * (x))
1204
1205 float ScaleUnit::cubic_bspline(float x)
1206 {
1207         float a, b, c, d;
1208
1209         if((x + 2.0F) <= 0.0F)
1210         {
1211         a = 0.0F;
1212         }
1213         else
1214         {
1215         a = CUBE(x + 2.0F);
1216         }
1217
1218
1219         if((x + 1.0F) <= 0.0F)
1220         {
1221         b = 0.0F;
1222         }
1223         else
1224         {
1225         b = CUBE(x + 1.0F);
1226         }
1227
1228         if(x <= 0)
1229         {
1230         c = 0.0F;
1231         }
1232         else
1233         {
1234         c = CUBE(x);
1235         }
1236
1237         if((x - 1.0F) <= 0.0F)
1238         {
1239         d = 0.0F;
1240         }
1241         else
1242         {
1243         d = CUBE(x - 1.0F);
1244         }
1245
1246
1247         return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
1248 }
1249
1250
1251 void ScaleUnit::tabulate_bcubic_f(float* &coef_table,
1252         int* &coord_table,
1253         float scale,
1254         int start,
1255         int pixels,
1256         int total_pixels,
1257         float coefficient)
1258 {
1259         coef_table = new float[pixels * 4];
1260         coord_table = new int[pixels * 4];
1261         for(int i = 0, j = 0; i < pixels; i++)
1262         {
1263                 float f_x = (float)i * scale;
1264                 float a = f_x - floor(f_x);
1265
1266                 for(float m = -1; m < 3; m++)
1267                 {
1268                         coef_table[j] = cubic_bspline(coefficient * (m - a));
1269                         coord_table[j] = (int)(start + (int)f_x + m);
1270                         CLAMP(coord_table[j], 0, total_pixels - 1);
1271                         j++;
1272                 }
1273
1274         }
1275 }
1276
1277 void ScaleUnit::tabulate_bcubic_i(int* &coef_table,
1278         int* &coord_table,
1279         float scale,
1280         int start,
1281         int pixels,
1282         int total_pixels,
1283         float coefficient)
1284 {
1285         coef_table = new int[pixels * 4];
1286         coord_table = new int[pixels * 4];
1287         for(int i = 0, j = 0; i < pixels; i++)
1288         {
1289                 float f_x = (float)i * scale;
1290                 float a = f_x - floor(f_x);
1291
1292                 for(float m = -1; m < 3; m++)
1293                 {
1294                         coef_table[j] = (int)(cubic_bspline(coefficient * (m - a)) * 0x10000);
1295                         coord_table[j] = (int)(start + (int)f_x + m);
1296                         CLAMP(coord_table[j], 0, total_pixels - 1);
1297                         j++;
1298                 }
1299
1300         }
1301 }
1302
1303 void ScaleUnit::tabulate_blinear_f(int* &table_int1,
1304                 int* &table_int2,
1305                 float* &table_frac,
1306                 float* &table_antifrac,
1307                 float scale,
1308                 int pixel1,
1309                 int pixel2,
1310                 int start,
1311                 int total_pixels)
1312 {
1313         table_int1 = new int[pixel2 - pixel1];
1314         table_int2 = new int[pixel2 - pixel1];
1315         table_frac = new float[pixel2 - pixel1];
1316         table_antifrac = new float[pixel2 - pixel1];
1317
1318         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1319         {
1320                 float f_x = (float)i * scale;
1321                 int i_x = (int)floor(f_x);
1322                 float a = (f_x - floor(f_x));
1323
1324                 table_int1[j] = i_x + start;
1325                 table_int2[j] = i_x + start + 1;
1326                 CLAMP(table_int1[j], 0, total_pixels - 1);
1327                 CLAMP(table_int2[j], 0, total_pixels - 1);
1328                 table_frac[j] = a;
1329                 table_antifrac[j] = 1.0F - a;
1330 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1331         }
1332 }
1333
1334 void ScaleUnit::tabulate_blinear_i(int* &table_int1,
1335                 int* &table_int2,
1336                 int* &table_frac,
1337                 int* &table_antifrac,
1338                 float scale,
1339                 int pixel1,
1340                 int pixel2,
1341                 int start,
1342                 int total_pixels)
1343 {
1344         table_int1 = new int[pixel2 - pixel1];
1345         table_int2 = new int[pixel2 - pixel1];
1346         table_frac = new int[pixel2 - pixel1];
1347         table_antifrac = new int[pixel2 - pixel1];
1348
1349         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1350         {
1351                 double f_x = (float)i * scale;
1352                 int i_x = (int)floor(f_x);
1353                 float a = (f_x - floor(f_x));
1354
1355                 table_int1[j] = i_x + start;
1356                 table_int2[j] = i_x + start + 1;
1357                 CLAMP(table_int1[j], 0, total_pixels - 1);
1358                 CLAMP(table_int2[j], 0, total_pixels - 1);
1359                 table_frac[j] = (int)(a * 0xffff);
1360                 table_antifrac[j] = (int)((1.0F - a) * 0x10000);
1361 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1362         }
1363 }
1364
1365 void ScaleUnit::process_package(LoadPackage *package)
1366 {
1367         ScalePackage *pkg = (ScalePackage*)package;
1368
1369 //printf("ScaleUnit::process_package 1\n");
1370 // Arguments for macros
1371         VFrame *output = engine->scale_output;
1372         VFrame *input = engine->scale_input;
1373         float scale_w = engine->w_scale;
1374         float scale_h = engine->h_scale;
1375         int in_x1_int = engine->in_x1_int;
1376         int in_y1_int = engine->in_y1_int;
1377         int out_h_int = engine->out_h_int;
1378         int out_w_int = engine->out_w_int;
1379         int do_yuv =
1380                 (input->get_color_model() == BC_YUV888 ||
1381                 input->get_color_model() == BC_YUVA8888 ||
1382                 input->get_color_model() == BC_YUV161616 ||
1383                 input->get_color_model() == BC_YUVA16161616);
1384
1385 //printf("ScaleUnit::process_package 2 %f %f\n", engine->w_scale, engine->h_scale);
1386         if(engine->interpolation_type == CUBIC_CUBIC ||
1387                 (engine->interpolation_type == CUBIC_LINEAR
1388                         && engine->w_scale > 1 &&
1389                         engine->h_scale > 1))
1390         {
1391                 switch(engine->scale_input->get_color_model())
1392                 {
1393                         case BC_RGB_FLOAT:
1394                                 BICUBIC(1.0, float, 3);
1395                                 break;
1396
1397                         case BC_RGBA_FLOAT:
1398                                 BICUBIC(1.0, float, 4);
1399                                 break;
1400
1401                         case BC_RGB888:
1402                         case BC_YUV888:
1403                                 BICUBIC(0xff, unsigned char, 3);
1404                                 break;
1405
1406                         case BC_RGBA8888:
1407                         case BC_YUVA8888:
1408                                 BICUBIC(0xff, unsigned char, 4);
1409                                 break;
1410
1411                         case BC_RGB161616:
1412                         case BC_YUV161616:
1413                                 BICUBIC(0xffff, uint16_t, 3);
1414                                 break;
1415
1416                         case BC_RGBA16161616:
1417                         case BC_YUVA16161616:
1418                                 BICUBIC(0xffff, uint16_t, 4);
1419                                 break;
1420                 }
1421         }
1422         else
1423 // Perform bilinear scaling input -> scale_output
1424         if(engine->w_scale > 1 &&
1425                 engine->h_scale > 1)
1426         {
1427                 switch(engine->scale_input->get_color_model())
1428                 {
1429                         case BC_RGB_FLOAT:
1430                                 BILINEAR_ENLARGE(1.0, float, 3);
1431                                 break;
1432
1433                         case BC_RGBA_FLOAT:
1434                                 BILINEAR_ENLARGE(1.0, float, 4);
1435                                 break;
1436
1437                         case BC_RGB888:
1438                         case BC_YUV888:
1439                                 BILINEAR_ENLARGE(0xff, unsigned char, 3);
1440                                 break;
1441
1442                         case BC_RGBA8888:
1443                         case BC_YUVA8888:
1444                                 BILINEAR_ENLARGE(0xff, unsigned char, 4);
1445                                 break;
1446
1447                         case BC_RGB161616:
1448                         case BC_YUV161616:
1449                                 BILINEAR_ENLARGE(0xffff, uint16_t, 3);
1450                                 break;
1451
1452                         case BC_RGBA16161616:
1453                         case BC_YUVA16161616:
1454                                 BILINEAR_ENLARGE(0xffff, uint16_t, 4);
1455                                 break;
1456                 }
1457         }
1458         else
1459 // Bilinear reduction
1460         {
1461                 switch(engine->scale_input->get_color_model())
1462                 {
1463                         case BC_RGB_FLOAT:
1464                                 BILINEAR_REDUCE(1.0, float, 3);
1465                                 break;
1466                         case BC_RGBA_FLOAT:
1467                                 BILINEAR_REDUCE(1.0, float, 4);
1468                                 break;
1469                         case BC_RGB888:
1470                         case BC_YUV888:
1471                                 BILINEAR_REDUCE(0xff, unsigned char, 3);
1472                                 break;
1473
1474                         case BC_RGBA8888:
1475                         case BC_YUVA8888:
1476                                 BILINEAR_REDUCE(0xff, unsigned char, 4);
1477                                 break;
1478
1479                         case BC_RGB161616:
1480                         case BC_YUV161616:
1481                                 BILINEAR_REDUCE(0xffff, uint16_t, 3);
1482                                 break;
1483
1484                         case BC_RGBA16161616:
1485                         case BC_YUVA16161616:
1486                                 BILINEAR_REDUCE(0xffff, uint16_t, 4);
1487                                 break;
1488                 }
1489         }
1490 //printf("ScaleUnit::process_package 3\n");
1491
1492 }
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
1507  : LoadServer(cpus, cpus)
1508 {
1509         this->overlay = overlay;
1510 }
1511
1512 ScaleEngine::~ScaleEngine()
1513 {
1514 }
1515
1516 void ScaleEngine::init_packages()
1517 {
1518         for(int i = 0; i < total_packages; i++)
1519         {
1520                 ScalePackage *package = (ScalePackage*)packages[i];
1521                 package->out_row1 = out_h_int / total_packages * i;
1522                 package->out_row2 = package->out_row1 + out_h_int / total_packages;
1523
1524                 if(i >= total_packages - 1)
1525                         package->out_row2 = out_h_int;
1526         }
1527 }
1528
1529 LoadClient* ScaleEngine::new_client()
1530 {
1531         return new ScaleUnit(this, overlay);
1532 }
1533
1534 LoadPackage* ScaleEngine::new_package()
1535 {
1536         return new ScalePackage;
1537 }
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551 TranslatePackage::TranslatePackage()
1552 {
1553 }
1554
1555
1556
1557 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1558  : LoadClient(server)
1559 {
1560         this->overlay = overlay;
1561         this->engine = server;
1562 }
1563
1564 TranslateUnit::~TranslateUnit()
1565 {
1566 }
1567
1568
1569
1570 void TranslateUnit::translation_array_f(transfer_table_f* &table,
1571         float out_x1,
1572         float out_x2,
1573         float in_x1,
1574         float in_x2,
1575         int in_total,
1576         int out_total,
1577         int &out_x1_int,
1578         int &out_x2_int)
1579 {
1580         int out_w_int;
1581         float offset = out_x1 - in_x1;
1582 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1583
1584         out_x1_int = (int)out_x1;
1585         out_x2_int = MIN((int)ceil(out_x2), out_total);
1586         out_w_int = out_x2_int - out_x1_int;
1587
1588         table = new transfer_table_f[out_w_int];
1589         bzero(table, sizeof(transfer_table_f) * out_w_int);
1590
1591
1592 //printf("OverlayFrame::translation_array_f 2 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1593
1594         float in_x = in_x1;
1595         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1596         {
1597                 transfer_table_f *entry = &table[out_x - out_x1_int];
1598
1599                 entry->in_x1 = (int)in_x;
1600                 entry->in_x2 = (int)in_x + 1;
1601
1602 // Get fraction of output pixel to fill
1603                 entry->output_fraction = 1;
1604
1605                 if(out_x1 > out_x)
1606                 {
1607                         entry->output_fraction -= out_x1 - out_x;
1608                 }
1609
1610                 if(out_x2 < out_x + 1)
1611                 {
1612                         entry->output_fraction = (out_x2 - out_x);
1613                 }
1614
1615 // Advance in_x until out_x_fraction is filled
1616                 float out_x_fraction = entry->output_fraction;
1617                 float in_x_fraction = floor(in_x + 1) - in_x;
1618
1619                 if(out_x_fraction <= in_x_fraction)
1620                 {
1621                         entry->in_fraction1 = out_x_fraction;
1622                         entry->in_fraction2 = 0.0;
1623                         in_x += out_x_fraction;
1624                 }
1625                 else
1626                 {
1627                         entry->in_fraction1 = in_x_fraction;
1628                         in_x += out_x_fraction;
1629                         entry->in_fraction2 = in_x - floor(in_x);
1630                 }
1631
1632 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1633                 if(entry->in_x2 >= in_total)
1634                 {
1635                         entry->in_x2 = in_total - 1;
1636                         entry->in_fraction2 = 0.0;
1637                 }
1638
1639                 if(entry->in_x1 >= in_total)
1640                 {
1641                         entry->in_x1 = in_total - 1;
1642                         entry->in_fraction1 = 0.0;
1643                 }
1644 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1645 //      out_x,
1646 //      entry->in_x1,
1647 //      entry->in_x2,
1648 //      entry->in_fraction1,
1649 //      entry->in_fraction2,
1650 //      entry->output_fraction);
1651         }
1652 }
1653
1654
1655 void TranslateUnit::translation_array_i(transfer_table_i* &table,
1656         float out_x1,
1657         float out_x2,
1658         float in_x1,
1659         float in_x2,
1660         int in_total,
1661         int out_total,
1662         int &out_x1_int,
1663         int &out_x2_int)
1664 {
1665         int out_w_int;
1666         float offset = out_x1 - in_x1;
1667
1668         out_x1_int = (int)out_x1;
1669         out_x2_int = MIN((int)ceil(out_x2), out_total);
1670         out_w_int = out_x2_int - out_x1_int;
1671
1672         table = new transfer_table_i[out_w_int];
1673         bzero(table, sizeof(transfer_table_i) * out_w_int);
1674
1675
1676 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1677
1678         float in_x = in_x1;
1679         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1680         {
1681                 transfer_table_i *entry = &table[out_x - out_x1_int];
1682
1683                 entry->in_x1 = (int)in_x;
1684                 entry->in_x2 = (int)in_x + 1;
1685
1686 // Get fraction of output pixel to fill
1687                 entry->output_fraction = 0x10000;
1688
1689                 if(out_x1 > out_x)
1690                 {
1691                         entry->output_fraction -= (int)((out_x1 - out_x) * 0x10000);
1692                 }
1693
1694                 if(out_x2 < out_x + 1)
1695                 {
1696                         entry->output_fraction = (int)((out_x2 - out_x) * 0x10000);
1697                 }
1698
1699 // Advance in_x until out_x_fraction is filled
1700                 int out_x_fraction = entry->output_fraction;
1701                 int in_x_fraction = (int)((floor(in_x + 1) - in_x) * 0x10000);
1702
1703                 if(out_x_fraction <= in_x_fraction)
1704                 {
1705                         entry->in_fraction1 = out_x_fraction;
1706                         entry->in_fraction2 = 0;
1707                         in_x += (float)out_x_fraction / 0x10000;
1708                 }
1709                 else
1710                 {
1711                         entry->in_fraction1 = in_x_fraction;
1712                         in_x += (float)out_x_fraction / 0x10000;
1713                         entry->in_fraction2 = (int)((in_x - floor(in_x)) * 0x10000);
1714                 }
1715
1716 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1717                 if(entry->in_x2 >= in_total)
1718                 {
1719                         entry->in_x2 = in_total - 1;
1720                         entry->in_fraction2 = 0;
1721                 }
1722
1723                 if(entry->in_x1 >= in_total)
1724                 {
1725                         entry->in_x1 = in_total - 1;
1726                         entry->in_fraction1 = 0;
1727                 }
1728 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1729 //      out_x,
1730 //      entry->in_x1,
1731 //      entry->in_x2,
1732 //      entry->in_fraction1,
1733 //      entry->in_fraction2,
1734 //      entry->output_fraction);
1735         }
1736 }
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771 #define TRANSLATE(max, temp_type, type, components, chroma_offset) \
1772 { \
1773  \
1774         type **in_rows = (type**)input->get_rows(); \
1775         type **out_rows = (type**)output->get_rows(); \
1776  \
1777  \
1778         temp_type master_opacity; \
1779         if(sizeof(type) != 4) \
1780                 master_opacity = (temp_type)(alpha * max + 0.5); \
1781         else \
1782                 master_opacity = (temp_type)(alpha * max); \
1783         temp_type master_transparency = max - master_opacity; \
1784         float round = 0.0; \
1785         if(sizeof(type) != 4) \
1786                 round = 0.5; \
1787  \
1788  \
1789         for(int i = row1; i < row2; i++) \
1790         { \
1791                 int in_y1; \
1792                 int in_y2; \
1793                 float y_fraction1_f; \
1794                 float y_fraction2_f; \
1795                 float y_output_fraction_f; \
1796                 in_y1 = y_table_f[i - out_y1_int].in_x1; \
1797                 in_y2 = y_table_f[i - out_y1_int].in_x2; \
1798                 y_fraction1_f = y_table_f[i - out_y1_int].in_fraction1; \
1799                 y_fraction2_f = y_table_f[i - out_y1_int].in_fraction2; \
1800                 y_output_fraction_f = y_table_f[i - out_y1_int].output_fraction; \
1801                 type *in_row1 = in_rows[(in_y1)]; \
1802                 type *in_row2 = in_rows[(in_y2)]; \
1803                 type *out_row = out_rows[i]; \
1804  \
1805                 for(int j = out_x1_int; j < out_x2_int; j++) \
1806                 { \
1807                         int in_x1; \
1808                         int in_x2; \
1809                         float x_fraction1_f; \
1810                         float x_fraction2_f; \
1811                         float x_output_fraction_f; \
1812                         in_x1 = x_table_f[j - out_x1_int].in_x1; \
1813                         in_x2 = x_table_f[j - out_x1_int].in_x2; \
1814                         x_fraction1_f = x_table_f[j - out_x1_int].in_fraction1; \
1815                         x_fraction2_f = x_table_f[j - out_x1_int].in_fraction2; \
1816                         x_output_fraction_f = x_table_f[j - out_x1_int].output_fraction; \
1817                         type *output = &out_row[j * components]; \
1818                         temp_type input1, input2, input3, input4; \
1819  \
1820                         float fraction1 = x_fraction1_f * y_fraction1_f; \
1821                         float fraction2 = x_fraction2_f * y_fraction1_f; \
1822                         float fraction3 = x_fraction1_f * y_fraction2_f; \
1823                         float fraction4 = x_fraction2_f * y_fraction2_f; \
1824  \
1825                         input1 = (type)(in_row1[in_x1 * components] * fraction1 +  \
1826                                 in_row1[in_x2 * components] * fraction2 +  \
1827                                 in_row2[in_x1 * components] * fraction3 +  \
1828                                 in_row2[in_x2 * components] * fraction4 + round); \
1829  \
1830 /* Add chroma to fractional pixels */ \
1831                         if(chroma_offset) \
1832                         { \
1833                                 float extra_chroma = (1.0F - \
1834                                         fraction1 - \
1835                                         fraction2 - \
1836                                         fraction3 - \
1837                                         fraction4) * chroma_offset; \
1838                                 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1839                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1840                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1841                                         in_row2[in_x2 * components + 1] * fraction4 + \
1842                                         extra_chroma + round); \
1843                                 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1844                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1845                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1846                                         in_row2[in_x2 * components + 2] * fraction4 +  \
1847                                         extra_chroma + round); \
1848                         } \
1849                         else \
1850                         { \
1851                                 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1852                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1853                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1854                                         in_row2[in_x2 * components + 1] * fraction4 + round); \
1855                                 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1856                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1857                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1858                                         in_row2[in_x2 * components + 2] * fraction4 + round); \
1859                         } \
1860  \
1861                         if(components == 4) \
1862                                 input4 = (type)(in_row1[in_x1 * components + 3] * fraction1 +  \
1863                                         in_row1[in_x2 * components + 3] * fraction2 +  \
1864                                         in_row2[in_x1 * components + 3] * fraction3 +  \
1865                                         in_row2[in_x2 * components + 3] * fraction4 + round); \
1866  \
1867                         temp_type opacity; \
1868                         if(sizeof(type) != 4) \
1869                                 opacity = (temp_type)(master_opacity *  \
1870                                         y_output_fraction_f *  \
1871                                         x_output_fraction_f + 0.5); \
1872                         else \
1873                                 opacity = (temp_type)(master_opacity *  \
1874                                         y_output_fraction_f *  \
1875                                         x_output_fraction_f); \
1876                         temp_type transparency = max - opacity; \
1877  \
1878 /* printf("TRANSLATE 2 %x %d %d\n", opacity, j, i); */ \
1879  \
1880                         if(components == 3) \
1881                         { \
1882                                 BLEND_3(max, temp_type, type, chroma_offset); \
1883                         } \
1884                         else \
1885                         { \
1886                                 BLEND_4(max, temp_type, type, chroma_offset); \
1887                         } \
1888                 } \
1889         } \
1890 }
1891
1892 void TranslateUnit::process_package(LoadPackage *package)
1893 {
1894         TranslatePackage *pkg = (TranslatePackage*)package;
1895         int out_y1_int;
1896         int out_y2_int;
1897         int out_x1_int;
1898         int out_x2_int;
1899
1900
1901 // Variables for TRANSLATE
1902         VFrame *input = engine->translate_input;
1903         VFrame *output = engine->translate_output;
1904         float in_x1 = engine->translate_in_x1;
1905         float in_y1 = engine->translate_in_y1;
1906         float in_x2 = engine->translate_in_x2;
1907         float in_y2 = engine->translate_in_y2;
1908         float out_x1 = engine->translate_out_x1;
1909         float out_y1 = engine->translate_out_y1;
1910         float out_x2 = engine->translate_out_x2;
1911         float out_y2 = engine->translate_out_y2;
1912         float alpha = engine->translate_alpha;
1913         int row1 = pkg->out_row1;
1914         int row2 = pkg->out_row2;
1915         int mode = engine->translate_mode;
1916         int in_total_x = input->get_w();
1917         int in_total_y = input->get_h();
1918         int do_yuv =
1919                 (engine->translate_input->get_color_model() == BC_YUV888 ||
1920                 engine->translate_input->get_color_model() == BC_YUVA8888 ||
1921                 engine->translate_input->get_color_model() == BC_YUV161616 ||
1922                 engine->translate_input->get_color_model() == BC_YUVA16161616);
1923
1924         transfer_table_f *x_table_f;
1925         transfer_table_f *y_table_f;
1926         transfer_table_i *x_table_i;
1927         transfer_table_i *y_table_i;
1928
1929         translation_array_f(x_table_f,
1930                 out_x1,
1931                 out_x2,
1932                 in_x1,
1933                 in_x2,
1934                 in_total_x,
1935                 output->get_w(),
1936                 out_x1_int,
1937                 out_x2_int);
1938         translation_array_f(y_table_f,
1939                 out_y1,
1940                 out_y2,
1941                 in_y1,
1942                 in_y2,
1943                 in_total_y,
1944                 output->get_h(),
1945                 out_y1_int,
1946                 out_y2_int);
1947 //      printf("TranslateUnit::process_package 1 %d\n", mode);
1948 //      Timer a;
1949 //      a.update();
1950
1951         switch(engine->translate_input->get_color_model())
1952         {
1953                 case BC_RGB888:
1954                         TRANSLATE(0xff, uint32_t, unsigned char, 3, 0);
1955                         break;
1956
1957                 case BC_RGBA8888:
1958                         TRANSLATE(0xff, uint32_t, unsigned char, 4, 0);
1959                         break;
1960
1961                 case BC_RGB_FLOAT:
1962                         TRANSLATE(1.0, float, float, 3, 0);
1963                         break;
1964
1965                 case BC_RGBA_FLOAT:
1966                         TRANSLATE(1.0, float, float, 4, 0);
1967                         break;
1968
1969                 case BC_RGB161616:
1970                         TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
1971                         break;
1972
1973                 case BC_RGBA16161616:
1974                         TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
1975                         break;
1976
1977                 case BC_YUV888:
1978                         TRANSLATE(0xff, int32_t, unsigned char, 3, 0x80);
1979                         break;
1980
1981                 case BC_YUVA8888:
1982                         TRANSLATE(0xff, int32_t, unsigned char, 4, 0x80);
1983                         break;
1984
1985                 case BC_YUV161616:
1986                         TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
1987                         break;
1988
1989                 case BC_YUVA16161616:
1990                         TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
1991                         break;
1992         }
1993 //      printf("blend mode %i, took %li ms\n", mode, a.get_difference());
1994
1995         delete [] x_table_f;
1996         delete [] y_table_f;
1997 }
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
2009  : LoadServer(cpus, cpus)
2010 {
2011         this->overlay = overlay;
2012 }
2013
2014 TranslateEngine::~TranslateEngine()
2015 {
2016 }
2017
2018 void TranslateEngine::init_packages()
2019 {
2020         int out_y1_int = (int)translate_out_y1;
2021         int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
2022         int out_h = out_y2_int - out_y1_int;
2023
2024         for(int i = 0; i < total_packages; i++)
2025         {
2026                 TranslatePackage *package = (TranslatePackage*)packages[i];
2027                 package->out_row1 = (int)(out_y1_int + out_h /
2028                         total_packages *
2029                         i);
2030                 package->out_row2 = (int)((float)package->out_row1 +
2031                         out_h /
2032                         total_packages);
2033                 if(i >= total_packages - 1)
2034                         package->out_row2 = out_y2_int;
2035         }
2036 }
2037
2038 LoadClient* TranslateEngine::new_client()
2039 {
2040         return new TranslateUnit(this, overlay);
2041 }
2042
2043 LoadPackage* TranslateEngine::new_package()
2044 {
2045         return new TranslatePackage;
2046 }
2047
2048
2049
2050
2051
2052
2053
2054
2055 #define SCALE_TRANSLATE(max, temp_type, type, components, chroma_offset) \
2056 { \
2057         temp_type opacity; \
2058         if(sizeof(type) != 4) \
2059                 opacity = (temp_type)(alpha * max + 0.5); \
2060         else \
2061                 opacity = (temp_type)(alpha * max); \
2062         temp_type transparency = max - opacity; \
2063  \
2064         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2065         { \
2066                 int in_y = y_table[i - out_y1]; \
2067                 type *in_row = (type*)in_rows[in_y] + in_x1 * components; \
2068                 type *output = (type*)out_rows[i] + out_x1 * components; \
2069  \
2070 /* X direction is scaled and requires a table lookup */ \
2071                 if(out_w != in_x2 - in_x1) \
2072                 { \
2073                         for(int j = 0; j < out_w; j++) \
2074                         { \
2075                                 type *in_row_plus_x = in_row + x_table[j] * components; \
2076                                 temp_type input1, input2, input3, input4; \
2077          \
2078                                 input1 = in_row_plus_x[0]; \
2079                                 input2 = in_row_plus_x[1]; \
2080                                 input3 = in_row_plus_x[2]; \
2081                                 if(components == 4) \
2082                                         input4 = in_row_plus_x[3]; \
2083          \
2084                                 if(components == 3) \
2085                                 { \
2086                                         BLEND_3(max, temp_type, type, chroma_offset); \
2087                                 } \
2088                                 else \
2089                                 { \
2090                                         BLEND_4(max, temp_type, type, chroma_offset); \
2091                                 } \
2092                                 output += components; \
2093                         } \
2094                 } \
2095                 else \
2096 /* X direction is not scaled */ \
2097                 { \
2098                         for(int j = 0; j < out_w; j++) \
2099                         { \
2100                                 temp_type input1, input2, input3, input4; \
2101          \
2102                                 input1 = in_row[0]; \
2103                                 input2 = in_row[1]; \
2104                                 input3 = in_row[2]; \
2105                                 if(components == 4) \
2106                                         input4 = in_row[3]; \
2107          \
2108                                 if(components == 3) \
2109                                 { \
2110                                         BLEND_3(max, temp_type, type, chroma_offset); \
2111                                 } \
2112                                 else \
2113                                 { \
2114                                         BLEND_4(max, temp_type, type, chroma_offset); \
2115                                 } \
2116                                 in_row += components; \
2117                                 output += components; \
2118                         } \
2119                 } \
2120         } \
2121 }
2122
2123
2124
2125 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
2126  : LoadClient(server)
2127 {
2128         this->overlay = overlay;
2129         this->scale_translate = server;
2130 }
2131
2132 ScaleTranslateUnit::~ScaleTranslateUnit()
2133 {
2134 }
2135
2136 void ScaleTranslateUnit::scale_array(int* &table,
2137         int out_x1,
2138         int out_x2,
2139         int in_x1,
2140         int in_x2,
2141         int is_x)
2142 {
2143         float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
2144
2145         table = new int[out_x2 - out_x1];
2146
2147         if(!is_x)
2148         {
2149                 for(int i = 0; i < out_x2 - out_x1; i++)
2150                 {
2151                         table[i] = (int)((float)i / scale + in_x1);
2152                 }
2153         }
2154         else
2155         {
2156                 for(int i = 0; i < out_x2 - out_x1; i++)
2157                 {
2158                         table[i] = (int)((float)i / scale);
2159                 }
2160         }
2161 }
2162
2163
2164 void ScaleTranslateUnit::process_package(LoadPackage *package)
2165 {
2166         ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
2167
2168 // Args for NEAREST_NEIGHBOR_MACRO
2169         VFrame *output = scale_translate->output;
2170         VFrame *input = scale_translate->input;
2171         int in_x1 = scale_translate->in_x1;
2172         int in_y1 = scale_translate->in_y1;
2173         int in_x2 = scale_translate->in_x2;
2174         int in_y2 = scale_translate->in_y2;
2175         int out_x1 = scale_translate->out_x1;
2176         int out_y1 = scale_translate->out_y1;
2177         int out_x2 = scale_translate->out_x2;
2178         int out_y2 = scale_translate->out_y2;
2179         float alpha = scale_translate->alpha;
2180         int mode = scale_translate->mode;
2181         int out_w = out_x2 - out_x1;
2182
2183         int *x_table;
2184         int *y_table;
2185         unsigned char **in_rows = input->get_rows();
2186         unsigned char **out_rows = output->get_rows();
2187
2188 //      Timer a;
2189 //      a.update();
2190 //printf("ScaleTranslateUnit::process_package 1 %d\n", mode);
2191         if(out_w != in_x2 - in_x1)
2192         {
2193                 scale_array(x_table,
2194                         out_x1,
2195                         out_x2,
2196                         in_x1,
2197                         in_x2,
2198                         1);
2199         }
2200         scale_array(y_table,
2201                 out_y1,
2202                 out_y2,
2203                 in_y1,
2204                 in_y2,
2205                 0);
2206
2207
2208         if (mode == TRANSFER_REPLACE && (out_w == in_x2 - in_x1))
2209         {
2210 // if we have transfer replace and x direction is not scaled, PARTY!
2211                 char bytes_per_pixel = input->calculate_bytes_per_pixel(input->get_color_model());
2212                 int line_len = out_w * bytes_per_pixel;
2213                 int in_start_byte = in_x1 * bytes_per_pixel;
2214                 int out_start_byte = out_x1 * bytes_per_pixel;
2215                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2216                 {
2217                         memcpy (out_rows[i] + out_start_byte,
2218                                 in_rows[y_table[i - out_y1]] + in_start_byte ,
2219                                 line_len);
2220                 }
2221
2222         }
2223         else
2224         switch(input->get_color_model())
2225         {
2226                 case BC_RGB888:
2227                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 3, 0);
2228                         break;
2229
2230                 case BC_RGB_FLOAT:
2231                         SCALE_TRANSLATE(1.0, float, float, 3, 0);
2232                         break;
2233
2234                 case BC_YUV888:
2235                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 3, 0x80);
2236                         break;
2237
2238                 case BC_RGBA8888:
2239                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 4, 0);
2240                         break;
2241
2242                 case BC_RGBA_FLOAT:
2243                         SCALE_TRANSLATE(1.0, float, float, 4, 0);
2244                         break;
2245
2246                 case BC_YUVA8888:
2247                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 4, 0x80);
2248                         break;
2249
2250
2251                 case BC_RGB161616:
2252                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2253                         break;
2254
2255                 case BC_YUV161616:
2256                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2257                         break;
2258
2259                 case BC_RGBA16161616:
2260                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2261                         break;
2262
2263                 case BC_YUVA16161616:
2264                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2265                         break;
2266         }
2267
2268 //printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2269         if(out_x2 - out_x1 != in_x2 - in_x1)
2270                 delete [] x_table;
2271         delete [] y_table;
2272
2273 };
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
2284  : LoadServer(cpus, cpus)
2285 {
2286         this->overlay = overlay;
2287 }
2288
2289 ScaleTranslateEngine::~ScaleTranslateEngine()
2290 {
2291 }
2292
2293 void ScaleTranslateEngine::init_packages()
2294 {
2295         int out_h = out_y2 - out_y1;
2296
2297         for(int i = 0; i < total_packages; i++)
2298         {
2299                 ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i];
2300                 package->out_row1 = (int)(out_y1 + out_h /
2301                         total_packages *
2302                         i);
2303                 package->out_row2 = (int)((float)package->out_row1 +
2304                         out_h /
2305                         total_packages);
2306                 if(i >= total_packages - 1)
2307                         package->out_row2 = out_y2;
2308         }
2309 }
2310
2311 LoadClient* ScaleTranslateEngine::new_client()
2312 {
2313         return new ScaleTranslateUnit(this, overlay);
2314 }
2315
2316 LoadPackage* ScaleTranslateEngine::new_package()
2317 {
2318         return new ScaleTranslatePackage;
2319 }
2320
2321
2322 ScaleTranslatePackage::ScaleTranslatePackage()
2323 {
2324 }
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353 #define BLEND_ONLY(temp_type, type, max, components, chroma_offset) \
2354 { \
2355         temp_type opacity; \
2356         if(sizeof(type) != 4) \
2357                 opacity = (temp_type)(alpha * max + 0.5); \
2358         else \
2359                 opacity = (temp_type)(alpha * max); \
2360         temp_type transparency = max - opacity; \
2361  \
2362         type** output_rows = (type**)output->get_rows(); \
2363         type** input_rows = (type**)input->get_rows(); \
2364         int w = input->get_w(); \
2365         int h = input->get_h(); \
2366  \
2367         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2368         { \
2369                 type* in_row = input_rows[i]; \
2370                 type* output = output_rows[i]; \
2371  \
2372                 for(int j = 0; j < w; j++) \
2373                 { \
2374                         temp_type input1, input2, input3, input4; \
2375                         input1 = in_row[0]; \
2376                         input2 = in_row[1]; \
2377                         input3 = in_row[2]; \
2378                         if(components == 4) input4 = in_row[3]; \
2379  \
2380  \
2381                         if(components == 3) \
2382                         { \
2383                                 BLEND_3(max, temp_type, type, chroma_offset); \
2384                         } \
2385                         else \
2386                         { \
2387                                 BLEND_4(max, temp_type, type, chroma_offset); \
2388                         } \
2389  \
2390                         in_row += components; \
2391                         output += components; \
2392                 } \
2393         } \
2394 }
2395
2396
2397 #define BLEND_ONLY_TRANSFER_REPLACE(type, components) \
2398 { \
2399  \
2400         type** output_rows = (type**)output->get_rows(); \
2401         type** input_rows = (type**)input->get_rows(); \
2402         int w = input->get_w(); \
2403         int h = input->get_h(); \
2404         int line_len = w * sizeof(type) * components; \
2405  \
2406         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2407         { \
2408                 memcpy(output_rows[i], input_rows[i], line_len); \
2409         } \
2410 }
2411
2412 // components is always 4
2413 #define BLEND_ONLY_4_NORMAL(temp_type, type, max, chroma_offset) \
2414 { \
2415         temp_type opacity = (temp_type)(alpha * max + 0.5); \
2416         temp_type transparency = max - opacity; \
2417         temp_type max_squared = ((temp_type)max) * max; \
2418  \
2419         type** output_rows = (type**)output->get_rows(); \
2420         type** input_rows = (type**)input->get_rows(); \
2421         int w = input->get_w(); \
2422         int h = input->get_h(); \
2423  \
2424         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2425         { \
2426                 type* in_row = input_rows[i]; \
2427                 type* output = output_rows[i]; \
2428  \
2429                 for(int j = 0; j < w; j++) \
2430                 { \
2431                         temp_type pixel_opacity, pixel_transparency; \
2432                         pixel_opacity = opacity * in_row[3]; \
2433                         pixel_transparency = (temp_type)max_squared - pixel_opacity; \
2434                  \
2435                  \
2436                         temp_type r,g,b; \
2437                         output[0] = ((temp_type)in_row[0] * pixel_opacity + \
2438                                 (temp_type)output[0] * pixel_transparency) / max / max; \
2439                         output[1] = (((temp_type)in_row[1] - chroma_offset) * pixel_opacity + \
2440                                 ((temp_type)output[1] - chroma_offset) * pixel_transparency) \
2441                                 / max / max + \
2442                                 chroma_offset; \
2443                         output[2] = (((temp_type)in_row[2] - chroma_offset) * pixel_opacity + \
2444                                 ((temp_type)output[2] - chroma_offset) * pixel_transparency) \
2445                                 / max / max + \
2446                                 chroma_offset; \
2447                         output[3] = (type)(in_row[3] > output[3] ? in_row[3] : output[3]); \
2448  \
2449                         in_row += 4; \
2450                         output += 4; \
2451                 } \
2452         } \
2453 }
2454
2455
2456
2457 // components is always 3
2458 #define BLEND_ONLY_3_NORMAL(temp_type, type, max, chroma_offset) \
2459 { \
2460         const int bits = sizeof(type) * 8; \
2461         temp_type opacity = (temp_type)(alpha * ((temp_type)1 << bits) + 0.5); \
2462         temp_type transparency = ((temp_type)1 << bits) - opacity; \
2463  \
2464         type** output_rows = (type**)output->get_rows(); \
2465         type** input_rows = (type**)input->get_rows(); \
2466         int w = input->get_w() * 3; \
2467         int h = input->get_h(); \
2468  \
2469         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2470         { \
2471                 type* in_row = input_rows[i]; \
2472                 type* output = output_rows[i]; \
2473  \
2474                 for(int j = 0; j < w; j++) /* w = 3x width! */ \
2475                 { \
2476                         *output = ((temp_type)*in_row * opacity + *output * transparency) >> bits; \
2477                         in_row ++; \
2478                         output ++; \
2479                 } \
2480         } \
2481 }
2482
2483
2484
2485 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
2486  : LoadClient(server)
2487 {
2488         this->overlay = overlay;
2489         this->blend_engine = server;
2490 }
2491
2492 BlendUnit::~BlendUnit()
2493 {
2494 }
2495
2496 void BlendUnit::process_package(LoadPackage *package)
2497 {
2498         BlendPackage *pkg = (BlendPackage*)package;
2499
2500
2501         VFrame *output = blend_engine->output;
2502         VFrame *input = blend_engine->input;
2503         float alpha = blend_engine->alpha;
2504         int mode = blend_engine->mode;
2505
2506         if (mode == TRANSFER_REPLACE)
2507         {
2508                 switch(input->get_color_model())
2509                 {
2510                         case BC_RGB_FLOAT:
2511                                 BLEND_ONLY_TRANSFER_REPLACE(float, 3);
2512                                 break;
2513                         case BC_RGBA_FLOAT:
2514                                 BLEND_ONLY_TRANSFER_REPLACE(float, 4);
2515                                 break;
2516                         case BC_RGB888:
2517                         case BC_YUV888:
2518                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 3);
2519                                 break;
2520                         case BC_RGBA8888:
2521                         case BC_YUVA8888:
2522                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 4);
2523                                 break;
2524                         case BC_RGB161616:
2525                         case BC_YUV161616:
2526                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 3);
2527                                 break;
2528                         case BC_RGBA16161616:
2529                         case BC_YUVA16161616:
2530                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 4);
2531                                 break;
2532                 }
2533         }
2534         else
2535         if (mode == TRANSFER_NORMAL)
2536         {
2537                 switch(input->get_color_model())
2538                 {
2539                         case BC_RGB_FLOAT:
2540                         {
2541                                 float opacity = alpha;
2542                                 float transparency = 1.0 - alpha;
2543
2544                                 float** output_rows = (float**)output->get_rows();
2545                                 float** input_rows = (float**)input->get_rows();
2546                                 int w = input->get_w() * 3;
2547                                 int h = input->get_h();
2548
2549                                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2550                                 {
2551                                         float* in_row = input_rows[i];
2552                                         float* output = output_rows[i];
2553 /* w = 3x width! */
2554                                         for(int j = 0; j < w; j++)
2555                                         {
2556                                                 *output = *in_row * opacity + *output * transparency;
2557                                                 in_row++;
2558                                                 output++;
2559                                         }
2560                                 }
2561                                 break;
2562                         }
2563                         case BC_RGBA_FLOAT:
2564                         {
2565                                 float opacity = alpha;
2566                                 float transparency = 1.0 - alpha;
2567
2568                                 float** output_rows = (float**)output->get_rows();
2569                                 float** input_rows = (float**)input->get_rows();
2570                                 int w = input->get_w();
2571                                 int h = input->get_h();
2572
2573                                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2574                                 {
2575                                         float* in_row = input_rows[i];
2576                                         float* output = output_rows[i];
2577
2578                                         for(int j = 0; j < w; j++)
2579                                         {
2580                                                 float pixel_opacity, pixel_transparency;
2581                                                 pixel_opacity = opacity * in_row[3];
2582                                                 pixel_transparency = 1.0 - pixel_opacity;
2583
2584
2585                                                 output[0] = in_row[0] * pixel_opacity +
2586                                                         output[0] * pixel_transparency;
2587                                                 output[1] = in_row[1] * pixel_opacity +
2588                                                         output[1] * pixel_transparency;
2589                                                 output[2] = in_row[2] * pixel_opacity +
2590                                                         output[2] * pixel_transparency;
2591                                                 output[3] = in_row[3] > output[3] ? in_row[3] : output[3];
2592
2593                                                 in_row += 4;
2594                                                 output += 4;
2595                                         }
2596                                 }
2597                                 break;
2598                         }
2599                         case BC_RGB888:
2600                                 BLEND_ONLY_3_NORMAL(uint32_t, unsigned char, 0xff, 0);
2601                                 break;
2602                         case BC_YUV888:
2603                                 BLEND_ONLY_3_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2604                                 break;
2605                         case BC_RGBA8888:
2606                                 BLEND_ONLY_4_NORMAL(uint32_t, unsigned char, 0xff, 0);
2607                                 break;
2608                         case BC_YUVA8888:
2609                                 BLEND_ONLY_4_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2610                                 break;
2611                         case BC_RGB161616:
2612                                 BLEND_ONLY_3_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2613                                 break;
2614                         case BC_YUV161616:
2615                                 BLEND_ONLY_3_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2616                                 break;
2617                         case BC_RGBA16161616:
2618                                 BLEND_ONLY_4_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2619                                 break;
2620                         case BC_YUVA16161616:
2621                                 BLEND_ONLY_4_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2622                                 break;
2623                 }
2624         }
2625         else
2626         switch(input->get_color_model())
2627         {
2628                 case BC_RGB_FLOAT:
2629                         BLEND_ONLY(float, float, 1.0, 3, 0);
2630                         break;
2631                 case BC_RGBA_FLOAT:
2632                         BLEND_ONLY(float, float, 1.0, 4, 0);
2633                         break;
2634                 case BC_RGB888:
2635                         BLEND_ONLY(uint32_t, unsigned char, 0xff, 3, 0);
2636                         break;
2637                 case BC_YUV888:
2638                         BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0x80);
2639                         break;
2640                 case BC_RGBA8888:
2641                         BLEND_ONLY(uint32_t, unsigned char, 0xff, 4, 0);
2642                         break;
2643                 case BC_YUVA8888:
2644                         BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0x80);
2645                         break;
2646                 case BC_RGB161616:
2647                         BLEND_ONLY(uint64_t, uint16_t, 0xffff, 3, 0);
2648                         break;
2649                 case BC_YUV161616:
2650                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0x8000);
2651                         break;
2652                 case BC_RGBA16161616:
2653                         BLEND_ONLY(uint64_t, uint16_t, 0xffff, 4, 0);
2654                         break;
2655                 case BC_YUVA16161616:
2656                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0x8000);
2657                         break;
2658         }
2659 }
2660
2661
2662
2663 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
2664  : LoadServer(cpus, cpus)
2665 {
2666         this->overlay = overlay;
2667 }
2668
2669 BlendEngine::~BlendEngine()
2670 {
2671 }
2672
2673 void BlendEngine::init_packages()
2674 {
2675         for(int i = 0; i < total_packages; i++)
2676         {
2677                 BlendPackage *package = (BlendPackage*)packages[i];
2678                 package->out_row1 = (int)(input->get_h() /
2679                         total_packages *
2680                         i);
2681                 package->out_row2 = (int)((float)package->out_row1 +
2682                         input->get_h() /
2683                         total_packages);
2684
2685                 if(i >= total_packages - 1)
2686                         package->out_row2 = input->get_h();
2687         }
2688 }
2689
2690 LoadClient* BlendEngine::new_client()
2691 {
2692         return new BlendUnit(this, overlay);
2693 }
2694
2695 LoadPackage* BlendEngine::new_package()
2696 {
2697         return new BlendPackage;
2698 }
2699
2700
2701 BlendPackage::BlendPackage()
2702 {
2703 }
2704
2705