cinelerra/overlayframe.C

   1 #include <math.h>
   2 #include <stdio.h>
   3 #include <string.h>
   4 #include <stdint.h>
   5 #include <stdlib.h>
   6 #include <unistd.h>
   7
   8 #include "clip.h"
   9 #include "edl.inc"
  10 #include "mutex.h"
  11 #include "overlayframe.h"
  12 #include "units.h"
  13 #include "vframe.h"
  14
  15 // Easy abstraction of the float and int types.  Most of these are never used
  16 // but GCC expects them.
  17 static int my_abs(int32_t x)
  18 {
  19         return abs(x);
  20 }
  21
  22 static int my_abs(uint32_t x)
  23 {
  24         return x;
  25 }
  26
  27 static int my_abs(int64_t x)
  28 {
  29         return llabs(x);
  30 }
  31
  32 static int my_abs(uint64_t x)
  33 {
  34         return x;
  35 }
  36
  37 static float my_abs(float x)
  38 {
  39         return fabsf(x);
  40 }
  41
  42
  43
  44
  45 OverlayFrame::OverlayFrame(int cpus)
  46 {
  47         temp_frame = 0;
  48         blend_engine = 0;
  49         scale_engine = 0;
  50         scaletranslate_engine = 0;
  51         translate_engine = 0;
  52         this->cpus = cpus;
  53 }
  54
  55 OverlayFrame::~OverlayFrame()
  56 {
  57         if(temp_frame) delete temp_frame;
  58         if(scale_engine) delete scale_engine;
  59         if(translate_engine) delete translate_engine;
  60         if(blend_engine) delete blend_engine;
  61         if(scaletranslate_engine) delete scaletranslate_engine;
  62 }
  63
  64
  65
  66
  67
  68
  69
  70
  71 // Verification:
  72
  73 // (255 * 255 + 0 * 0) / 255 = 255
  74 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
  75
  76 // (65535 * 65535 + 0 * 0) / 65535 = 65535
  77 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
  78
  79
  80 // Permutation 4 U
  81
  82 #define BLEND_3(max, temp_type, type, chroma_offset) \
  83 { \
  84         temp_type r, g, b; \
  85  \
  86 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
  87         switch(mode) \
  88         { \
  89                 case TRANSFER_DIVIDE: \
  90                         r = input1 ? (((temp_type)output[0] * max) / input1) : max; \
  91                         if(chroma_offset) \
  92                         { \
  93                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
  94                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
  95                         } \
  96                         else \
  97                         { \
  98                                 g = input2 ? (temp_type)output[1] * max / (temp_type)input2 : max; \
  99                                 b = input3 ? (temp_type)output[2] * max / (temp_type)input3 : max; \
 100                         } \
 101                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
 102                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
 103                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
 104                         break; \
 105                 case TRANSFER_MULTIPLY: \
 106                         r = ((temp_type)input1 * output[0]) / max; \
 107                         if(chroma_offset) \
 108                         { \
 109                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
 110                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
 111                         } \
 112                         else \
 113                         { \
 114                                 g = (temp_type)input2 * (temp_type)output[1] / max; \
 115                                 b = (temp_type)input3 * (temp_type)output[2] / max; \
 116                         } \
 117                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
 118                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
 119                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
 120                         break; \
 121                 case TRANSFER_SUBTRACT: \
 122                         r = (temp_type)output[0] - (temp_type)input1; \
 123                         g = ((temp_type)output[1] - (temp_type)chroma_offset) - \
 124                                 ((temp_type)input2 - (temp_type)chroma_offset) + \
 125                                 (temp_type)chroma_offset; \
 126                         b = ((temp_type)output[2] - (temp_type)chroma_offset) - \
 127                                 ((temp_type)input3 - (temp_type)chroma_offset) + \
 128                                 (temp_type)chroma_offset; \
 129                         if(r < 0) r = 0; \
 130                         if(g < 0) g = 0; \
 131                         if(b < 0) b = 0; \
 132                         r = (r * opacity + output[0] * transparency) / max; \
 133                         g = (g * opacity + output[1] * transparency) / max; \
 134                         b = (b * opacity + output[2] * transparency) / max; \
 135                         break; \
 136                 case TRANSFER_ADDITION: \
 137                         r = (temp_type)input1 + output[0]; \
 138                         g = ((temp_type)input2 - chroma_offset) + \
 139                                 ((temp_type)output[1] - chroma_offset) + \
 140                                 (temp_type)chroma_offset; \
 141                         b = ((temp_type)input3 - chroma_offset) + \
 142                                 ((temp_type)output[2] - chroma_offset) + \
 143                                 (temp_type)chroma_offset; \
 144                         r = (r * opacity + output[0] * transparency) / max; \
 145                         g = (g * opacity + output[1] * transparency) / max; \
 146                         b = (b * opacity + output[2] * transparency) / max; \
 147                         break; \
 148                 case TRANSFER_MAX: \
 149                 { \
 150                         r = (temp_type)MAX(input1, output[0]); \
 151                         temp_type g1 = ((temp_type)input2 - chroma_offset); \
 152                         if(g1 < 0) g1 = -g1; \
 153                         temp_type g2 = ((temp_type)output[1] - chroma_offset); \
 154                         if(g2 < 0) g2 = -g2; \
 155                         if(g1 > g2) \
 156                                 g = input2; \
 157                         else \
 158                                 g = output[1]; \
 159                         temp_type b1 = ((temp_type)input3 - chroma_offset); \
 160                         if(b1 < 0) b1 = -b1; \
 161                         temp_type b2 = ((temp_type)output[2] - chroma_offset); \
 162                         if(b2 < 0) b2 = -b2; \
 163                         if(b1 > b2) \
 164                                 b = input3; \
 165                         else \
 166                                 b = output[2]; \
 167                         r = (r * opacity + output[0] * transparency) / max; \
 168                         g = (g * opacity + output[1] * transparency) / max; \
 169                         b = (b * opacity + output[2] * transparency) / max; \
 170                         break; \
 171                 } \
 172                 case TRANSFER_REPLACE: \
 173                         r = input1; \
 174                         g = input2; \
 175                         b = input3; \
 176                         break; \
 177                 case TRANSFER_NORMAL: \
 178                         r = ((temp_type)input1 * opacity + output[0] * transparency) / max; \
 179                         g = ((temp_type)input2 * opacity + output[1] * transparency) / max; \
 180                         b = ((temp_type)input3 * opacity + output[2] * transparency) / max; \
 181                         break; \
 182         } \
 183  \
 184         if(sizeof(type) != 4) \
 185         { \
 186                 output[0] = (type)CLIP(r, 0, max); \
 187                 output[1] = (type)CLIP(g, 0, max); \
 188                 output[2] = (type)CLIP(b, 0, max); \
 189         } \
 190         else \
 191         { \
 192                 output[0] = r; \
 193                 output[1] = g; \
 194                 output[2] = b; \
 195         } \
 196 }
 197
 198
 199
 200
 201
 202 // Blending equations are drastically different for 3 and 4 components
 203 #define BLEND_4(max, temp_type, type, chroma_offset) \
 204 { \
 205         temp_type r, g, b, a; \
 206         temp_type pixel_opacity, pixel_transparency; \
 207         temp_type output1 = output[0]; \
 208         temp_type output2 = output[1]; \
 209         temp_type output3 = output[2]; \
 210         temp_type output4 = output[3]; \
 211  \
 212         pixel_opacity = opacity * input4; \
 213         pixel_transparency = (temp_type)max * max - pixel_opacity; \
 214  \
 215         switch(mode) \
 216         { \
 217                 case TRANSFER_DIVIDE: \
 218                         r = input1 ? (((temp_type)output1 * max) / input1) : max; \
 219                         if(chroma_offset) \
 220                         { \
 221                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
 222                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
 223                         } \
 224                         else \
 225                         { \
 226                                 g = input2 ? (temp_type)output2 * max / (temp_type)input2 : max; \
 227                                 b = input3 ? (temp_type)output3 * max / (temp_type)input3 : max; \
 228                         } \
 229                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
 230                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
 231                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
 232                         a = input4 > output4 ? input4 : output4; \
 233                         break; \
 234                 case TRANSFER_MULTIPLY: \
 235                         r = ((temp_type)input1 * output1) / max; \
 236                         if(chroma_offset) \
 237                         { \
 238                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
 239                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
 240                         } \
 241                         else \
 242                         { \
 243                                 g = (temp_type)input2 * (temp_type)output2 / max; \
 244                                 b = (temp_type)input3 * (temp_type)output3 / max; \
 245                         } \
 246                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
 247                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
 248                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
 249                         a = input4 > output4 ? input4 : output4; \
 250                         break; \
 251                 case TRANSFER_SUBTRACT: \
 252                         r = (temp_type)output1 - input1; \
 253                         g = ((temp_type)output2 - chroma_offset) - \
 254                                 ((temp_type)input2 - (temp_type)chroma_offset) + \
 255                                 (temp_type)chroma_offset; \
 256                         b = ((temp_type)output3 - chroma_offset) - \
 257                                 ((temp_type)input3 - (temp_type)chroma_offset) + \
 258                                 (temp_type)chroma_offset; \
 259                         if(r < 0) r = 0; \
 260                         if(g < 0) g = 0; \
 261                         if(b < 0) b = 0; \
 262                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
 263                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
 264                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
 265                         a = input4 > output4 ? input4 : output4; \
 266                         break; \
 267                 case TRANSFER_ADDITION: \
 268                         r = (temp_type)input1 + output1; \
 269                         g = ((temp_type)input2 - chroma_offset) + \
 270                                 ((temp_type)output2 - chroma_offset) + \
 271                                 chroma_offset; \
 272                         b = ((temp_type)input3 - chroma_offset) + \
 273                                 ((temp_type)output3 - chroma_offset) + \
 274                                 chroma_offset; \
 275                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
 276                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
 277                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
 278                         a = input4 > output4 ? input4 : output4; \
 279                         break; \
 280                 case TRANSFER_MAX: \
 281                 { \
 282                         r = (temp_type)MAX(input1, output1); \
 283                         temp_type g1 = ((temp_type)input2 - chroma_offset); \
 284                         if(g1 < 0) g1 = -g1; \
 285                         temp_type g2 = ((temp_type)output2 - chroma_offset); \
 286                         if(g2 < 0) g2 = -g2; \
 287                         if(g1 > g2) \
 288                                 g = input2; \
 289                         else \
 290                                 g = output2; \
 291                         temp_type b1 = ((temp_type)input3 - chroma_offset); \
 292                         if(b1 < 0) b1 = -b1; \
 293                         temp_type b2 = ((temp_type)output3 - chroma_offset); \
 294                         if(b2 < 0) b2 = -b2; \
 295                         if(b1 > b2) \
 296                                 b = input3; \
 297                         else \
 298                                 b = output3; \
 299                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
 300                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
 301                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
 302                         a = input4 > output4 ? input4 : output4; \
 303                         break; \
 304                 } \
 305                 case TRANSFER_REPLACE: \
 306                         r = input1; \
 307                         g = input2; \
 308                         b = input3; \
 309                         a = input4; \
 310                         break; \
 311                 case TRANSFER_NORMAL: \
 312                         r = (input1 * pixel_opacity + \
 313                                 output1 * pixel_transparency) / max / max; \
 314                         g = ((input2 - chroma_offset) * pixel_opacity + \
 315                                 (output2 - chroma_offset) * pixel_transparency) \
 316                                 / max / max + \
 317                                 chroma_offset; \
 318                         b = ((input3 - chroma_offset) * pixel_opacity + \
 319                                 (output3 - chroma_offset) * pixel_transparency) \
 320                                 / max / max + \
 321                                 chroma_offset; \
 322                         a = input4 > output4 ? input4 : output4; \
 323                         break; \
 324         } \
 325  \
 326         if(sizeof(type) != 4) \
 327         { \
 328                 output[0] = (type)CLIP(r, 0, max); \
 329                 output[1] = (type)CLIP(g, 0, max); \
 330                 output[2] = (type)CLIP(b, 0, max); \
 331                 output[3] = (type)a; \
 332         } \
 333         else \
 334         { \
 335                 output[0] = r; \
 336                 output[1] = g; \
 337                 output[2] = b; \
 338                 output[3] = a; \
 339         } \
 340 }
 341
 342
 343
 344 // Bicubic algorithm using multiprocessors
 345 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
 346
 347 // Nearest neighbor algorithm using multiprocessors for blending
 348 // input -> scale + translate -> blend -> output
 349
 350
 351 int OverlayFrame::overlay(VFrame *output,
 352         VFrame *input,
 353         float in_x1,
 354         float in_y1,
 355         float in_x2,
 356         float in_y2,
 357         float out_x1,
 358         float out_y1,
 359         float out_x2,
 360         float out_y2,
 361         float alpha,       // 0 - 1
 362         int mode,
 363         int interpolation_type)
 364 {
 365         float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
 366         float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
 367
 368
 369
 370
 371
 372
 373
 374
 375         if(isnan(in_x1) ||
 376                 isnan(in_y1) ||
 377                 isnan(in_x2) ||
 378                 isnan(in_y2) ||
 379                 isnan(out_x1) ||
 380                 isnan(out_y1) ||
 381                 isnan(out_x2) ||
 382                 isnan(out_y2)) return 1;
 383 // printf("OverlayFrame::overlay 1 %f %f %f %f -> %f %f %f %f scale=%f %f\n", in_x1,
 384 // in_y1,
 385 // in_x2,
 386 // in_y2,
 387 // out_x1,
 388 // out_y1,
 389 // out_x2,
 390 // out_y2,
 391 // out_x2 - out_x1,
 392 // out_y2 - out_y1);
 393
 394 // Limit values
 395         if(in_x1 < 0)
 396         {
 397                 out_x1 += -in_x1 * w_scale;
 398                 in_x1 = 0;
 399         }
 400         else
 401         if(in_x1 >= input->get_w())
 402         {
 403                 out_x1 -= (in_x1 - input->get_w()) * w_scale;
 404                 in_x1 = input->get_w();
 405         }
 406
 407         if(in_y1 < 0)
 408         {
 409                 out_y1 += -in_y1 * h_scale;
 410                 in_y1 = 0;
 411         }
 412         else
 413         if(in_y1 >= input->get_h())
 414         {
 415                 out_y1 -= (in_y1 - input->get_h()) * h_scale;
 416                 in_y1 = input->get_h();
 417         }
 418
 419         if(in_x2 < 0)
 420         {
 421                 out_x2 += -in_x2 * w_scale;
 422                 in_x2 = 0;
 423         }
 424         else
 425         if(in_x2 >= input->get_w())
 426         {
 427                 out_x2 -= (in_x2 - input->get_w()) * w_scale;
 428                 in_x2 = input->get_w();
 429         }
 430
 431         if(in_y2 < 0)
 432         {
 433                 out_y2 += -in_y2 * h_scale;
 434                 in_y2 = 0;
 435         }
 436         else
 437         if(in_y2 >= input->get_h())
 438         {
 439                 out_y2 -= (in_y2 - input->get_h()) * h_scale;
 440                 in_y2 = input->get_h();
 441         }
 442
 443         if(out_x1 < 0)
 444         {
 445                 in_x1 += -out_x1 / w_scale;
 446                 out_x1 = 0;
 447         }
 448         else
 449         if(out_x1 >= output->get_w())
 450         {
 451                 in_x1 -= (out_x1 - output->get_w()) / w_scale;
 452                 out_x1 = output->get_w();
 453         }
 454
 455         if(out_y1 < 0)
 456         {
 457                 in_y1 += -out_y1 / h_scale;
 458                 out_y1 = 0;
 459         }
 460         else
 461         if(out_y1 >= output->get_h())
 462         {
 463                 in_y1 -= (out_y1 - output->get_h()) / h_scale;
 464                 out_y1 = output->get_h();
 465         }
 466
 467         if(out_x2 < 0)
 468         {
 469                 in_x2 += -out_x2 / w_scale;
 470                 out_x2 = 0;
 471         }
 472         else
 473         if(out_x2 >= output->get_w())
 474         {
 475                 in_x2 -= (out_x2 - output->get_w()) / w_scale;
 476                 out_x2 = output->get_w();
 477         }
 478
 479         if(out_y2 < 0)
 480         {
 481                 in_y2 += -out_y2 / h_scale;
 482                 out_y2 = 0;
 483         }
 484         else
 485         if(out_y2 >= output->get_h())
 486         {
 487                 in_y2 -= (out_y2 - output->get_h()) / h_scale;
 488                 out_y2 = output->get_h();
 489         }
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500         float in_w = in_x2 - in_x1;
 501         float in_h = in_y2 - in_y1;
 502         float out_w = out_x2 - out_x1;
 503         float out_h = out_y2 - out_y1;
 504 // Input for translation operation
 505         VFrame *translation_input = input;
 506
 507
 508         if(in_w <= 0 || in_h <= 0 || out_w <= 0 || out_h <= 0) return 0;
 509
 510
 511 // printf("OverlayFrame::overlay 2 %f %f %f %f -> %f %f %f %f\n", in_x1,
 512 //                      in_y1,
 513 //                      in_x2,
 514 //                      in_y2,
 515 //                      out_x1,
 516 //                      out_y1,
 517 //                      out_x2,
 518 //                      out_y2);
 519
 520
 521
 522
 523
 524 // ****************************************************************************
 525 // Transfer to temp buffer by scaling nearest integer boundaries
 526 // ****************************************************************************
 527         if(interpolation_type != NEAREST_NEIGHBOR &&
 528                 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
 529         {
 530 // Create integer boundaries for interpolation
 531                 float in_x1_float = in_x1;
 532                 float in_y1_float = in_y1;
 533                 float in_x2_float = MIN(in_x2, input->get_w());
 534                 float in_y2_float = MIN(in_y2, input->get_h());
 535                 int out_x1_int = (int)out_x1;
 536                 int out_y1_int = (int)out_y1;
 537                 int out_x2_int = MIN((int)ceil(out_x2), output->get_w());
 538                 int out_y2_int = MIN((int)ceil(out_y2), output->get_h());
 539
 540 // Dimensions of temp frame.  Integer boundaries scaled.
 541                 int temp_w = (out_x2_int - out_x1_int);
 542                 int temp_h = (out_y2_int - out_y1_int);
 543                 VFrame *scale_output;
 544
 545
 546
 547 #define NO_TRANSLATION1 \
 548         (EQUIV(in_x1, 0) && \
 549         EQUIV(in_y1, 0) && \
 550         EQUIV(out_x1, 0) && \
 551         EQUIV(out_y1, 0) && \
 552         EQUIV(in_x2, in_x2_float) && \
 553         EQUIV(in_y2, in_y2_float) && \
 554         EQUIV(out_x2, temp_w) && \
 555         EQUIV(out_y2, temp_h))
 556
 557
 558 #define NO_BLEND \
 559         (EQUIV(alpha, 1) && \
 560         (mode == TRANSFER_REPLACE || \
 561         (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
 562
 563
 564
 565
 566
 567 // Prepare destination for operation
 568
 569 // No translation and no blending.  The blending operation is built into the
 570 // translation unit but not the scaling unit.
 571 // input -> output
 572                 if(NO_TRANSLATION1 &&
 573                         NO_BLEND)
 574                 {
 575 // printf("OverlayFrame::overlay input -> output\n");
 576
 577                         scale_output = output;
 578                         translation_input = 0;
 579                 }
 580                 else
 581 // If translation or blending
 582 // input -> nearest integer boundary temp
 583                 {
 584                         if(temp_frame &&
 585                                 (temp_frame->get_w() != temp_w ||
 586                                         temp_frame->get_h() != temp_h))
 587                         {
 588                                 delete temp_frame;
 589                                 temp_frame = 0;
 590                         }
 591
 592                         if(!temp_frame)
 593                         {
 594                                 temp_frame = new VFrame(0,
 595                                         temp_w,
 596                                         temp_h,
 597                                         input->get_color_model(),
 598                                         -1);
 599                         }
 600 //printf("OverlayFrame::overlay input -> temp\n");
 601
 602
 603                         temp_frame->clear_frame();
 604
 605 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
 606 //      temp_w, temp_h);
 607                         scale_output = temp_frame;
 608                         translation_input = scale_output;
 609
 610 // Adjust input coordinates to reflect new scaled coordinates.
 611                         in_x1 = 0;
 612                         in_y1 = 0;
 613                         in_x2 = temp_w;
 614                         in_y2 = temp_h;
 615                 }
 616
 617
 618
 619 //printf("Overlay 1\n");
 620
 621 // Scale input -> scale_output
 622                 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
 623                 scale_engine->scale_output = scale_output;
 624                 scale_engine->scale_input = input;
 625                 scale_engine->w_scale = w_scale;
 626                 scale_engine->h_scale = h_scale;
 627                 scale_engine->in_x1_float = in_x1_float;
 628                 scale_engine->in_y1_float = in_y1_float;
 629                 scale_engine->out_w_int = temp_w;
 630                 scale_engine->out_h_int = temp_h;
 631                 scale_engine->interpolation_type = interpolation_type;
 632 //printf("Overlay 2\n");
 633
 634 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
 635                 scale_engine->process_packages();
 636 //printf("OverlayFrame::overlay ScaleEngine 2\n");
 637
 638
 639
 640         }
 641
 642 // printf("OverlayFrame::overlay 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
 643 //      in_x1,
 644 //      in_y1,
 645 //      in_x2,
 646 //      in_y2,
 647 //      out_x1,
 648 //      out_y1,
 649 //      out_x2,
 650 //      out_y2);
 651
 652
 653
 654
 655
 656 #define NO_TRANSLATION2 \
 657         (EQUIV(in_x1, 0) && \
 658         EQUIV(in_y1, 0) && \
 659         EQUIV(in_x2, translation_input->get_w()) && \
 660         EQUIV(in_y2, translation_input->get_h()) && \
 661         EQUIV(out_x1, 0) && \
 662         EQUIV(out_y1, 0) && \
 663         EQUIV(out_x2, output->get_w()) && \
 664         EQUIV(out_y2, output->get_h())) \
 665
 666 #define NO_SCALE \
 667         (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
 668         EQUIV(out_y2 - out_y1, in_y2 - in_y1))
 669
 670
 671
 672
 673 //printf("OverlayFrame::overlay 4 %d\n", mode);
 674
 675
 676
 677
 678         if(translation_input)
 679         {
 680 // Direct copy
 681                 if( NO_TRANSLATION2 &&
 682                         NO_SCALE &&
 683                         NO_BLEND)
 684                 {
 685 //printf("OverlayFrame::overlay direct copy\n");
 686                         output->copy_from(translation_input);
 687                 }
 688                 else
 689 // Blend only
 690                 if( NO_TRANSLATION2 &&
 691                         NO_SCALE)
 692                 {
 693                         if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
 694
 695
 696                         blend_engine->output = output;
 697                         blend_engine->input = translation_input;
 698                         blend_engine->alpha = alpha;
 699                         blend_engine->mode = mode;
 700
 701                         blend_engine->process_packages();
 702                 }
 703                 else
 704 // Scale and translate using nearest neighbor
 705 // Translation is exactly on integer boundaries
 706                 if(interpolation_type == NEAREST_NEIGHBOR ||
 707                         EQUIV(in_x1, (int)in_x1) &&
 708                         EQUIV(in_y1, (int)in_y1) &&
 709                         EQUIV(in_x2, (int)in_x2) &&
 710                         EQUIV(in_y2, (int)in_y2) &&
 711
 712                         EQUIV(out_x1, (int)out_x1) &&
 713                         EQUIV(out_y1, (int)out_y1) &&
 714                         EQUIV(out_x2, (int)out_x2) &&
 715                         EQUIV(out_y2, (int)out_y2))
 716                 {
 717 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
 718                         if(!scaletranslate_engine) scaletranslate_engine =
 719                                 new ScaleTranslateEngine(this, cpus);
 720
 721
 722                         scaletranslate_engine->output = output;
 723                         scaletranslate_engine->input = translation_input;
 724 // Input for Scaletranslate is subpixel precise!
 725                         scaletranslate_engine->in_x1 = in_x1;
 726                         scaletranslate_engine->in_y1 = in_y1;
 727                         scaletranslate_engine->in_x2 = in_x2;
 728                         scaletranslate_engine->in_y2 = in_y2;
 729                         scaletranslate_engine->out_x1 = (int)out_x1;
 730                         scaletranslate_engine->out_y1 = (int)out_y1;
 731                         scaletranslate_engine->out_x2 = (int)out_x1 + (int)(out_x2 - out_x1);
 732                         scaletranslate_engine->out_y2 = (int)out_y1 + (int)(out_y2 - out_y1);
 733                         scaletranslate_engine->alpha = alpha;
 734                         scaletranslate_engine->mode = mode;
 735
 736                         scaletranslate_engine->process_packages();
 737                 }
 738                 else
 739 // Fractional translation
 740                 {
 741 // Use fractional translation
 742 // printf("OverlayFrame::overlay temp -> output  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
 743 //      in_x1,
 744 //      in_y1,
 745 //      in_x2,
 746 //      in_y2,
 747 //      out_x1,
 748 //      out_y1,
 749 //      out_x2,
 750 //      out_y2);
 751
 752 //printf("Overlay 3\n");
 753                         if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
 754                         translate_engine->translate_output = output;
 755                         translate_engine->translate_input = translation_input;
 756                         translate_engine->translate_in_x1 = in_x1;
 757                         translate_engine->translate_in_y1 = in_y1;
 758                         translate_engine->translate_in_x2 = in_x2;
 759                         translate_engine->translate_in_y2 = in_y2;
 760                         translate_engine->translate_out_x1 = out_x1;
 761                         translate_engine->translate_out_y1 = out_y1;
 762                         translate_engine->translate_out_x2 = out_x2;
 763                         translate_engine->translate_out_y2 = out_y2;
 764                         translate_engine->translate_alpha = alpha;
 765                         translate_engine->translate_mode = mode;
 766 //printf("Overlay 4\n");
 767
 768 //printf("OverlayFrame::overlay 5 %d\n", mode);
 769                         translate_engine->process_packages();
 770
 771                 }
 772         }
 773 //printf("OverlayFrame::overlay 2\n");
 774
 775         return 0;
 776 }
 777
 778
 779
 780
 781
 782
 783
 784 ScalePackage::ScalePackage()
 785 {
 786 }
 787
 788
 789
 790
 791 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
 792  : LoadClient(server)
 793 {
 794         this->overlay = overlay;
 795         this->engine = server;
 796 }
 797
 798 ScaleUnit::~ScaleUnit()
 799 {
 800 }
 801
 802
 803
 804 void ScaleUnit::tabulate_reduction(bilinear_table_t* &table,
 805         float scale,
 806         int in_pixel1,
 807         int out_total,
 808         int in_total)
 809 {
 810         table = new bilinear_table_t[out_total];
 811         bzero(table, sizeof(bilinear_table_t) * out_total);
 812 //printf("ScaleUnit::tabulate_reduction 1 %f %d %d %d\n", scale, in_pixel1, out_total, in_total);
 813         for(int i = 0; i < out_total; i++)
 814         {
 815                 float out_start = i;
 816                 float in_start = out_start * scale;
 817                 float out_end = i + 1;
 818                 float in_end = out_end * scale;
 819                 bilinear_table_t *entry = table + i;
 820 //printf("ScaleUnit::tabulate_reduction 1 %f %f %f %f\n", out_start, out_end, in_start, in_end);
 821
 822 // Store input fraction.  Using scale to normalize these didn't work.
 823                 entry->input_fraction1 = (floor(in_start + 1) - in_start) /* / scale */;
 824                 entry->input_fraction2 = 1.0 /* / scale */;
 825                 entry->input_fraction3 = (in_end - floor(in_end)) /* / scale */;
 826
 827                 if(in_end >= in_total - in_pixel1)
 828                 {
 829                         in_end = in_total - in_pixel1 - 1;
 830
 831                         int difference = (int)in_end - (int)in_start - 1;
 832                         if(difference < 0) difference = 0;
 833                         entry->input_fraction3 = 1.0 -
 834                                 entry->input_fraction1 -
 835                                 entry->input_fraction2 * difference;
 836                 }
 837
 838 // Store input pixels
 839                 entry->input_pixel1 = (int)in_start;
 840                 entry->input_pixel2 = (int)in_end;
 841
 842 // Normalize for middle pixels
 843                 if(entry->input_pixel2 > entry->input_pixel1 + 1)
 844                 {
 845                         float total = entry->input_fraction1 +
 846                                 entry->input_fraction2 *
 847                                 (entry->input_pixel2 - entry->input_pixel1 - 1) +
 848                                 entry->input_fraction3;
 849                         entry->input_fraction1 /= total;
 850                         entry->input_fraction2 /= total;
 851                         entry->input_fraction3 /= total;
 852                 }
 853                 else
 854                 {
 855                         float total = entry->input_fraction1 +
 856                                 entry->input_fraction3;
 857                         entry->input_fraction1 /= total;
 858                         entry->input_fraction3 /= total;
 859                 }
 860
 861 // printf("ScaleUnit::tabulate_reduction 1 %d %d %d %f %f %f %f\n",
 862 // i,
 863 // entry->input_pixel1,
 864 // entry->input_pixel2,
 865 // entry->input_fraction1,
 866 // entry->input_fraction2,
 867 // entry->input_fraction3,
 868 // entry->input_fraction1 +
 869 //      entry->input_fraction2 *
 870 //      (entry->input_pixel2 - entry->input_pixel1 - 1) +
 871 //      entry->input_fraction3);
 872
 873
 874 // Sanity check
 875                 if(entry->input_pixel1 > entry->input_pixel2)
 876                 {
 877                         entry->input_pixel1 = entry->input_pixel2;
 878                         entry->input_fraction1 = 0;
 879                 }
 880
 881 // Get total fraction of output pixel used
 882 //              if(entry->input_pixel2 > entry->input_pixel1)
 883                 entry->total_fraction =
 884                         entry->input_fraction1 +
 885                         entry->input_fraction2 * (entry->input_pixel2 - entry->input_pixel1 - 1) +
 886                         entry->input_fraction3;
 887                 entry->input_pixel1 += in_pixel1;
 888                 entry->input_pixel2 += in_pixel1;
 889         }
 890 }
 891
 892 void ScaleUnit::tabulate_enlarge(bilinear_table_t* &table,
 893         float scale,
 894         float in_pixel1,
 895         int out_total,
 896         int in_total)
 897 {
 898         table = new bilinear_table_t[out_total];
 899         bzero(table, sizeof(bilinear_table_t) * out_total);
 900
 901         for(int i = 0; i < out_total; i++)
 902         {
 903                 bilinear_table_t *entry = table + i;
 904                 float in_pixel = i * scale + in_pixel1;
 905                 entry->input_pixel1 = (int)floor(in_pixel);
 906                 entry->input_pixel2 = entry->input_pixel1 + 1;
 907
 908                 if(in_pixel - in_pixel1 <= in_total)
 909                 {
 910                         entry->input_fraction3 = in_pixel - entry->input_pixel1;
 911                 }
 912                 else
 913                 {
 914                         entry->input_fraction3 = 0;
 915                         entry->input_pixel2 = 0;
 916                 }
 917
 918                 if(in_pixel - in_pixel1 >= 0)
 919                 {
 920                         entry->input_fraction1 = entry->input_pixel2 - in_pixel;
 921                 }
 922                 else
 923                 {
 924                         entry->input_fraction1 = 0;
 925                         entry->input_pixel1 = (int)in_pixel1;
 926                 }
 927
 928                 if(entry->input_pixel2 >= in_total)
 929                 {
 930                         entry->input_pixel2 = entry->input_pixel1;
 931                         entry->input_fraction3 = 1.0 - entry->input_fraction1;
 932                 }
 933
 934                 entry->total_fraction =
 935                         entry->input_fraction1 +
 936                         entry->input_fraction3;
 937 //
 938 // printf("ScaleUnit::tabulate_enlarge %d %d %f %f %f\n",
 939 // entry->input_pixel1,
 940 // entry->input_pixel2,
 941 // entry->input_fraction1,
 942 // entry->input_fraction2,
 943 // entry->input_fraction3);
 944         }
 945 }
 946
 947 void ScaleUnit::dump_bilinear(bilinear_table_t *table, int total)
 948 {
 949         printf("ScaleUnit::dump_bilinear\n");
 950         for(int i = 0; i < total; i++)
 951         {
 952                 printf("out=%d inpixel1=%d inpixel2=%d infrac1=%f infrac2=%f infrac3=%f total=%f\n",
 953                         i,
 954                         table[i].input_pixel1,
 955                         table[i].input_pixel2,
 956                         table[i].input_fraction1,
 957                         table[i].input_fraction2,
 958                         table[i].input_fraction3,
 959                         table[i].total_fraction);
 960         }
 961 }
 962
 963 #define PIXEL_REDUCE_MACRO(type, components, row) \
 964 { \
 965         type *input_row = &in_rows[row][x_entry->input_pixel1 * components]; \
 966         type *input_end = &in_rows[row][x_entry->input_pixel2 * components]; \
 967  \
 968 /* Do first pixel */ \
 969         temp_f1 += input_scale1 * input_row[0]; \
 970         temp_f2 += input_scale1 * input_row[1]; \
 971         temp_f3 += input_scale1 * input_row[2]; \
 972         if(components == 4) temp_f4 += input_scale1 * input_row[3]; \
 973  \
 974 /* Do last pixel */ \
 975 /*      if(input_row < input_end) */\
 976         { \
 977                 temp_f1 += input_scale3 * input_end[0]; \
 978                 temp_f2 += input_scale3 * input_end[1]; \
 979                 temp_f3 += input_scale3 * input_end[2]; \
 980                 if(components == 4) temp_f4 += input_scale3 * input_end[3]; \
 981         } \
 982  \
 983 /* Do middle pixels */ \
 984         for(input_row += components; input_row < input_end; input_row += components) \
 985         { \
 986                 temp_f1 += input_scale2 * input_row[0]; \
 987                 temp_f2 += input_scale2 * input_row[1]; \
 988                 temp_f3 += input_scale2 * input_row[2]; \
 989                 if(components == 4) temp_f4 += input_scale2 * input_row[3]; \
 990         } \
 991 }
 992
 993 // Bilinear reduction and suboptimal enlargement.
 994 // Very high quality.
 995 #define BILINEAR_REDUCE(max, type, components) \
 996 { \
 997         bilinear_table_t *x_table, *y_table; \
 998         int out_h = pkg->out_row2 - pkg->out_row1; \
 999         type **in_rows = (type**)input->get_rows(); \
1000         type **out_rows = (type**)output->get_rows(); \
1001  \
1002         if(scale_w < 1) \
1003                 tabulate_reduction(x_table, \
1004                         1.0 / scale_w, \
1005                         (int)in_x1_float, \
1006                         out_w_int, \
1007                         input->get_w()); \
1008         else \
1009                 tabulate_enlarge(x_table, \
1010                         1.0 / scale_w, \
1011                         in_x1_float, \
1012                         out_w_int, \
1013                         input->get_w()); \
1014  \
1015         if(scale_h < 1) \
1016                 tabulate_reduction(y_table, \
1017                         1.0 / scale_h, \
1018                         (int)in_y1_float, \
1019                         out_h_int, \
1020                         input->get_h()); \
1021         else \
1022                 tabulate_enlarge(y_table, \
1023                         1.0 / scale_h, \
1024                         in_y1_float, \
1025                         out_h_int, \
1026                         input->get_h()); \
1027 /* dump_bilinear(y_table, out_h_int); */\
1028  \
1029         for(int i = 0; i < out_h; i++) \
1030         { \
1031                 type *out_row = out_rows[i + pkg->out_row1]; \
1032                 bilinear_table_t *y_entry = &y_table[i + pkg->out_row1]; \
1033 /* printf("BILINEAR_REDUCE 2 %d %d %d %f %f %f\n", */ \
1034 /* i, */ \
1035 /* y_entry->input_pixel1, */ \
1036 /* y_entry->input_pixel2, */ \
1037 /* y_entry->input_fraction1, */ \
1038 /* y_entry->input_fraction2, */ \
1039 /* y_entry->input_fraction3); */ \
1040  \
1041                 for(int j = 0; j < out_w_int; j++) \
1042                 { \
1043                         bilinear_table_t *x_entry = &x_table[j]; \
1044 /* Load rounding factors */ \
1045                         float temp_f1; \
1046                         float temp_f2; \
1047                         float temp_f3; \
1048                         float temp_f4; \
1049                         if(sizeof(type) != 4) \
1050                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = .5; \
1051                         else \
1052                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = 0; \
1053  \
1054 /* First row */ \
1055                         float input_scale1 = y_entry->input_fraction1 * x_entry->input_fraction1; \
1056                         float input_scale2 = y_entry->input_fraction1 * x_entry->input_fraction2; \
1057                         float input_scale3 = y_entry->input_fraction1 * x_entry->input_fraction3; \
1058                         PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel1) \
1059  \
1060 /* Last row */ \
1061                         if(out_h) \
1062                         { \
1063                                 input_scale1 = y_entry->input_fraction3 * x_entry->input_fraction1; \
1064                                 input_scale2 = y_entry->input_fraction3 * x_entry->input_fraction2; \
1065                                 input_scale3 = y_entry->input_fraction3 * x_entry->input_fraction3; \
1066                                 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel2) \
1067  \
1068 /* Middle rows */ \
1069                                 if(out_h > 1) \
1070                                 { \
1071                                         input_scale1 = y_entry->input_fraction2 * x_entry->input_fraction1; \
1072                                         input_scale2 = y_entry->input_fraction2 * x_entry->input_fraction2; \
1073                                         input_scale3 = y_entry->input_fraction2 * x_entry->input_fraction3; \
1074                                         for(int k = y_entry->input_pixel1 + 1; \
1075                                                 k < y_entry->input_pixel2; \
1076                                                 k++) \
1077                                         { \
1078                                                 PIXEL_REDUCE_MACRO(type, components, k) \
1079                                         } \
1080                                 } \
1081                         } \
1082  \
1083  \
1084                         if(max != 1.0) \
1085                         { \
1086                                 if(temp_f1 > max) temp_f1 = max; \
1087                                 if(temp_f2 > max) temp_f2 = max; \
1088                                 if(temp_f3 > max) temp_f3 = max; \
1089                                 if(components == 4) if(temp_f4 > max) temp_f4 = max; \
1090                         } \
1091  \
1092                         out_row[j * components    ] = (type)temp_f1; \
1093                         out_row[j * components + 1] = (type)temp_f2; \
1094                         out_row[j * components + 2] = (type)temp_f3; \
1095                         if(components == 4) out_row[j * components + 3] = (type)temp_f4; \
1096                 } \
1097 /*printf("BILINEAR_REDUCE 3 %d\n", i);*/ \
1098         } \
1099  \
1100         delete [] x_table; \
1101         delete [] y_table; \
1102 }
1103
1104
1105
1106 // Only 2 input pixels
1107 #define BILINEAR_ENLARGE(max, type, components) \
1108 { \
1109 /*printf("BILINEAR_ENLARGE 1\n");*/ \
1110         float k_y = 1.0 / scale_h; \
1111         float k_x = 1.0 / scale_w; \
1112         type **in_rows = (type**)input->get_rows(); \
1113         type **out_rows = (type**)output->get_rows(); \
1114         int out_h = pkg->out_row2 - pkg->out_row1; \
1115         int in_h_int = input->get_h(); \
1116         int in_w_int = input->get_w(); \
1117         int *table_int_x1, *table_int_y1; \
1118         int *table_int_x2, *table_int_y2; \
1119         float *table_frac_x_f, *table_antifrac_x_f, *table_frac_y_f, *table_antifrac_y_f; \
1120         int *table_frac_x_i, *table_antifrac_x_i, *table_frac_y_i, *table_antifrac_y_i; \
1121  \
1122         tabulate_blinear_f(table_int_x1,  \
1123                 table_int_x2,  \
1124                 table_frac_x_f,  \
1125                 table_antifrac_x_f,  \
1126                 k_x,  \
1127                 0,  \
1128                 out_w_int, \
1129                 in_x1_float,  \
1130                 in_w_int); \
1131         tabulate_blinear_f(table_int_y1,  \
1132                 table_int_y2,  \
1133                 table_frac_y_f,  \
1134                 table_antifrac_y_f,  \
1135                 k_y,  \
1136                 pkg->out_row1,  \
1137                 pkg->out_row2,  \
1138                 in_y1_float, \
1139                 in_h_int); \
1140  \
1141         for(int i = 0; i < out_h; i++) \
1142         { \
1143                 int i_y1 = table_int_y1[i]; \
1144                 int i_y2 = table_int_y2[i]; \
1145                 float a_f; \
1146         float anti_a_f; \
1147                 uint64_t a_i; \
1148         uint64_t anti_a_i; \
1149                 a_f = table_frac_y_f[i]; \
1150         anti_a_f = table_antifrac_y_f[i]; \
1151                 type *in_row1 = in_rows[i_y1]; \
1152                 type *in_row2 = in_rows[i_y2]; \
1153                 type *out_row = out_rows[i + pkg->out_row1]; \
1154  \
1155                 for(int j = 0; j < out_w_int; j++) \
1156                 { \
1157                         int i_x1 = table_int_x1[j]; \
1158                         int i_x2 = table_int_x2[j]; \
1159                         float output1r, output1g, output1b, output1a; \
1160                         float output2r, output2g, output2b, output2a; \
1161                         float output3r, output3g, output3b, output3a; \
1162                         float output4r, output4g, output4b, output4a; \
1163                         float b_f; \
1164                         float anti_b_f; \
1165                         b_f = table_frac_x_f[j]; \
1166                         anti_b_f = table_antifrac_x_f[j]; \
1167 \
1168                 output1r = in_row1[i_x1 * components]; \
1169                 output1g = in_row1[i_x1 * components + 1]; \
1170                 output1b = in_row1[i_x1 * components + 2]; \
1171                 if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1172 \
1173                 output2r = in_row1[i_x2 * components]; \
1174                 output2g = in_row1[i_x2 * components + 1]; \
1175                 output2b = in_row1[i_x2 * components + 2]; \
1176                 if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1177 \
1178                 output3r = in_row2[i_x1 * components]; \
1179                 output3g = in_row2[i_x1 * components + 1]; \
1180                 output3b = in_row2[i_x1 * components + 2]; \
1181                 if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1182 \
1183                 output4r = in_row2[i_x2 * components]; \
1184                 output4g = in_row2[i_x2 * components + 1]; \
1185                 output4b = in_row2[i_x2 * components + 2]; \
1186                 if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1187 \
1188                         out_row[j * components] =  \
1189                                 (type)(anti_a_f * (anti_b_f * output1r +  \
1190                                 b_f * output2r) +  \
1191                 a_f * (anti_b_f * output3r +  \
1192                                 b_f * output4r)); \
1193                         out_row[j * components + 1] =   \
1194                                 (type)(anti_a_f * (anti_b_f * output1g +  \
1195                                 b_f * output2g) +  \
1196                 a_f * ((anti_b_f * output3g) +  \
1197                                 b_f * output4g)); \
1198                         out_row[j * components + 2] =   \
1199                                 (type)(anti_a_f * ((anti_b_f * output1b) +  \
1200                                 (b_f * output2b)) +  \
1201                 a_f * ((anti_b_f * output3b) +  \
1202                                 b_f * output4b)); \
1203                         if(components == 4) \
1204                                 out_row[j * components + 3] =   \
1205                                         (type)(anti_a_f * ((anti_b_f * output1a) +  \
1206                                         (b_f * output2a)) +  \
1207                         a_f * ((anti_b_f * output3a) +  \
1208                                         b_f * output4a)); \
1209                 } \
1210         } \
1211  \
1212  \
1213         delete [] table_int_x1; \
1214         delete [] table_int_x2; \
1215         delete [] table_int_y1; \
1216         delete [] table_int_y2; \
1217         delete [] table_frac_x_f; \
1218         delete [] table_antifrac_x_f; \
1219         delete [] table_frac_y_f; \
1220         delete [] table_antifrac_y_f; \
1221  \
1222 /*printf("BILINEAR_ENLARGE 2\n");*/ \
1223 }
1224
1225
1226 #define BICUBIC(max, type, components) \
1227 { \
1228         float k_y = 1.0 / scale_h; \
1229         float k_x = 1.0 / scale_w; \
1230         type **in_rows = (type**)input->get_rows(); \
1231         type **out_rows = (type**)output->get_rows(); \
1232         float *bspline_x_f, *bspline_y_f; \
1233         int *bspline_x_i, *bspline_y_i; \
1234         int *in_x_table, *in_y_table; \
1235         int in_h_int = input->get_h(); \
1236         int in_w_int = input->get_w(); \
1237  \
1238         tabulate_bcubic_f(bspline_x_f,  \
1239                 in_x_table, \
1240                 k_x, \
1241                 in_x1_float, \
1242                 out_w_int, \
1243                 in_w_int, \
1244                 -1); \
1245  \
1246         tabulate_bcubic_f(bspline_y_f,  \
1247                 in_y_table, \
1248                 k_y, \
1249                 in_y1_float, \
1250                 out_h_int, \
1251                 in_h_int, \
1252                 1); \
1253  \
1254         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1255         { \
1256                 for(int j = 0; j < out_w_int; j++) \
1257                 { \
1258                         int i_x = (int)(k_x * j); \
1259                         float output1_f, output2_f, output3_f, output4_f; \
1260                         uint64_t output1_i, output2_i, output3_i, output4_i; \
1261                         output1_f = 0; \
1262                         output2_f = 0; \
1263                         output3_f = 0; \
1264                         if(components == 4) \
1265                                 output4_f = 0; \
1266                         int table_y = i * 4; \
1267  \
1268 /* Kernel */ \
1269                         for(int m = -1; m < 3; m++) \
1270                         { \
1271                                 float r1_f; \
1272                                 uint64_t r1_i; \
1273                                 r1_f = bspline_y_f[table_y]; \
1274                                 int y = in_y_table[table_y]; \
1275                                 int table_x = j * 4; \
1276  \
1277                                 for(int n = -1; n < 3; n++) \
1278                                 { \
1279                                         float r2_f; \
1280                                         uint64_t r2_i; \
1281                                         r2_f = bspline_x_f[table_x]; \
1282                                         int x = in_x_table[table_x]; \
1283                                         float r_square_f; \
1284                                         uint64_t r_square_i; \
1285                                         r_square_f = r1_f * r2_f; \
1286                                         output1_f += r_square_f * in_rows[y][x * components]; \
1287                                         output2_f += r_square_f * in_rows[y][x * components + 1]; \
1288                                         output3_f += r_square_f * in_rows[y][x * components + 2]; \
1289                                         if(components == 4) \
1290                                                 output4_f += r_square_f * in_rows[y][x * components + 3]; \
1291  \
1292                                         table_x++; \
1293                                 } \
1294                                 table_y++; \
1295                         } \
1296  \
1297  \
1298                         out_rows[i][j * components] = (type)output1_f; \
1299                         out_rows[i][j * components + 1] = (type)output2_f; \
1300                         out_rows[i][j * components + 2] = (type)output3_f; \
1301                         if(components == 4) \
1302                                 out_rows[i][j * components + 3] = (type)output4_f; \
1303  \
1304                 } \
1305         } \
1306  \
1307         delete [] bspline_x_f; \
1308         delete [] bspline_y_f; \
1309         delete [] in_x_table; \
1310         delete [] in_y_table; \
1311 }
1312
1313
1314
1315
1316 // Pow function is not thread safe in Compaqt C
1317 #define CUBE(x) ((x) * (x) * (x))
1318
1319 float ScaleUnit::cubic_bspline(float x)
1320 {
1321         float a, b, c, d;
1322
1323         if((x + 2.0F) <= 0.0F)
1324         {
1325         a = 0.0F;
1326         }
1327         else
1328         {
1329         a = CUBE(x + 2.0F);
1330         }
1331
1332
1333         if((x + 1.0F) <= 0.0F)
1334         {
1335         b = 0.0F;
1336         }
1337         else
1338         {
1339         b = CUBE(x + 1.0F);
1340         }
1341
1342         if(x <= 0)
1343         {
1344         c = 0.0F;
1345         }
1346         else
1347         {
1348         c = CUBE(x);
1349         }
1350
1351         if((x - 1.0F) <= 0.0F)
1352         {
1353         d = 0.0F;
1354         }
1355         else
1356         {
1357         d = CUBE(x - 1.0F);
1358         }
1359
1360
1361         return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
1362 }
1363
1364
1365 void ScaleUnit::tabulate_bcubic_f(float* &coef_table,
1366         int* &coord_table,
1367         float scale,
1368         float start,
1369         int pixels,
1370         int total_pixels,
1371         float coefficient)
1372 {
1373         coef_table = new float[pixels * 4];
1374         coord_table = new int[pixels * 4];
1375         for(int i = 0, j = 0; i < pixels; i++)
1376         {
1377                 float f_x = (float)i * scale + start;
1378                 float a = f_x - floor(f_x);
1379
1380                 for(float m = -1; m < 3; m++)
1381                 {
1382                         coef_table[j] = cubic_bspline(coefficient * (m - a));
1383                         coord_table[j] = (int)(f_x + m);
1384                         CLAMP(coord_table[j], 0, total_pixels - 1);
1385                         j++;
1386                 }
1387
1388         }
1389 }
1390
1391 void ScaleUnit::tabulate_bcubic_i(int* &coef_table,
1392         int* &coord_table,
1393         float scale,
1394         int start,
1395         int pixels,
1396         int total_pixels,
1397         float coefficient)
1398 {
1399         coef_table = new int[pixels * 4];
1400         coord_table = new int[pixels * 4];
1401         for(int i = 0, j = 0; i < pixels; i++)
1402         {
1403                 float f_x = (float)i * scale + start;
1404                 float a = f_x - floor(f_x);
1405
1406                 for(float m = -1; m < 3; m++)
1407                 {
1408                         coef_table[j] = (int)(cubic_bspline(coefficient * (m - a)) * 0x10000);
1409                         coord_table[j] = (int)(f_x + m);
1410                         CLAMP(coord_table[j], 0, total_pixels - 1);
1411                         j++;
1412                 }
1413
1414         }
1415 }
1416
1417 void ScaleUnit::tabulate_blinear_f(int* &table_int1,
1418                 int* &table_int2,
1419                 float* &table_frac,
1420                 float* &table_antifrac,
1421                 float scale,
1422                 int pixel1,
1423                 int pixel2,
1424                 float start,
1425                 int total_pixels)
1426 {
1427         table_int1 = new int[pixel2 - pixel1];
1428         table_int2 = new int[pixel2 - pixel1];
1429         table_frac = new float[pixel2 - pixel1];
1430         table_antifrac = new float[pixel2 - pixel1];
1431
1432         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1433         {
1434                 float f_x = (float)i * scale + start;
1435                 int i_x = (int)floor(f_x);
1436                 float a = (f_x - floor(f_x));
1437
1438                 table_int1[j] = i_x;
1439                 table_int2[j] = i_x + 1;
1440                 CLAMP(table_int1[j], 0, total_pixels - 1);
1441                 CLAMP(table_int2[j], 0, total_pixels - 1);
1442                 table_frac[j] = a;
1443                 table_antifrac[j] = 1.0F - a;
1444 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1445         }
1446 }
1447
1448 void ScaleUnit::tabulate_blinear_i(int* &table_int1,
1449                 int* &table_int2,
1450                 int* &table_frac,
1451                 int* &table_antifrac,
1452                 float scale,
1453                 int pixel1,
1454                 int pixel2,
1455                 float start,
1456                 int total_pixels)
1457 {
1458         table_int1 = new int[pixel2 - pixel1];
1459         table_int2 = new int[pixel2 - pixel1];
1460         table_frac = new int[pixel2 - pixel1];
1461         table_antifrac = new int[pixel2 - pixel1];
1462
1463         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1464         {
1465                 double f_x = (float)i * scale + start;
1466                 int i_x = (int)floor(f_x);
1467                 float a = (f_x - floor(f_x));
1468
1469                 table_int1[j] = i_x;
1470                 table_int2[j] = i_x + 1;
1471                 CLAMP(table_int1[j], 0, total_pixels - 1);
1472                 CLAMP(table_int2[j], 0, total_pixels - 1);
1473                 table_frac[j] = (int)(a * 0xffff);
1474                 table_antifrac[j] = (int)((1.0F - a) * 0x10000);
1475 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1476         }
1477 }
1478
1479 void ScaleUnit::process_package(LoadPackage *package)
1480 {
1481         ScalePackage *pkg = (ScalePackage*)package;
1482
1483 //printf("ScaleUnit::process_package 1\n");
1484 // Arguments for macros
1485         VFrame *output = engine->scale_output;
1486         VFrame *input = engine->scale_input;
1487         float scale_w = engine->w_scale;
1488         float scale_h = engine->h_scale;
1489         float in_x1_float = engine->in_x1_float;
1490         float in_y1_float = engine->in_y1_float;
1491         int out_h_int = engine->out_h_int;
1492         int out_w_int = engine->out_w_int;
1493         int do_yuv =
1494                 (input->get_color_model() == BC_YUV888 ||
1495                 input->get_color_model() == BC_YUVA8888 ||
1496                 input->get_color_model() == BC_YUV161616 ||
1497                 input->get_color_model() == BC_YUVA16161616);
1498
1499 //printf("ScaleUnit::process_package 2 %f %f\n", engine->w_scale, engine->h_scale);
1500         if(engine->interpolation_type == CUBIC_CUBIC ||
1501                 (engine->interpolation_type == CUBIC_LINEAR
1502                         && engine->w_scale > 1 &&
1503                         engine->h_scale > 1))
1504         {
1505                 switch(engine->scale_input->get_color_model())
1506                 {
1507                         case BC_RGB_FLOAT:
1508                                 BICUBIC(1.0, float, 3);
1509                                 break;
1510
1511                         case BC_RGBA_FLOAT:
1512                                 BICUBIC(1.0, float, 4);
1513                                 break;
1514
1515                         case BC_RGB888:
1516                         case BC_YUV888:
1517                                 BICUBIC(0xff, unsigned char, 3);
1518                                 break;
1519
1520                         case BC_RGBA8888:
1521                         case BC_YUVA8888:
1522                                 BICUBIC(0xff, unsigned char, 4);
1523                                 break;
1524
1525                         case BC_RGB161616:
1526                         case BC_YUV161616:
1527                                 BICUBIC(0xffff, uint16_t, 3);
1528                                 break;
1529
1530                         case BC_RGBA16161616:
1531                         case BC_YUVA16161616:
1532                                 BICUBIC(0xffff, uint16_t, 4);
1533                                 break;
1534                 }
1535         }
1536         else
1537 // Perform bilinear scaling input -> scale_output
1538         if(engine->w_scale > 1 &&
1539                 engine->h_scale > 1)
1540         {
1541                 switch(engine->scale_input->get_color_model())
1542                 {
1543                         case BC_RGB_FLOAT:
1544                                 BILINEAR_ENLARGE(1.0, float, 3);
1545                                 break;
1546
1547                         case BC_RGBA_FLOAT:
1548                                 BILINEAR_ENLARGE(1.0, float, 4);
1549                                 break;
1550
1551                         case BC_RGB888:
1552                         case BC_YUV888:
1553                                 BILINEAR_ENLARGE(0xff, unsigned char, 3);
1554                                 break;
1555
1556                         case BC_RGBA8888:
1557                         case BC_YUVA8888:
1558                                 BILINEAR_ENLARGE(0xff, unsigned char, 4);
1559                                 break;
1560
1561                         case BC_RGB161616:
1562                         case BC_YUV161616:
1563                                 BILINEAR_ENLARGE(0xffff, uint16_t, 3);
1564                                 break;
1565
1566                         case BC_RGBA16161616:
1567                         case BC_YUVA16161616:
1568                                 BILINEAR_ENLARGE(0xffff, uint16_t, 4);
1569                                 break;
1570                 }
1571         }
1572         else
1573 // Bilinear reduction
1574         {
1575                 switch(engine->scale_input->get_color_model())
1576                 {
1577                         case BC_RGB_FLOAT:
1578                                 BILINEAR_REDUCE(1.0, float, 3);
1579                                 break;
1580                         case BC_RGBA_FLOAT:
1581                                 BILINEAR_REDUCE(1.0, float, 4);
1582                                 break;
1583                         case BC_RGB888:
1584                         case BC_YUV888:
1585                                 BILINEAR_REDUCE(0xff, unsigned char, 3);
1586                                 break;
1587
1588                         case BC_RGBA8888:
1589                         case BC_YUVA8888:
1590                                 BILINEAR_REDUCE(0xff, unsigned char, 4);
1591                                 break;
1592
1593                         case BC_RGB161616:
1594                         case BC_YUV161616:
1595                                 BILINEAR_REDUCE(0xffff, uint16_t, 3);
1596                                 break;
1597
1598                         case BC_RGBA16161616:
1599                         case BC_YUVA16161616:
1600                                 BILINEAR_REDUCE(0xffff, uint16_t, 4);
1601                                 break;
1602                 }
1603         }
1604 //printf("ScaleUnit::process_package 3\n");
1605
1606 }
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
1621  : LoadServer(cpus, cpus)
1622 {
1623         this->overlay = overlay;
1624 }
1625
1626 ScaleEngine::~ScaleEngine()
1627 {
1628 }
1629
1630 void ScaleEngine::init_packages()
1631 {
1632         for(int i = 0; i < get_total_packages(); i++)
1633         {
1634                 ScalePackage *package = (ScalePackage*)get_package(i);
1635                 package->out_row1 = out_h_int / get_total_packages() * i;
1636                 package->out_row2 = package->out_row1 + out_h_int / get_total_packages();
1637
1638                 if(i >= get_total_packages() - 1)
1639                         package->out_row2 = out_h_int;
1640         }
1641 }
1642
1643 LoadClient* ScaleEngine::new_client()
1644 {
1645         return new ScaleUnit(this, overlay);
1646 }
1647
1648 LoadPackage* ScaleEngine::new_package()
1649 {
1650         return new ScalePackage;
1651 }
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665 TranslatePackage::TranslatePackage()
1666 {
1667 }
1668
1669
1670
1671 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1672  : LoadClient(server)
1673 {
1674         this->overlay = overlay;
1675         this->engine = server;
1676 }
1677
1678 TranslateUnit::~TranslateUnit()
1679 {
1680 }
1681
1682
1683
1684 void TranslateUnit::translation_array_f(transfer_table_f* &table,
1685         float out_x1,
1686         float out_x2,
1687         float in_x1,
1688         float in_x2,
1689         int in_total,
1690         int out_total,
1691         int &out_x1_int,
1692         int &out_x2_int)
1693 {
1694         int out_w_int;
1695         float offset = out_x1 - in_x1;
1696 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1697
1698         out_x1_int = (int)out_x1;
1699         out_x2_int = MIN((int)ceil(out_x2), out_total);
1700         out_w_int = out_x2_int - out_x1_int;
1701
1702         table = new transfer_table_f[out_w_int];
1703         bzero(table, sizeof(transfer_table_f) * out_w_int);
1704
1705
1706 // printf("OverlayFrame::translation_array_f 2 %f %f -> %f %f scale=%f %f\n",
1707 // in_x1,
1708 // in_x2,
1709 // out_x1,
1710 // out_x2,
1711 // in_x2 - in_x1,
1712 // out_x2 - out_x1);
1713 //
1714
1715         float in_x = in_x1;
1716         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1717         {
1718                 transfer_table_f *entry = &table[out_x - out_x1_int];
1719
1720                 entry->in_x1 = (int)in_x;
1721                 entry->in_x2 = (int)in_x + 1;
1722
1723 // Get fraction of output pixel to fill
1724                 entry->output_fraction = 1;
1725
1726                 if(out_x1 > out_x)
1727                 {
1728                         entry->output_fraction -= out_x1 - out_x;
1729                 }
1730
1731                 if(out_x2 < out_x + 1)
1732                 {
1733                         entry->output_fraction = (out_x2 - out_x);
1734                 }
1735
1736 // Advance in_x until out_x_fraction is filled
1737                 float out_x_fraction = entry->output_fraction;
1738                 float in_x_fraction = floor(in_x + 1) - in_x;
1739
1740                 if(out_x_fraction <= in_x_fraction)
1741                 {
1742                         entry->in_fraction1 = out_x_fraction;
1743                         entry->in_fraction2 = 0.0;
1744                         in_x += out_x_fraction;
1745                 }
1746                 else
1747                 {
1748                         entry->in_fraction1 = in_x_fraction;
1749                         in_x += out_x_fraction;
1750                         entry->in_fraction2 = in_x - floor(in_x);
1751                 }
1752
1753 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1754                 if(entry->in_x2 >= in_total)
1755                 {
1756                         entry->in_x2 = in_total - 1;
1757                         entry->in_fraction2 = 0.0;
1758                 }
1759
1760                 if(entry->in_x1 >= in_total)
1761                 {
1762                         entry->in_x1 = in_total - 1;
1763                         entry->in_fraction1 = 0.0;
1764                 }
1765 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1766 //      out_x,
1767 //      entry->in_x1,
1768 //      entry->in_x2,
1769 //      entry->in_fraction1,
1770 //      entry->in_fraction2,
1771 //      entry->output_fraction);
1772         }
1773 }
1774
1775
1776 void TranslateUnit::translation_array_i(transfer_table_i* &table,
1777         float out_x1,
1778         float out_x2,
1779         float in_x1,
1780         float in_x2,
1781         int in_total,
1782         int out_total,
1783         int &out_x1_int,
1784         int &out_x2_int)
1785 {
1786         int out_w_int;
1787         float offset = out_x1 - in_x1;
1788
1789         out_x1_int = (int)out_x1;
1790         out_x2_int = MIN((int)ceil(out_x2), out_total);
1791         out_w_int = out_x2_int - out_x1_int;
1792
1793         table = new transfer_table_i[out_w_int];
1794         bzero(table, sizeof(transfer_table_i) * out_w_int);
1795
1796
1797 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1798
1799         float in_x = in_x1;
1800         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1801         {
1802                 transfer_table_i *entry = &table[out_x - out_x1_int];
1803
1804                 entry->in_x1 = (int)in_x;
1805                 entry->in_x2 = (int)in_x + 1;
1806
1807 // Get fraction of output pixel to fill
1808                 entry->output_fraction = 0x10000;
1809
1810                 if(out_x1 > out_x)
1811                 {
1812                         entry->output_fraction -= (int)((out_x1 - out_x) * 0x10000);
1813                 }
1814
1815                 if(out_x2 < out_x + 1)
1816                 {
1817                         entry->output_fraction = (int)((out_x2 - out_x) * 0x10000);
1818                 }
1819
1820 // Advance in_x until out_x_fraction is filled
1821                 int out_x_fraction = entry->output_fraction;
1822                 int in_x_fraction = (int)((floor(in_x + 1) - in_x) * 0x10000);
1823
1824                 if(out_x_fraction <= in_x_fraction)
1825                 {
1826                         entry->in_fraction1 = out_x_fraction;
1827                         entry->in_fraction2 = 0;
1828                         in_x += (float)out_x_fraction / 0x10000;
1829                 }
1830                 else
1831                 {
1832                         entry->in_fraction1 = in_x_fraction;
1833                         in_x += (float)out_x_fraction / 0x10000;
1834                         entry->in_fraction2 = (int)((in_x - floor(in_x)) * 0x10000);
1835                 }
1836
1837 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1838                 if(entry->in_x2 >= in_total)
1839                 {
1840                         entry->in_x2 = in_total - 1;
1841                         entry->in_fraction2 = 0;
1842                 }
1843
1844                 if(entry->in_x1 >= in_total)
1845                 {
1846                         entry->in_x1 = in_total - 1;
1847                         entry->in_fraction1 = 0;
1848                 }
1849 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1850 //      out_x,
1851 //      entry->in_x1,
1852 //      entry->in_x2,
1853 //      entry->in_fraction1,
1854 //      entry->in_fraction2,
1855 //      entry->output_fraction);
1856         }
1857 }
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892 #define TRANSLATE(max, temp_type, type, components, chroma_offset) \
1893 { \
1894  \
1895         type **in_rows = (type**)input->get_rows(); \
1896         type **out_rows = (type**)output->get_rows(); \
1897  \
1898  \
1899         temp_type master_opacity; \
1900         if(sizeof(type) != 4) \
1901                 master_opacity = (temp_type)(alpha * max + 0.5); \
1902         else \
1903                 master_opacity = (temp_type)(alpha * max); \
1904         temp_type master_transparency = max - master_opacity; \
1905         float round = 0.0; \
1906         if(sizeof(type) != 4) \
1907                 round = 0.5; \
1908  \
1909  \
1910         for(int i = row1; i < row2; i++) \
1911         { \
1912                 int in_y1; \
1913                 int in_y2; \
1914                 float y_fraction1_f; \
1915                 float y_fraction2_f; \
1916                 float y_output_fraction_f; \
1917                 in_y1 = y_table_f[i - out_y1_int].in_x1; \
1918                 in_y2 = y_table_f[i - out_y1_int].in_x2; \
1919                 y_fraction1_f = y_table_f[i - out_y1_int].in_fraction1; \
1920                 y_fraction2_f = y_table_f[i - out_y1_int].in_fraction2; \
1921                 y_output_fraction_f = y_table_f[i - out_y1_int].output_fraction; \
1922                 type *in_row1 = in_rows[(in_y1)]; \
1923                 type *in_row2 = in_rows[(in_y2)]; \
1924                 type *out_row = out_rows[i]; \
1925  \
1926                 for(int j = out_x1_int; j < out_x2_int; j++) \
1927                 { \
1928                         int in_x1; \
1929                         int in_x2; \
1930                         float x_fraction1_f; \
1931                         float x_fraction2_f; \
1932                         float x_output_fraction_f; \
1933                         in_x1 = x_table_f[j - out_x1_int].in_x1; \
1934                         in_x2 = x_table_f[j - out_x1_int].in_x2; \
1935                         x_fraction1_f = x_table_f[j - out_x1_int].in_fraction1; \
1936                         x_fraction2_f = x_table_f[j - out_x1_int].in_fraction2; \
1937                         x_output_fraction_f = x_table_f[j - out_x1_int].output_fraction; \
1938                         type *output = &out_row[j * components]; \
1939                         temp_type input1, input2, input3, input4; \
1940  \
1941                         float fraction1 = x_fraction1_f * y_fraction1_f; \
1942                         float fraction2 = x_fraction2_f * y_fraction1_f; \
1943                         float fraction3 = x_fraction1_f * y_fraction2_f; \
1944                         float fraction4 = x_fraction2_f * y_fraction2_f; \
1945  \
1946                         input1 = (type)(in_row1[in_x1 * components] * fraction1 +  \
1947                                 in_row1[in_x2 * components] * fraction2 +  \
1948                                 in_row2[in_x1 * components] * fraction3 +  \
1949                                 in_row2[in_x2 * components] * fraction4 + round); \
1950  \
1951 /* Add chroma to fractional pixels */ \
1952                         if(chroma_offset) \
1953                         { \
1954                                 float extra_chroma = (1.0F - \
1955                                         fraction1 - \
1956                                         fraction2 - \
1957                                         fraction3 - \
1958                                         fraction4) * chroma_offset; \
1959                                 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1960                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1961                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1962                                         in_row2[in_x2 * components + 1] * fraction4 + \
1963                                         extra_chroma + round); \
1964                                 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1965                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1966                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1967                                         in_row2[in_x2 * components + 2] * fraction4 +  \
1968                                         extra_chroma + round); \
1969                         } \
1970                         else \
1971                         { \
1972                                 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1973                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1974                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1975                                         in_row2[in_x2 * components + 1] * fraction4 + round); \
1976                                 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1977                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1978                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1979                                         in_row2[in_x2 * components + 2] * fraction4 + round); \
1980                         } \
1981  \
1982                         if(components == 4) \
1983                                 input4 = (type)(in_row1[in_x1 * components + 3] * fraction1 +  \
1984                                         in_row1[in_x2 * components + 3] * fraction2 +  \
1985                                         in_row2[in_x1 * components + 3] * fraction3 +  \
1986                                         in_row2[in_x2 * components + 3] * fraction4 + round); \
1987  \
1988                         temp_type opacity; \
1989                         if(sizeof(type) != 4) \
1990                                 opacity = (temp_type)(master_opacity *  \
1991                                         y_output_fraction_f *  \
1992                                         x_output_fraction_f + 0.5); \
1993                         else \
1994                                 opacity = (temp_type)(master_opacity *  \
1995                                         y_output_fraction_f *  \
1996                                         x_output_fraction_f); \
1997                         temp_type transparency = max - opacity; \
1998  \
1999 /* printf("TRANSLATE 2 %x %d %d\n", opacity, j, i); */ \
2000  \
2001                         if(components == 3) \
2002                         { \
2003                                 BLEND_3(max, temp_type, type, chroma_offset); \
2004                         } \
2005                         else \
2006                         { \
2007                                 BLEND_4(max, temp_type, type, chroma_offset); \
2008                         } \
2009                 } \
2010         } \
2011 }
2012
2013 void TranslateUnit::process_package(LoadPackage *package)
2014 {
2015         TranslatePackage *pkg = (TranslatePackage*)package;
2016         int out_y1_int;
2017         int out_y2_int;
2018         int out_x1_int;
2019         int out_x2_int;
2020
2021
2022 // Variables for TRANSLATE
2023         VFrame *input = engine->translate_input;
2024         VFrame *output = engine->translate_output;
2025         float in_x1 = engine->translate_in_x1;
2026         float in_y1 = engine->translate_in_y1;
2027         float in_x2 = engine->translate_in_x2;
2028         float in_y2 = engine->translate_in_y2;
2029         float out_x1 = engine->translate_out_x1;
2030         float out_y1 = engine->translate_out_y1;
2031         float out_x2 = engine->translate_out_x2;
2032         float out_y2 = engine->translate_out_y2;
2033         float alpha = engine->translate_alpha;
2034         int row1 = pkg->out_row1;
2035         int row2 = pkg->out_row2;
2036         int mode = engine->translate_mode;
2037         int in_total_x = input->get_w();
2038         int in_total_y = input->get_h();
2039         int do_yuv =
2040                 (engine->translate_input->get_color_model() == BC_YUV888 ||
2041                 engine->translate_input->get_color_model() == BC_YUVA8888 ||
2042                 engine->translate_input->get_color_model() == BC_YUV161616 ||
2043                 engine->translate_input->get_color_model() == BC_YUVA16161616);
2044
2045         transfer_table_f *x_table_f;
2046         transfer_table_f *y_table_f;
2047         transfer_table_i *x_table_i;
2048         transfer_table_i *y_table_i;
2049
2050         translation_array_f(x_table_f,
2051                 out_x1,
2052                 out_x2,
2053                 in_x1,
2054                 in_x2,
2055                 in_total_x,
2056                 output->get_w(),
2057                 out_x1_int,
2058                 out_x2_int);
2059         translation_array_f(y_table_f,
2060                 out_y1,
2061                 out_y2,
2062                 in_y1,
2063                 in_y2,
2064                 in_total_y,
2065                 output->get_h(),
2066                 out_y1_int,
2067                 out_y2_int);
2068 //      printf("TranslateUnit::process_package 1 %d\n", mode);
2069 //      Timer a;
2070 //      a.update();
2071
2072         switch(engine->translate_input->get_color_model())
2073         {
2074                 case BC_RGB888:
2075                         TRANSLATE(0xff, uint32_t, unsigned char, 3, 0);
2076                         break;
2077
2078                 case BC_RGBA8888:
2079                         TRANSLATE(0xff, uint32_t, unsigned char, 4, 0);
2080                         break;
2081
2082                 case BC_RGB_FLOAT:
2083                         TRANSLATE(1.0, float, float, 3, 0);
2084                         break;
2085
2086                 case BC_RGBA_FLOAT:
2087                         TRANSLATE(1.0, float, float, 4, 0);
2088                         break;
2089
2090                 case BC_RGB161616:
2091                         TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2092                         break;
2093
2094                 case BC_RGBA16161616:
2095                         TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2096                         break;
2097
2098                 case BC_YUV888:
2099                         TRANSLATE(0xff, int32_t, unsigned char, 3, 0x80);
2100                         break;
2101
2102                 case BC_YUVA8888:
2103                         TRANSLATE(0xff, int32_t, unsigned char, 4, 0x80);
2104                         break;
2105
2106                 case BC_YUV161616:
2107                         TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2108                         break;
2109
2110                 case BC_YUVA16161616:
2111                         TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2112                         break;
2113         }
2114 //      printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2115
2116         delete [] x_table_f;
2117         delete [] y_table_f;
2118 }
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
2130  : LoadServer(cpus, cpus)
2131 {
2132         this->overlay = overlay;
2133 }
2134
2135 TranslateEngine::~TranslateEngine()
2136 {
2137 }
2138
2139 void TranslateEngine::init_packages()
2140 {
2141         int out_y1_int = (int)translate_out_y1;
2142         int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
2143         int out_h = out_y2_int - out_y1_int;
2144
2145         for(int i = 0; i < get_total_packages(); i++)
2146         {
2147                 TranslatePackage *package = (TranslatePackage*)get_package(i);
2148                 package->out_row1 = (int)(out_y1_int + out_h /
2149                         get_total_packages() *
2150                         i);
2151                 package->out_row2 = (int)((float)package->out_row1 +
2152                         out_h /
2153                         get_total_packages());
2154                 if(i >= get_total_packages() - 1)
2155                         package->out_row2 = out_y2_int;
2156         }
2157 }
2158
2159 LoadClient* TranslateEngine::new_client()
2160 {
2161         return new TranslateUnit(this, overlay);
2162 }
2163
2164 LoadPackage* TranslateEngine::new_package()
2165 {
2166         return new TranslatePackage;
2167 }
2168
2169
2170
2171
2172
2173
2174
2175
2176 #define SCALE_TRANSLATE(max, temp_type, type, components, chroma_offset) \
2177 { \
2178         temp_type opacity; \
2179         if(sizeof(type) != 4) \
2180                 opacity = (temp_type)(alpha * max + 0.5); \
2181         else \
2182                 opacity = (temp_type)(alpha * max); \
2183         temp_type transparency = max - opacity; \
2184  \
2185         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2186         { \
2187                 int in_y = y_table[i - out_y1]; \
2188                 type *in_row = (type*)in_rows[in_y]; \
2189                 type *output = (type*)out_rows[i] + out_x1 * components; \
2190  \
2191 /* X direction is scaled and requires a table lookup */ \
2192                 if(out_w != in_x2 - in_x1) \
2193                 { \
2194                         for(int j = 0; j < out_w; j++) \
2195                         { \
2196                                 type *in_row_plus_x = in_row + x_table[j] * components; \
2197                                 temp_type input1, input2, input3, input4; \
2198          \
2199                                 input1 = in_row_plus_x[0]; \
2200                                 input2 = in_row_plus_x[1]; \
2201                                 input3 = in_row_plus_x[2]; \
2202                                 if(components == 4) \
2203                                         input4 = in_row_plus_x[3]; \
2204          \
2205                                 if(components == 3) \
2206                                 { \
2207                                         BLEND_3(max, temp_type, type, chroma_offset); \
2208                                 } \
2209                                 else \
2210                                 { \
2211                                         BLEND_4(max, temp_type, type, chroma_offset); \
2212                                 } \
2213                                 output += components; \
2214                         } \
2215                 } \
2216                 else \
2217 /* X direction is not scaled */ \
2218                 { \
2219                         in_row += in_x1 * components; \
2220                         for(int j = 0; j < out_w; j++) \
2221                         { \
2222                                 temp_type input1, input2, input3, input4; \
2223          \
2224                                 input1 = in_row[0]; \
2225                                 input2 = in_row[1]; \
2226                                 input3 = in_row[2]; \
2227                                 if(components == 4) \
2228                                         input4 = in_row[3]; \
2229          \
2230                                 if(components == 3) \
2231                                 { \
2232                                         BLEND_3(max, temp_type, type, chroma_offset); \
2233                                 } \
2234                                 else \
2235                                 { \
2236                                         BLEND_4(max, temp_type, type, chroma_offset); \
2237                                 } \
2238                                 in_row += components; \
2239                                 output += components; \
2240                         } \
2241                 } \
2242         } \
2243 }
2244
2245
2246
2247 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
2248  : LoadClient(server)
2249 {
2250         this->overlay = overlay;
2251         this->scale_translate = server;
2252 }
2253
2254 ScaleTranslateUnit::~ScaleTranslateUnit()
2255 {
2256 }
2257
2258 void ScaleTranslateUnit::scale_array_f(int* &table,
2259         int out_x1,
2260         int out_x2,
2261         float in_x1,
2262         float in_x2)
2263 {
2264         float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
2265
2266         table = new int[(int)out_x2 - out_x1];
2267
2268         for(int i = 0; i < out_x2 - out_x1; i++)
2269                 table[i] = (int)((float)i / scale + in_x1);
2270 }
2271
2272 void ScaleTranslateUnit::process_package(LoadPackage *package)
2273 {
2274         ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
2275
2276 // Args for NEAREST_NEIGHBOR_MACRO
2277         VFrame *output = scale_translate->output;
2278         VFrame *input = scale_translate->input;
2279         int in_x1 = (int)scale_translate->in_x1;
2280         int in_y1 = (int)scale_translate->in_y1;
2281         int in_x2 = (int)scale_translate->in_x2;
2282         int in_y2 = (int)scale_translate->in_y2;
2283         int out_x1 = scale_translate->out_x1;
2284         int out_y1 = scale_translate->out_y1;
2285         int out_x2 = scale_translate->out_x2;
2286         int out_y2 = scale_translate->out_y2;
2287         float alpha = scale_translate->alpha;
2288         int mode = scale_translate->mode;
2289         int out_w = out_x2 - out_x1;
2290
2291         int *x_table = 0;
2292         int *y_table;
2293         unsigned char **in_rows = input->get_rows();
2294         unsigned char **out_rows = output->get_rows();
2295
2296 //      Timer a;
2297 //      a.update();
2298 //printf("ScaleTranslateUnit::process_package 1 %d\n", mode);
2299         if(out_w != in_x2 - in_x1)
2300         {
2301                 scale_array_f(x_table,
2302                         out_x1,
2303                         out_x2,
2304                         scale_translate->in_x1,
2305                         scale_translate->in_x2);
2306         }
2307         scale_array_f(y_table,
2308                 out_y1,
2309                 out_y2,
2310                 scale_translate->in_y1,
2311                 scale_translate->in_y2);
2312
2313
2314         if (mode == TRANSFER_REPLACE && (out_w == in_x2 - in_x1))
2315         {
2316 // if we have transfer replace and x direction is not scaled, PARTY!
2317                 char bytes_per_pixel = input->calculate_bytes_per_pixel(input->get_color_model());
2318                 int line_len = out_w * bytes_per_pixel;
2319                 int in_start_byte = in_x1 * bytes_per_pixel;
2320                 int out_start_byte = out_x1 * bytes_per_pixel;
2321                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2322                 {
2323                         memcpy (out_rows[i] + out_start_byte,
2324                                 in_rows[y_table[i - out_y1]] + in_start_byte ,
2325                                 line_len);
2326                 }
2327
2328         }
2329         else
2330         switch(input->get_color_model())
2331         {
2332                 case BC_RGB888:
2333                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 3, 0);
2334                         break;
2335
2336                 case BC_RGB_FLOAT:
2337                         SCALE_TRANSLATE(1.0, float, float, 3, 0);
2338                         break;
2339
2340                 case BC_YUV888:
2341                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 3, 0x80);
2342                         break;
2343
2344                 case BC_RGBA8888:
2345                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 4, 0);
2346                         break;
2347
2348                 case BC_RGBA_FLOAT:
2349                         SCALE_TRANSLATE(1.0, float, float, 4, 0);
2350                         break;
2351
2352                 case BC_YUVA8888:
2353                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 4, 0x80);
2354                         break;
2355
2356
2357                 case BC_RGB161616:
2358                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2359                         break;
2360
2361                 case BC_YUV161616:
2362                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2363                         break;
2364
2365                 case BC_RGBA16161616:
2366                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2367                         break;
2368
2369                 case BC_YUVA16161616:
2370                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2371                         break;
2372         }
2373
2374 //printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2375         if(x_table)
2376                 delete [] x_table;
2377         delete [] y_table;
2378
2379 };
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
2390  : LoadServer(cpus, cpus)
2391 {
2392         this->overlay = overlay;
2393 }
2394
2395 ScaleTranslateEngine::~ScaleTranslateEngine()
2396 {
2397 }
2398
2399 void ScaleTranslateEngine::init_packages()
2400 {
2401         int out_h = out_y2 - out_y1;
2402
2403         for(int i = 0; i < get_total_packages(); i++)
2404         {
2405                 ScaleTranslatePackage *package = (ScaleTranslatePackage*)get_package(i);
2406                 package->out_row1 = (int)(out_y1 + out_h /
2407                         get_total_packages() *
2408                         i);
2409                 package->out_row2 = (int)((float)package->out_row1 +
2410                         out_h /
2411                         get_total_packages());
2412                 if(i >= get_total_packages() - 1)
2413                         package->out_row2 = out_y2;
2414         }
2415 }
2416
2417 LoadClient* ScaleTranslateEngine::new_client()
2418 {
2419         return new ScaleTranslateUnit(this, overlay);
2420 }
2421
2422 LoadPackage* ScaleTranslateEngine::new_package()
2423 {
2424         return new ScaleTranslatePackage;
2425 }
2426
2427
2428 ScaleTranslatePackage::ScaleTranslatePackage()
2429 {
2430 }
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459 #define BLEND_ONLY(temp_type, type, max, components, chroma_offset) \
2460 { \
2461         temp_type opacity; \
2462         if(sizeof(type) != 4) \
2463                 opacity = (temp_type)(alpha * max + 0.5); \
2464         else \
2465                 opacity = (temp_type)(alpha * max); \
2466         temp_type transparency = max - opacity; \
2467  \
2468         type** output_rows = (type**)output->get_rows(); \
2469         type** input_rows = (type**)input->get_rows(); \
2470         int w = input->get_w(); \
2471         int h = input->get_h(); \
2472  \
2473         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2474         { \
2475                 type* in_row = input_rows[i]; \
2476                 type* output = output_rows[i]; \
2477  \
2478                 for(int j = 0; j < w; j++) \
2479                 { \
2480                         temp_type input1, input2, input3, input4; \
2481                         input1 = in_row[0]; \
2482                         input2 = in_row[1]; \
2483                         input3 = in_row[2]; \
2484                         if(components == 4) input4 = in_row[3]; \
2485  \
2486  \
2487                         if(components == 3) \
2488                         { \
2489                                 BLEND_3(max, temp_type, type, chroma_offset); \
2490                         } \
2491                         else \
2492                         { \
2493                                 BLEND_4(max, temp_type, type, chroma_offset); \
2494                         } \
2495  \
2496                         in_row += components; \
2497                         output += components; \
2498                 } \
2499         } \
2500 }
2501
2502
2503 #define BLEND_ONLY_TRANSFER_REPLACE(type, components) \
2504 { \
2505  \
2506         type** output_rows = (type**)output->get_rows(); \
2507         type** input_rows = (type**)input->get_rows(); \
2508         int w = input->get_w(); \
2509         int h = input->get_h(); \
2510         int line_len = w * sizeof(type) * components; \
2511  \
2512         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2513         { \
2514                 memcpy(output_rows[i], input_rows[i], line_len); \
2515         } \
2516 }
2517
2518 // components is always 4
2519 #define BLEND_ONLY_4_NORMAL(temp_type, type, max, chroma_offset) \
2520 { \
2521         temp_type opacity = (temp_type)(alpha * max + 0.5); \
2522         temp_type transparency = max - opacity; \
2523         temp_type max_squared = ((temp_type)max) * max; \
2524  \
2525         type** output_rows = (type**)output->get_rows(); \
2526         type** input_rows = (type**)input->get_rows(); \
2527         int w = input->get_w(); \
2528         int h = input->get_h(); \
2529  \
2530         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2531         { \
2532                 type* in_row = input_rows[i]; \
2533                 type* output = output_rows[i]; \
2534  \
2535                 for(int j = 0; j < w; j++) \
2536                 { \
2537                         temp_type pixel_opacity, pixel_transparency; \
2538                         pixel_opacity = opacity * in_row[3]; \
2539                         pixel_transparency = (temp_type)max_squared - pixel_opacity; \
2540                  \
2541                  \
2542                         temp_type r,g,b; \
2543                         output[0] = ((temp_type)in_row[0] * pixel_opacity + \
2544                                 (temp_type)output[0] * pixel_transparency) / max / max; \
2545                         output[1] = (((temp_type)in_row[1] - chroma_offset) * pixel_opacity + \
2546                                 ((temp_type)output[1] - chroma_offset) * pixel_transparency) \
2547                                 / max / max + \
2548                                 chroma_offset; \
2549                         output[2] = (((temp_type)in_row[2] - chroma_offset) * pixel_opacity + \
2550                                 ((temp_type)output[2] - chroma_offset) * pixel_transparency) \
2551                                 / max / max + \
2552                                 chroma_offset; \
2553                         output[3] = (type)(in_row[3] > output[3] ? in_row[3] : output[3]); \
2554  \
2555                         in_row += 4; \
2556                         output += 4; \
2557                 } \
2558         } \
2559 }
2560
2561
2562
2563 // components is always 3
2564 #define BLEND_ONLY_3_NORMAL(temp_type, type, max, chroma_offset) \
2565 { \
2566         const int bits = sizeof(type) * 8; \
2567         temp_type opacity = (temp_type)(alpha * ((temp_type)1 << bits) + 0.5); \
2568         temp_type transparency = ((temp_type)1 << bits) - opacity; \
2569  \
2570         type** output_rows = (type**)output->get_rows(); \
2571         type** input_rows = (type**)input->get_rows(); \
2572         int w = input->get_w() * 3; \
2573         int h = input->get_h(); \
2574  \
2575         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2576         { \
2577                 type* in_row = input_rows[i]; \
2578                 type* output = output_rows[i]; \
2579  \
2580                 for(int j = 0; j < w; j++) /* w = 3x width! */ \
2581                 { \
2582                         *output = ((temp_type)*in_row * opacity + *output * transparency) >> bits; \
2583                         in_row ++; \
2584                         output ++; \
2585                 } \
2586         } \
2587 }
2588
2589
2590
2591 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
2592  : LoadClient(server)
2593 {
2594         this->overlay = overlay;
2595         this->blend_engine = server;
2596 }
2597
2598 BlendUnit::~BlendUnit()
2599 {
2600 }
2601
2602 void BlendUnit::process_package(LoadPackage *package)
2603 {
2604         BlendPackage *pkg = (BlendPackage*)package;
2605
2606
2607         VFrame *output = blend_engine->output;
2608         VFrame *input = blend_engine->input;
2609         float alpha = blend_engine->alpha;
2610         int mode = blend_engine->mode;
2611
2612         if (mode == TRANSFER_REPLACE)
2613         {
2614                 switch(input->get_color_model())
2615                 {
2616                         case BC_RGB_FLOAT:
2617                                 BLEND_ONLY_TRANSFER_REPLACE(float, 3);
2618                                 break;
2619                         case BC_RGBA_FLOAT:
2620                                 BLEND_ONLY_TRANSFER_REPLACE(float, 4);
2621                                 break;
2622                         case BC_RGB888:
2623                         case BC_YUV888:
2624                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 3);
2625                                 break;
2626                         case BC_RGBA8888:
2627                         case BC_YUVA8888:
2628                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 4);
2629                                 break;
2630                         case BC_RGB161616:
2631                         case BC_YUV161616:
2632                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 3);
2633                                 break;
2634                         case BC_RGBA16161616:
2635                         case BC_YUVA16161616:
2636                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 4);
2637                                 break;
2638                 }
2639         }
2640         else
2641         if (mode == TRANSFER_NORMAL)
2642         {
2643                 switch(input->get_color_model())
2644                 {
2645                         case BC_RGB_FLOAT:
2646                         {
2647                                 float opacity = alpha;
2648                                 float transparency = 1.0 - alpha;
2649
2650                                 float** output_rows = (float**)output->get_rows();
2651                                 float** input_rows = (float**)input->get_rows();
2652                                 int w = input->get_w() * 3;
2653                                 int h = input->get_h();
2654
2655                                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2656                                 {
2657                                         float* in_row = input_rows[i];
2658                                         float* output = output_rows[i];
2659 /* w = 3x width! */
2660                                         for(int j = 0; j < w; j++)
2661                                         {
2662                                                 *output = *in_row * opacity + *output * transparency;
2663                                                 in_row++;
2664                                                 output++;
2665                                         }
2666                                 }
2667                                 break;
2668                         }
2669                         case BC_RGBA_FLOAT:
2670                         {
2671                                 float opacity = alpha;
2672                                 float transparency = 1.0 - alpha;
2673
2674                                 float** output_rows = (float**)output->get_rows();
2675                                 float** input_rows = (float**)input->get_rows();
2676                                 int w = input->get_w();
2677                                 int h = input->get_h();
2678
2679                                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2680                                 {
2681                                         float* in_row = input_rows[i];
2682                                         float* output = output_rows[i];
2683
2684                                         for(int j = 0; j < w; j++)
2685                                         {
2686                                                 float pixel_opacity, pixel_transparency;
2687                                                 pixel_opacity = opacity * in_row[3];
2688                                                 pixel_transparency = 1.0 - pixel_opacity;
2689
2690
2691                                                 output[0] = in_row[0] * pixel_opacity +
2692                                                         output[0] * pixel_transparency;
2693                                                 output[1] = in_row[1] * pixel_opacity +
2694                                                         output[1] * pixel_transparency;
2695                                                 output[2] = in_row[2] * pixel_opacity +
2696                                                         output[2] * pixel_transparency;
2697                                                 output[3] = in_row[3] > output[3] ? in_row[3] : output[3];
2698
2699                                                 in_row += 4;
2700                                                 output += 4;
2701                                         }
2702                                 }
2703                                 break;
2704                         }
2705                         case BC_RGB888:
2706                                 BLEND_ONLY_3_NORMAL(uint32_t, unsigned char, 0xff, 0);
2707                                 break;
2708                         case BC_YUV888:
2709                                 BLEND_ONLY_3_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2710                                 break;
2711                         case BC_RGBA8888:
2712                                 BLEND_ONLY_4_NORMAL(uint32_t, unsigned char, 0xff, 0);
2713                                 break;
2714                         case BC_YUVA8888:
2715                                 BLEND_ONLY_4_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2716                                 break;
2717                         case BC_RGB161616:
2718                                 BLEND_ONLY_3_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2719                                 break;
2720                         case BC_YUV161616:
2721                                 BLEND_ONLY_3_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2722                                 break;
2723                         case BC_RGBA16161616:
2724                                 BLEND_ONLY_4_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2725                                 break;
2726                         case BC_YUVA16161616:
2727                                 BLEND_ONLY_4_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2728                                 break;
2729                 }
2730         }
2731         else
2732         switch(input->get_color_model())
2733         {
2734                 case BC_RGB_FLOAT:
2735                         BLEND_ONLY(float, float, 1.0, 3, 0);
2736                         break;
2737                 case BC_RGBA_FLOAT:
2738                         BLEND_ONLY(float, float, 1.0, 4, 0);
2739                         break;
2740                 case BC_RGB888:
2741                         BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0);
2742                         break;
2743                 case BC_YUV888:
2744                         BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0x80);
2745                         break;
2746                 case BC_RGBA8888:
2747                         BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0);
2748                         break;
2749                 case BC_YUVA8888:
2750                         BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0x80);
2751                         break;
2752                 case BC_RGB161616:
2753                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0);
2754                         break;
2755                 case BC_YUV161616:
2756                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0x8000);
2757                         break;
2758                 case BC_RGBA16161616:
2759                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0);
2760                         break;
2761                 case BC_YUVA16161616:
2762                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0x8000);
2763                         break;
2764         }
2765 }
2766
2767
2768
2769 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
2770  : LoadServer(cpus, cpus)
2771 {
2772         this->overlay = overlay;
2773 }
2774
2775 BlendEngine::~BlendEngine()
2776 {
2777 }
2778
2779 void BlendEngine::init_packages()
2780 {
2781         for(int i = 0; i < get_total_packages(); i++)
2782         {
2783                 BlendPackage *package = (BlendPackage*)get_package(i);
2784                 package->out_row1 = (int)(input->get_h() /
2785                         get_total_packages() *
2786                         i);
2787                 package->out_row2 = (int)((float)package->out_row1 +
2788                         input->get_h() /
2789                         get_total_packages());
2790
2791                 if(i >= get_total_packages() - 1)
2792                         package->out_row2 = input->get_h();
2793         }
2794 }
2795
2796 LoadClient* BlendEngine::new_client()
2797 {
2798         return new BlendUnit(this, overlay);
2799 }
2800
2801 LoadPackage* BlendEngine::new_package()
2802 {
2803         return new BlendPackage;
2804 }
2805
2806
2807 BlendPackage::BlendPackage()
2808 {
2809 }
2810
2811