gfx/wr/swgl/src/gl.cc

   1 /* This Source Code Form is subject to the terms of the Mozilla Public
   2  * License, v. 2.0. If a copy of the MPL was not distributed with this
   3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   4
   5 #include <stdlib.h>
   6 #include <stdint.h>
   7 #include <string.h>
   8 #include <assert.h>
   9 #include <stdio.h>
  10 #include <math.h>
  11
  12 #ifdef __MACH__
  13 #  include <mach/mach.h>
  14 #  include <mach/mach_time.h>
  15 #else
  16 #  include <time.h>
  17 #endif
  18
  19 #ifdef NDEBUG
  20 #  define debugf(...)
  21 #else
  22 #  define debugf(...) printf(__VA_ARGS__)
  23 #endif
  24
  25 // #define PRINT_TIMINGS
  26
  27 #ifdef _WIN32
  28 #  define ALWAYS_INLINE __forceinline
  29 #  define NO_INLINE __declspec(noinline)
  30
  31 // Including Windows.h brings a huge amount of namespace polution so just
  32 // define a couple of things manually
  33 typedef int BOOL;
  34 #  define WINAPI __stdcall
  35 #  define DECLSPEC_IMPORT __declspec(dllimport)
  36 #  define WINBASEAPI DECLSPEC_IMPORT
  37 typedef unsigned long DWORD;
  38 typedef long LONG;
  39 typedef __int64 LONGLONG;
  40 #  define DUMMYSTRUCTNAME
  41
  42 typedef union _LARGE_INTEGER {
  43   struct {
  44     DWORD LowPart;
  45     LONG HighPart;
  46   } DUMMYSTRUCTNAME;
  47   struct {
  48     DWORD LowPart;
  49     LONG HighPart;
  50   } u;
  51   LONGLONG QuadPart;
  52 } LARGE_INTEGER;
  53 extern "C" {
  54 WINBASEAPI BOOL WINAPI
  55 QueryPerformanceCounter(LARGE_INTEGER* lpPerformanceCount);
  56
  57 WINBASEAPI BOOL WINAPI QueryPerformanceFrequency(LARGE_INTEGER* lpFrequency);
  58 }
  59
  60 #else
  61 // GCC is slower when dealing with always_inline, especially in debug builds.
  62 // When using Clang, use always_inline more aggressively.
  63 #  if defined(__clang__) || defined(NDEBUG)
  64 #    define ALWAYS_INLINE __attribute__((always_inline)) inline
  65 #  else
  66 #    define ALWAYS_INLINE inline
  67 #  endif
  68 #  define NO_INLINE __attribute__((noinline))
  69 #endif
  70
  71 // Some functions may cause excessive binary bloat if inlined in debug or with
  72 // GCC builds, so use PREFER_INLINE on these instead of ALWAYS_INLINE.
  73 #if defined(__clang__) && defined(NDEBUG)
  74 #  define PREFER_INLINE ALWAYS_INLINE
  75 #else
  76 #  define PREFER_INLINE inline
  77 #endif
  78
  79 #define UNREACHABLE __builtin_unreachable()
  80
  81 #define UNUSED [[maybe_unused]]
  82
  83 #define FALLTHROUGH [[fallthrough]]
  84
  85 #if defined(MOZILLA_CLIENT) && defined(MOZ_CLANG_PLUGIN)
  86 #  define IMPLICIT __attribute__((annotate("moz_implicit")))
  87 #else
  88 #  define IMPLICIT
  89 #endif
  90
  91 #include "gl_defs.h"
  92 #include "glsl.h"
  93 #include "program.h"
  94 #include "texture.h"
  95
  96 using namespace glsl;
  97
  98 typedef ivec2_scalar IntPoint;
  99
 100 struct IntRect {
 101   int x0;
 102   int y0;
 103   int x1;
 104   int y1;
 105
 106   IntRect() : x0(0), y0(0), x1(0), y1(0) {}
 107   IntRect(int x0, int y0, int x1, int y1) : x0(x0), y0(y0), x1(x1), y1(y1) {}
 108   IntRect(IntPoint origin, IntPoint size)
 109       : x0(origin.x),
 110         y0(origin.y),
 111         x1(origin.x + size.x),
 112         y1(origin.y + size.y) {}
 113
 114   int width() const { return x1 - x0; }
 115   int height() const { return y1 - y0; }
 116   bool is_empty() const { return width() <= 0 || height() <= 0; }
 117
 118   IntPoint origin() const { return IntPoint(x0, y0); }
 119
 120   bool same_size(const IntRect& o) const {
 121     return width() == o.width() && height() == o.height();
 122   }
 123
 124   bool contains(const IntRect& o) const {
 125     return o.x0 >= x0 && o.y0 >= y0 && o.x1 <= x1 && o.y1 <= y1;
 126   }
 127
 128   IntRect& intersect(const IntRect& o) {
 129     x0 = max(x0, o.x0);
 130     y0 = max(y0, o.y0);
 131     x1 = min(x1, o.x1);
 132     y1 = min(y1, o.y1);
 133     return *this;
 134   }
 135
 136   IntRect intersection(const IntRect& o) {
 137     IntRect result = *this;
 138     result.intersect(o);
 139     return result;
 140   }
 141
 142   // Scale from source-space to dest-space, optionally rounding inward
 143   IntRect& scale(int srcWidth, int srcHeight, int dstWidth, int dstHeight,
 144                  bool roundIn = false) {
 145     x0 = (x0 * dstWidth + (roundIn ? srcWidth - 1 : 0)) / srcWidth;
 146     y0 = (y0 * dstHeight + (roundIn ? srcHeight - 1 : 0)) / srcHeight;
 147     x1 = (x1 * dstWidth) / srcWidth;
 148     y1 = (y1 * dstHeight) / srcHeight;
 149     return *this;
 150   }
 151
 152   // Flip the rect's Y coords around inflection point at Y=offset
 153   void invert_y(int offset) {
 154     y0 = offset - y0;
 155     y1 = offset - y1;
 156     swap(y0, y1);
 157   }
 158
 159   IntRect& offset(const IntPoint& o) {
 160     x0 += o.x;
 161     y0 += o.y;
 162     x1 += o.x;
 163     y1 += o.y;
 164     return *this;
 165   }
 166
 167   IntRect operator+(const IntPoint& o) const {
 168     return IntRect(*this).offset(o);
 169   }
 170   IntRect operator-(const IntPoint& o) const {
 171     return IntRect(*this).offset(-o);
 172   }
 173 };
 174
 175 typedef vec2_scalar Point2D;
 176 typedef vec4_scalar Point3D;
 177
 178 struct IntRange {
 179   int start;
 180   int end;
 181
 182   int len() const { return end - start; }
 183
 184   IntRange intersect(IntRange r) const {
 185     return {max(start, r.start), min(end, r.end)};
 186   }
 187 };
 188
 189 struct FloatRange {
 190   float start;
 191   float end;
 192
 193   float clip(float x) const { return clamp(x, start, end); }
 194
 195   FloatRange clip(FloatRange r) const { return {clip(r.start), clip(r.end)}; }
 196
 197   FloatRange merge(FloatRange r) const {
 198     return {min(start, r.start), max(end, r.end)};
 199   }
 200
 201   IntRange round() const {
 202     return {int(floor(start + 0.5f)), int(floor(end + 0.5f))};
 203   }
 204
 205   IntRange round_out() const { return {int(floor(start)), int(ceil(end))}; }
 206 };
 207
 208 template <typename P>
 209 static inline FloatRange x_range(P p0, P p1) {
 210   return {min(p0.x, p1.x), max(p0.x, p1.x)};
 211 }
 212
 213 struct VertexAttrib {
 214   size_t size = 0;  // in bytes
 215   GLenum type = 0;
 216   bool normalized = false;
 217   GLsizei stride = 0;
 218   GLuint offset = 0;
 219   bool enabled = false;
 220   GLuint divisor = 0;
 221   int vertex_array = 0;
 222   int vertex_buffer = 0;
 223   char* buf = nullptr;  // XXX: this can easily dangle
 224   size_t buf_size = 0;  // this will let us bounds check
 225
 226   // Mark the buffer as invalid so we don't accidentally use stale data.
 227   void disable() {
 228     enabled = false;
 229     buf = nullptr;
 230     buf_size = 0;
 231   }
 232 };
 233
 234 static int bytes_for_internal_format(GLenum internal_format) {
 235   switch (internal_format) {
 236     case GL_RGBA32F:
 237       return 4 * 4;
 238     case GL_RGBA32I:
 239       return 4 * 4;
 240     case GL_RGBA8:
 241     case GL_BGRA8:
 242     case GL_RGBA:
 243       return 4;
 244     case GL_R8:
 245     case GL_RED:
 246       return 1;
 247     case GL_RG8:
 248     case GL_RG:
 249       return 2;
 250     case GL_DEPTH_COMPONENT:
 251     case GL_DEPTH_COMPONENT16:
 252     case GL_DEPTH_COMPONENT24:
 253     case GL_DEPTH_COMPONENT32:
 254       return 4;
 255     case GL_RGB_RAW_422_APPLE:
 256       return 2;
 257     case GL_R16:
 258       return 2;
 259     case GL_RG16:
 260       return 4;
 261     default:
 262       debugf("internal format: %x\n", internal_format);
 263       assert(0);
 264       return 0;
 265   }
 266 }
 267
 268 static inline int aligned_stride(int row_bytes) { return (row_bytes + 3) & ~3; }
 269
 270 static TextureFormat gl_format_to_texture_format(int type) {
 271   switch (type) {
 272     case GL_RGBA32F:
 273       return TextureFormat::RGBA32F;
 274     case GL_RGBA32I:
 275       return TextureFormat::RGBA32I;
 276     case GL_RGBA8:
 277       return TextureFormat::RGBA8;
 278     case GL_R8:
 279       return TextureFormat::R8;
 280     case GL_RG8:
 281       return TextureFormat::RG8;
 282     case GL_R16:
 283       return TextureFormat::R16;
 284     case GL_RG16:
 285       return TextureFormat::RG16;
 286     case GL_RGB_RAW_422_APPLE:
 287       return TextureFormat::YUY2;
 288     default:
 289       assert(0);
 290       return TextureFormat::RGBA8;
 291   }
 292 }
 293
 294 struct Query {
 295   uint64_t value = 0;
 296 };
 297
 298 struct Buffer {
 299   char* buf = nullptr;
 300   size_t size = 0;
 301   size_t capacity = 0;
 302
 303   // Returns true if re-allocation succeeded, false otherwise...
 304   bool allocate(size_t new_size) {
 305     // If the size remains unchanged, don't allocate anything.
 306     if (new_size == size) {
 307       return true;
 308     }
 309     // If the new size is within the existing capacity of the buffer, just
 310     // reuse the existing buffer.
 311     if (new_size <= capacity) {
 312       size = new_size;
 313       return true;
 314     }
 315     // Otherwise we need to reallocate the buffer to hold up to the requested
 316     // larger size.
 317     char* new_buf = (char*)realloc(buf, new_size);
 318     assert(new_buf);
 319     if (!new_buf) {
 320       // If we fail, null out the buffer rather than leave around the old
 321       // allocation state.
 322       cleanup();
 323       return false;
 324     }
 325     // The reallocation succeeded, so install the buffer.
 326     buf = new_buf;
 327     size = new_size;
 328     capacity = new_size;
 329     return true;
 330   }
 331
 332   void cleanup() {
 333     if (buf) {
 334       free(buf);
 335       buf = nullptr;
 336       size = 0;
 337       capacity = 0;
 338     }
 339   }
 340
 341   ~Buffer() { cleanup(); }
 342 };
 343
 344 struct Framebuffer {
 345   GLuint color_attachment = 0;
 346   GLuint depth_attachment = 0;
 347 };
 348
 349 struct Renderbuffer {
 350   GLuint texture = 0;
 351
 352   void on_erase();
 353 };
 354
 355 TextureFilter gl_filter_to_texture_filter(int type) {
 356   switch (type) {
 357     case GL_NEAREST:
 358       return TextureFilter::NEAREST;
 359     case GL_NEAREST_MIPMAP_LINEAR:
 360       return TextureFilter::NEAREST;
 361     case GL_NEAREST_MIPMAP_NEAREST:
 362       return TextureFilter::NEAREST;
 363     case GL_LINEAR:
 364       return TextureFilter::LINEAR;
 365     case GL_LINEAR_MIPMAP_LINEAR:
 366       return TextureFilter::LINEAR;
 367     case GL_LINEAR_MIPMAP_NEAREST:
 368       return TextureFilter::LINEAR;
 369     default:
 370       assert(0);
 371       return TextureFilter::NEAREST;
 372   }
 373 }
 374
 375 struct Texture {
 376   GLenum internal_format = 0;
 377   int width = 0;
 378   int height = 0;
 379   char* buf = nullptr;
 380   size_t buf_size = 0;
 381   uint32_t buf_stride = 0;
 382   uint8_t buf_bpp = 0;
 383   GLenum min_filter = GL_NEAREST;
 384   GLenum mag_filter = GL_LINEAR;
 385   // The number of active locks on this texture. If this texture has any active
 386   // locks, we need to disallow modifying or destroying the texture as it may
 387   // be accessed by other threads where modifications could lead to races.
 388   int32_t locked = 0;
 389   // When used as an attachment of a framebuffer, rendering to the texture
 390   // behaves as if it is located at the given offset such that the offset is
 391   // subtracted from all transformed vertexes after the viewport is applied.
 392   IntPoint offset;
 393
 394   enum FLAGS {
 395     // If the buffer is internally-allocated by SWGL
 396     SHOULD_FREE = 1 << 1,
 397     // If the buffer has been cleared to initialize it. Currently this is only
 398     // utilized by depth buffers which need to know when depth runs have reset
 399     // to a valid row state. When unset, the depth runs may contain garbage.
 400     CLEARED = 1 << 2,
 401   };
 402   int flags = SHOULD_FREE;
 403   bool should_free() const { return bool(flags & SHOULD_FREE); }
 404   bool cleared() const { return bool(flags & CLEARED); }
 405
 406   void set_flag(int flag, bool val) {
 407     if (val) {
 408       flags |= flag;
 409     } else {
 410       flags &= ~flag;
 411     }
 412   }
 413   void set_should_free(bool val) {
 414     // buf must be null before SHOULD_FREE can be safely toggled. Otherwise, we
 415     // might accidentally mistakenly realloc an externally allocated buffer as
 416     // if it were an internally allocated one.
 417     assert(!buf);
 418     set_flag(SHOULD_FREE, val);
 419   }
 420   void set_cleared(bool val) { set_flag(CLEARED, val); }
 421
 422   // Delayed-clearing state. When a clear of an FB is requested, we don't
 423   // immediately clear each row, as the rows may be subsequently overwritten
 424   // by draw calls, allowing us to skip the work of clearing the affected rows
 425   // either fully or partially. Instead, we keep a bit vector of rows that need
 426   // to be cleared later and save the value they need to be cleared with so
 427   // that we can clear these rows individually when they are touched by draws.
 428   // This currently only works for 2D textures, but not on texture arrays.
 429   int delay_clear = 0;
 430   uint32_t clear_val = 0;
 431   uint32_t* cleared_rows = nullptr;
 432
 433   void init_depth_runs(uint32_t z);
 434   void fill_depth_runs(uint32_t z, const IntRect& scissor);
 435
 436   void enable_delayed_clear(uint32_t val) {
 437     delay_clear = height;
 438     clear_val = val;
 439     if (!cleared_rows) {
 440       cleared_rows = new uint32_t[(height + 31) / 32];
 441     }
 442     memset(cleared_rows, 0, ((height + 31) / 32) * sizeof(uint32_t));
 443     if (height & 31) {
 444       cleared_rows[height / 32] = ~0U << (height & 31);
 445     }
 446   }
 447
 448   void disable_delayed_clear() {
 449     if (cleared_rows) {
 450       delete[] cleared_rows;
 451       cleared_rows = nullptr;
 452       delay_clear = 0;
 453     }
 454   }
 455
 456   int bpp() const { return buf_bpp; }
 457   int compute_bpp() const { return bytes_for_internal_format(internal_format); }
 458
 459   size_t stride() const { return buf_stride; }
 460   size_t compute_stride(int bpp, int width) const {
 461     return aligned_stride(bpp * width);
 462   }
 463
 464   // Set an external backing buffer of this texture.
 465   void set_buffer(void* new_buf, size_t new_stride) {
 466     assert(!should_free());
 467     // Ensure that the supplied stride is at least as big as the row data and
 468     // is aligned to the smaller of either the BPP or word-size. We need to at
 469     // least be able to sample data from within a row and sample whole pixels
 470     // of smaller formats without risking unaligned access.
 471     int new_bpp = compute_bpp();
 472     assert(new_stride >= size_t(new_bpp * width) &&
 473            new_stride % min(new_bpp, sizeof(uint32_t)) == 0);
 474
 475     buf = (char*)new_buf;
 476     buf_size = 0;
 477     buf_bpp = new_bpp;
 478     buf_stride = new_stride;
 479   }
 480
 481   // Returns true if re-allocation succeeded, false otherwise...
 482   bool allocate(bool force = false, int min_width = 0, int min_height = 0) {
 483     assert(!locked);  // Locked textures shouldn't be reallocated
 484     // If we get here, some GL API call that invalidates the texture was used.
 485     // Mark the buffer as not-cleared to signal this.
 486     set_cleared(false);
 487     // Check if there is either no buffer currently or if we forced validation
 488     // of the buffer size because some dimension might have changed.
 489     if ((!buf || force) && should_free()) {
 490       // Compute the buffer's BPP and stride, since they may have changed.
 491       int new_bpp = compute_bpp();
 492       size_t new_stride = compute_stride(new_bpp, width);
 493       // Compute new size based on the maximum potential stride, rather than
 494       // the current stride, to hopefully avoid reallocations when size would
 495       // otherwise change too much...
 496       size_t max_stride = compute_stride(new_bpp, max(width, min_width));
 497       size_t size = max_stride * max(height, min_height);
 498       if ((!buf && size > 0) || size > buf_size) {
 499         // Allocate with a SIMD register-sized tail of padding at the end so we
 500         // can safely read or write past the end of the texture with SIMD ops.
 501         // Currently only the flat Z-buffer texture needs this padding due to
 502         // full-register loads and stores in check_depth and discard_depth. In
 503         // case some code in the future accidentally uses a linear filter on a
 504         // texture with less than 2 pixels per row, we also add this padding
 505         // just to be safe. All other texture types and use-cases should be
 506         // safe to omit padding.
 507         size_t padding =
 508             internal_format == GL_DEPTH_COMPONENT24 || max(width, min_width) < 2
 509                 ? sizeof(Float)
 510                 : 0;
 511         char* new_buf = (char*)realloc(buf, size + padding);
 512         assert(new_buf);
 513         if (!new_buf) {
 514           // Allocation failed, so ensure we don't leave stale buffer state.
 515           cleanup();
 516           return false;
 517         }
 518         // Successfully reallocated the buffer, so go ahead and set it.
 519         buf = new_buf;
 520         buf_size = size;
 521       }
 522       // Set the BPP and stride in case they changed.
 523       buf_bpp = new_bpp;
 524       buf_stride = new_stride;
 525     }
 526     // Allocation succeeded or nothing changed...
 527     return true;
 528   }
 529
 530   void cleanup() {
 531     assert(!locked);  // Locked textures shouldn't be destroyed
 532     if (buf) {
 533       // If we need to toggle SHOULD_FREE state, ensure that buf is nulled out,
 534       // regardless of whether we internally allocated it. This will prevent us
 535       // from wrongly treating buf as having been internally allocated for when
 536       // we go to realloc if it actually was externally allocted.
 537       if (should_free()) {
 538         free(buf);
 539       }
 540       buf = nullptr;
 541       buf_size = 0;
 542       buf_bpp = 0;
 543       buf_stride = 0;
 544     }
 545     disable_delayed_clear();
 546   }
 547
 548   ~Texture() { cleanup(); }
 549
 550   IntRect bounds() const { return IntRect{0, 0, width, height}; }
 551   IntRect offset_bounds() const { return bounds() + offset; }
 552
 553   // Find the valid sampling bounds relative to the requested region
 554   IntRect sample_bounds(const IntRect& req, bool invertY = false) const {
 555     IntRect bb = bounds().intersect(req) - req.origin();
 556     if (invertY) bb.invert_y(req.height());
 557     return bb;
 558   }
 559
 560   // Get a pointer for sampling at the given offset
 561   char* sample_ptr(int x, int y) const {
 562     return buf + y * stride() + x * bpp();
 563   }
 564
 565   // Get a pointer for sampling the requested region and limit to the provided
 566   // sampling bounds
 567   char* sample_ptr(const IntRect& req, const IntRect& bounds,
 568                    bool invertY = false) const {
 569     // Offset the sample pointer by the clamped bounds
 570     int x = req.x0 + bounds.x0;
 571     // Invert the Y offset if necessary
 572     int y = invertY ? req.y1 - 1 - bounds.y0 : req.y0 + bounds.y0;
 573     return sample_ptr(x, y);
 574   }
 575 };
 576
 577 // The last vertex attribute is reserved as a null attribute in case a vertex
 578 // attribute is used without being set.
 579 #define MAX_ATTRIBS 17
 580 #define NULL_ATTRIB 16
 581 struct VertexArray {
 582   VertexAttrib attribs[MAX_ATTRIBS];
 583   int max_attrib = -1;
 584   // The GL spec defines element array buffer binding to be part of VAO state.
 585   GLuint element_array_buffer_binding = 0;
 586
 587   void validate();
 588 };
 589
 590 struct Shader {
 591   GLenum type = 0;
 592   ProgramLoader loader = nullptr;
 593 };
 594
 595 struct Program {
 596   ProgramImpl* impl = nullptr;
 597   VertexShaderImpl* vert_impl = nullptr;
 598   FragmentShaderImpl* frag_impl = nullptr;
 599   bool deleted = false;
 600
 601   ~Program() { delete impl; }
 602 };
 603
 604 // clang-format off
 605 // Fully-expand GL defines while ignoring more than 4 suffixes
 606 #define CONCAT_KEY(prefix, x, y, z, w, ...) prefix##x##y##z##w
 607 // Generate a blend key enum symbol
 608 #define BLEND_KEY(...) CONCAT_KEY(BLEND_, __VA_ARGS__, 0, 0, 0)
 609 #define MASK_BLEND_KEY(...) CONCAT_KEY(MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
 610 #define AA_BLEND_KEY(...) CONCAT_KEY(AA_BLEND_, __VA_ARGS__, 0, 0, 0)
 611 #define AA_MASK_BLEND_KEY(...) CONCAT_KEY(AA_MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
 612
 613 // Utility macro to easily generate similar code for all implemented blend modes
 614 #define FOR_EACH_BLEND_KEY(macro)                                              \
 615   macro(GL_ONE, GL_ZERO, 0, 0)                                                 \
 616   macro(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA)  \
 617   macro(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, 0, 0)                                  \
 618   macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, 0, 0)                                 \
 619   macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, GL_ZERO, GL_ONE)                      \
 620   macro(GL_ZERO, GL_ONE_MINUS_SRC_ALPHA, 0, 0)                                 \
 621   macro(GL_ZERO, GL_SRC_COLOR, 0, 0)                                           \
 622   macro(GL_ONE, GL_ONE, 0, 0)                                                  \
 623   macro(GL_ONE, GL_ONE, GL_ONE, GL_ONE_MINUS_SRC_ALPHA)                        \
 624   macro(GL_ONE_MINUS_DST_ALPHA, GL_ONE, GL_ZERO, GL_ONE)                       \
 625   macro(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR, 0, 0)                       \
 626   macro(GL_ONE, GL_ONE_MINUS_SRC1_COLOR, 0, 0)                                 \
 627   macro(GL_MIN, 0, 0, 0)                                                       \
 628   macro(GL_MAX, 0, 0, 0)                                                       \
 629   macro(GL_MULTIPLY_KHR, 0, 0, 0)                                              \
 630   macro(GL_SCREEN_KHR, 0, 0, 0)                                                \
 631   macro(GL_OVERLAY_KHR, 0, 0, 0)                                               \
 632   macro(GL_DARKEN_KHR, 0, 0, 0)                                                \
 633   macro(GL_LIGHTEN_KHR, 0, 0, 0)                                               \
 634   macro(GL_COLORDODGE_KHR, 0, 0, 0)                                            \
 635   macro(GL_COLORBURN_KHR, 0, 0, 0)                                             \
 636   macro(GL_HARDLIGHT_KHR, 0, 0, 0)                                             \
 637   macro(GL_SOFTLIGHT_KHR, 0, 0, 0)                                             \
 638   macro(GL_DIFFERENCE_KHR, 0, 0, 0)                                            \
 639   macro(GL_EXCLUSION_KHR, 0, 0, 0)                                             \
 640   macro(GL_HSL_HUE_KHR, 0, 0, 0)                                               \
 641   macro(GL_HSL_SATURATION_KHR, 0, 0, 0)                                        \
 642   macro(GL_HSL_COLOR_KHR, 0, 0, 0)                                             \
 643   macro(GL_HSL_LUMINOSITY_KHR, 0, 0, 0)                                        \
 644   macro(SWGL_BLEND_DROP_SHADOW, 0, 0, 0)                                       \
 645   macro(SWGL_BLEND_SUBPIXEL_TEXT, 0, 0, 0)
 646
 647 #define DEFINE_BLEND_KEY(...) BLEND_KEY(__VA_ARGS__),
 648 #define DEFINE_MASK_BLEND_KEY(...) MASK_BLEND_KEY(__VA_ARGS__),
 649 #define DEFINE_AA_BLEND_KEY(...) AA_BLEND_KEY(__VA_ARGS__),
 650 #define DEFINE_AA_MASK_BLEND_KEY(...) AA_MASK_BLEND_KEY(__VA_ARGS__),
 651 enum BlendKey : uint8_t {
 652   FOR_EACH_BLEND_KEY(DEFINE_BLEND_KEY)
 653   FOR_EACH_BLEND_KEY(DEFINE_MASK_BLEND_KEY)
 654   FOR_EACH_BLEND_KEY(DEFINE_AA_BLEND_KEY)
 655   FOR_EACH_BLEND_KEY(DEFINE_AA_MASK_BLEND_KEY)
 656   BLEND_KEY_NONE = BLEND_KEY(GL_ONE, GL_ZERO),
 657   MASK_BLEND_KEY_NONE = MASK_BLEND_KEY(GL_ONE, GL_ZERO),
 658   AA_BLEND_KEY_NONE = AA_BLEND_KEY(GL_ONE, GL_ZERO),
 659   AA_MASK_BLEND_KEY_NONE = AA_MASK_BLEND_KEY(GL_ONE, GL_ZERO),
 660 };
 661 // clang-format on
 662
 663 const size_t MAX_TEXTURE_UNITS = 16;
 664
 665 template <typename T>
 666 static inline bool unlink(T& binding, T n) {
 667   if (binding == n) {
 668     binding = 0;
 669     return true;
 670   }
 671   return false;
 672 }
 673
 674 template <typename O>
 675 struct ObjectStore {
 676   O** objects = nullptr;
 677   size_t size = 0;
 678   // reserve object 0 as null
 679   size_t first_free = 1;
 680   O invalid;
 681
 682   ~ObjectStore() {
 683     if (objects) {
 684       for (size_t i = 0; i < size; i++) delete objects[i];
 685       free(objects);
 686     }
 687   }
 688
 689   bool grow(size_t i) {
 690     size_t new_size = size ? size : 8;
 691     while (new_size <= i) new_size += new_size / 2;
 692     O** new_objects = (O**)realloc(objects, new_size * sizeof(O*));
 693     assert(new_objects);
 694     if (!new_objects) return false;
 695     while (size < new_size) new_objects[size++] = nullptr;
 696     objects = new_objects;
 697     return true;
 698   }
 699
 700   void insert(size_t i, const O& o) {
 701     if (i >= size && !grow(i)) return;
 702     if (!objects[i]) objects[i] = new O(o);
 703   }
 704
 705   size_t next_free() {
 706     size_t i = first_free;
 707     while (i < size && objects[i]) i++;
 708     first_free = i;
 709     return i;
 710   }
 711
 712   size_t insert(const O& o = O()) {
 713     size_t i = next_free();
 714     insert(i, o);
 715     return i;
 716   }
 717
 718   O& operator[](size_t i) {
 719     insert(i, O());
 720     return i < size ? *objects[i] : invalid;
 721   }
 722
 723   O* find(size_t i) const { return i < size ? objects[i] : nullptr; }
 724
 725   template <typename T>
 726   void on_erase(T*, ...) {}
 727   template <typename T>
 728   void on_erase(T* o, decltype(&T::on_erase)) {
 729     o->on_erase();
 730   }
 731
 732   bool erase(size_t i) {
 733     if (i < size && objects[i]) {
 734       on_erase(objects[i], nullptr);
 735       delete objects[i];
 736       objects[i] = nullptr;
 737       if (i < first_free) first_free = i;
 738       return true;
 739     }
 740     return false;
 741   }
 742
 743   O** begin() const { return objects; }
 744   O** end() const { return &objects[size]; }
 745 };
 746
 747 struct Context {
 748   int32_t references = 1;
 749
 750   ObjectStore<Query> queries;
 751   ObjectStore<Buffer> buffers;
 752   ObjectStore<Texture> textures;
 753   ObjectStore<VertexArray> vertex_arrays;
 754   ObjectStore<Framebuffer> framebuffers;
 755   ObjectStore<Renderbuffer> renderbuffers;
 756   ObjectStore<Shader> shaders;
 757   ObjectStore<Program> programs;
 758
 759   GLenum last_error = GL_NO_ERROR;
 760
 761   IntRect viewport = {0, 0, 0, 0};
 762
 763   bool blend = false;
 764   GLenum blendfunc_srgb = GL_ONE;
 765   GLenum blendfunc_drgb = GL_ZERO;
 766   GLenum blendfunc_sa = GL_ONE;
 767   GLenum blendfunc_da = GL_ZERO;
 768   GLenum blend_equation = GL_FUNC_ADD;
 769   V8<uint16_t> blendcolor = 0;
 770   BlendKey blend_key = BLEND_KEY_NONE;
 771
 772   bool depthtest = false;
 773   bool depthmask = true;
 774   GLenum depthfunc = GL_LESS;
 775
 776   bool scissortest = false;
 777   IntRect scissor = {0, 0, 0, 0};
 778
 779   GLfloat clearcolor[4] = {0, 0, 0, 0};
 780   GLdouble cleardepth = 1;
 781
 782   int unpack_row_length = 0;
 783
 784   int shaded_rows = 0;
 785   int shaded_pixels = 0;
 786
 787   struct TextureUnit {
 788     GLuint texture_2d_binding = 0;
 789     GLuint texture_rectangle_binding = 0;
 790
 791     void unlink(GLuint n) {
 792       ::unlink(texture_2d_binding, n);
 793       ::unlink(texture_rectangle_binding, n);
 794     }
 795   };
 796   TextureUnit texture_units[MAX_TEXTURE_UNITS];
 797   int active_texture_unit = 0;
 798
 799   GLuint current_program = 0;
 800
 801   GLuint current_vertex_array = 0;
 802   bool validate_vertex_array = true;
 803
 804   GLuint pixel_pack_buffer_binding = 0;
 805   GLuint pixel_unpack_buffer_binding = 0;
 806   GLuint array_buffer_binding = 0;
 807   GLuint time_elapsed_query = 0;
 808   GLuint samples_passed_query = 0;
 809   GLuint renderbuffer_binding = 0;
 810   GLuint draw_framebuffer_binding = 0;
 811   GLuint read_framebuffer_binding = 0;
 812   GLuint unknown_binding = 0;
 813
 814   GLuint& get_binding(GLenum name) {
 815     switch (name) {
 816       case GL_PIXEL_PACK_BUFFER:
 817         return pixel_pack_buffer_binding;
 818       case GL_PIXEL_UNPACK_BUFFER:
 819         return pixel_unpack_buffer_binding;
 820       case GL_ARRAY_BUFFER:
 821         return array_buffer_binding;
 822       case GL_ELEMENT_ARRAY_BUFFER:
 823         return vertex_arrays[current_vertex_array].element_array_buffer_binding;
 824       case GL_TEXTURE_2D:
 825         return texture_units[active_texture_unit].texture_2d_binding;
 826       case GL_TEXTURE_RECTANGLE:
 827         return texture_units[active_texture_unit].texture_rectangle_binding;
 828       case GL_TIME_ELAPSED:
 829         return time_elapsed_query;
 830       case GL_SAMPLES_PASSED:
 831         return samples_passed_query;
 832       case GL_RENDERBUFFER:
 833         return renderbuffer_binding;
 834       case GL_DRAW_FRAMEBUFFER:
 835         return draw_framebuffer_binding;
 836       case GL_READ_FRAMEBUFFER:
 837         return read_framebuffer_binding;
 838       default:
 839         debugf("unknown binding %x\n", name);
 840         assert(false);
 841         return unknown_binding;
 842     }
 843   }
 844
 845   Texture& get_texture(sampler2D, int unit) {
 846     return textures[texture_units[unit].texture_2d_binding];
 847   }
 848
 849   Texture& get_texture(isampler2D, int unit) {
 850     return textures[texture_units[unit].texture_2d_binding];
 851   }
 852
 853   Texture& get_texture(sampler2DRect, int unit) {
 854     return textures[texture_units[unit].texture_rectangle_binding];
 855   }
 856
 857   IntRect apply_scissor(IntRect bb,
 858                         const IntPoint& origin = IntPoint(0, 0)) const {
 859     return scissortest ? bb.intersect(scissor - origin) : bb;
 860   }
 861
 862   IntRect apply_scissor(const Texture& t) const {
 863     return apply_scissor(t.bounds(), t.offset);
 864   }
 865 };
 866 static Context* ctx = nullptr;
 867 static VertexShaderImpl* vertex_shader = nullptr;
 868 static FragmentShaderImpl* fragment_shader = nullptr;
 869 static BlendKey blend_key = BLEND_KEY_NONE;
 870
 871 static void prepare_texture(Texture& t, const IntRect* skip = nullptr);
 872
 873 template <typename S>
 874 static inline void init_filter(S* s, Texture& t) {
 875   // If the width is not at least 2 pixels, then we can't safely sample the end
 876   // of the row with a linear filter. In that case, just punt to using nearest
 877   // filtering instead.
 878   s->filter = t.width >= 2 ? gl_filter_to_texture_filter(t.mag_filter)
 879                            : TextureFilter::NEAREST;
 880 }
 881
 882 template <typename S>
 883 static inline void init_sampler(S* s, Texture& t) {
 884   prepare_texture(t);
 885   s->width = t.width;
 886   s->height = t.height;
 887   s->stride = t.stride();
 888   int bpp = t.bpp();
 889   if (bpp >= 4)
 890     s->stride /= 4;
 891   else if (bpp == 2)
 892     s->stride /= 2;
 893   else
 894     assert(bpp == 1);
 895   // Use uint32_t* for easier sampling, but need to cast to uint8_t* or
 896   // uint16_t* for formats with bpp < 4.
 897   s->buf = (uint32_t*)t.buf;
 898   s->format = gl_format_to_texture_format(t.internal_format);
 899 }
 900
 901 template <typename S>
 902 static inline void null_sampler(S* s) {
 903   // For null texture data, just make the sampler provide a 1x1 buffer that is
 904   // transparent black. Ensure buffer holds at least a SIMD vector of zero data
 905   // for SIMD padding of unaligned loads.
 906   static const uint32_t zeroBuf[sizeof(Float) / sizeof(uint32_t)] = {0};
 907   s->width = 1;
 908   s->height = 1;
 909   s->stride = s->width;
 910   s->buf = (uint32_t*)zeroBuf;
 911   s->format = TextureFormat::RGBA8;
 912 }
 913
 914 template <typename S>
 915 static inline void null_filter(S* s) {
 916   s->filter = TextureFilter::NEAREST;
 917 }
 918
 919 template <typename S>
 920 S* lookup_sampler(S* s, int texture) {
 921   Texture& t = ctx->get_texture(s, texture);
 922   if (!t.buf) {
 923     null_sampler(s);
 924     null_filter(s);
 925   } else {
 926     init_sampler(s, t);
 927     init_filter(s, t);
 928   }
 929   return s;
 930 }
 931
 932 template <typename S>
 933 S* lookup_isampler(S* s, int texture) {
 934   Texture& t = ctx->get_texture(s, texture);
 935   if (!t.buf) {
 936     null_sampler(s);
 937   } else {
 938     init_sampler(s, t);
 939   }
 940   return s;
 941 }
 942
 943 int bytes_per_type(GLenum type) {
 944   switch (type) {
 945     case GL_INT:
 946       return 4;
 947     case GL_FLOAT:
 948       return 4;
 949     case GL_UNSIGNED_SHORT:
 950       return 2;
 951     case GL_UNSIGNED_BYTE:
 952       return 1;
 953     default:
 954       assert(0);
 955       return 0;
 956   }
 957 }
 958
 959 template <typename S, typename C>
 960 static inline S expand_attrib(const char* buf, size_t size, bool normalized) {
 961   typedef typename ElementType<S>::ty elem_type;
 962   S scalar = {0};
 963   const C* src = reinterpret_cast<const C*>(buf);
 964   if (normalized) {
 965     const float scale = 1.0f / ((1 << (8 * sizeof(C))) - 1);
 966     for (size_t i = 0; i < size / sizeof(C); i++) {
 967       put_nth_component(scalar, i, elem_type(src[i]) * scale);
 968     }
 969   } else {
 970     for (size_t i = 0; i < size / sizeof(C); i++) {
 971       put_nth_component(scalar, i, elem_type(src[i]));
 972     }
 973   }
 974   return scalar;
 975 }
 976
 977 template <typename S>
 978 static inline S load_attrib_scalar(VertexAttrib& va, const char* src) {
 979   if (sizeof(S) <= va.size) {
 980     return *reinterpret_cast<const S*>(src);
 981   }
 982   if (va.type == GL_UNSIGNED_SHORT) {
 983     return expand_attrib<S, uint16_t>(src, va.size, va.normalized);
 984   }
 985   if (va.type == GL_UNSIGNED_BYTE) {
 986     return expand_attrib<S, uint8_t>(src, va.size, va.normalized);
 987   }
 988   assert(sizeof(typename ElementType<S>::ty) == bytes_per_type(va.type));
 989   S scalar = {0};
 990   memcpy(&scalar, src, va.size);
 991   return scalar;
 992 }
 993
 994 template <typename T>
 995 void load_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
 996                  int count) {
 997   typedef decltype(force_scalar(attrib)) scalar_type;
 998   // If no buffer is available, just use a zero default.
 999   if (!va.buf_size) {
1000     attrib = T(scalar_type{0});
1001   } else if (va.divisor != 0) {
1002     char* src = (char*)va.buf + va.stride * instance + va.offset;
1003     assert(src + va.size <= va.buf + va.buf_size);
1004     attrib = T(load_attrib_scalar<scalar_type>(va, src));
1005   } else {
1006     // Specialized for WR's primitive vertex order/winding.
1007     if (!count) return;
1008     assert(count >= 2 && count <= 4);
1009     char* src = (char*)va.buf + va.stride * start + va.offset;
1010     switch (count) {
1011       case 2: {
1012         // Lines must be indexed at offsets 0, 1.
1013         // Line vertexes fill vertex shader SIMD lanes as 0, 1, 1, 0.
1014         scalar_type lanes[2] = {
1015             load_attrib_scalar<scalar_type>(va, src),
1016             load_attrib_scalar<scalar_type>(va, src + va.stride)};
1017         attrib = (T){lanes[0], lanes[1], lanes[1], lanes[0]};
1018         break;
1019       }
1020       case 3: {
1021         // Triangles must be indexed at offsets 0, 1, 2.
1022         // Triangle vertexes fill vertex shader SIMD lanes as 0, 1, 2, 2.
1023         scalar_type lanes[3] = {
1024             load_attrib_scalar<scalar_type>(va, src),
1025             load_attrib_scalar<scalar_type>(va, src + va.stride),
1026             load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
1027         attrib = (T){lanes[0], lanes[1], lanes[2], lanes[2]};
1028         break;
1029       }
1030       default:
1031         // Quads must be successive triangles indexed at offsets 0, 1, 2, 2,
1032         // 1, 3. Quad vertexes fill vertex shader SIMD lanes as 0, 1, 3, 2, so
1033         // that the points form a convex path that can be traversed by the
1034         // rasterizer.
1035         attrib = (T){load_attrib_scalar<scalar_type>(va, src),
1036                      load_attrib_scalar<scalar_type>(va, src + va.stride),
1037                      load_attrib_scalar<scalar_type>(va, src + va.stride * 3),
1038                      load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
1039         break;
1040     }
1041   }
1042 }
1043
1044 template <typename T>
1045 void load_flat_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
1046                       int count) {
1047   typedef decltype(force_scalar(attrib)) scalar_type;
1048   // If no buffer is available, just use a zero default.
1049   if (!va.buf_size) {
1050     attrib = T{0};
1051     return;
1052   }
1053   char* src = nullptr;
1054   if (va.divisor != 0) {
1055     src = (char*)va.buf + va.stride * instance + va.offset;
1056   } else {
1057     if (!count) return;
1058     src = (char*)va.buf + va.stride * start + va.offset;
1059   }
1060   assert(src + va.size <= va.buf + va.buf_size);
1061   attrib = T(load_attrib_scalar<scalar_type>(va, src));
1062 }
1063
1064 void setup_program(GLuint program) {
1065   if (!program) {
1066     vertex_shader = nullptr;
1067     fragment_shader = nullptr;
1068     return;
1069   }
1070   Program& p = ctx->programs[program];
1071   assert(p.impl);
1072   assert(p.vert_impl);
1073   assert(p.frag_impl);
1074   vertex_shader = p.vert_impl;
1075   fragment_shader = p.frag_impl;
1076 }
1077
1078 extern ProgramLoader load_shader(const char* name);
1079
1080 extern "C" {
1081
1082 void UseProgram(GLuint program) {
1083   if (ctx->current_program && program != ctx->current_program) {
1084     auto* p = ctx->programs.find(ctx->current_program);
1085     if (p && p->deleted) {
1086       ctx->programs.erase(ctx->current_program);
1087     }
1088   }
1089   ctx->current_program = program;
1090   setup_program(program);
1091 }
1092
1093 void SetViewport(GLint x, GLint y, GLsizei width, GLsizei height) {
1094   ctx->viewport = IntRect{x, y, x + width, y + height};
1095 }
1096
1097 void Enable(GLenum cap) {
1098   switch (cap) {
1099     case GL_BLEND:
1100       ctx->blend = true;
1101       break;
1102     case GL_DEPTH_TEST:
1103       ctx->depthtest = true;
1104       break;
1105     case GL_SCISSOR_TEST:
1106       ctx->scissortest = true;
1107       break;
1108   }
1109 }
1110
1111 void Disable(GLenum cap) {
1112   switch (cap) {
1113     case GL_BLEND:
1114       ctx->blend = false;
1115       break;
1116     case GL_DEPTH_TEST:
1117       ctx->depthtest = false;
1118       break;
1119     case GL_SCISSOR_TEST:
1120       ctx->scissortest = false;
1121       break;
1122   }
1123 }
1124
1125 // Report the last error generated and clear the error status.
1126 GLenum GetError() {
1127   GLenum error = ctx->last_error;
1128   ctx->last_error = GL_NO_ERROR;
1129   return error;
1130 }
1131
1132 // Sets the error status to out-of-memory to indicate that a buffer
1133 // or texture re-allocation failed.
1134 static void out_of_memory() { ctx->last_error = GL_OUT_OF_MEMORY; }
1135
1136 static const char* const extensions[] = {
1137     "GL_ARB_blend_func_extended",
1138     "GL_ARB_clear_texture",
1139     "GL_ARB_copy_image",
1140     "GL_ARB_draw_instanced",
1141     "GL_ARB_explicit_attrib_location",
1142     "GL_ARB_instanced_arrays",
1143     "GL_ARB_invalidate_subdata",
1144     "GL_ARB_texture_storage",
1145     "GL_EXT_timer_query",
1146     "GL_KHR_blend_equation_advanced",
1147     "GL_KHR_blend_equation_advanced_coherent",
1148     "GL_APPLE_rgb_422",
1149 };
1150
1151 void GetIntegerv(GLenum pname, GLint* params) {
1152   assert(params);
1153   switch (pname) {
1154     case GL_MAX_TEXTURE_UNITS:
1155     case GL_MAX_TEXTURE_IMAGE_UNITS:
1156       params[0] = MAX_TEXTURE_UNITS;
1157       break;
1158     case GL_MAX_TEXTURE_SIZE:
1159       params[0] = 1 << 15;
1160       break;
1161     case GL_MAX_ARRAY_TEXTURE_LAYERS:
1162       params[0] = 0;
1163       break;
1164     case GL_READ_FRAMEBUFFER_BINDING:
1165       params[0] = ctx->read_framebuffer_binding;
1166       break;
1167     case GL_DRAW_FRAMEBUFFER_BINDING:
1168       params[0] = ctx->draw_framebuffer_binding;
1169       break;
1170     case GL_PIXEL_PACK_BUFFER_BINDING:
1171       params[0] = ctx->pixel_pack_buffer_binding;
1172       break;
1173     case GL_PIXEL_UNPACK_BUFFER_BINDING:
1174       params[0] = ctx->pixel_unpack_buffer_binding;
1175       break;
1176     case GL_NUM_EXTENSIONS:
1177       params[0] = sizeof(extensions) / sizeof(extensions[0]);
1178       break;
1179     case GL_MAJOR_VERSION:
1180       params[0] = 3;
1181       break;
1182     case GL_MINOR_VERSION:
1183       params[0] = 2;
1184       break;
1185     case GL_MIN_PROGRAM_TEXEL_OFFSET:
1186       params[0] = 0;
1187       break;
1188     case GL_MAX_PROGRAM_TEXEL_OFFSET:
1189       params[0] = MAX_TEXEL_OFFSET;
1190       break;
1191     default:
1192       debugf("unhandled glGetIntegerv parameter %x\n", pname);
1193       assert(false);
1194   }
1195 }
1196
1197 void GetBooleanv(GLenum pname, GLboolean* params) {
1198   assert(params);
1199   switch (pname) {
1200     case GL_DEPTH_WRITEMASK:
1201       params[0] = ctx->depthmask;
1202       break;
1203     default:
1204       debugf("unhandled glGetBooleanv parameter %x\n", pname);
1205       assert(false);
1206   }
1207 }
1208
1209 const char* GetString(GLenum name) {
1210   switch (name) {
1211     case GL_VENDOR:
1212       return "Mozilla Gfx";
1213     case GL_RENDERER:
1214       return "Software WebRender";
1215     case GL_VERSION:
1216       return "3.2";
1217     case GL_SHADING_LANGUAGE_VERSION:
1218       return "1.50";
1219     default:
1220       debugf("unhandled glGetString parameter %x\n", name);
1221       assert(false);
1222       return nullptr;
1223   }
1224 }
1225
1226 const char* GetStringi(GLenum name, GLuint index) {
1227   switch (name) {
1228     case GL_EXTENSIONS:
1229       if (index >= sizeof(extensions) / sizeof(extensions[0])) {
1230         return nullptr;
1231       }
1232       return extensions[index];
1233     default:
1234       debugf("unhandled glGetStringi parameter %x\n", name);
1235       assert(false);
1236       return nullptr;
1237   }
1238 }
1239
1240 GLenum remap_blendfunc(GLenum rgb, GLenum a) {
1241   switch (a) {
1242     case GL_SRC_ALPHA:
1243       if (rgb == GL_SRC_COLOR) a = GL_SRC_COLOR;
1244       break;
1245     case GL_ONE_MINUS_SRC_ALPHA:
1246       if (rgb == GL_ONE_MINUS_SRC_COLOR) a = GL_ONE_MINUS_SRC_COLOR;
1247       break;
1248     case GL_DST_ALPHA:
1249       if (rgb == GL_DST_COLOR) a = GL_DST_COLOR;
1250       break;
1251     case GL_ONE_MINUS_DST_ALPHA:
1252       if (rgb == GL_ONE_MINUS_DST_COLOR) a = GL_ONE_MINUS_DST_COLOR;
1253       break;
1254     case GL_CONSTANT_ALPHA:
1255       if (rgb == GL_CONSTANT_COLOR) a = GL_CONSTANT_COLOR;
1256       break;
1257     case GL_ONE_MINUS_CONSTANT_ALPHA:
1258       if (rgb == GL_ONE_MINUS_CONSTANT_COLOR) a = GL_ONE_MINUS_CONSTANT_COLOR;
1259       break;
1260     case GL_SRC_COLOR:
1261       if (rgb == GL_SRC_ALPHA) a = GL_SRC_ALPHA;
1262       break;
1263     case GL_ONE_MINUS_SRC_COLOR:
1264       if (rgb == GL_ONE_MINUS_SRC_ALPHA) a = GL_ONE_MINUS_SRC_ALPHA;
1265       break;
1266     case GL_DST_COLOR:
1267       if (rgb == GL_DST_ALPHA) a = GL_DST_ALPHA;
1268       break;
1269     case GL_ONE_MINUS_DST_COLOR:
1270       if (rgb == GL_ONE_MINUS_DST_ALPHA) a = GL_ONE_MINUS_DST_ALPHA;
1271       break;
1272     case GL_CONSTANT_COLOR:
1273       if (rgb == GL_CONSTANT_ALPHA) a = GL_CONSTANT_ALPHA;
1274       break;
1275     case GL_ONE_MINUS_CONSTANT_COLOR:
1276       if (rgb == GL_ONE_MINUS_CONSTANT_ALPHA) a = GL_ONE_MINUS_CONSTANT_ALPHA;
1277       break;
1278     case GL_SRC1_ALPHA:
1279       if (rgb == GL_SRC1_COLOR) a = GL_SRC1_COLOR;
1280       break;
1281     case GL_ONE_MINUS_SRC1_ALPHA:
1282       if (rgb == GL_ONE_MINUS_SRC1_COLOR) a = GL_ONE_MINUS_SRC1_COLOR;
1283       break;
1284     case GL_SRC1_COLOR:
1285       if (rgb == GL_SRC1_ALPHA) a = GL_SRC1_ALPHA;
1286       break;
1287     case GL_ONE_MINUS_SRC1_COLOR:
1288       if (rgb == GL_ONE_MINUS_SRC1_ALPHA) a = GL_ONE_MINUS_SRC1_ALPHA;
1289       break;
1290   }
1291   return a;
1292 }
1293
1294 // Generate a hashed blend key based on blend func and equation state. This
1295 // allows all the blend state to be processed down to a blend key that can be
1296 // dealt with inside a single switch statement.
1297 static void hash_blend_key() {
1298   GLenum srgb = ctx->blendfunc_srgb;
1299   GLenum drgb = ctx->blendfunc_drgb;
1300   GLenum sa = ctx->blendfunc_sa;
1301   GLenum da = ctx->blendfunc_da;
1302   GLenum equation = ctx->blend_equation;
1303 #define HASH_BLEND_KEY(x, y, z, w) ((x << 4) | (y) | (z << 24) | (w << 20))
1304   // Basic non-separate blend funcs used the two argument form
1305   int hash = HASH_BLEND_KEY(srgb, drgb, 0, 0);
1306   // Separate alpha blend funcs use the 4 argument hash
1307   if (srgb != sa || drgb != da) hash |= HASH_BLEND_KEY(0, 0, sa, da);
1308   // Any other blend equation than the default func_add ignores the func and
1309   // instead generates a one-argument hash based on the equation
1310   if (equation != GL_FUNC_ADD) hash = HASH_BLEND_KEY(equation, 0, 0, 0);
1311   switch (hash) {
1312 #define MAP_BLEND_KEY(...)                   \
1313   case HASH_BLEND_KEY(__VA_ARGS__):          \
1314     ctx->blend_key = BLEND_KEY(__VA_ARGS__); \
1315     break;
1316     FOR_EACH_BLEND_KEY(MAP_BLEND_KEY)
1317     default:
1318       debugf("blendfunc: %x, %x, separate: %x, %x, equation: %x\n", srgb, drgb,
1319              sa, da, equation);
1320       assert(false);
1321       break;
1322   }
1323 }
1324
1325 void BlendFunc(GLenum srgb, GLenum drgb, GLenum sa, GLenum da) {
1326   ctx->blendfunc_srgb = srgb;
1327   ctx->blendfunc_drgb = drgb;
1328   sa = remap_blendfunc(srgb, sa);
1329   da = remap_blendfunc(drgb, da);
1330   ctx->blendfunc_sa = sa;
1331   ctx->blendfunc_da = da;
1332
1333   hash_blend_key();
1334 }
1335
1336 void BlendColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
1337   I32 c = round_pixel((Float){b, g, r, a});
1338   ctx->blendcolor = CONVERT(c, U16).xyzwxyzw;
1339 }
1340
1341 void BlendEquation(GLenum mode) {
1342   assert(mode == GL_FUNC_ADD || mode == GL_MIN || mode == GL_MAX ||
1343          (mode >= GL_MULTIPLY_KHR && mode <= GL_HSL_LUMINOSITY_KHR));
1344   if (mode != ctx->blend_equation) {
1345     ctx->blend_equation = mode;
1346     hash_blend_key();
1347   }
1348 }
1349
1350 void DepthMask(GLboolean flag) { ctx->depthmask = flag; }
1351
1352 void DepthFunc(GLenum func) {
1353   switch (func) {
1354     case GL_LESS:
1355     case GL_LEQUAL:
1356       break;
1357     default:
1358       assert(false);
1359   }
1360   ctx->depthfunc = func;
1361 }
1362
1363 void SetScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
1364   ctx->scissor = IntRect{x, y, x + width, y + height};
1365 }
1366
1367 void ClearColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
1368   ctx->clearcolor[0] = r;
1369   ctx->clearcolor[1] = g;
1370   ctx->clearcolor[2] = b;
1371   ctx->clearcolor[3] = a;
1372 }
1373
1374 void ClearDepth(GLdouble depth) { ctx->cleardepth = depth; }
1375
1376 void ActiveTexture(GLenum texture) {
1377   assert(texture >= GL_TEXTURE0);
1378   assert(texture < GL_TEXTURE0 + MAX_TEXTURE_UNITS);
1379   ctx->active_texture_unit =
1380       clamp(int(texture - GL_TEXTURE0), 0, int(MAX_TEXTURE_UNITS - 1));
1381 }
1382
1383 void GenQueries(GLsizei n, GLuint* result) {
1384   for (int i = 0; i < n; i++) {
1385     Query q;
1386     result[i] = ctx->queries.insert(q);
1387   }
1388 }
1389
1390 void DeleteQuery(GLuint n) {
1391   if (n && ctx->queries.erase(n)) {
1392     unlink(ctx->time_elapsed_query, n);
1393     unlink(ctx->samples_passed_query, n);
1394   }
1395 }
1396
1397 void GenBuffers(int n, GLuint* result) {
1398   for (int i = 0; i < n; i++) {
1399     Buffer b;
1400     result[i] = ctx->buffers.insert(b);
1401   }
1402 }
1403
1404 void DeleteBuffer(GLuint n) {
1405   if (n && ctx->buffers.erase(n)) {
1406     unlink(ctx->pixel_pack_buffer_binding, n);
1407     unlink(ctx->pixel_unpack_buffer_binding, n);
1408     unlink(ctx->array_buffer_binding, n);
1409   }
1410 }
1411
1412 void GenVertexArrays(int n, GLuint* result) {
1413   for (int i = 0; i < n; i++) {
1414     VertexArray v;
1415     result[i] = ctx->vertex_arrays.insert(v);
1416   }
1417 }
1418
1419 void DeleteVertexArray(GLuint n) {
1420   if (n && ctx->vertex_arrays.erase(n)) {
1421     unlink(ctx->current_vertex_array, n);
1422   }
1423 }
1424
1425 GLuint CreateShader(GLenum type) {
1426   Shader s;
1427   s.type = type;
1428   return ctx->shaders.insert(s);
1429 }
1430
1431 void ShaderSourceByName(GLuint shader, char* name) {
1432   Shader& s = ctx->shaders[shader];
1433   s.loader = load_shader(name);
1434   if (!s.loader) {
1435     debugf("unknown shader %s\n", name);
1436   }
1437 }
1438
1439 void AttachShader(GLuint program, GLuint shader) {
1440   Program& p = ctx->programs[program];
1441   Shader& s = ctx->shaders[shader];
1442   if (s.type == GL_VERTEX_SHADER) {
1443     if (!p.impl && s.loader) p.impl = s.loader();
1444   } else if (s.type == GL_FRAGMENT_SHADER) {
1445     if (!p.impl && s.loader) p.impl = s.loader();
1446   } else {
1447     assert(0);
1448   }
1449 }
1450
1451 void DeleteShader(GLuint n) {
1452   if (n) ctx->shaders.erase(n);
1453 }
1454
1455 GLuint CreateProgram() {
1456   Program p;
1457   return ctx->programs.insert(p);
1458 }
1459
1460 void DeleteProgram(GLuint n) {
1461   if (!n) return;
1462   if (ctx->current_program == n) {
1463     if (auto* p = ctx->programs.find(n)) {
1464       p->deleted = true;
1465     }
1466   } else {
1467     ctx->programs.erase(n);
1468   }
1469 }
1470
1471 void LinkProgram(GLuint program) {
1472   Program& p = ctx->programs[program];
1473   assert(p.impl);
1474   if (!p.impl) {
1475     return;
1476   }
1477   assert(p.impl->interpolants_size() <= sizeof(Interpolants));
1478   if (!p.vert_impl) p.vert_impl = p.impl->get_vertex_shader();
1479   if (!p.frag_impl) p.frag_impl = p.impl->get_fragment_shader();
1480 }
1481
1482 GLint GetLinkStatus(GLuint program) {
1483   if (auto* p = ctx->programs.find(program)) {
1484     return p->impl ? 1 : 0;
1485   }
1486   return 0;
1487 }
1488
1489 void BindAttribLocation(GLuint program, GLuint index, char* name) {
1490   Program& p = ctx->programs[program];
1491   assert(p.impl);
1492   if (!p.impl) {
1493     return;
1494   }
1495   p.impl->bind_attrib(name, index);
1496 }
1497
1498 GLint GetAttribLocation(GLuint program, char* name) {
1499   Program& p = ctx->programs[program];
1500   assert(p.impl);
1501   if (!p.impl) {
1502     return -1;
1503   }
1504   return p.impl->get_attrib(name);
1505 }
1506
1507 GLint GetUniformLocation(GLuint program, char* name) {
1508   Program& p = ctx->programs[program];
1509   assert(p.impl);
1510   if (!p.impl) {
1511     return -1;
1512   }
1513   GLint loc = p.impl->get_uniform(name);
1514   // debugf("location: %d\n", loc);
1515   return loc;
1516 }
1517
1518 static uint64_t get_time_value() {
1519 #ifdef __MACH__
1520   return mach_absolute_time();
1521 #elif defined(_WIN32)
1522   LARGE_INTEGER time;
1523   static bool have_frequency = false;
1524   static LARGE_INTEGER frequency;
1525   if (!have_frequency) {
1526     QueryPerformanceFrequency(&frequency);
1527     have_frequency = true;
1528   }
1529   QueryPerformanceCounter(&time);
1530   return time.QuadPart * 1000000000ULL / frequency.QuadPart;
1531 #else
1532   return ({
1533     struct timespec tp;
1534     clock_gettime(CLOCK_MONOTONIC, &tp);
1535     tp.tv_sec * 1000000000ULL + tp.tv_nsec;
1536   });
1537 #endif
1538 }
1539
1540 void BeginQuery(GLenum target, GLuint id) {
1541   ctx->get_binding(target) = id;
1542   Query& q = ctx->queries[id];
1543   switch (target) {
1544     case GL_SAMPLES_PASSED:
1545       q.value = 0;
1546       break;
1547     case GL_TIME_ELAPSED:
1548       q.value = get_time_value();
1549       break;
1550     default:
1551       debugf("unknown query target %x for query %d\n", target, id);
1552       assert(false);
1553   }
1554 }
1555
1556 void EndQuery(GLenum target) {
1557   Query& q = ctx->queries[ctx->get_binding(target)];
1558   switch (target) {
1559     case GL_SAMPLES_PASSED:
1560       break;
1561     case GL_TIME_ELAPSED:
1562       q.value = get_time_value() - q.value;
1563       break;
1564     default:
1565       debugf("unknown query target %x\n", target);
1566       assert(false);
1567   }
1568   ctx->get_binding(target) = 0;
1569 }
1570
1571 void GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64* params) {
1572   Query& q = ctx->queries[id];
1573   switch (pname) {
1574     case GL_QUERY_RESULT:
1575       assert(params);
1576       params[0] = q.value;
1577       break;
1578     default:
1579       assert(false);
1580   }
1581 }
1582
1583 void BindVertexArray(GLuint vertex_array) {
1584   if (vertex_array != ctx->current_vertex_array) {
1585     ctx->validate_vertex_array = true;
1586   }
1587   ctx->current_vertex_array = vertex_array;
1588 }
1589
1590 void BindTexture(GLenum target, GLuint texture) {
1591   ctx->get_binding(target) = texture;
1592 }
1593
1594 void BindBuffer(GLenum target, GLuint buffer) {
1595   ctx->get_binding(target) = buffer;
1596 }
1597
1598 void BindFramebuffer(GLenum target, GLuint fb) {
1599   if (target == GL_FRAMEBUFFER) {
1600     ctx->read_framebuffer_binding = fb;
1601     ctx->draw_framebuffer_binding = fb;
1602   } else {
1603     assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
1604     ctx->get_binding(target) = fb;
1605   }
1606 }
1607
1608 void BindRenderbuffer(GLenum target, GLuint rb) {
1609   ctx->get_binding(target) = rb;
1610 }
1611
1612 void PixelStorei(GLenum name, GLint param) {
1613   if (name == GL_UNPACK_ALIGNMENT) {
1614     assert(param == 1);
1615   } else if (name == GL_UNPACK_ROW_LENGTH) {
1616     ctx->unpack_row_length = param;
1617   }
1618 }
1619
1620 static GLenum remap_internal_format(GLenum format) {
1621   switch (format) {
1622     case GL_DEPTH_COMPONENT:
1623       return GL_DEPTH_COMPONENT24;
1624     case GL_RGBA:
1625       return GL_RGBA8;
1626     case GL_RED:
1627       return GL_R8;
1628     case GL_RG:
1629       return GL_RG8;
1630     case GL_RGB_422_APPLE:
1631       return GL_RGB_RAW_422_APPLE;
1632     default:
1633       return format;
1634   }
1635 }
1636
1637 }  // extern "C"
1638
1639 static bool format_requires_conversion(GLenum external_format,
1640                                        GLenum internal_format) {
1641   switch (external_format) {
1642     case GL_RGBA:
1643       return internal_format == GL_RGBA8;
1644     default:
1645       return false;
1646   }
1647 }
1648
1649 static inline void copy_bgra8_to_rgba8(uint32_t* dest, const uint32_t* src,
1650                                        int width) {
1651   for (; width >= 4; width -= 4, dest += 4, src += 4) {
1652     U32 p = unaligned_load<U32>(src);
1653     U32 rb = p & 0x00FF00FF;
1654     unaligned_store(dest, (p & 0xFF00FF00) | (rb << 16) | (rb >> 16));
1655   }
1656   for (; width > 0; width--, dest++, src++) {
1657     uint32_t p = *src;
1658     uint32_t rb = p & 0x00FF00FF;
1659     *dest = (p & 0xFF00FF00) | (rb << 16) | (rb >> 16);
1660   }
1661 }
1662
1663 static void convert_copy(GLenum external_format, GLenum internal_format,
1664                          uint8_t* dst_buf, size_t dst_stride,
1665                          const uint8_t* src_buf, size_t src_stride,
1666                          size_t width, size_t height) {
1667   switch (external_format) {
1668     case GL_RGBA:
1669       if (internal_format == GL_RGBA8) {
1670         for (; height; height--) {
1671           copy_bgra8_to_rgba8((uint32_t*)dst_buf, (const uint32_t*)src_buf,
1672                               width);
1673           dst_buf += dst_stride;
1674           src_buf += src_stride;
1675         }
1676         return;
1677       }
1678       break;
1679     default:
1680       break;
1681   }
1682   size_t row_bytes = width * bytes_for_internal_format(internal_format);
1683   for (; height; height--) {
1684     memcpy(dst_buf, src_buf, row_bytes);
1685     dst_buf += dst_stride;
1686     src_buf += src_stride;
1687   }
1688 }
1689
1690 static void set_tex_storage(Texture& t, GLenum external_format, GLsizei width,
1691                             GLsizei height, void* buf = nullptr,
1692                             GLsizei stride = 0, GLsizei min_width = 0,
1693                             GLsizei min_height = 0) {
1694   GLenum internal_format = remap_internal_format(external_format);
1695   bool changed = false;
1696   if (t.width != width || t.height != height ||
1697       t.internal_format != internal_format) {
1698     changed = true;
1699     t.internal_format = internal_format;
1700     t.width = width;
1701     t.height = height;
1702   }
1703   // If we are changed from an internally managed buffer to an externally
1704   // supplied one or vice versa, ensure that we clean up old buffer state.
1705   // However, if we have to convert the data from a non-native format, then
1706   // always treat it as internally managed since we will need to copy to an
1707   // internally managed native format buffer.
1708   bool should_free = buf == nullptr || format_requires_conversion(
1709                                            external_format, internal_format);
1710   if (t.should_free() != should_free) {
1711     changed = true;
1712     t.cleanup();
1713     t.set_should_free(should_free);
1714   }
1715   // If now an external buffer, explicitly set it...
1716   if (!should_free) {
1717     t.set_buffer(buf, stride);
1718   }
1719   t.disable_delayed_clear();
1720   if (!t.allocate(changed, min_width, min_height)) {
1721     out_of_memory();
1722   }
1723   // If we have a buffer that needs format conversion, then do that now.
1724   if (buf && should_free) {
1725     convert_copy(external_format, internal_format, (uint8_t*)t.buf, t.stride(),
1726                  (const uint8_t*)buf, stride, width, height);
1727   }
1728 }
1729
1730 extern "C" {
1731
1732 void TexStorage2D(GLenum target, GLint levels, GLenum internal_format,
1733                   GLsizei width, GLsizei height) {
1734   assert(levels == 1);
1735   Texture& t = ctx->textures[ctx->get_binding(target)];
1736   set_tex_storage(t, internal_format, width, height);
1737 }
1738
1739 GLenum internal_format_for_data(GLenum format, GLenum ty) {
1740   if (format == GL_RED && ty == GL_UNSIGNED_BYTE) {
1741     return GL_R8;
1742   } else if ((format == GL_RGBA || format == GL_BGRA) &&
1743              (ty == GL_UNSIGNED_BYTE || ty == GL_UNSIGNED_INT_8_8_8_8_REV)) {
1744     return GL_RGBA8;
1745   } else if (format == GL_RGBA && ty == GL_FLOAT) {
1746     return GL_RGBA32F;
1747   } else if (format == GL_RGBA_INTEGER && ty == GL_INT) {
1748     return GL_RGBA32I;
1749   } else if (format == GL_RG && ty == GL_UNSIGNED_BYTE) {
1750     return GL_RG8;
1751   } else if (format == GL_RGB_422_APPLE &&
1752              ty == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
1753     return GL_RGB_RAW_422_APPLE;
1754   } else if (format == GL_RED && ty == GL_UNSIGNED_SHORT) {
1755     return GL_R16;
1756   } else if (format == GL_RG && ty == GL_UNSIGNED_SHORT) {
1757     return GL_RG16;
1758   } else {
1759     debugf("unknown internal format for format %x, type %x\n", format, ty);
1760     assert(false);
1761     return 0;
1762   }
1763 }
1764
1765 static Buffer* get_pixel_pack_buffer() {
1766   return ctx->pixel_pack_buffer_binding
1767              ? &ctx->buffers[ctx->pixel_pack_buffer_binding]
1768              : nullptr;
1769 }
1770
1771 static void* get_pixel_pack_buffer_data(void* data) {
1772   if (Buffer* b = get_pixel_pack_buffer()) {
1773     return b->buf ? b->buf + (size_t)data : nullptr;
1774   }
1775   return data;
1776 }
1777
1778 static Buffer* get_pixel_unpack_buffer() {
1779   return ctx->pixel_unpack_buffer_binding
1780              ? &ctx->buffers[ctx->pixel_unpack_buffer_binding]
1781              : nullptr;
1782 }
1783
1784 static void* get_pixel_unpack_buffer_data(void* data) {
1785   if (Buffer* b = get_pixel_unpack_buffer()) {
1786     return b->buf ? b->buf + (size_t)data : nullptr;
1787   }
1788   return data;
1789 }
1790
1791 void TexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset,
1792                    GLsizei width, GLsizei height, GLenum format, GLenum ty,
1793                    void* data) {
1794   if (level != 0) {
1795     assert(false);
1796     return;
1797   }
1798   data = get_pixel_unpack_buffer_data(data);
1799   if (!data) return;
1800   Texture& t = ctx->textures[ctx->get_binding(target)];
1801   IntRect skip = {xoffset, yoffset, xoffset + width, yoffset + height};
1802   prepare_texture(t, &skip);
1803   assert(xoffset + width <= t.width);
1804   assert(yoffset + height <= t.height);
1805   assert(ctx->unpack_row_length == 0 || ctx->unpack_row_length >= width);
1806   GLsizei row_length =
1807       ctx->unpack_row_length != 0 ? ctx->unpack_row_length : width;
1808   assert(t.internal_format == internal_format_for_data(format, ty));
1809   int src_bpp = format_requires_conversion(format, t.internal_format)
1810                     ? bytes_for_internal_format(format)
1811                     : t.bpp();
1812   if (!src_bpp || !t.buf) return;
1813   convert_copy(format, t.internal_format,
1814                (uint8_t*)t.sample_ptr(xoffset, yoffset), t.stride(),
1815                (const uint8_t*)data, row_length * src_bpp, width, height);
1816 }
1817
1818 void TexImage2D(GLenum target, GLint level, GLint internal_format,
1819                 GLsizei width, GLsizei height, GLint border, GLenum format,
1820                 GLenum ty, void* data) {
1821   if (level != 0) {
1822     assert(false);
1823     return;
1824   }
1825   assert(border == 0);
1826   TexStorage2D(target, 1, internal_format, width, height);
1827   TexSubImage2D(target, 0, 0, 0, width, height, format, ty, data);
1828 }
1829
1830 void GenerateMipmap(UNUSED GLenum target) {
1831   // TODO: support mipmaps
1832 }
1833
1834 void SetTextureParameter(GLuint texid, GLenum pname, GLint param) {
1835   Texture& t = ctx->textures[texid];
1836   switch (pname) {
1837     case GL_TEXTURE_WRAP_S:
1838       assert(param == GL_CLAMP_TO_EDGE);
1839       break;
1840     case GL_TEXTURE_WRAP_T:
1841       assert(param == GL_CLAMP_TO_EDGE);
1842       break;
1843     case GL_TEXTURE_MIN_FILTER:
1844       t.min_filter = param;
1845       break;
1846     case GL_TEXTURE_MAG_FILTER:
1847       t.mag_filter = param;
1848       break;
1849     default:
1850       break;
1851   }
1852 }
1853
1854 void TexParameteri(GLenum target, GLenum pname, GLint param) {
1855   SetTextureParameter(ctx->get_binding(target), pname, param);
1856 }
1857
1858 void GenTextures(int n, GLuint* result) {
1859   for (int i = 0; i < n; i++) {
1860     Texture t;
1861     result[i] = ctx->textures.insert(t);
1862   }
1863 }
1864
1865 void DeleteTexture(GLuint n) {
1866   if (n && ctx->textures.erase(n)) {
1867     for (size_t i = 0; i < MAX_TEXTURE_UNITS; i++) {
1868       ctx->texture_units[i].unlink(n);
1869     }
1870   }
1871 }
1872
1873 void GenRenderbuffers(int n, GLuint* result) {
1874   for (int i = 0; i < n; i++) {
1875     Renderbuffer r;
1876     result[i] = ctx->renderbuffers.insert(r);
1877   }
1878 }
1879
1880 void Renderbuffer::on_erase() {
1881   for (auto* fb : ctx->framebuffers) {
1882     if (fb) {
1883       unlink(fb->color_attachment, texture);
1884       unlink(fb->depth_attachment, texture);
1885     }
1886   }
1887   DeleteTexture(texture);
1888 }
1889
1890 void DeleteRenderbuffer(GLuint n) {
1891   if (n && ctx->renderbuffers.erase(n)) {
1892     unlink(ctx->renderbuffer_binding, n);
1893   }
1894 }
1895
1896 void GenFramebuffers(int n, GLuint* result) {
1897   for (int i = 0; i < n; i++) {
1898     Framebuffer f;
1899     result[i] = ctx->framebuffers.insert(f);
1900   }
1901 }
1902
1903 void DeleteFramebuffer(GLuint n) {
1904   if (n && ctx->framebuffers.erase(n)) {
1905     unlink(ctx->read_framebuffer_binding, n);
1906     unlink(ctx->draw_framebuffer_binding, n);
1907   }
1908 }
1909
1910 void RenderbufferStorage(GLenum target, GLenum internal_format, GLsizei width,
1911                          GLsizei height) {
1912   // Just refer a renderbuffer to a texture to simplify things for now...
1913   Renderbuffer& r = ctx->renderbuffers[ctx->get_binding(target)];
1914   if (!r.texture) {
1915     GenTextures(1, &r.texture);
1916   }
1917   switch (internal_format) {
1918     case GL_DEPTH_COMPONENT:
1919     case GL_DEPTH_COMPONENT16:
1920     case GL_DEPTH_COMPONENT24:
1921     case GL_DEPTH_COMPONENT32:
1922       // Force depth format to 24 bits...
1923       internal_format = GL_DEPTH_COMPONENT24;
1924       break;
1925   }
1926   set_tex_storage(ctx->textures[r.texture], internal_format, width, height);
1927 }
1928
1929 void VertexAttribPointer(GLuint index, GLint size, GLenum type, bool normalized,
1930                          GLsizei stride, GLuint offset) {
1931   // debugf("cva: %d\n", ctx->current_vertex_array);
1932   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1933   if (index >= NULL_ATTRIB) {
1934     assert(0);
1935     return;
1936   }
1937   VertexAttrib& va = v.attribs[index];
1938   va.size = size * bytes_per_type(type);
1939   va.type = type;
1940   va.normalized = normalized;
1941   va.stride = stride;
1942   va.offset = offset;
1943   // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
1944   va.vertex_buffer = ctx->array_buffer_binding;
1945   va.vertex_array = ctx->current_vertex_array;
1946   ctx->validate_vertex_array = true;
1947 }
1948
1949 void VertexAttribIPointer(GLuint index, GLint size, GLenum type, GLsizei stride,
1950                           GLuint offset) {
1951   // debugf("cva: %d\n", ctx->current_vertex_array);
1952   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1953   if (index >= NULL_ATTRIB) {
1954     assert(0);
1955     return;
1956   }
1957   VertexAttrib& va = v.attribs[index];
1958   va.size = size * bytes_per_type(type);
1959   va.type = type;
1960   va.normalized = false;
1961   va.stride = stride;
1962   va.offset = offset;
1963   // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
1964   va.vertex_buffer = ctx->array_buffer_binding;
1965   va.vertex_array = ctx->current_vertex_array;
1966   ctx->validate_vertex_array = true;
1967 }
1968
1969 void EnableVertexAttribArray(GLuint index) {
1970   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1971   if (index >= NULL_ATTRIB) {
1972     assert(0);
1973     return;
1974   }
1975   VertexAttrib& va = v.attribs[index];
1976   if (!va.enabled) {
1977     ctx->validate_vertex_array = true;
1978   }
1979   va.enabled = true;
1980   v.max_attrib = max(v.max_attrib, (int)index);
1981 }
1982
1983 void DisableVertexAttribArray(GLuint index) {
1984   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1985   if (index >= NULL_ATTRIB) {
1986     assert(0);
1987     return;
1988   }
1989   VertexAttrib& va = v.attribs[index];
1990   if (va.enabled) {
1991     ctx->validate_vertex_array = true;
1992   }
1993   va.disable();
1994 }
1995
1996 void VertexAttribDivisor(GLuint index, GLuint divisor) {
1997   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1998   // Only support divisor being 0 (per-vertex) or 1 (per-instance).
1999   if (index >= NULL_ATTRIB || divisor > 1) {
2000     assert(0);
2001     return;
2002   }
2003   VertexAttrib& va = v.attribs[index];
2004   va.divisor = divisor;
2005 }
2006
2007 void BufferData(GLenum target, GLsizeiptr size, void* data,
2008                 UNUSED GLenum usage) {
2009   Buffer& b = ctx->buffers[ctx->get_binding(target)];
2010   if (size != b.size) {
2011     if (!b.allocate(size)) {
2012       out_of_memory();
2013     }
2014     ctx->validate_vertex_array = true;
2015   }
2016   if (data && b.buf && size <= b.size) {
2017     memcpy(b.buf, data, size);
2018   }
2019 }
2020
2021 void BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
2022                    void* data) {
2023   Buffer& b = ctx->buffers[ctx->get_binding(target)];
2024   assert(offset + size <= b.size);
2025   if (data && b.buf && offset + size <= b.size) {
2026     memcpy(&b.buf[offset], data, size);
2027   }
2028 }
2029
2030 void* MapBuffer(GLenum target, UNUSED GLbitfield access) {
2031   Buffer& b = ctx->buffers[ctx->get_binding(target)];
2032   return b.buf;
2033 }
2034
2035 void* MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
2036                      UNUSED GLbitfield access) {
2037   Buffer& b = ctx->buffers[ctx->get_binding(target)];
2038   if (b.buf && offset >= 0 && length > 0 && offset + length <= b.size) {
2039     return b.buf + offset;
2040   }
2041   return nullptr;
2042 }
2043
2044 GLboolean UnmapBuffer(GLenum target) {
2045   Buffer& b = ctx->buffers[ctx->get_binding(target)];
2046   return b.buf != nullptr;
2047 }
2048
2049 void Uniform1i(GLint location, GLint V0) {
2050   // debugf("tex: %d\n", (int)ctx->textures.size);
2051   if (vertex_shader) {
2052     vertex_shader->set_uniform_1i(location, V0);
2053   }
2054 }
2055 void Uniform4fv(GLint location, GLsizei count, const GLfloat* v) {
2056   assert(count == 1);
2057   if (vertex_shader) {
2058     vertex_shader->set_uniform_4fv(location, v);
2059   }
2060 }
2061 void UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
2062                       const GLfloat* value) {
2063   assert(count == 1);
2064   assert(!transpose);
2065   if (vertex_shader) {
2066     vertex_shader->set_uniform_matrix4fv(location, value);
2067   }
2068 }
2069
2070 void FramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget,
2071                           GLuint texture, GLint level) {
2072   assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
2073   assert(textarget == GL_TEXTURE_2D || textarget == GL_TEXTURE_RECTANGLE);
2074   assert(level == 0);
2075   Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
2076   if (attachment == GL_COLOR_ATTACHMENT0) {
2077     fb.color_attachment = texture;
2078   } else if (attachment == GL_DEPTH_ATTACHMENT) {
2079     fb.depth_attachment = texture;
2080   } else {
2081     assert(0);
2082   }
2083 }
2084
2085 void FramebufferRenderbuffer(GLenum target, GLenum attachment,
2086                              GLenum renderbuffertarget, GLuint renderbuffer) {
2087   assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
2088   assert(renderbuffertarget == GL_RENDERBUFFER);
2089   Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
2090   Renderbuffer& rb = ctx->renderbuffers[renderbuffer];
2091   if (attachment == GL_COLOR_ATTACHMENT0) {
2092     fb.color_attachment = rb.texture;
2093   } else if (attachment == GL_DEPTH_ATTACHMENT) {
2094     fb.depth_attachment = rb.texture;
2095   } else {
2096     assert(0);
2097   }
2098 }
2099
2100 }  // extern "C"
2101
2102 static inline Framebuffer* get_framebuffer(GLenum target,
2103                                            bool fallback = false) {
2104   if (target == GL_FRAMEBUFFER) {
2105     target = GL_DRAW_FRAMEBUFFER;
2106   }
2107   Framebuffer* fb = ctx->framebuffers.find(ctx->get_binding(target));
2108   if (fallback && !fb) {
2109     // If the specified framebuffer isn't found and a fallback is requested,
2110     // use the default framebuffer.
2111     fb = &ctx->framebuffers[0];
2112   }
2113   return fb;
2114 }
2115
2116 template <typename T>
2117 static inline void fill_n(T* dst, size_t n, T val) {
2118   for (T* end = &dst[n]; dst < end; dst++) *dst = val;
2119 }
2120
2121 #if USE_SSE2
2122 template <>
2123 inline void fill_n<uint32_t>(uint32_t* dst, size_t n, uint32_t val) {
2124   __asm__ __volatile__("rep stosl\n"
2125                        : "+D"(dst), "+c"(n)
2126                        : "a"(val)
2127                        : "memory", "cc");
2128 }
2129 #endif
2130
2131 static inline uint32_t clear_chunk(uint8_t value) {
2132   return uint32_t(value) * 0x01010101U;
2133 }
2134
2135 static inline uint32_t clear_chunk(uint16_t value) {
2136   return uint32_t(value) | (uint32_t(value) << 16);
2137 }
2138
2139 static inline uint32_t clear_chunk(uint32_t value) { return value; }
2140
2141 template <typename T>
2142 static inline void clear_row(T* buf, size_t len, T value, uint32_t chunk) {
2143   const size_t N = sizeof(uint32_t) / sizeof(T);
2144   // fill any leading unaligned values
2145   if (N > 1) {
2146     size_t align = (-(intptr_t)buf & (sizeof(uint32_t) - 1)) / sizeof(T);
2147     if (align <= len) {
2148       fill_n(buf, align, value);
2149       len -= align;
2150       buf += align;
2151     }
2152   }
2153   // fill as many aligned chunks as possible
2154   fill_n((uint32_t*)buf, len / N, chunk);
2155   // fill any remaining values
2156   if (N > 1) {
2157     fill_n(buf + (len & ~(N - 1)), len & (N - 1), value);
2158   }
2159 }
2160
2161 template <typename T>
2162 static void clear_buffer(Texture& t, T value, IntRect bb, int skip_start = 0,
2163                          int skip_end = 0) {
2164   if (!t.buf) return;
2165   skip_start = max(skip_start, bb.x0);
2166   skip_end = max(skip_end, skip_start);
2167   assert(sizeof(T) == t.bpp());
2168   size_t stride = t.stride();
2169   // When clearing multiple full-width rows, collapse them into a single large
2170   // "row" to avoid redundant setup from clearing each row individually. Note
2171   // that we can only safely do this if the stride is tightly packed.
2172   if (bb.width() == t.width && bb.height() > 1 && skip_start >= skip_end &&
2173       (t.should_free() || stride == t.width * sizeof(T))) {
2174     bb.x1 += (stride / sizeof(T)) * (bb.height() - 1);
2175     bb.y1 = bb.y0 + 1;
2176   }
2177   T* buf = (T*)t.sample_ptr(bb.x0, bb.y0);
2178   uint32_t chunk = clear_chunk(value);
2179   for (int rows = bb.height(); rows > 0; rows--) {
2180     if (bb.x0 < skip_start) {
2181       clear_row(buf, skip_start - bb.x0, value, chunk);
2182     }
2183     if (skip_end < bb.x1) {
2184       clear_row(buf + (skip_end - bb.x0), bb.x1 - skip_end, value, chunk);
2185     }
2186     buf += stride / sizeof(T);
2187   }
2188 }
2189
2190 template <typename T>
2191 static inline void force_clear_row(Texture& t, int y, int skip_start = 0,
2192                                    int skip_end = 0) {
2193   assert(t.buf != nullptr);
2194   assert(sizeof(T) == t.bpp());
2195   assert(skip_start <= skip_end);
2196   T* buf = (T*)t.sample_ptr(0, y);
2197   uint32_t chunk = clear_chunk((T)t.clear_val);
2198   if (skip_start > 0) {
2199     clear_row<T>(buf, skip_start, t.clear_val, chunk);
2200   }
2201   if (skip_end < t.width) {
2202     clear_row<T>(buf + skip_end, t.width - skip_end, t.clear_val, chunk);
2203   }
2204 }
2205
2206 template <typename T>
2207 static void force_clear(Texture& t, const IntRect* skip = nullptr) {
2208   if (!t.delay_clear || !t.cleared_rows) {
2209     return;
2210   }
2211   int y0 = 0;
2212   int y1 = t.height;
2213   int skip_start = 0;
2214   int skip_end = 0;
2215   if (skip) {
2216     y0 = clamp(skip->y0, 0, t.height);
2217     y1 = clamp(skip->y1, y0, t.height);
2218     skip_start = clamp(skip->x0, 0, t.width);
2219     skip_end = clamp(skip->x1, skip_start, t.width);
2220     if (skip_start <= 0 && skip_end >= t.width && y0 <= 0 && y1 >= t.height) {
2221       t.disable_delayed_clear();
2222       return;
2223     }
2224   }
2225   int num_masks = (y1 + 31) / 32;
2226   uint32_t* rows = t.cleared_rows;
2227   for (int i = y0 / 32; i < num_masks; i++) {
2228     uint32_t mask = rows[i];
2229     if (mask != ~0U) {
2230       rows[i] = ~0U;
2231       int start = i * 32;
2232       while (mask) {
2233         int count = __builtin_ctz(mask);
2234         if (count > 0) {
2235           clear_buffer<T>(t, t.clear_val,
2236                           IntRect{0, start, t.width, start + count}, skip_start,
2237                           skip_end);
2238           t.delay_clear -= count;
2239           start += count;
2240           mask >>= count;
2241         }
2242         count = __builtin_ctz(mask + 1);
2243         start += count;
2244         mask >>= count;
2245       }
2246       int count = (i + 1) * 32 - start;
2247       if (count > 0) {
2248         clear_buffer<T>(t, t.clear_val,
2249                         IntRect{0, start, t.width, start + count}, skip_start,
2250                         skip_end);
2251         t.delay_clear -= count;
2252       }
2253     }
2254   }
2255   if (t.delay_clear <= 0) t.disable_delayed_clear();
2256 }
2257
2258 static void prepare_texture(Texture& t, const IntRect* skip) {
2259   if (t.delay_clear) {
2260     switch (t.internal_format) {
2261       case GL_RGBA8:
2262         force_clear<uint32_t>(t, skip);
2263         break;
2264       case GL_R8:
2265         force_clear<uint8_t>(t, skip);
2266         break;
2267       case GL_RG8:
2268         force_clear<uint16_t>(t, skip);
2269         break;
2270       default:
2271         assert(false);
2272         break;
2273     }
2274   }
2275 }
2276
2277 // Setup a clear on a texture. This may either force an immediate clear or
2278 // potentially punt to a delayed clear, if applicable.
2279 template <typename T>
2280 static void request_clear(Texture& t, T value, const IntRect& scissor) {
2281   // If the clear would require a scissor, force clear anything outside
2282   // the scissor, and then immediately clear anything inside the scissor.
2283   if (!scissor.contains(t.offset_bounds())) {
2284     IntRect skip = scissor - t.offset;
2285     force_clear<T>(t, &skip);
2286     clear_buffer<T>(t, value, skip.intersection(t.bounds()));
2287   } else {
2288     // Do delayed clear for 2D texture without scissor.
2289     t.enable_delayed_clear(value);
2290   }
2291 }
2292
2293 template <typename T>
2294 static inline void request_clear(Texture& t, T value) {
2295   // If scissoring is enabled, use the scissor rect. Otherwise, just scissor to
2296   // the entire texture bounds.
2297   request_clear(t, value, ctx->scissortest ? ctx->scissor : t.offset_bounds());
2298 }
2299
2300 extern "C" {
2301
2302 void InitDefaultFramebuffer(int x, int y, int width, int height, int stride,
2303                             void* buf) {
2304   Framebuffer& fb = ctx->framebuffers[0];
2305   if (!fb.color_attachment) {
2306     GenTextures(1, &fb.color_attachment);
2307   }
2308   // If the dimensions or buffer properties changed, we need to reallocate
2309   // the underlying storage for the color buffer texture.
2310   Texture& colortex = ctx->textures[fb.color_attachment];
2311   set_tex_storage(colortex, GL_RGBA8, width, height, buf, stride);
2312   colortex.offset = IntPoint(x, y);
2313   if (!fb.depth_attachment) {
2314     GenTextures(1, &fb.depth_attachment);
2315   }
2316   // Ensure dimensions of the depth buffer match the color buffer.
2317   Texture& depthtex = ctx->textures[fb.depth_attachment];
2318   set_tex_storage(depthtex, GL_DEPTH_COMPONENT24, width, height);
2319   depthtex.offset = IntPoint(x, y);
2320 }
2321
2322 void* GetColorBuffer(GLuint fbo, GLboolean flush, int32_t* width,
2323                      int32_t* height, int32_t* stride) {
2324   Framebuffer* fb = ctx->framebuffers.find(fbo);
2325   if (!fb || !fb->color_attachment) {
2326     return nullptr;
2327   }
2328   Texture& colortex = ctx->textures[fb->color_attachment];
2329   if (flush) {
2330     prepare_texture(colortex);
2331   }
2332   assert(colortex.offset == IntPoint(0, 0));
2333   if (width) {
2334     *width = colortex.width;
2335   }
2336   if (height) {
2337     *height = colortex.height;
2338   }
2339   if (stride) {
2340     *stride = colortex.stride();
2341   }
2342   return colortex.buf ? colortex.sample_ptr(0, 0) : nullptr;
2343 }
2344
2345 void ResolveFramebuffer(GLuint fbo) {
2346   Framebuffer* fb = ctx->framebuffers.find(fbo);
2347   if (!fb || !fb->color_attachment) {
2348     return;
2349   }
2350   Texture& colortex = ctx->textures[fb->color_attachment];
2351   prepare_texture(colortex);
2352 }
2353
2354 void SetTextureBuffer(GLuint texid, GLenum internal_format, GLsizei width,
2355                       GLsizei height, GLsizei stride, void* buf,
2356                       GLsizei min_width, GLsizei min_height) {
2357   Texture& t = ctx->textures[texid];
2358   set_tex_storage(t, internal_format, width, height, buf, stride, min_width,
2359                   min_height);
2360 }
2361
2362 GLenum CheckFramebufferStatus(GLenum target) {
2363   Framebuffer* fb = get_framebuffer(target);
2364   if (!fb || !fb->color_attachment) {
2365     return GL_FRAMEBUFFER_UNSUPPORTED;
2366   }
2367   return GL_FRAMEBUFFER_COMPLETE;
2368 }
2369
2370 void ClearTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset,
2371                       GLint zoffset, GLsizei width, GLsizei height,
2372                       GLsizei depth, GLenum format, GLenum type,
2373                       const void* data) {
2374   if (level != 0) {
2375     assert(false);
2376     return;
2377   }
2378   Texture& t = ctx->textures[texture];
2379   assert(!t.locked);
2380   if (width <= 0 || height <= 0 || depth <= 0) {
2381     return;
2382   }
2383   assert(zoffset == 0 && depth == 1);
2384   IntRect scissor = {xoffset, yoffset, xoffset + width, yoffset + height};
2385   if (t.internal_format == GL_DEPTH_COMPONENT24) {
2386     uint32_t value = 0xFFFFFF;
2387     switch (format) {
2388       case GL_DEPTH_COMPONENT:
2389         switch (type) {
2390           case GL_DOUBLE:
2391             value = uint32_t(*(const GLdouble*)data * 0xFFFFFF);
2392             break;
2393           case GL_FLOAT:
2394             value = uint32_t(*(const GLfloat*)data * 0xFFFFFF);
2395             break;
2396           default:
2397             assert(false);
2398             break;
2399         }
2400         break;
2401       default:
2402         assert(false);
2403         break;
2404     }
2405     if (t.cleared() && !scissor.contains(t.offset_bounds())) {
2406       // If we need to scissor the clear and the depth buffer was already
2407       // initialized, then just fill runs for that scissor area.
2408       t.fill_depth_runs(value, scissor);
2409     } else {
2410       // Otherwise, the buffer is either uninitialized or the clear would
2411       // encompass the entire buffer. If uninitialized, we can safely fill
2412       // the entire buffer with any value and thus ignore any scissoring.
2413       t.init_depth_runs(value);
2414     }
2415     return;
2416   }
2417
2418   uint32_t color = 0xFF000000;
2419   switch (type) {
2420     case GL_FLOAT: {
2421       const GLfloat* f = (const GLfloat*)data;
2422       Float v = {0.0f, 0.0f, 0.0f, 1.0f};
2423       switch (format) {
2424         case GL_RGBA:
2425           v.w = f[3];  // alpha
2426           FALLTHROUGH;
2427         case GL_RGB:
2428           v.z = f[2];  // blue
2429           FALLTHROUGH;
2430         case GL_RG:
2431           v.y = f[1];  // green
2432           FALLTHROUGH;
2433         case GL_RED:
2434           v.x = f[0];  // red
2435           break;
2436         default:
2437           assert(false);
2438           break;
2439       }
2440       color = bit_cast<uint32_t>(CONVERT(round_pixel(v), U8));
2441       break;
2442     }
2443     case GL_UNSIGNED_BYTE: {
2444       const GLubyte* b = (const GLubyte*)data;
2445       switch (format) {
2446         case GL_RGBA:
2447           color = (color & ~0xFF000000) | (uint32_t(b[3]) << 24);  // alpha
2448           FALLTHROUGH;
2449         case GL_RGB:
2450           color = (color & ~0x00FF0000) | (uint32_t(b[2]) << 16);  // blue
2451           FALLTHROUGH;
2452         case GL_RG:
2453           color = (color & ~0x0000FF00) | (uint32_t(b[1]) << 8);  // green
2454           FALLTHROUGH;
2455         case GL_RED:
2456           color = (color & ~0x000000FF) | uint32_t(b[0]);  // red
2457           break;
2458         default:
2459           assert(false);
2460           break;
2461       }
2462       break;
2463     }
2464     default:
2465       assert(false);
2466       break;
2467   }
2468
2469   switch (t.internal_format) {
2470     case GL_RGBA8:
2471       // Clear color needs to swizzle to BGRA.
2472       request_clear<uint32_t>(t,
2473                               (color & 0xFF00FF00) |
2474                                   ((color << 16) & 0xFF0000) |
2475                                   ((color >> 16) & 0xFF),
2476                               scissor);
2477       break;
2478     case GL_R8:
2479       request_clear<uint8_t>(t, uint8_t(color & 0xFF), scissor);
2480       break;
2481     case GL_RG8:
2482       request_clear<uint16_t>(t, uint16_t(color & 0xFFFF), scissor);
2483       break;
2484     default:
2485       assert(false);
2486       break;
2487   }
2488 }
2489
2490 void ClearTexImage(GLuint texture, GLint level, GLenum format, GLenum type,
2491                    const void* data) {
2492   Texture& t = ctx->textures[texture];
2493   IntRect scissor = t.offset_bounds();
2494   ClearTexSubImage(texture, level, scissor.x0, scissor.y0, 0, scissor.width(),
2495                    scissor.height(), 1, format, type, data);
2496 }
2497
2498 void Clear(GLbitfield mask) {
2499   Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
2500   if ((mask & GL_COLOR_BUFFER_BIT) && fb.color_attachment) {
2501     Texture& t = ctx->textures[fb.color_attachment];
2502     IntRect scissor = ctx->scissortest
2503                           ? ctx->scissor.intersection(t.offset_bounds())
2504                           : t.offset_bounds();
2505     ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
2506                      scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
2507                      ctx->clearcolor);
2508   }
2509   if ((mask & GL_DEPTH_BUFFER_BIT) && fb.depth_attachment) {
2510     Texture& t = ctx->textures[fb.depth_attachment];
2511     IntRect scissor = ctx->scissortest
2512                           ? ctx->scissor.intersection(t.offset_bounds())
2513                           : t.offset_bounds();
2514     ClearTexSubImage(fb.depth_attachment, 0, scissor.x0, scissor.y0, 0,
2515                      scissor.width(), scissor.height(), 1, GL_DEPTH_COMPONENT,
2516                      GL_DOUBLE, &ctx->cleardepth);
2517   }
2518 }
2519
2520 void ClearColorRect(GLuint fbo, GLint xoffset, GLint yoffset, GLsizei width,
2521                     GLsizei height, GLfloat r, GLfloat g, GLfloat b,
2522                     GLfloat a) {
2523   GLfloat color[] = {r, g, b, a};
2524   Framebuffer& fb = ctx->framebuffers[fbo];
2525   Texture& t = ctx->textures[fb.color_attachment];
2526   IntRect scissor =
2527       IntRect{xoffset, yoffset, xoffset + width, yoffset + height}.intersection(
2528           t.offset_bounds());
2529   ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
2530                    scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
2531                    color);
2532 }
2533
2534 void InvalidateFramebuffer(GLenum target, GLsizei num_attachments,
2535                            const GLenum* attachments) {
2536   Framebuffer* fb = get_framebuffer(target);
2537   if (!fb || num_attachments <= 0 || !attachments) {
2538     return;
2539   }
2540   for (GLsizei i = 0; i < num_attachments; i++) {
2541     switch (attachments[i]) {
2542       case GL_DEPTH_ATTACHMENT: {
2543         Texture& t = ctx->textures[fb->depth_attachment];
2544         t.set_cleared(false);
2545         break;
2546       }
2547       case GL_COLOR_ATTACHMENT0: {
2548         Texture& t = ctx->textures[fb->color_attachment];
2549         t.disable_delayed_clear();
2550         break;
2551       }
2552     }
2553   }
2554 }
2555
2556 void ReadPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format,
2557                 GLenum type, void* data) {
2558   data = get_pixel_pack_buffer_data(data);
2559   if (!data) return;
2560   Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
2561   if (!fb) return;
2562   assert(format == GL_RED || format == GL_RGBA || format == GL_RGBA_INTEGER ||
2563          format == GL_BGRA || format == GL_RG);
2564   Texture& t = ctx->textures[fb->color_attachment];
2565   if (!t.buf) return;
2566   prepare_texture(t);
2567   // debugf("read pixels %d, %d, %d, %d from fb %d with format %x\n", x, y,
2568   // width, height, ctx->read_framebuffer_binding, t.internal_format);
2569   x -= t.offset.x;
2570   y -= t.offset.y;
2571   assert(x >= 0 && y >= 0);
2572   assert(x + width <= t.width);
2573   assert(y + height <= t.height);
2574   if (internal_format_for_data(format, type) != t.internal_format) {
2575     debugf("mismatched format for read pixels: %x vs %x\n", t.internal_format,
2576            internal_format_for_data(format, type));
2577     assert(false);
2578     return;
2579   }
2580   // Only support readback conversions that are reversible
2581   assert(!format_requires_conversion(format, t.internal_format) ||
2582          bytes_for_internal_format(format) == t.bpp());
2583   uint8_t* dest = (uint8_t*)data;
2584   size_t destStride = width * t.bpp();
2585   if (y < 0) {
2586     dest += -y * destStride;
2587     height += y;
2588     y = 0;
2589   }
2590   if (y + height > t.height) {
2591     height = t.height - y;
2592   }
2593   if (x < 0) {
2594     dest += -x * t.bpp();
2595     width += x;
2596     x = 0;
2597   }
2598   if (x + width > t.width) {
2599     width = t.width - x;
2600   }
2601   if (width <= 0 || height <= 0) {
2602     return;
2603   }
2604   convert_copy(format, t.internal_format, dest, destStride,
2605                (const uint8_t*)t.sample_ptr(x, y), t.stride(), width, height);
2606 }
2607
2608 void CopyImageSubData(GLuint srcName, GLenum srcTarget, UNUSED GLint srcLevel,
2609                       GLint srcX, GLint srcY, GLint srcZ, GLuint dstName,
2610                       GLenum dstTarget, UNUSED GLint dstLevel, GLint dstX,
2611                       GLint dstY, GLint dstZ, GLsizei srcWidth,
2612                       GLsizei srcHeight, GLsizei srcDepth) {
2613   assert(srcLevel == 0 && dstLevel == 0);
2614   assert(srcZ == 0 && srcDepth == 1 && dstZ == 0);
2615   if (srcTarget == GL_RENDERBUFFER) {
2616     Renderbuffer& rb = ctx->renderbuffers[srcName];
2617     srcName = rb.texture;
2618   }
2619   if (dstTarget == GL_RENDERBUFFER) {
2620     Renderbuffer& rb = ctx->renderbuffers[dstName];
2621     dstName = rb.texture;
2622   }
2623   Texture& srctex = ctx->textures[srcName];
2624   if (!srctex.buf) return;
2625   prepare_texture(srctex);
2626   Texture& dsttex = ctx->textures[dstName];
2627   if (!dsttex.buf) return;
2628   assert(!dsttex.locked);
2629   IntRect skip = {dstX, dstY, dstX + srcWidth, dstY + srcHeight};
2630   prepare_texture(dsttex, &skip);
2631   assert(srctex.internal_format == dsttex.internal_format);
2632   assert(srcWidth >= 0);
2633   assert(srcHeight >= 0);
2634   assert(srcX + srcWidth <= srctex.width);
2635   assert(srcY + srcHeight <= srctex.height);
2636   assert(dstX + srcWidth <= dsttex.width);
2637   assert(dstY + srcHeight <= dsttex.height);
2638   int bpp = srctex.bpp();
2639   int src_stride = srctex.stride();
2640   int dest_stride = dsttex.stride();
2641   char* dest = dsttex.sample_ptr(dstX, dstY);
2642   char* src = srctex.sample_ptr(srcX, srcY);
2643   for (int y = 0; y < srcHeight; y++) {
2644     memcpy(dest, src, srcWidth * bpp);
2645     dest += dest_stride;
2646     src += src_stride;
2647   }
2648 }
2649
2650 void CopyTexSubImage2D(GLenum target, UNUSED GLint level, GLint xoffset,
2651                        GLint yoffset, GLint x, GLint y, GLsizei width,
2652                        GLsizei height) {
2653   assert(level == 0);
2654   Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
2655   if (!fb) return;
2656   CopyImageSubData(fb->color_attachment, GL_TEXTURE_2D, 0, x, y, 0,
2657                    ctx->get_binding(target), GL_TEXTURE_2D, 0, xoffset, yoffset,
2658                    0, width, height, 1);
2659 }
2660
2661 }  // extern "C"
2662
2663 #include "blend.h"
2664 #include "composite.h"
2665 #include "swgl_ext.h"
2666
2667 #pragma GCC diagnostic push
2668 #pragma GCC diagnostic ignored "-Wuninitialized"
2669 #pragma GCC diagnostic ignored "-Wunused-function"
2670 #pragma GCC diagnostic ignored "-Wunused-parameter"
2671 #pragma GCC diagnostic ignored "-Wunused-variable"
2672 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
2673 #ifdef __clang__
2674 #  pragma GCC diagnostic ignored "-Wunused-private-field"
2675 #else
2676 #  pragma GCC diagnostic ignored "-Wunused-but-set-variable"
2677 #endif
2678 #include "load_shader.h"
2679 #pragma GCC diagnostic pop
2680
2681 #include "rasterize.h"
2682
2683 void VertexArray::validate() {
2684   int last_enabled = -1;
2685   for (int i = 0; i <= max_attrib; i++) {
2686     VertexAttrib& attr = attribs[i];
2687     if (attr.enabled) {
2688       // VertexArray &v = ctx->vertex_arrays[attr.vertex_array];
2689       Buffer& vertex_buf = ctx->buffers[attr.vertex_buffer];
2690       attr.buf = vertex_buf.buf;
2691       attr.buf_size = vertex_buf.size;
2692       // debugf("%d %x %d %d %d %d\n", i, attr.type, attr.size, attr.stride,
2693       // attr.offset, attr.divisor);
2694       last_enabled = i;
2695     }
2696   }
2697   max_attrib = last_enabled;
2698 }
2699
2700 extern "C" {
2701
2702 void DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
2703                            GLintptr offset, GLsizei instancecount) {
2704   if (offset < 0 || count <= 0 || instancecount <= 0 || !vertex_shader ||
2705       !fragment_shader) {
2706     return;
2707   }
2708
2709   Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
2710   if (!fb.color_attachment) {
2711     return;
2712   }
2713   Texture& colortex = ctx->textures[fb.color_attachment];
2714   if (!colortex.buf) {
2715     return;
2716   }
2717   assert(!colortex.locked);
2718   assert(colortex.internal_format == GL_RGBA8 ||
2719          colortex.internal_format == GL_R8);
2720   Texture& depthtex = ctx->textures[ctx->depthtest ? fb.depth_attachment : 0];
2721   if (depthtex.buf) {
2722     assert(depthtex.internal_format == GL_DEPTH_COMPONENT24);
2723     assert(colortex.width == depthtex.width &&
2724            colortex.height == depthtex.height);
2725     assert(colortex.offset == depthtex.offset);
2726   }
2727
2728   // debugf("current_vertex_array %d\n", ctx->current_vertex_array);
2729   // debugf("indices size: %d\n", indices_buf.size);
2730   VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
2731   if (ctx->validate_vertex_array) {
2732     ctx->validate_vertex_array = false;
2733     v.validate();
2734   }
2735
2736 #ifdef PRINT_TIMINGS
2737   uint64_t start = get_time_value();
2738 #endif
2739
2740   ctx->shaded_rows = 0;
2741   ctx->shaded_pixels = 0;
2742
2743   vertex_shader->init_batch();
2744
2745   switch (type) {
2746     case GL_UNSIGNED_SHORT:
2747       assert(mode == GL_TRIANGLES);
2748       draw_elements<uint16_t>(count, instancecount, offset, v, colortex,
2749                               depthtex);
2750       break;
2751     case GL_UNSIGNED_INT:
2752       assert(mode == GL_TRIANGLES);
2753       draw_elements<uint32_t>(count, instancecount, offset, v, colortex,
2754                               depthtex);
2755       break;
2756     case GL_NONE:
2757       // Non-standard GL extension - if element type is GL_NONE, then we don't
2758       // use any element buffer and behave as if DrawArrays was called instead.
2759       for (GLsizei instance = 0; instance < instancecount; instance++) {
2760         switch (mode) {
2761           case GL_LINES:
2762             for (GLsizei i = 0; i + 2 <= count; i += 2) {
2763               vertex_shader->load_attribs(v.attribs, offset + i, instance, 2);
2764               draw_quad(2, colortex, depthtex);
2765             }
2766             break;
2767           case GL_TRIANGLES:
2768             for (GLsizei i = 0; i + 3 <= count; i += 3) {
2769               vertex_shader->load_attribs(v.attribs, offset + i, instance, 3);
2770               draw_quad(3, colortex, depthtex);
2771             }
2772             break;
2773           default:
2774             assert(false);
2775             break;
2776         }
2777       }
2778       break;
2779     default:
2780       assert(false);
2781       break;
2782   }
2783
2784   if (ctx->samples_passed_query) {
2785     Query& q = ctx->queries[ctx->samples_passed_query];
2786     q.value += ctx->shaded_pixels;
2787   }
2788
2789 #ifdef PRINT_TIMINGS
2790   uint64_t end = get_time_value();
2791   printf(
2792       "%7.3fms draw(%s, %d): %d pixels in %d rows (avg %f pixels/row, "
2793       "%fns/pixel)\n",
2794       double(end - start) / (1000. * 1000.),
2795       ctx->programs[ctx->current_program].impl->get_name(), instancecount,
2796       ctx->shaded_pixels, ctx->shaded_rows,
2797       double(ctx->shaded_pixels) / ctx->shaded_rows,
2798       double(end - start) / max(ctx->shaded_pixels, 1));
2799 #endif
2800 }
2801
2802 void Finish() {
2803 #ifdef PRINT_TIMINGS
2804   printf("Finish\n");
2805 #endif
2806 }
2807
2808 void MakeCurrent(Context* c) {
2809   if (ctx == c) {
2810     return;
2811   }
2812   ctx = c;
2813   setup_program(ctx ? ctx->current_program : 0);
2814 }
2815
2816 Context* CreateContext() { return new Context; }
2817
2818 void ReferenceContext(Context* c) {
2819   if (!c) {
2820     return;
2821   }
2822   ++c->references;
2823 }
2824
2825 void DestroyContext(Context* c) {
2826   if (!c) {
2827     return;
2828   }
2829   assert(c->references > 0);
2830   --c->references;
2831   if (c->references > 0) {
2832     return;
2833   }
2834   if (ctx == c) {
2835     MakeCurrent(nullptr);
2836   }
2837   delete c;
2838 }
2839
2840 size_t ReportMemory(Context* ctx, size_t (*size_of_op)(const void*)) {
2841   size_t size = 0;
2842   if (ctx) {
2843     for (auto& t : ctx->textures) {
2844       if (t && t->should_free()) {
2845         size += size_of_op(t->buf);
2846       }
2847     }
2848   }
2849   return size;
2850 }
2851 }  // extern "C"