Bug 1899501 - Part 2: Implement explicit resource management opcodes in Warp. r=arai...
[gecko.git] / gfx / wr / swgl / src / gl.cc
blobaff941db78244d85202ed2d6da4b8e99223e91f0
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include <stdlib.h>
6 #include <stdint.h>
7 #include <string.h>
8 #include <assert.h>
9 #include <stdio.h>
10 #include <math.h>
12 #ifdef __MACH__
13 # include <mach/mach.h>
14 # include <mach/mach_time.h>
15 #else
16 # include <time.h>
17 #endif
19 #ifdef NDEBUG
20 # define debugf(...)
21 #else
22 # define debugf(...) printf(__VA_ARGS__)
23 #endif
25 // #define PRINT_TIMINGS
27 #ifdef _WIN32
28 # define ALWAYS_INLINE __forceinline
29 # define NO_INLINE __declspec(noinline)
31 // Including Windows.h brings a huge amount of namespace polution so just
32 // define a couple of things manually
33 typedef int BOOL;
34 # define WINAPI __stdcall
35 # define DECLSPEC_IMPORT __declspec(dllimport)
36 # define WINBASEAPI DECLSPEC_IMPORT
37 typedef unsigned long DWORD;
38 typedef long LONG;
39 typedef __int64 LONGLONG;
40 # define DUMMYSTRUCTNAME
42 typedef union _LARGE_INTEGER {
43 struct {
44 DWORD LowPart;
45 LONG HighPart;
46 } DUMMYSTRUCTNAME;
47 struct {
48 DWORD LowPart;
49 LONG HighPart;
50 } u;
51 LONGLONG QuadPart;
52 } LARGE_INTEGER;
53 extern "C" {
54 WINBASEAPI BOOL WINAPI
55 QueryPerformanceCounter(LARGE_INTEGER* lpPerformanceCount);
57 WINBASEAPI BOOL WINAPI QueryPerformanceFrequency(LARGE_INTEGER* lpFrequency);
60 #else
61 // GCC is slower when dealing with always_inline, especially in debug builds.
62 // When using Clang, use always_inline more aggressively.
63 # if defined(__clang__) || defined(NDEBUG)
64 # define ALWAYS_INLINE __attribute__((always_inline)) inline
65 # else
66 # define ALWAYS_INLINE inline
67 # endif
68 # define NO_INLINE __attribute__((noinline))
69 #endif
71 // Some functions may cause excessive binary bloat if inlined in debug or with
72 // GCC builds, so use PREFER_INLINE on these instead of ALWAYS_INLINE.
73 #if defined(__clang__) && defined(NDEBUG)
74 # define PREFER_INLINE ALWAYS_INLINE
75 #else
76 # define PREFER_INLINE inline
77 #endif
79 #define UNREACHABLE __builtin_unreachable()
81 #define UNUSED [[maybe_unused]]
83 #define FALLTHROUGH [[fallthrough]]
85 #if defined(MOZILLA_CLIENT) && defined(MOZ_CLANG_PLUGIN)
86 # define IMPLICIT __attribute__((annotate("moz_implicit")))
87 #else
88 # define IMPLICIT
89 #endif
91 #include "gl_defs.h"
92 #include "glsl.h"
93 #include "program.h"
94 #include "texture.h"
96 using namespace glsl;
98 typedef ivec2_scalar IntPoint;
100 struct IntRect {
101 int x0;
102 int y0;
103 int x1;
104 int y1;
106 IntRect() : x0(0), y0(0), x1(0), y1(0) {}
107 IntRect(int x0, int y0, int x1, int y1) : x0(x0), y0(y0), x1(x1), y1(y1) {}
108 IntRect(IntPoint origin, IntPoint size)
109 : x0(origin.x),
110 y0(origin.y),
111 x1(origin.x + size.x),
112 y1(origin.y + size.y) {}
114 int width() const { return x1 - x0; }
115 int height() const { return y1 - y0; }
116 bool is_empty() const { return width() <= 0 || height() <= 0; }
118 IntPoint origin() const { return IntPoint(x0, y0); }
120 bool same_size(const IntRect& o) const {
121 return width() == o.width() && height() == o.height();
124 bool contains(const IntRect& o) const {
125 return o.x0 >= x0 && o.y0 >= y0 && o.x1 <= x1 && o.y1 <= y1;
128 IntRect& intersect(const IntRect& o) {
129 x0 = max(x0, o.x0);
130 y0 = max(y0, o.y0);
131 x1 = min(x1, o.x1);
132 y1 = min(y1, o.y1);
133 return *this;
136 IntRect intersection(const IntRect& o) {
137 IntRect result = *this;
138 result.intersect(o);
139 return result;
142 // Scale from source-space to dest-space, optionally rounding inward
143 IntRect& scale(int srcWidth, int srcHeight, int dstWidth, int dstHeight,
144 bool roundIn = false) {
145 x0 = (x0 * dstWidth + (roundIn ? srcWidth - 1 : 0)) / srcWidth;
146 y0 = (y0 * dstHeight + (roundIn ? srcHeight - 1 : 0)) / srcHeight;
147 x1 = (x1 * dstWidth) / srcWidth;
148 y1 = (y1 * dstHeight) / srcHeight;
149 return *this;
152 // Flip the rect's Y coords around inflection point at Y=offset
153 void invert_y(int offset) {
154 y0 = offset - y0;
155 y1 = offset - y1;
156 swap(y0, y1);
159 IntRect& offset(const IntPoint& o) {
160 x0 += o.x;
161 y0 += o.y;
162 x1 += o.x;
163 y1 += o.y;
164 return *this;
167 IntRect operator+(const IntPoint& o) const {
168 return IntRect(*this).offset(o);
170 IntRect operator-(const IntPoint& o) const {
171 return IntRect(*this).offset(-o);
175 typedef vec2_scalar Point2D;
176 typedef vec4_scalar Point3D;
178 struct IntRange {
179 int start;
180 int end;
182 int len() const { return end - start; }
184 IntRange intersect(IntRange r) const {
185 return {max(start, r.start), min(end, r.end)};
189 struct FloatRange {
190 float start;
191 float end;
193 float clip(float x) const { return clamp(x, start, end); }
195 FloatRange clip(FloatRange r) const { return {clip(r.start), clip(r.end)}; }
197 FloatRange merge(FloatRange r) const {
198 return {min(start, r.start), max(end, r.end)};
201 IntRange round() const {
202 return {int(floor(start + 0.5f)), int(floor(end + 0.5f))};
205 IntRange round_out() const { return {int(floor(start)), int(ceil(end))}; }
208 template <typename P>
209 static inline FloatRange x_range(P p0, P p1) {
210 return {min(p0.x, p1.x), max(p0.x, p1.x)};
213 struct VertexAttrib {
214 size_t size = 0; // in bytes
215 GLenum type = 0;
216 bool normalized = false;
217 GLsizei stride = 0;
218 GLuint offset = 0;
219 bool enabled = false;
220 GLuint divisor = 0;
221 int vertex_array = 0;
222 int vertex_buffer = 0;
223 char* buf = nullptr; // XXX: this can easily dangle
224 size_t buf_size = 0; // this will let us bounds check
226 // Mark the buffer as invalid so we don't accidentally use stale data.
227 void disable() {
228 enabled = false;
229 buf = nullptr;
230 buf_size = 0;
234 static int bytes_for_internal_format(GLenum internal_format) {
235 switch (internal_format) {
236 case GL_RGBA32F:
237 return 4 * 4;
238 case GL_RGBA32I:
239 return 4 * 4;
240 case GL_RGBA8:
241 case GL_BGRA8:
242 case GL_RGBA:
243 return 4;
244 case GL_R8:
245 case GL_RED:
246 return 1;
247 case GL_RG8:
248 case GL_RG:
249 return 2;
250 case GL_DEPTH_COMPONENT:
251 case GL_DEPTH_COMPONENT16:
252 case GL_DEPTH_COMPONENT24:
253 case GL_DEPTH_COMPONENT32:
254 return 4;
255 case GL_RGB_RAW_422_APPLE:
256 return 2;
257 case GL_R16:
258 return 2;
259 case GL_RG16:
260 return 4;
261 default:
262 debugf("internal format: %x\n", internal_format);
263 assert(0);
264 return 0;
268 static inline int aligned_stride(int row_bytes) { return (row_bytes + 3) & ~3; }
270 static TextureFormat gl_format_to_texture_format(int type) {
271 switch (type) {
272 case GL_RGBA32F:
273 return TextureFormat::RGBA32F;
274 case GL_RGBA32I:
275 return TextureFormat::RGBA32I;
276 case GL_RGBA8:
277 return TextureFormat::RGBA8;
278 case GL_R8:
279 return TextureFormat::R8;
280 case GL_RG8:
281 return TextureFormat::RG8;
282 case GL_R16:
283 return TextureFormat::R16;
284 case GL_RG16:
285 return TextureFormat::RG16;
286 case GL_RGB_RAW_422_APPLE:
287 return TextureFormat::YUY2;
288 default:
289 assert(0);
290 return TextureFormat::RGBA8;
294 struct Query {
295 uint64_t value = 0;
298 struct Buffer {
299 char* buf = nullptr;
300 size_t size = 0;
301 size_t capacity = 0;
303 // Returns true if re-allocation succeeded, false otherwise...
304 bool allocate(size_t new_size) {
305 // If the size remains unchanged, don't allocate anything.
306 if (new_size == size) {
307 return true;
309 // If the new size is within the existing capacity of the buffer, just
310 // reuse the existing buffer.
311 if (new_size <= capacity) {
312 size = new_size;
313 return true;
315 // Otherwise we need to reallocate the buffer to hold up to the requested
316 // larger size.
317 char* new_buf = (char*)realloc(buf, new_size);
318 assert(new_buf);
319 if (!new_buf) {
320 // If we fail, null out the buffer rather than leave around the old
321 // allocation state.
322 cleanup();
323 return false;
325 // The reallocation succeeded, so install the buffer.
326 buf = new_buf;
327 size = new_size;
328 capacity = new_size;
329 return true;
332 void cleanup() {
333 if (buf) {
334 free(buf);
335 buf = nullptr;
336 size = 0;
337 capacity = 0;
341 ~Buffer() { cleanup(); }
344 struct Framebuffer {
345 GLuint color_attachment = 0;
346 GLuint depth_attachment = 0;
349 struct Renderbuffer {
350 GLuint texture = 0;
352 void on_erase();
355 TextureFilter gl_filter_to_texture_filter(int type) {
356 switch (type) {
357 case GL_NEAREST:
358 return TextureFilter::NEAREST;
359 case GL_NEAREST_MIPMAP_LINEAR:
360 return TextureFilter::NEAREST;
361 case GL_NEAREST_MIPMAP_NEAREST:
362 return TextureFilter::NEAREST;
363 case GL_LINEAR:
364 return TextureFilter::LINEAR;
365 case GL_LINEAR_MIPMAP_LINEAR:
366 return TextureFilter::LINEAR;
367 case GL_LINEAR_MIPMAP_NEAREST:
368 return TextureFilter::LINEAR;
369 default:
370 assert(0);
371 return TextureFilter::NEAREST;
375 struct Texture {
376 GLenum internal_format = 0;
377 int width = 0;
378 int height = 0;
379 char* buf = nullptr;
380 size_t buf_size = 0;
381 uint32_t buf_stride = 0;
382 uint8_t buf_bpp = 0;
383 GLenum min_filter = GL_NEAREST;
384 GLenum mag_filter = GL_LINEAR;
385 // The number of active locks on this texture. If this texture has any active
386 // locks, we need to disallow modifying or destroying the texture as it may
387 // be accessed by other threads where modifications could lead to races.
388 int32_t locked = 0;
389 // When used as an attachment of a framebuffer, rendering to the texture
390 // behaves as if it is located at the given offset such that the offset is
391 // subtracted from all transformed vertexes after the viewport is applied.
392 IntPoint offset;
394 enum FLAGS {
395 // If the buffer is internally-allocated by SWGL
396 SHOULD_FREE = 1 << 1,
397 // If the buffer has been cleared to initialize it. Currently this is only
398 // utilized by depth buffers which need to know when depth runs have reset
399 // to a valid row state. When unset, the depth runs may contain garbage.
400 CLEARED = 1 << 2,
402 int flags = SHOULD_FREE;
403 bool should_free() const { return bool(flags & SHOULD_FREE); }
404 bool cleared() const { return bool(flags & CLEARED); }
406 void set_flag(int flag, bool val) {
407 if (val) {
408 flags |= flag;
409 } else {
410 flags &= ~flag;
413 void set_should_free(bool val) {
414 // buf must be null before SHOULD_FREE can be safely toggled. Otherwise, we
415 // might accidentally mistakenly realloc an externally allocated buffer as
416 // if it were an internally allocated one.
417 assert(!buf);
418 set_flag(SHOULD_FREE, val);
420 void set_cleared(bool val) { set_flag(CLEARED, val); }
422 // Delayed-clearing state. When a clear of an FB is requested, we don't
423 // immediately clear each row, as the rows may be subsequently overwritten
424 // by draw calls, allowing us to skip the work of clearing the affected rows
425 // either fully or partially. Instead, we keep a bit vector of rows that need
426 // to be cleared later and save the value they need to be cleared with so
427 // that we can clear these rows individually when they are touched by draws.
428 // This currently only works for 2D textures, but not on texture arrays.
429 int delay_clear = 0;
430 uint32_t clear_val = 0;
431 uint32_t* cleared_rows = nullptr;
433 void init_depth_runs(uint32_t z);
434 void fill_depth_runs(uint32_t z, const IntRect& scissor);
436 void enable_delayed_clear(uint32_t val) {
437 delay_clear = height;
438 clear_val = val;
439 if (!cleared_rows) {
440 cleared_rows = new uint32_t[(height + 31) / 32];
442 memset(cleared_rows, 0, ((height + 31) / 32) * sizeof(uint32_t));
443 if (height & 31) {
444 cleared_rows[height / 32] = ~0U << (height & 31);
448 void disable_delayed_clear() {
449 if (cleared_rows) {
450 delete[] cleared_rows;
451 cleared_rows = nullptr;
452 delay_clear = 0;
456 int bpp() const { return buf_bpp; }
457 int compute_bpp() const { return bytes_for_internal_format(internal_format); }
459 size_t stride() const { return buf_stride; }
460 size_t compute_stride(int bpp, int width) const {
461 return aligned_stride(bpp * width);
464 // Set an external backing buffer of this texture.
465 void set_buffer(void* new_buf, size_t new_stride) {
466 assert(!should_free());
467 // Ensure that the supplied stride is at least as big as the row data and
468 // is aligned to the smaller of either the BPP or word-size. We need to at
469 // least be able to sample data from within a row and sample whole pixels
470 // of smaller formats without risking unaligned access.
471 int new_bpp = compute_bpp();
472 assert(new_stride >= size_t(new_bpp * width) &&
473 new_stride % min(new_bpp, sizeof(uint32_t)) == 0);
475 buf = (char*)new_buf;
476 buf_size = 0;
477 buf_bpp = new_bpp;
478 buf_stride = new_stride;
481 // Returns true if re-allocation succeeded, false otherwise...
482 bool allocate(bool force = false, int min_width = 0, int min_height = 0) {
483 assert(!locked); // Locked textures shouldn't be reallocated
484 // If we get here, some GL API call that invalidates the texture was used.
485 // Mark the buffer as not-cleared to signal this.
486 set_cleared(false);
487 // Check if there is either no buffer currently or if we forced validation
488 // of the buffer size because some dimension might have changed.
489 if ((!buf || force) && should_free()) {
490 // Compute the buffer's BPP and stride, since they may have changed.
491 int new_bpp = compute_bpp();
492 size_t new_stride = compute_stride(new_bpp, width);
493 // Compute new size based on the maximum potential stride, rather than
494 // the current stride, to hopefully avoid reallocations when size would
495 // otherwise change too much...
496 size_t max_stride = compute_stride(new_bpp, max(width, min_width));
497 size_t size = max_stride * max(height, min_height);
498 if ((!buf && size > 0) || size > buf_size) {
499 // Allocate with a SIMD register-sized tail of padding at the end so we
500 // can safely read or write past the end of the texture with SIMD ops.
501 // Currently only the flat Z-buffer texture needs this padding due to
502 // full-register loads and stores in check_depth and discard_depth. In
503 // case some code in the future accidentally uses a linear filter on a
504 // texture with less than 2 pixels per row, we also add this padding
505 // just to be safe. All other texture types and use-cases should be
506 // safe to omit padding.
507 size_t padding =
508 internal_format == GL_DEPTH_COMPONENT24 || max(width, min_width) < 2
509 ? sizeof(Float)
510 : 0;
511 char* new_buf = (char*)realloc(buf, size + padding);
512 assert(new_buf);
513 if (!new_buf) {
514 // Allocation failed, so ensure we don't leave stale buffer state.
515 cleanup();
516 return false;
518 // Successfully reallocated the buffer, so go ahead and set it.
519 buf = new_buf;
520 buf_size = size;
522 // Set the BPP and stride in case they changed.
523 buf_bpp = new_bpp;
524 buf_stride = new_stride;
526 // Allocation succeeded or nothing changed...
527 return true;
530 void cleanup() {
531 assert(!locked); // Locked textures shouldn't be destroyed
532 if (buf) {
533 // If we need to toggle SHOULD_FREE state, ensure that buf is nulled out,
534 // regardless of whether we internally allocated it. This will prevent us
535 // from wrongly treating buf as having been internally allocated for when
536 // we go to realloc if it actually was externally allocted.
537 if (should_free()) {
538 free(buf);
540 buf = nullptr;
541 buf_size = 0;
542 buf_bpp = 0;
543 buf_stride = 0;
545 disable_delayed_clear();
548 ~Texture() { cleanup(); }
550 IntRect bounds() const { return IntRect{0, 0, width, height}; }
551 IntRect offset_bounds() const { return bounds() + offset; }
553 // Find the valid sampling bounds relative to the requested region
554 IntRect sample_bounds(const IntRect& req, bool invertY = false) const {
555 IntRect bb = bounds().intersect(req) - req.origin();
556 if (invertY) bb.invert_y(req.height());
557 return bb;
560 // Get a pointer for sampling at the given offset
561 char* sample_ptr(int x, int y) const {
562 return buf + y * stride() + x * bpp();
565 // Get a pointer for sampling the requested region and limit to the provided
566 // sampling bounds
567 char* sample_ptr(const IntRect& req, const IntRect& bounds,
568 bool invertY = false) const {
569 // Offset the sample pointer by the clamped bounds
570 int x = req.x0 + bounds.x0;
571 // Invert the Y offset if necessary
572 int y = invertY ? req.y1 - 1 - bounds.y0 : req.y0 + bounds.y0;
573 return sample_ptr(x, y);
577 // The last vertex attribute is reserved as a null attribute in case a vertex
578 // attribute is used without being set.
579 #define MAX_ATTRIBS 17
580 #define NULL_ATTRIB 16
581 struct VertexArray {
582 VertexAttrib attribs[MAX_ATTRIBS];
583 int max_attrib = -1;
584 // The GL spec defines element array buffer binding to be part of VAO state.
585 GLuint element_array_buffer_binding = 0;
587 void validate();
590 struct Shader {
591 GLenum type = 0;
592 ProgramLoader loader = nullptr;
595 struct Program {
596 ProgramImpl* impl = nullptr;
597 VertexShaderImpl* vert_impl = nullptr;
598 FragmentShaderImpl* frag_impl = nullptr;
599 bool deleted = false;
601 ~Program() { delete impl; }
604 // clang-format off
605 // Fully-expand GL defines while ignoring more than 4 suffixes
606 #define CONCAT_KEY(prefix, x, y, z, w, ...) prefix##x##y##z##w
607 // Generate a blend key enum symbol
608 #define BLEND_KEY(...) CONCAT_KEY(BLEND_, __VA_ARGS__, 0, 0, 0)
609 #define MASK_BLEND_KEY(...) CONCAT_KEY(MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
610 #define AA_BLEND_KEY(...) CONCAT_KEY(AA_BLEND_, __VA_ARGS__, 0, 0, 0)
611 #define AA_MASK_BLEND_KEY(...) CONCAT_KEY(AA_MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
613 // Utility macro to easily generate similar code for all implemented blend modes
614 #define FOR_EACH_BLEND_KEY(macro) \
615 macro(GL_ONE, GL_ZERO, 0, 0) \
616 macro(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA) \
617 macro(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, 0, 0) \
618 macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, 0, 0) \
619 macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, GL_ZERO, GL_ONE) \
620 macro(GL_ZERO, GL_ONE_MINUS_SRC_ALPHA, 0, 0) \
621 macro(GL_ZERO, GL_SRC_COLOR, 0, 0) \
622 macro(GL_ONE, GL_ONE, 0, 0) \
623 macro(GL_ONE, GL_ONE, GL_ONE, GL_ONE_MINUS_SRC_ALPHA) \
624 macro(GL_ONE_MINUS_DST_ALPHA, GL_ONE, GL_ZERO, GL_ONE) \
625 macro(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR, 0, 0) \
626 macro(GL_ONE, GL_ONE_MINUS_SRC1_COLOR, 0, 0) \
627 macro(GL_MIN, 0, 0, 0) \
628 macro(GL_MAX, 0, 0, 0) \
629 macro(GL_MULTIPLY_KHR, 0, 0, 0) \
630 macro(GL_SCREEN_KHR, 0, 0, 0) \
631 macro(GL_OVERLAY_KHR, 0, 0, 0) \
632 macro(GL_DARKEN_KHR, 0, 0, 0) \
633 macro(GL_LIGHTEN_KHR, 0, 0, 0) \
634 macro(GL_COLORDODGE_KHR, 0, 0, 0) \
635 macro(GL_COLORBURN_KHR, 0, 0, 0) \
636 macro(GL_HARDLIGHT_KHR, 0, 0, 0) \
637 macro(GL_SOFTLIGHT_KHR, 0, 0, 0) \
638 macro(GL_DIFFERENCE_KHR, 0, 0, 0) \
639 macro(GL_EXCLUSION_KHR, 0, 0, 0) \
640 macro(GL_HSL_HUE_KHR, 0, 0, 0) \
641 macro(GL_HSL_SATURATION_KHR, 0, 0, 0) \
642 macro(GL_HSL_COLOR_KHR, 0, 0, 0) \
643 macro(GL_HSL_LUMINOSITY_KHR, 0, 0, 0) \
644 macro(SWGL_BLEND_DROP_SHADOW, 0, 0, 0) \
645 macro(SWGL_BLEND_SUBPIXEL_TEXT, 0, 0, 0)
647 #define DEFINE_BLEND_KEY(...) BLEND_KEY(__VA_ARGS__),
648 #define DEFINE_MASK_BLEND_KEY(...) MASK_BLEND_KEY(__VA_ARGS__),
649 #define DEFINE_AA_BLEND_KEY(...) AA_BLEND_KEY(__VA_ARGS__),
650 #define DEFINE_AA_MASK_BLEND_KEY(...) AA_MASK_BLEND_KEY(__VA_ARGS__),
651 enum BlendKey : uint8_t {
652 FOR_EACH_BLEND_KEY(DEFINE_BLEND_KEY)
653 FOR_EACH_BLEND_KEY(DEFINE_MASK_BLEND_KEY)
654 FOR_EACH_BLEND_KEY(DEFINE_AA_BLEND_KEY)
655 FOR_EACH_BLEND_KEY(DEFINE_AA_MASK_BLEND_KEY)
656 BLEND_KEY_NONE = BLEND_KEY(GL_ONE, GL_ZERO),
657 MASK_BLEND_KEY_NONE = MASK_BLEND_KEY(GL_ONE, GL_ZERO),
658 AA_BLEND_KEY_NONE = AA_BLEND_KEY(GL_ONE, GL_ZERO),
659 AA_MASK_BLEND_KEY_NONE = AA_MASK_BLEND_KEY(GL_ONE, GL_ZERO),
661 // clang-format on
663 const size_t MAX_TEXTURE_UNITS = 16;
665 template <typename T>
666 static inline bool unlink(T& binding, T n) {
667 if (binding == n) {
668 binding = 0;
669 return true;
671 return false;
674 template <typename O>
675 struct ObjectStore {
676 O** objects = nullptr;
677 size_t size = 0;
678 // reserve object 0 as null
679 size_t first_free = 1;
680 O invalid;
682 ~ObjectStore() {
683 if (objects) {
684 for (size_t i = 0; i < size; i++) delete objects[i];
685 free(objects);
689 bool grow(size_t i) {
690 size_t new_size = size ? size : 8;
691 while (new_size <= i) new_size += new_size / 2;
692 O** new_objects = (O**)realloc(objects, new_size * sizeof(O*));
693 assert(new_objects);
694 if (!new_objects) return false;
695 while (size < new_size) new_objects[size++] = nullptr;
696 objects = new_objects;
697 return true;
700 void insert(size_t i, const O& o) {
701 if (i >= size && !grow(i)) return;
702 if (!objects[i]) objects[i] = new O(o);
705 size_t next_free() {
706 size_t i = first_free;
707 while (i < size && objects[i]) i++;
708 first_free = i;
709 return i;
712 size_t insert(const O& o = O()) {
713 size_t i = next_free();
714 insert(i, o);
715 return i;
718 O& operator[](size_t i) {
719 insert(i, O());
720 return i < size ? *objects[i] : invalid;
723 O* find(size_t i) const { return i < size ? objects[i] : nullptr; }
725 template <typename T>
726 void on_erase(T*, ...) {}
727 template <typename T>
728 void on_erase(T* o, decltype(&T::on_erase)) {
729 o->on_erase();
732 bool erase(size_t i) {
733 if (i < size && objects[i]) {
734 on_erase(objects[i], nullptr);
735 delete objects[i];
736 objects[i] = nullptr;
737 if (i < first_free) first_free = i;
738 return true;
740 return false;
743 O** begin() const { return objects; }
744 O** end() const { return &objects[size]; }
747 struct Context {
748 int32_t references = 1;
750 ObjectStore<Query> queries;
751 ObjectStore<Buffer> buffers;
752 ObjectStore<Texture> textures;
753 ObjectStore<VertexArray> vertex_arrays;
754 ObjectStore<Framebuffer> framebuffers;
755 ObjectStore<Renderbuffer> renderbuffers;
756 ObjectStore<Shader> shaders;
757 ObjectStore<Program> programs;
759 GLenum last_error = GL_NO_ERROR;
761 IntRect viewport = {0, 0, 0, 0};
763 bool blend = false;
764 GLenum blendfunc_srgb = GL_ONE;
765 GLenum blendfunc_drgb = GL_ZERO;
766 GLenum blendfunc_sa = GL_ONE;
767 GLenum blendfunc_da = GL_ZERO;
768 GLenum blend_equation = GL_FUNC_ADD;
769 V8<uint16_t> blendcolor = 0;
770 BlendKey blend_key = BLEND_KEY_NONE;
772 bool depthtest = false;
773 bool depthmask = true;
774 GLenum depthfunc = GL_LESS;
776 bool scissortest = false;
777 IntRect scissor = {0, 0, 0, 0};
779 GLfloat clearcolor[4] = {0, 0, 0, 0};
780 GLdouble cleardepth = 1;
782 int unpack_row_length = 0;
784 int shaded_rows = 0;
785 int shaded_pixels = 0;
787 struct TextureUnit {
788 GLuint texture_2d_binding = 0;
789 GLuint texture_rectangle_binding = 0;
791 void unlink(GLuint n) {
792 ::unlink(texture_2d_binding, n);
793 ::unlink(texture_rectangle_binding, n);
796 TextureUnit texture_units[MAX_TEXTURE_UNITS];
797 int active_texture_unit = 0;
799 GLuint current_program = 0;
801 GLuint current_vertex_array = 0;
802 bool validate_vertex_array = true;
804 GLuint pixel_pack_buffer_binding = 0;
805 GLuint pixel_unpack_buffer_binding = 0;
806 GLuint array_buffer_binding = 0;
807 GLuint time_elapsed_query = 0;
808 GLuint samples_passed_query = 0;
809 GLuint renderbuffer_binding = 0;
810 GLuint draw_framebuffer_binding = 0;
811 GLuint read_framebuffer_binding = 0;
812 GLuint unknown_binding = 0;
814 GLuint& get_binding(GLenum name) {
815 switch (name) {
816 case GL_PIXEL_PACK_BUFFER:
817 return pixel_pack_buffer_binding;
818 case GL_PIXEL_UNPACK_BUFFER:
819 return pixel_unpack_buffer_binding;
820 case GL_ARRAY_BUFFER:
821 return array_buffer_binding;
822 case GL_ELEMENT_ARRAY_BUFFER:
823 return vertex_arrays[current_vertex_array].element_array_buffer_binding;
824 case GL_TEXTURE_2D:
825 return texture_units[active_texture_unit].texture_2d_binding;
826 case GL_TEXTURE_RECTANGLE:
827 return texture_units[active_texture_unit].texture_rectangle_binding;
828 case GL_TIME_ELAPSED:
829 return time_elapsed_query;
830 case GL_SAMPLES_PASSED:
831 return samples_passed_query;
832 case GL_RENDERBUFFER:
833 return renderbuffer_binding;
834 case GL_DRAW_FRAMEBUFFER:
835 return draw_framebuffer_binding;
836 case GL_READ_FRAMEBUFFER:
837 return read_framebuffer_binding;
838 default:
839 debugf("unknown binding %x\n", name);
840 assert(false);
841 return unknown_binding;
845 Texture& get_texture(sampler2D, int unit) {
846 return textures[texture_units[unit].texture_2d_binding];
849 Texture& get_texture(isampler2D, int unit) {
850 return textures[texture_units[unit].texture_2d_binding];
853 Texture& get_texture(sampler2DRect, int unit) {
854 return textures[texture_units[unit].texture_rectangle_binding];
857 IntRect apply_scissor(IntRect bb,
858 const IntPoint& origin = IntPoint(0, 0)) const {
859 return scissortest ? bb.intersect(scissor - origin) : bb;
862 IntRect apply_scissor(const Texture& t) const {
863 return apply_scissor(t.bounds(), t.offset);
866 static Context* ctx = nullptr;
867 static VertexShaderImpl* vertex_shader = nullptr;
868 static FragmentShaderImpl* fragment_shader = nullptr;
869 static BlendKey blend_key = BLEND_KEY_NONE;
871 static void prepare_texture(Texture& t, const IntRect* skip = nullptr);
873 template <typename S>
874 static inline void init_filter(S* s, Texture& t) {
875 // If the width is not at least 2 pixels, then we can't safely sample the end
876 // of the row with a linear filter. In that case, just punt to using nearest
877 // filtering instead.
878 s->filter = t.width >= 2 ? gl_filter_to_texture_filter(t.mag_filter)
879 : TextureFilter::NEAREST;
882 template <typename S>
883 static inline void init_sampler(S* s, Texture& t) {
884 prepare_texture(t);
885 s->width = t.width;
886 s->height = t.height;
887 s->stride = t.stride();
888 int bpp = t.bpp();
889 if (bpp >= 4)
890 s->stride /= 4;
891 else if (bpp == 2)
892 s->stride /= 2;
893 else
894 assert(bpp == 1);
895 // Use uint32_t* for easier sampling, but need to cast to uint8_t* or
896 // uint16_t* for formats with bpp < 4.
897 s->buf = (uint32_t*)t.buf;
898 s->format = gl_format_to_texture_format(t.internal_format);
901 template <typename S>
902 static inline void null_sampler(S* s) {
903 // For null texture data, just make the sampler provide a 1x1 buffer that is
904 // transparent black. Ensure buffer holds at least a SIMD vector of zero data
905 // for SIMD padding of unaligned loads.
906 static const uint32_t zeroBuf[sizeof(Float) / sizeof(uint32_t)] = {0};
907 s->width = 1;
908 s->height = 1;
909 s->stride = s->width;
910 s->buf = (uint32_t*)zeroBuf;
911 s->format = TextureFormat::RGBA8;
914 template <typename S>
915 static inline void null_filter(S* s) {
916 s->filter = TextureFilter::NEAREST;
919 template <typename S>
920 S* lookup_sampler(S* s, int texture) {
921 Texture& t = ctx->get_texture(s, texture);
922 if (!t.buf) {
923 null_sampler(s);
924 null_filter(s);
925 } else {
926 init_sampler(s, t);
927 init_filter(s, t);
929 return s;
932 template <typename S>
933 S* lookup_isampler(S* s, int texture) {
934 Texture& t = ctx->get_texture(s, texture);
935 if (!t.buf) {
936 null_sampler(s);
937 } else {
938 init_sampler(s, t);
940 return s;
943 int bytes_per_type(GLenum type) {
944 switch (type) {
945 case GL_INT:
946 return 4;
947 case GL_FLOAT:
948 return 4;
949 case GL_UNSIGNED_SHORT:
950 return 2;
951 case GL_UNSIGNED_BYTE:
952 return 1;
953 default:
954 assert(0);
955 return 0;
959 template <typename S, typename C>
960 static inline S expand_attrib(const char* buf, size_t size, bool normalized) {
961 typedef typename ElementType<S>::ty elem_type;
962 S scalar = {0};
963 const C* src = reinterpret_cast<const C*>(buf);
964 if (normalized) {
965 const float scale = 1.0f / ((1 << (8 * sizeof(C))) - 1);
966 for (size_t i = 0; i < size / sizeof(C); i++) {
967 put_nth_component(scalar, i, elem_type(src[i]) * scale);
969 } else {
970 for (size_t i = 0; i < size / sizeof(C); i++) {
971 put_nth_component(scalar, i, elem_type(src[i]));
974 return scalar;
977 template <typename S>
978 static inline S load_attrib_scalar(VertexAttrib& va, const char* src) {
979 if (sizeof(S) <= va.size) {
980 return *reinterpret_cast<const S*>(src);
982 if (va.type == GL_UNSIGNED_SHORT) {
983 return expand_attrib<S, uint16_t>(src, va.size, va.normalized);
985 if (va.type == GL_UNSIGNED_BYTE) {
986 return expand_attrib<S, uint8_t>(src, va.size, va.normalized);
988 assert(sizeof(typename ElementType<S>::ty) == bytes_per_type(va.type));
989 S scalar = {0};
990 memcpy(&scalar, src, va.size);
991 return scalar;
994 template <typename T>
995 void load_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
996 int count) {
997 typedef decltype(force_scalar(attrib)) scalar_type;
998 // If no buffer is available, just use a zero default.
999 if (!va.buf_size) {
1000 attrib = T(scalar_type{0});
1001 } else if (va.divisor != 0) {
1002 char* src = (char*)va.buf + va.stride * instance + va.offset;
1003 assert(src + va.size <= va.buf + va.buf_size);
1004 attrib = T(load_attrib_scalar<scalar_type>(va, src));
1005 } else {
1006 // Specialized for WR's primitive vertex order/winding.
1007 if (!count) return;
1008 assert(count >= 2 && count <= 4);
1009 char* src = (char*)va.buf + va.stride * start + va.offset;
1010 switch (count) {
1011 case 2: {
1012 // Lines must be indexed at offsets 0, 1.
1013 // Line vertexes fill vertex shader SIMD lanes as 0, 1, 1, 0.
1014 scalar_type lanes[2] = {
1015 load_attrib_scalar<scalar_type>(va, src),
1016 load_attrib_scalar<scalar_type>(va, src + va.stride)};
1017 attrib = (T){lanes[0], lanes[1], lanes[1], lanes[0]};
1018 break;
1020 case 3: {
1021 // Triangles must be indexed at offsets 0, 1, 2.
1022 // Triangle vertexes fill vertex shader SIMD lanes as 0, 1, 2, 2.
1023 scalar_type lanes[3] = {
1024 load_attrib_scalar<scalar_type>(va, src),
1025 load_attrib_scalar<scalar_type>(va, src + va.stride),
1026 load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
1027 attrib = (T){lanes[0], lanes[1], lanes[2], lanes[2]};
1028 break;
1030 default:
1031 // Quads must be successive triangles indexed at offsets 0, 1, 2, 2,
1032 // 1, 3. Quad vertexes fill vertex shader SIMD lanes as 0, 1, 3, 2, so
1033 // that the points form a convex path that can be traversed by the
1034 // rasterizer.
1035 attrib = (T){load_attrib_scalar<scalar_type>(va, src),
1036 load_attrib_scalar<scalar_type>(va, src + va.stride),
1037 load_attrib_scalar<scalar_type>(va, src + va.stride * 3),
1038 load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
1039 break;
1044 template <typename T>
1045 void load_flat_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
1046 int count) {
1047 typedef decltype(force_scalar(attrib)) scalar_type;
1048 // If no buffer is available, just use a zero default.
1049 if (!va.buf_size) {
1050 attrib = T{0};
1051 return;
1053 char* src = nullptr;
1054 if (va.divisor != 0) {
1055 src = (char*)va.buf + va.stride * instance + va.offset;
1056 } else {
1057 if (!count) return;
1058 src = (char*)va.buf + va.stride * start + va.offset;
1060 assert(src + va.size <= va.buf + va.buf_size);
1061 attrib = T(load_attrib_scalar<scalar_type>(va, src));
1064 void setup_program(GLuint program) {
1065 if (!program) {
1066 vertex_shader = nullptr;
1067 fragment_shader = nullptr;
1068 return;
1070 Program& p = ctx->programs[program];
1071 assert(p.impl);
1072 assert(p.vert_impl);
1073 assert(p.frag_impl);
1074 vertex_shader = p.vert_impl;
1075 fragment_shader = p.frag_impl;
1078 extern ProgramLoader load_shader(const char* name);
1080 extern "C" {
1082 void UseProgram(GLuint program) {
1083 if (ctx->current_program && program != ctx->current_program) {
1084 auto* p = ctx->programs.find(ctx->current_program);
1085 if (p && p->deleted) {
1086 ctx->programs.erase(ctx->current_program);
1089 ctx->current_program = program;
1090 setup_program(program);
1093 void SetViewport(GLint x, GLint y, GLsizei width, GLsizei height) {
1094 ctx->viewport = IntRect{x, y, x + width, y + height};
1097 void Enable(GLenum cap) {
1098 switch (cap) {
1099 case GL_BLEND:
1100 ctx->blend = true;
1101 break;
1102 case GL_DEPTH_TEST:
1103 ctx->depthtest = true;
1104 break;
1105 case GL_SCISSOR_TEST:
1106 ctx->scissortest = true;
1107 break;
1111 void Disable(GLenum cap) {
1112 switch (cap) {
1113 case GL_BLEND:
1114 ctx->blend = false;
1115 break;
1116 case GL_DEPTH_TEST:
1117 ctx->depthtest = false;
1118 break;
1119 case GL_SCISSOR_TEST:
1120 ctx->scissortest = false;
1121 break;
1125 // Report the last error generated and clear the error status.
1126 GLenum GetError() {
1127 GLenum error = ctx->last_error;
1128 ctx->last_error = GL_NO_ERROR;
1129 return error;
1132 // Sets the error status to out-of-memory to indicate that a buffer
1133 // or texture re-allocation failed.
1134 static void out_of_memory() { ctx->last_error = GL_OUT_OF_MEMORY; }
1136 static const char* const extensions[] = {
1137 "GL_ARB_blend_func_extended",
1138 "GL_ARB_clear_texture",
1139 "GL_ARB_copy_image",
1140 "GL_ARB_draw_instanced",
1141 "GL_ARB_explicit_attrib_location",
1142 "GL_ARB_instanced_arrays",
1143 "GL_ARB_invalidate_subdata",
1144 "GL_ARB_texture_storage",
1145 "GL_EXT_timer_query",
1146 "GL_KHR_blend_equation_advanced",
1147 "GL_KHR_blend_equation_advanced_coherent",
1148 "GL_APPLE_rgb_422",
1151 void GetIntegerv(GLenum pname, GLint* params) {
1152 assert(params);
1153 switch (pname) {
1154 case GL_MAX_TEXTURE_UNITS:
1155 case GL_MAX_TEXTURE_IMAGE_UNITS:
1156 params[0] = MAX_TEXTURE_UNITS;
1157 break;
1158 case GL_MAX_TEXTURE_SIZE:
1159 params[0] = 1 << 15;
1160 break;
1161 case GL_MAX_ARRAY_TEXTURE_LAYERS:
1162 params[0] = 0;
1163 break;
1164 case GL_READ_FRAMEBUFFER_BINDING:
1165 params[0] = ctx->read_framebuffer_binding;
1166 break;
1167 case GL_DRAW_FRAMEBUFFER_BINDING:
1168 params[0] = ctx->draw_framebuffer_binding;
1169 break;
1170 case GL_PIXEL_PACK_BUFFER_BINDING:
1171 params[0] = ctx->pixel_pack_buffer_binding;
1172 break;
1173 case GL_PIXEL_UNPACK_BUFFER_BINDING:
1174 params[0] = ctx->pixel_unpack_buffer_binding;
1175 break;
1176 case GL_NUM_EXTENSIONS:
1177 params[0] = sizeof(extensions) / sizeof(extensions[0]);
1178 break;
1179 case GL_MAJOR_VERSION:
1180 params[0] = 3;
1181 break;
1182 case GL_MINOR_VERSION:
1183 params[0] = 2;
1184 break;
1185 case GL_MIN_PROGRAM_TEXEL_OFFSET:
1186 params[0] = 0;
1187 break;
1188 case GL_MAX_PROGRAM_TEXEL_OFFSET:
1189 params[0] = MAX_TEXEL_OFFSET;
1190 break;
1191 default:
1192 debugf("unhandled glGetIntegerv parameter %x\n", pname);
1193 assert(false);
1197 void GetBooleanv(GLenum pname, GLboolean* params) {
1198 assert(params);
1199 switch (pname) {
1200 case GL_DEPTH_WRITEMASK:
1201 params[0] = ctx->depthmask;
1202 break;
1203 default:
1204 debugf("unhandled glGetBooleanv parameter %x\n", pname);
1205 assert(false);
1209 const char* GetString(GLenum name) {
1210 switch (name) {
1211 case GL_VENDOR:
1212 return "Mozilla Gfx";
1213 case GL_RENDERER:
1214 return "Software WebRender";
1215 case GL_VERSION:
1216 return "3.2";
1217 case GL_SHADING_LANGUAGE_VERSION:
1218 return "1.50";
1219 default:
1220 debugf("unhandled glGetString parameter %x\n", name);
1221 assert(false);
1222 return nullptr;
1226 const char* GetStringi(GLenum name, GLuint index) {
1227 switch (name) {
1228 case GL_EXTENSIONS:
1229 if (index >= sizeof(extensions) / sizeof(extensions[0])) {
1230 return nullptr;
1232 return extensions[index];
1233 default:
1234 debugf("unhandled glGetStringi parameter %x\n", name);
1235 assert(false);
1236 return nullptr;
1240 GLenum remap_blendfunc(GLenum rgb, GLenum a) {
1241 switch (a) {
1242 case GL_SRC_ALPHA:
1243 if (rgb == GL_SRC_COLOR) a = GL_SRC_COLOR;
1244 break;
1245 case GL_ONE_MINUS_SRC_ALPHA:
1246 if (rgb == GL_ONE_MINUS_SRC_COLOR) a = GL_ONE_MINUS_SRC_COLOR;
1247 break;
1248 case GL_DST_ALPHA:
1249 if (rgb == GL_DST_COLOR) a = GL_DST_COLOR;
1250 break;
1251 case GL_ONE_MINUS_DST_ALPHA:
1252 if (rgb == GL_ONE_MINUS_DST_COLOR) a = GL_ONE_MINUS_DST_COLOR;
1253 break;
1254 case GL_CONSTANT_ALPHA:
1255 if (rgb == GL_CONSTANT_COLOR) a = GL_CONSTANT_COLOR;
1256 break;
1257 case GL_ONE_MINUS_CONSTANT_ALPHA:
1258 if (rgb == GL_ONE_MINUS_CONSTANT_COLOR) a = GL_ONE_MINUS_CONSTANT_COLOR;
1259 break;
1260 case GL_SRC_COLOR:
1261 if (rgb == GL_SRC_ALPHA) a = GL_SRC_ALPHA;
1262 break;
1263 case GL_ONE_MINUS_SRC_COLOR:
1264 if (rgb == GL_ONE_MINUS_SRC_ALPHA) a = GL_ONE_MINUS_SRC_ALPHA;
1265 break;
1266 case GL_DST_COLOR:
1267 if (rgb == GL_DST_ALPHA) a = GL_DST_ALPHA;
1268 break;
1269 case GL_ONE_MINUS_DST_COLOR:
1270 if (rgb == GL_ONE_MINUS_DST_ALPHA) a = GL_ONE_MINUS_DST_ALPHA;
1271 break;
1272 case GL_CONSTANT_COLOR:
1273 if (rgb == GL_CONSTANT_ALPHA) a = GL_CONSTANT_ALPHA;
1274 break;
1275 case GL_ONE_MINUS_CONSTANT_COLOR:
1276 if (rgb == GL_ONE_MINUS_CONSTANT_ALPHA) a = GL_ONE_MINUS_CONSTANT_ALPHA;
1277 break;
1278 case GL_SRC1_ALPHA:
1279 if (rgb == GL_SRC1_COLOR) a = GL_SRC1_COLOR;
1280 break;
1281 case GL_ONE_MINUS_SRC1_ALPHA:
1282 if (rgb == GL_ONE_MINUS_SRC1_COLOR) a = GL_ONE_MINUS_SRC1_COLOR;
1283 break;
1284 case GL_SRC1_COLOR:
1285 if (rgb == GL_SRC1_ALPHA) a = GL_SRC1_ALPHA;
1286 break;
1287 case GL_ONE_MINUS_SRC1_COLOR:
1288 if (rgb == GL_ONE_MINUS_SRC1_ALPHA) a = GL_ONE_MINUS_SRC1_ALPHA;
1289 break;
1291 return a;
1294 // Generate a hashed blend key based on blend func and equation state. This
1295 // allows all the blend state to be processed down to a blend key that can be
1296 // dealt with inside a single switch statement.
1297 static void hash_blend_key() {
1298 GLenum srgb = ctx->blendfunc_srgb;
1299 GLenum drgb = ctx->blendfunc_drgb;
1300 GLenum sa = ctx->blendfunc_sa;
1301 GLenum da = ctx->blendfunc_da;
1302 GLenum equation = ctx->blend_equation;
1303 #define HASH_BLEND_KEY(x, y, z, w) ((x << 4) | (y) | (z << 24) | (w << 20))
1304 // Basic non-separate blend funcs used the two argument form
1305 int hash = HASH_BLEND_KEY(srgb, drgb, 0, 0);
1306 // Separate alpha blend funcs use the 4 argument hash
1307 if (srgb != sa || drgb != da) hash |= HASH_BLEND_KEY(0, 0, sa, da);
1308 // Any other blend equation than the default func_add ignores the func and
1309 // instead generates a one-argument hash based on the equation
1310 if (equation != GL_FUNC_ADD) hash = HASH_BLEND_KEY(equation, 0, 0, 0);
1311 switch (hash) {
1312 #define MAP_BLEND_KEY(...) \
1313 case HASH_BLEND_KEY(__VA_ARGS__): \
1314 ctx->blend_key = BLEND_KEY(__VA_ARGS__); \
1315 break;
1316 FOR_EACH_BLEND_KEY(MAP_BLEND_KEY)
1317 default:
1318 debugf("blendfunc: %x, %x, separate: %x, %x, equation: %x\n", srgb, drgb,
1319 sa, da, equation);
1320 assert(false);
1321 break;
1325 void BlendFunc(GLenum srgb, GLenum drgb, GLenum sa, GLenum da) {
1326 ctx->blendfunc_srgb = srgb;
1327 ctx->blendfunc_drgb = drgb;
1328 sa = remap_blendfunc(srgb, sa);
1329 da = remap_blendfunc(drgb, da);
1330 ctx->blendfunc_sa = sa;
1331 ctx->blendfunc_da = da;
1333 hash_blend_key();
1336 void BlendColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
1337 I32 c = round_pixel((Float){b, g, r, a});
1338 ctx->blendcolor = CONVERT(c, U16).xyzwxyzw;
1341 void BlendEquation(GLenum mode) {
1342 assert(mode == GL_FUNC_ADD || mode == GL_MIN || mode == GL_MAX ||
1343 (mode >= GL_MULTIPLY_KHR && mode <= GL_HSL_LUMINOSITY_KHR));
1344 if (mode != ctx->blend_equation) {
1345 ctx->blend_equation = mode;
1346 hash_blend_key();
1350 void DepthMask(GLboolean flag) { ctx->depthmask = flag; }
1352 void DepthFunc(GLenum func) {
1353 switch (func) {
1354 case GL_LESS:
1355 case GL_LEQUAL:
1356 break;
1357 default:
1358 assert(false);
1360 ctx->depthfunc = func;
1363 void SetScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
1364 ctx->scissor = IntRect{x, y, x + width, y + height};
1367 void ClearColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
1368 ctx->clearcolor[0] = r;
1369 ctx->clearcolor[1] = g;
1370 ctx->clearcolor[2] = b;
1371 ctx->clearcolor[3] = a;
1374 void ClearDepth(GLdouble depth) { ctx->cleardepth = depth; }
1376 void ActiveTexture(GLenum texture) {
1377 assert(texture >= GL_TEXTURE0);
1378 assert(texture < GL_TEXTURE0 + MAX_TEXTURE_UNITS);
1379 ctx->active_texture_unit =
1380 clamp(int(texture - GL_TEXTURE0), 0, int(MAX_TEXTURE_UNITS - 1));
1383 void GenQueries(GLsizei n, GLuint* result) {
1384 for (int i = 0; i < n; i++) {
1385 Query q;
1386 result[i] = ctx->queries.insert(q);
1390 void DeleteQuery(GLuint n) {
1391 if (n && ctx->queries.erase(n)) {
1392 unlink(ctx->time_elapsed_query, n);
1393 unlink(ctx->samples_passed_query, n);
1397 void GenBuffers(int n, GLuint* result) {
1398 for (int i = 0; i < n; i++) {
1399 Buffer b;
1400 result[i] = ctx->buffers.insert(b);
1404 void DeleteBuffer(GLuint n) {
1405 if (n && ctx->buffers.erase(n)) {
1406 unlink(ctx->pixel_pack_buffer_binding, n);
1407 unlink(ctx->pixel_unpack_buffer_binding, n);
1408 unlink(ctx->array_buffer_binding, n);
1412 void GenVertexArrays(int n, GLuint* result) {
1413 for (int i = 0; i < n; i++) {
1414 VertexArray v;
1415 result[i] = ctx->vertex_arrays.insert(v);
1419 void DeleteVertexArray(GLuint n) {
1420 if (n && ctx->vertex_arrays.erase(n)) {
1421 unlink(ctx->current_vertex_array, n);
1425 GLuint CreateShader(GLenum type) {
1426 Shader s;
1427 s.type = type;
1428 return ctx->shaders.insert(s);
1431 void ShaderSourceByName(GLuint shader, char* name) {
1432 Shader& s = ctx->shaders[shader];
1433 s.loader = load_shader(name);
1434 if (!s.loader) {
1435 debugf("unknown shader %s\n", name);
1439 void AttachShader(GLuint program, GLuint shader) {
1440 Program& p = ctx->programs[program];
1441 Shader& s = ctx->shaders[shader];
1442 if (s.type == GL_VERTEX_SHADER) {
1443 if (!p.impl && s.loader) p.impl = s.loader();
1444 } else if (s.type == GL_FRAGMENT_SHADER) {
1445 if (!p.impl && s.loader) p.impl = s.loader();
1446 } else {
1447 assert(0);
1451 void DeleteShader(GLuint n) {
1452 if (n) ctx->shaders.erase(n);
1455 GLuint CreateProgram() {
1456 Program p;
1457 return ctx->programs.insert(p);
1460 void DeleteProgram(GLuint n) {
1461 if (!n) return;
1462 if (ctx->current_program == n) {
1463 if (auto* p = ctx->programs.find(n)) {
1464 p->deleted = true;
1466 } else {
1467 ctx->programs.erase(n);
1471 void LinkProgram(GLuint program) {
1472 Program& p = ctx->programs[program];
1473 assert(p.impl);
1474 if (!p.impl) {
1475 return;
1477 assert(p.impl->interpolants_size() <= sizeof(Interpolants));
1478 if (!p.vert_impl) p.vert_impl = p.impl->get_vertex_shader();
1479 if (!p.frag_impl) p.frag_impl = p.impl->get_fragment_shader();
1482 GLint GetLinkStatus(GLuint program) {
1483 if (auto* p = ctx->programs.find(program)) {
1484 return p->impl ? 1 : 0;
1486 return 0;
1489 void BindAttribLocation(GLuint program, GLuint index, char* name) {
1490 Program& p = ctx->programs[program];
1491 assert(p.impl);
1492 if (!p.impl) {
1493 return;
1495 p.impl->bind_attrib(name, index);
1498 GLint GetAttribLocation(GLuint program, char* name) {
1499 Program& p = ctx->programs[program];
1500 assert(p.impl);
1501 if (!p.impl) {
1502 return -1;
1504 return p.impl->get_attrib(name);
1507 GLint GetUniformLocation(GLuint program, char* name) {
1508 Program& p = ctx->programs[program];
1509 assert(p.impl);
1510 if (!p.impl) {
1511 return -1;
1513 GLint loc = p.impl->get_uniform(name);
1514 // debugf("location: %d\n", loc);
1515 return loc;
1518 static uint64_t get_time_value() {
1519 #ifdef __MACH__
1520 return mach_absolute_time();
1521 #elif defined(_WIN32)
1522 LARGE_INTEGER time;
1523 static bool have_frequency = false;
1524 static LARGE_INTEGER frequency;
1525 if (!have_frequency) {
1526 QueryPerformanceFrequency(&frequency);
1527 have_frequency = true;
1529 QueryPerformanceCounter(&time);
1530 return time.QuadPart * 1000000000ULL / frequency.QuadPart;
1531 #else
1532 return ({
1533 struct timespec tp;
1534 clock_gettime(CLOCK_MONOTONIC, &tp);
1535 tp.tv_sec * 1000000000ULL + tp.tv_nsec;
1537 #endif
1540 void BeginQuery(GLenum target, GLuint id) {
1541 ctx->get_binding(target) = id;
1542 Query& q = ctx->queries[id];
1543 switch (target) {
1544 case GL_SAMPLES_PASSED:
1545 q.value = 0;
1546 break;
1547 case GL_TIME_ELAPSED:
1548 q.value = get_time_value();
1549 break;
1550 default:
1551 debugf("unknown query target %x for query %d\n", target, id);
1552 assert(false);
1556 void EndQuery(GLenum target) {
1557 Query& q = ctx->queries[ctx->get_binding(target)];
1558 switch (target) {
1559 case GL_SAMPLES_PASSED:
1560 break;
1561 case GL_TIME_ELAPSED:
1562 q.value = get_time_value() - q.value;
1563 break;
1564 default:
1565 debugf("unknown query target %x\n", target);
1566 assert(false);
1568 ctx->get_binding(target) = 0;
1571 void GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64* params) {
1572 Query& q = ctx->queries[id];
1573 switch (pname) {
1574 case GL_QUERY_RESULT:
1575 assert(params);
1576 params[0] = q.value;
1577 break;
1578 default:
1579 assert(false);
1583 void BindVertexArray(GLuint vertex_array) {
1584 if (vertex_array != ctx->current_vertex_array) {
1585 ctx->validate_vertex_array = true;
1587 ctx->current_vertex_array = vertex_array;
1590 void BindTexture(GLenum target, GLuint texture) {
1591 ctx->get_binding(target) = texture;
1594 void BindBuffer(GLenum target, GLuint buffer) {
1595 ctx->get_binding(target) = buffer;
1598 void BindFramebuffer(GLenum target, GLuint fb) {
1599 if (target == GL_FRAMEBUFFER) {
1600 ctx->read_framebuffer_binding = fb;
1601 ctx->draw_framebuffer_binding = fb;
1602 } else {
1603 assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
1604 ctx->get_binding(target) = fb;
1608 void BindRenderbuffer(GLenum target, GLuint rb) {
1609 ctx->get_binding(target) = rb;
1612 void PixelStorei(GLenum name, GLint param) {
1613 if (name == GL_UNPACK_ALIGNMENT) {
1614 assert(param == 1);
1615 } else if (name == GL_UNPACK_ROW_LENGTH) {
1616 ctx->unpack_row_length = param;
1620 static GLenum remap_internal_format(GLenum format) {
1621 switch (format) {
1622 case GL_DEPTH_COMPONENT:
1623 return GL_DEPTH_COMPONENT24;
1624 case GL_RGBA:
1625 return GL_RGBA8;
1626 case GL_RED:
1627 return GL_R8;
1628 case GL_RG:
1629 return GL_RG8;
1630 case GL_RGB_422_APPLE:
1631 return GL_RGB_RAW_422_APPLE;
1632 default:
1633 return format;
1637 } // extern "C"
1639 static bool format_requires_conversion(GLenum external_format,
1640 GLenum internal_format) {
1641 switch (external_format) {
1642 case GL_RGBA:
1643 return internal_format == GL_RGBA8;
1644 default:
1645 return false;
1649 static inline void copy_bgra8_to_rgba8(uint32_t* dest, const uint32_t* src,
1650 int width) {
1651 for (; width >= 4; width -= 4, dest += 4, src += 4) {
1652 U32 p = unaligned_load<U32>(src);
1653 U32 rb = p & 0x00FF00FF;
1654 unaligned_store(dest, (p & 0xFF00FF00) | (rb << 16) | (rb >> 16));
1656 for (; width > 0; width--, dest++, src++) {
1657 uint32_t p = *src;
1658 uint32_t rb = p & 0x00FF00FF;
1659 *dest = (p & 0xFF00FF00) | (rb << 16) | (rb >> 16);
1663 static void convert_copy(GLenum external_format, GLenum internal_format,
1664 uint8_t* dst_buf, size_t dst_stride,
1665 const uint8_t* src_buf, size_t src_stride,
1666 size_t width, size_t height) {
1667 switch (external_format) {
1668 case GL_RGBA:
1669 if (internal_format == GL_RGBA8) {
1670 for (; height; height--) {
1671 copy_bgra8_to_rgba8((uint32_t*)dst_buf, (const uint32_t*)src_buf,
1672 width);
1673 dst_buf += dst_stride;
1674 src_buf += src_stride;
1676 return;
1678 break;
1679 default:
1680 break;
1682 size_t row_bytes = width * bytes_for_internal_format(internal_format);
1683 for (; height; height--) {
1684 memcpy(dst_buf, src_buf, row_bytes);
1685 dst_buf += dst_stride;
1686 src_buf += src_stride;
1690 static void set_tex_storage(Texture& t, GLenum external_format, GLsizei width,
1691 GLsizei height, void* buf = nullptr,
1692 GLsizei stride = 0, GLsizei min_width = 0,
1693 GLsizei min_height = 0) {
1694 GLenum internal_format = remap_internal_format(external_format);
1695 bool changed = false;
1696 if (t.width != width || t.height != height ||
1697 t.internal_format != internal_format) {
1698 changed = true;
1699 t.internal_format = internal_format;
1700 t.width = width;
1701 t.height = height;
1703 // If we are changed from an internally managed buffer to an externally
1704 // supplied one or vice versa, ensure that we clean up old buffer state.
1705 // However, if we have to convert the data from a non-native format, then
1706 // always treat it as internally managed since we will need to copy to an
1707 // internally managed native format buffer.
1708 bool should_free = buf == nullptr || format_requires_conversion(
1709 external_format, internal_format);
1710 if (t.should_free() != should_free) {
1711 changed = true;
1712 t.cleanup();
1713 t.set_should_free(should_free);
1715 // If now an external buffer, explicitly set it...
1716 if (!should_free) {
1717 t.set_buffer(buf, stride);
1719 t.disable_delayed_clear();
1720 if (!t.allocate(changed, min_width, min_height)) {
1721 out_of_memory();
1723 // If we have a buffer that needs format conversion, then do that now.
1724 if (buf && should_free) {
1725 convert_copy(external_format, internal_format, (uint8_t*)t.buf, t.stride(),
1726 (const uint8_t*)buf, stride, width, height);
1730 extern "C" {
1732 void TexStorage2D(GLenum target, GLint levels, GLenum internal_format,
1733 GLsizei width, GLsizei height) {
1734 assert(levels == 1);
1735 Texture& t = ctx->textures[ctx->get_binding(target)];
1736 set_tex_storage(t, internal_format, width, height);
1739 GLenum internal_format_for_data(GLenum format, GLenum ty) {
1740 if (format == GL_RED && ty == GL_UNSIGNED_BYTE) {
1741 return GL_R8;
1742 } else if ((format == GL_RGBA || format == GL_BGRA) &&
1743 (ty == GL_UNSIGNED_BYTE || ty == GL_UNSIGNED_INT_8_8_8_8_REV)) {
1744 return GL_RGBA8;
1745 } else if (format == GL_RGBA && ty == GL_FLOAT) {
1746 return GL_RGBA32F;
1747 } else if (format == GL_RGBA_INTEGER && ty == GL_INT) {
1748 return GL_RGBA32I;
1749 } else if (format == GL_RG && ty == GL_UNSIGNED_BYTE) {
1750 return GL_RG8;
1751 } else if (format == GL_RGB_422_APPLE &&
1752 ty == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
1753 return GL_RGB_RAW_422_APPLE;
1754 } else if (format == GL_RED && ty == GL_UNSIGNED_SHORT) {
1755 return GL_R16;
1756 } else if (format == GL_RG && ty == GL_UNSIGNED_SHORT) {
1757 return GL_RG16;
1758 } else {
1759 debugf("unknown internal format for format %x, type %x\n", format, ty);
1760 assert(false);
1761 return 0;
1765 static Buffer* get_pixel_pack_buffer() {
1766 return ctx->pixel_pack_buffer_binding
1767 ? &ctx->buffers[ctx->pixel_pack_buffer_binding]
1768 : nullptr;
1771 static void* get_pixel_pack_buffer_data(void* data) {
1772 if (Buffer* b = get_pixel_pack_buffer()) {
1773 return b->buf ? b->buf + (size_t)data : nullptr;
1775 return data;
1778 static Buffer* get_pixel_unpack_buffer() {
1779 return ctx->pixel_unpack_buffer_binding
1780 ? &ctx->buffers[ctx->pixel_unpack_buffer_binding]
1781 : nullptr;
1784 static void* get_pixel_unpack_buffer_data(void* data) {
1785 if (Buffer* b = get_pixel_unpack_buffer()) {
1786 return b->buf ? b->buf + (size_t)data : nullptr;
1788 return data;
1791 void TexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset,
1792 GLsizei width, GLsizei height, GLenum format, GLenum ty,
1793 void* data) {
1794 if (level != 0) {
1795 assert(false);
1796 return;
1798 data = get_pixel_unpack_buffer_data(data);
1799 if (!data) return;
1800 Texture& t = ctx->textures[ctx->get_binding(target)];
1801 IntRect skip = {xoffset, yoffset, xoffset + width, yoffset + height};
1802 prepare_texture(t, &skip);
1803 assert(xoffset + width <= t.width);
1804 assert(yoffset + height <= t.height);
1805 assert(ctx->unpack_row_length == 0 || ctx->unpack_row_length >= width);
1806 GLsizei row_length =
1807 ctx->unpack_row_length != 0 ? ctx->unpack_row_length : width;
1808 assert(t.internal_format == internal_format_for_data(format, ty));
1809 int src_bpp = format_requires_conversion(format, t.internal_format)
1810 ? bytes_for_internal_format(format)
1811 : t.bpp();
1812 if (!src_bpp || !t.buf) return;
1813 convert_copy(format, t.internal_format,
1814 (uint8_t*)t.sample_ptr(xoffset, yoffset), t.stride(),
1815 (const uint8_t*)data, row_length * src_bpp, width, height);
1818 void TexImage2D(GLenum target, GLint level, GLint internal_format,
1819 GLsizei width, GLsizei height, GLint border, GLenum format,
1820 GLenum ty, void* data) {
1821 if (level != 0) {
1822 assert(false);
1823 return;
1825 assert(border == 0);
1826 TexStorage2D(target, 1, internal_format, width, height);
1827 TexSubImage2D(target, 0, 0, 0, width, height, format, ty, data);
1830 void GenerateMipmap(UNUSED GLenum target) {
1831 // TODO: support mipmaps
1834 void SetTextureParameter(GLuint texid, GLenum pname, GLint param) {
1835 Texture& t = ctx->textures[texid];
1836 switch (pname) {
1837 case GL_TEXTURE_WRAP_S:
1838 assert(param == GL_CLAMP_TO_EDGE);
1839 break;
1840 case GL_TEXTURE_WRAP_T:
1841 assert(param == GL_CLAMP_TO_EDGE);
1842 break;
1843 case GL_TEXTURE_MIN_FILTER:
1844 t.min_filter = param;
1845 break;
1846 case GL_TEXTURE_MAG_FILTER:
1847 t.mag_filter = param;
1848 break;
1849 default:
1850 break;
1854 void TexParameteri(GLenum target, GLenum pname, GLint param) {
1855 SetTextureParameter(ctx->get_binding(target), pname, param);
1858 void GenTextures(int n, GLuint* result) {
1859 for (int i = 0; i < n; i++) {
1860 Texture t;
1861 result[i] = ctx->textures.insert(t);
1865 void DeleteTexture(GLuint n) {
1866 if (n && ctx->textures.erase(n)) {
1867 for (size_t i = 0; i < MAX_TEXTURE_UNITS; i++) {
1868 ctx->texture_units[i].unlink(n);
1873 void GenRenderbuffers(int n, GLuint* result) {
1874 for (int i = 0; i < n; i++) {
1875 Renderbuffer r;
1876 result[i] = ctx->renderbuffers.insert(r);
1880 void Renderbuffer::on_erase() {
1881 for (auto* fb : ctx->framebuffers) {
1882 if (fb) {
1883 unlink(fb->color_attachment, texture);
1884 unlink(fb->depth_attachment, texture);
1887 DeleteTexture(texture);
1890 void DeleteRenderbuffer(GLuint n) {
1891 if (n && ctx->renderbuffers.erase(n)) {
1892 unlink(ctx->renderbuffer_binding, n);
1896 void GenFramebuffers(int n, GLuint* result) {
1897 for (int i = 0; i < n; i++) {
1898 Framebuffer f;
1899 result[i] = ctx->framebuffers.insert(f);
1903 void DeleteFramebuffer(GLuint n) {
1904 if (n && ctx->framebuffers.erase(n)) {
1905 unlink(ctx->read_framebuffer_binding, n);
1906 unlink(ctx->draw_framebuffer_binding, n);
1910 void RenderbufferStorage(GLenum target, GLenum internal_format, GLsizei width,
1911 GLsizei height) {
1912 // Just refer a renderbuffer to a texture to simplify things for now...
1913 Renderbuffer& r = ctx->renderbuffers[ctx->get_binding(target)];
1914 if (!r.texture) {
1915 GenTextures(1, &r.texture);
1917 switch (internal_format) {
1918 case GL_DEPTH_COMPONENT:
1919 case GL_DEPTH_COMPONENT16:
1920 case GL_DEPTH_COMPONENT24:
1921 case GL_DEPTH_COMPONENT32:
1922 // Force depth format to 24 bits...
1923 internal_format = GL_DEPTH_COMPONENT24;
1924 break;
1926 set_tex_storage(ctx->textures[r.texture], internal_format, width, height);
1929 void VertexAttribPointer(GLuint index, GLint size, GLenum type, bool normalized,
1930 GLsizei stride, GLuint offset) {
1931 // debugf("cva: %d\n", ctx->current_vertex_array);
1932 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1933 if (index >= NULL_ATTRIB) {
1934 assert(0);
1935 return;
1937 VertexAttrib& va = v.attribs[index];
1938 va.size = size * bytes_per_type(type);
1939 va.type = type;
1940 va.normalized = normalized;
1941 va.stride = stride;
1942 va.offset = offset;
1943 // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
1944 va.vertex_buffer = ctx->array_buffer_binding;
1945 va.vertex_array = ctx->current_vertex_array;
1946 ctx->validate_vertex_array = true;
1949 void VertexAttribIPointer(GLuint index, GLint size, GLenum type, GLsizei stride,
1950 GLuint offset) {
1951 // debugf("cva: %d\n", ctx->current_vertex_array);
1952 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1953 if (index >= NULL_ATTRIB) {
1954 assert(0);
1955 return;
1957 VertexAttrib& va = v.attribs[index];
1958 va.size = size * bytes_per_type(type);
1959 va.type = type;
1960 va.normalized = false;
1961 va.stride = stride;
1962 va.offset = offset;
1963 // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
1964 va.vertex_buffer = ctx->array_buffer_binding;
1965 va.vertex_array = ctx->current_vertex_array;
1966 ctx->validate_vertex_array = true;
1969 void EnableVertexAttribArray(GLuint index) {
1970 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1971 if (index >= NULL_ATTRIB) {
1972 assert(0);
1973 return;
1975 VertexAttrib& va = v.attribs[index];
1976 if (!va.enabled) {
1977 ctx->validate_vertex_array = true;
1979 va.enabled = true;
1980 v.max_attrib = max(v.max_attrib, (int)index);
1983 void DisableVertexAttribArray(GLuint index) {
1984 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1985 if (index >= NULL_ATTRIB) {
1986 assert(0);
1987 return;
1989 VertexAttrib& va = v.attribs[index];
1990 if (va.enabled) {
1991 ctx->validate_vertex_array = true;
1993 va.disable();
1996 void VertexAttribDivisor(GLuint index, GLuint divisor) {
1997 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
1998 // Only support divisor being 0 (per-vertex) or 1 (per-instance).
1999 if (index >= NULL_ATTRIB || divisor > 1) {
2000 assert(0);
2001 return;
2003 VertexAttrib& va = v.attribs[index];
2004 va.divisor = divisor;
2007 void BufferData(GLenum target, GLsizeiptr size, void* data,
2008 UNUSED GLenum usage) {
2009 Buffer& b = ctx->buffers[ctx->get_binding(target)];
2010 if (size != b.size) {
2011 if (!b.allocate(size)) {
2012 out_of_memory();
2014 ctx->validate_vertex_array = true;
2016 if (data && b.buf && size <= b.size) {
2017 memcpy(b.buf, data, size);
2021 void BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
2022 void* data) {
2023 Buffer& b = ctx->buffers[ctx->get_binding(target)];
2024 assert(offset + size <= b.size);
2025 if (data && b.buf && offset + size <= b.size) {
2026 memcpy(&b.buf[offset], data, size);
2030 void* MapBuffer(GLenum target, UNUSED GLbitfield access) {
2031 Buffer& b = ctx->buffers[ctx->get_binding(target)];
2032 return b.buf;
2035 void* MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
2036 UNUSED GLbitfield access) {
2037 Buffer& b = ctx->buffers[ctx->get_binding(target)];
2038 if (b.buf && offset >= 0 && length > 0 && offset + length <= b.size) {
2039 return b.buf + offset;
2041 return nullptr;
2044 GLboolean UnmapBuffer(GLenum target) {
2045 Buffer& b = ctx->buffers[ctx->get_binding(target)];
2046 return b.buf != nullptr;
2049 void Uniform1i(GLint location, GLint V0) {
2050 // debugf("tex: %d\n", (int)ctx->textures.size);
2051 if (vertex_shader) {
2052 vertex_shader->set_uniform_1i(location, V0);
2055 void Uniform4fv(GLint location, GLsizei count, const GLfloat* v) {
2056 assert(count == 1);
2057 if (vertex_shader) {
2058 vertex_shader->set_uniform_4fv(location, v);
2061 void UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
2062 const GLfloat* value) {
2063 assert(count == 1);
2064 assert(!transpose);
2065 if (vertex_shader) {
2066 vertex_shader->set_uniform_matrix4fv(location, value);
2070 void FramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget,
2071 GLuint texture, GLint level) {
2072 assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
2073 assert(textarget == GL_TEXTURE_2D || textarget == GL_TEXTURE_RECTANGLE);
2074 assert(level == 0);
2075 Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
2076 if (attachment == GL_COLOR_ATTACHMENT0) {
2077 fb.color_attachment = texture;
2078 } else if (attachment == GL_DEPTH_ATTACHMENT) {
2079 fb.depth_attachment = texture;
2080 } else {
2081 assert(0);
2085 void FramebufferRenderbuffer(GLenum target, GLenum attachment,
2086 GLenum renderbuffertarget, GLuint renderbuffer) {
2087 assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
2088 assert(renderbuffertarget == GL_RENDERBUFFER);
2089 Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
2090 Renderbuffer& rb = ctx->renderbuffers[renderbuffer];
2091 if (attachment == GL_COLOR_ATTACHMENT0) {
2092 fb.color_attachment = rb.texture;
2093 } else if (attachment == GL_DEPTH_ATTACHMENT) {
2094 fb.depth_attachment = rb.texture;
2095 } else {
2096 assert(0);
2100 } // extern "C"
2102 static inline Framebuffer* get_framebuffer(GLenum target,
2103 bool fallback = false) {
2104 if (target == GL_FRAMEBUFFER) {
2105 target = GL_DRAW_FRAMEBUFFER;
2107 Framebuffer* fb = ctx->framebuffers.find(ctx->get_binding(target));
2108 if (fallback && !fb) {
2109 // If the specified framebuffer isn't found and a fallback is requested,
2110 // use the default framebuffer.
2111 fb = &ctx->framebuffers[0];
2113 return fb;
2116 template <typename T>
2117 static inline void fill_n(T* dst, size_t n, T val) {
2118 for (T* end = &dst[n]; dst < end; dst++) *dst = val;
2121 #if USE_SSE2
2122 template <>
2123 inline void fill_n<uint32_t>(uint32_t* dst, size_t n, uint32_t val) {
2124 __asm__ __volatile__("rep stosl\n"
2125 : "+D"(dst), "+c"(n)
2126 : "a"(val)
2127 : "memory", "cc");
2129 #endif
2131 static inline uint32_t clear_chunk(uint8_t value) {
2132 return uint32_t(value) * 0x01010101U;
2135 static inline uint32_t clear_chunk(uint16_t value) {
2136 return uint32_t(value) | (uint32_t(value) << 16);
2139 static inline uint32_t clear_chunk(uint32_t value) { return value; }
2141 template <typename T>
2142 static inline void clear_row(T* buf, size_t len, T value, uint32_t chunk) {
2143 const size_t N = sizeof(uint32_t) / sizeof(T);
2144 // fill any leading unaligned values
2145 if (N > 1) {
2146 size_t align = (-(intptr_t)buf & (sizeof(uint32_t) - 1)) / sizeof(T);
2147 if (align <= len) {
2148 fill_n(buf, align, value);
2149 len -= align;
2150 buf += align;
2153 // fill as many aligned chunks as possible
2154 fill_n((uint32_t*)buf, len / N, chunk);
2155 // fill any remaining values
2156 if (N > 1) {
2157 fill_n(buf + (len & ~(N - 1)), len & (N - 1), value);
2161 template <typename T>
2162 static void clear_buffer(Texture& t, T value, IntRect bb, int skip_start = 0,
2163 int skip_end = 0) {
2164 if (!t.buf) return;
2165 skip_start = max(skip_start, bb.x0);
2166 skip_end = max(skip_end, skip_start);
2167 assert(sizeof(T) == t.bpp());
2168 size_t stride = t.stride();
2169 // When clearing multiple full-width rows, collapse them into a single large
2170 // "row" to avoid redundant setup from clearing each row individually. Note
2171 // that we can only safely do this if the stride is tightly packed.
2172 if (bb.width() == t.width && bb.height() > 1 && skip_start >= skip_end &&
2173 (t.should_free() || stride == t.width * sizeof(T))) {
2174 bb.x1 += (stride / sizeof(T)) * (bb.height() - 1);
2175 bb.y1 = bb.y0 + 1;
2177 T* buf = (T*)t.sample_ptr(bb.x0, bb.y0);
2178 uint32_t chunk = clear_chunk(value);
2179 for (int rows = bb.height(); rows > 0; rows--) {
2180 if (bb.x0 < skip_start) {
2181 clear_row(buf, skip_start - bb.x0, value, chunk);
2183 if (skip_end < bb.x1) {
2184 clear_row(buf + (skip_end - bb.x0), bb.x1 - skip_end, value, chunk);
2186 buf += stride / sizeof(T);
2190 template <typename T>
2191 static inline void force_clear_row(Texture& t, int y, int skip_start = 0,
2192 int skip_end = 0) {
2193 assert(t.buf != nullptr);
2194 assert(sizeof(T) == t.bpp());
2195 assert(skip_start <= skip_end);
2196 T* buf = (T*)t.sample_ptr(0, y);
2197 uint32_t chunk = clear_chunk((T)t.clear_val);
2198 if (skip_start > 0) {
2199 clear_row<T>(buf, skip_start, t.clear_val, chunk);
2201 if (skip_end < t.width) {
2202 clear_row<T>(buf + skip_end, t.width - skip_end, t.clear_val, chunk);
2206 template <typename T>
2207 static void force_clear(Texture& t, const IntRect* skip = nullptr) {
2208 if (!t.delay_clear || !t.cleared_rows) {
2209 return;
2211 int y0 = 0;
2212 int y1 = t.height;
2213 int skip_start = 0;
2214 int skip_end = 0;
2215 if (skip) {
2216 y0 = clamp(skip->y0, 0, t.height);
2217 y1 = clamp(skip->y1, y0, t.height);
2218 skip_start = clamp(skip->x0, 0, t.width);
2219 skip_end = clamp(skip->x1, skip_start, t.width);
2220 if (skip_start <= 0 && skip_end >= t.width && y0 <= 0 && y1 >= t.height) {
2221 t.disable_delayed_clear();
2222 return;
2225 int num_masks = (y1 + 31) / 32;
2226 uint32_t* rows = t.cleared_rows;
2227 for (int i = y0 / 32; i < num_masks; i++) {
2228 uint32_t mask = rows[i];
2229 if (mask != ~0U) {
2230 rows[i] = ~0U;
2231 int start = i * 32;
2232 while (mask) {
2233 int count = __builtin_ctz(mask);
2234 if (count > 0) {
2235 clear_buffer<T>(t, t.clear_val,
2236 IntRect{0, start, t.width, start + count}, skip_start,
2237 skip_end);
2238 t.delay_clear -= count;
2239 start += count;
2240 mask >>= count;
2242 count = __builtin_ctz(mask + 1);
2243 start += count;
2244 mask >>= count;
2246 int count = (i + 1) * 32 - start;
2247 if (count > 0) {
2248 clear_buffer<T>(t, t.clear_val,
2249 IntRect{0, start, t.width, start + count}, skip_start,
2250 skip_end);
2251 t.delay_clear -= count;
2255 if (t.delay_clear <= 0) t.disable_delayed_clear();
2258 static void prepare_texture(Texture& t, const IntRect* skip) {
2259 if (t.delay_clear) {
2260 switch (t.internal_format) {
2261 case GL_RGBA8:
2262 force_clear<uint32_t>(t, skip);
2263 break;
2264 case GL_R8:
2265 force_clear<uint8_t>(t, skip);
2266 break;
2267 case GL_RG8:
2268 force_clear<uint16_t>(t, skip);
2269 break;
2270 default:
2271 assert(false);
2272 break;
2277 // Setup a clear on a texture. This may either force an immediate clear or
2278 // potentially punt to a delayed clear, if applicable.
2279 template <typename T>
2280 static void request_clear(Texture& t, T value, const IntRect& scissor) {
2281 // If the clear would require a scissor, force clear anything outside
2282 // the scissor, and then immediately clear anything inside the scissor.
2283 if (!scissor.contains(t.offset_bounds())) {
2284 IntRect skip = scissor - t.offset;
2285 force_clear<T>(t, &skip);
2286 clear_buffer<T>(t, value, skip.intersection(t.bounds()));
2287 } else {
2288 // Do delayed clear for 2D texture without scissor.
2289 t.enable_delayed_clear(value);
2293 template <typename T>
2294 static inline void request_clear(Texture& t, T value) {
2295 // If scissoring is enabled, use the scissor rect. Otherwise, just scissor to
2296 // the entire texture bounds.
2297 request_clear(t, value, ctx->scissortest ? ctx->scissor : t.offset_bounds());
2300 extern "C" {
2302 void InitDefaultFramebuffer(int x, int y, int width, int height, int stride,
2303 void* buf) {
2304 Framebuffer& fb = ctx->framebuffers[0];
2305 if (!fb.color_attachment) {
2306 GenTextures(1, &fb.color_attachment);
2308 // If the dimensions or buffer properties changed, we need to reallocate
2309 // the underlying storage for the color buffer texture.
2310 Texture& colortex = ctx->textures[fb.color_attachment];
2311 set_tex_storage(colortex, GL_RGBA8, width, height, buf, stride);
2312 colortex.offset = IntPoint(x, y);
2313 if (!fb.depth_attachment) {
2314 GenTextures(1, &fb.depth_attachment);
2316 // Ensure dimensions of the depth buffer match the color buffer.
2317 Texture& depthtex = ctx->textures[fb.depth_attachment];
2318 set_tex_storage(depthtex, GL_DEPTH_COMPONENT24, width, height);
2319 depthtex.offset = IntPoint(x, y);
2322 void* GetColorBuffer(GLuint fbo, GLboolean flush, int32_t* width,
2323 int32_t* height, int32_t* stride) {
2324 Framebuffer* fb = ctx->framebuffers.find(fbo);
2325 if (!fb || !fb->color_attachment) {
2326 return nullptr;
2328 Texture& colortex = ctx->textures[fb->color_attachment];
2329 if (flush) {
2330 prepare_texture(colortex);
2332 assert(colortex.offset == IntPoint(0, 0));
2333 if (width) {
2334 *width = colortex.width;
2336 if (height) {
2337 *height = colortex.height;
2339 if (stride) {
2340 *stride = colortex.stride();
2342 return colortex.buf ? colortex.sample_ptr(0, 0) : nullptr;
2345 void ResolveFramebuffer(GLuint fbo) {
2346 Framebuffer* fb = ctx->framebuffers.find(fbo);
2347 if (!fb || !fb->color_attachment) {
2348 return;
2350 Texture& colortex = ctx->textures[fb->color_attachment];
2351 prepare_texture(colortex);
2354 void SetTextureBuffer(GLuint texid, GLenum internal_format, GLsizei width,
2355 GLsizei height, GLsizei stride, void* buf,
2356 GLsizei min_width, GLsizei min_height) {
2357 Texture& t = ctx->textures[texid];
2358 set_tex_storage(t, internal_format, width, height, buf, stride, min_width,
2359 min_height);
2362 GLenum CheckFramebufferStatus(GLenum target) {
2363 Framebuffer* fb = get_framebuffer(target);
2364 if (!fb || !fb->color_attachment) {
2365 return GL_FRAMEBUFFER_UNSUPPORTED;
2367 return GL_FRAMEBUFFER_COMPLETE;
2370 void ClearTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset,
2371 GLint zoffset, GLsizei width, GLsizei height,
2372 GLsizei depth, GLenum format, GLenum type,
2373 const void* data) {
2374 if (level != 0) {
2375 assert(false);
2376 return;
2378 Texture& t = ctx->textures[texture];
2379 assert(!t.locked);
2380 if (width <= 0 || height <= 0 || depth <= 0) {
2381 return;
2383 assert(zoffset == 0 && depth == 1);
2384 IntRect scissor = {xoffset, yoffset, xoffset + width, yoffset + height};
2385 if (t.internal_format == GL_DEPTH_COMPONENT24) {
2386 uint32_t value = 0xFFFFFF;
2387 switch (format) {
2388 case GL_DEPTH_COMPONENT:
2389 switch (type) {
2390 case GL_DOUBLE:
2391 value = uint32_t(*(const GLdouble*)data * 0xFFFFFF);
2392 break;
2393 case GL_FLOAT:
2394 value = uint32_t(*(const GLfloat*)data * 0xFFFFFF);
2395 break;
2396 default:
2397 assert(false);
2398 break;
2400 break;
2401 default:
2402 assert(false);
2403 break;
2405 if (t.cleared() && !scissor.contains(t.offset_bounds())) {
2406 // If we need to scissor the clear and the depth buffer was already
2407 // initialized, then just fill runs for that scissor area.
2408 t.fill_depth_runs(value, scissor);
2409 } else {
2410 // Otherwise, the buffer is either uninitialized or the clear would
2411 // encompass the entire buffer. If uninitialized, we can safely fill
2412 // the entire buffer with any value and thus ignore any scissoring.
2413 t.init_depth_runs(value);
2415 return;
2418 uint32_t color = 0xFF000000;
2419 switch (type) {
2420 case GL_FLOAT: {
2421 const GLfloat* f = (const GLfloat*)data;
2422 Float v = {0.0f, 0.0f, 0.0f, 1.0f};
2423 switch (format) {
2424 case GL_RGBA:
2425 v.w = f[3]; // alpha
2426 FALLTHROUGH;
2427 case GL_RGB:
2428 v.z = f[2]; // blue
2429 FALLTHROUGH;
2430 case GL_RG:
2431 v.y = f[1]; // green
2432 FALLTHROUGH;
2433 case GL_RED:
2434 v.x = f[0]; // red
2435 break;
2436 default:
2437 assert(false);
2438 break;
2440 color = bit_cast<uint32_t>(CONVERT(round_pixel(v), U8));
2441 break;
2443 case GL_UNSIGNED_BYTE: {
2444 const GLubyte* b = (const GLubyte*)data;
2445 switch (format) {
2446 case GL_RGBA:
2447 color = (color & ~0xFF000000) | (uint32_t(b[3]) << 24); // alpha
2448 FALLTHROUGH;
2449 case GL_RGB:
2450 color = (color & ~0x00FF0000) | (uint32_t(b[2]) << 16); // blue
2451 FALLTHROUGH;
2452 case GL_RG:
2453 color = (color & ~0x0000FF00) | (uint32_t(b[1]) << 8); // green
2454 FALLTHROUGH;
2455 case GL_RED:
2456 color = (color & ~0x000000FF) | uint32_t(b[0]); // red
2457 break;
2458 default:
2459 assert(false);
2460 break;
2462 break;
2464 default:
2465 assert(false);
2466 break;
2469 switch (t.internal_format) {
2470 case GL_RGBA8:
2471 // Clear color needs to swizzle to BGRA.
2472 request_clear<uint32_t>(t,
2473 (color & 0xFF00FF00) |
2474 ((color << 16) & 0xFF0000) |
2475 ((color >> 16) & 0xFF),
2476 scissor);
2477 break;
2478 case GL_R8:
2479 request_clear<uint8_t>(t, uint8_t(color & 0xFF), scissor);
2480 break;
2481 case GL_RG8:
2482 request_clear<uint16_t>(t, uint16_t(color & 0xFFFF), scissor);
2483 break;
2484 default:
2485 assert(false);
2486 break;
2490 void ClearTexImage(GLuint texture, GLint level, GLenum format, GLenum type,
2491 const void* data) {
2492 Texture& t = ctx->textures[texture];
2493 IntRect scissor = t.offset_bounds();
2494 ClearTexSubImage(texture, level, scissor.x0, scissor.y0, 0, scissor.width(),
2495 scissor.height(), 1, format, type, data);
2498 void Clear(GLbitfield mask) {
2499 Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
2500 if ((mask & GL_COLOR_BUFFER_BIT) && fb.color_attachment) {
2501 Texture& t = ctx->textures[fb.color_attachment];
2502 IntRect scissor = ctx->scissortest
2503 ? ctx->scissor.intersection(t.offset_bounds())
2504 : t.offset_bounds();
2505 ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
2506 scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
2507 ctx->clearcolor);
2509 if ((mask & GL_DEPTH_BUFFER_BIT) && fb.depth_attachment) {
2510 Texture& t = ctx->textures[fb.depth_attachment];
2511 IntRect scissor = ctx->scissortest
2512 ? ctx->scissor.intersection(t.offset_bounds())
2513 : t.offset_bounds();
2514 ClearTexSubImage(fb.depth_attachment, 0, scissor.x0, scissor.y0, 0,
2515 scissor.width(), scissor.height(), 1, GL_DEPTH_COMPONENT,
2516 GL_DOUBLE, &ctx->cleardepth);
2520 void ClearColorRect(GLuint fbo, GLint xoffset, GLint yoffset, GLsizei width,
2521 GLsizei height, GLfloat r, GLfloat g, GLfloat b,
2522 GLfloat a) {
2523 GLfloat color[] = {r, g, b, a};
2524 Framebuffer& fb = ctx->framebuffers[fbo];
2525 Texture& t = ctx->textures[fb.color_attachment];
2526 IntRect scissor =
2527 IntRect{xoffset, yoffset, xoffset + width, yoffset + height}.intersection(
2528 t.offset_bounds());
2529 ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
2530 scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
2531 color);
2534 void InvalidateFramebuffer(GLenum target, GLsizei num_attachments,
2535 const GLenum* attachments) {
2536 Framebuffer* fb = get_framebuffer(target);
2537 if (!fb || num_attachments <= 0 || !attachments) {
2538 return;
2540 for (GLsizei i = 0; i < num_attachments; i++) {
2541 switch (attachments[i]) {
2542 case GL_DEPTH_ATTACHMENT: {
2543 Texture& t = ctx->textures[fb->depth_attachment];
2544 t.set_cleared(false);
2545 break;
2547 case GL_COLOR_ATTACHMENT0: {
2548 Texture& t = ctx->textures[fb->color_attachment];
2549 t.disable_delayed_clear();
2550 break;
2556 void ReadPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format,
2557 GLenum type, void* data) {
2558 data = get_pixel_pack_buffer_data(data);
2559 if (!data) return;
2560 Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
2561 if (!fb) return;
2562 assert(format == GL_RED || format == GL_RGBA || format == GL_RGBA_INTEGER ||
2563 format == GL_BGRA || format == GL_RG);
2564 Texture& t = ctx->textures[fb->color_attachment];
2565 if (!t.buf) return;
2566 prepare_texture(t);
2567 // debugf("read pixels %d, %d, %d, %d from fb %d with format %x\n", x, y,
2568 // width, height, ctx->read_framebuffer_binding, t.internal_format);
2569 x -= t.offset.x;
2570 y -= t.offset.y;
2571 assert(x >= 0 && y >= 0);
2572 assert(x + width <= t.width);
2573 assert(y + height <= t.height);
2574 if (internal_format_for_data(format, type) != t.internal_format) {
2575 debugf("mismatched format for read pixels: %x vs %x\n", t.internal_format,
2576 internal_format_for_data(format, type));
2577 assert(false);
2578 return;
2580 // Only support readback conversions that are reversible
2581 assert(!format_requires_conversion(format, t.internal_format) ||
2582 bytes_for_internal_format(format) == t.bpp());
2583 uint8_t* dest = (uint8_t*)data;
2584 size_t destStride = width * t.bpp();
2585 if (y < 0) {
2586 dest += -y * destStride;
2587 height += y;
2588 y = 0;
2590 if (y + height > t.height) {
2591 height = t.height - y;
2593 if (x < 0) {
2594 dest += -x * t.bpp();
2595 width += x;
2596 x = 0;
2598 if (x + width > t.width) {
2599 width = t.width - x;
2601 if (width <= 0 || height <= 0) {
2602 return;
2604 convert_copy(format, t.internal_format, dest, destStride,
2605 (const uint8_t*)t.sample_ptr(x, y), t.stride(), width, height);
2608 void CopyImageSubData(GLuint srcName, GLenum srcTarget, UNUSED GLint srcLevel,
2609 GLint srcX, GLint srcY, GLint srcZ, GLuint dstName,
2610 GLenum dstTarget, UNUSED GLint dstLevel, GLint dstX,
2611 GLint dstY, GLint dstZ, GLsizei srcWidth,
2612 GLsizei srcHeight, GLsizei srcDepth) {
2613 assert(srcLevel == 0 && dstLevel == 0);
2614 assert(srcZ == 0 && srcDepth == 1 && dstZ == 0);
2615 if (srcTarget == GL_RENDERBUFFER) {
2616 Renderbuffer& rb = ctx->renderbuffers[srcName];
2617 srcName = rb.texture;
2619 if (dstTarget == GL_RENDERBUFFER) {
2620 Renderbuffer& rb = ctx->renderbuffers[dstName];
2621 dstName = rb.texture;
2623 Texture& srctex = ctx->textures[srcName];
2624 if (!srctex.buf) return;
2625 prepare_texture(srctex);
2626 Texture& dsttex = ctx->textures[dstName];
2627 if (!dsttex.buf) return;
2628 assert(!dsttex.locked);
2629 IntRect skip = {dstX, dstY, dstX + srcWidth, dstY + srcHeight};
2630 prepare_texture(dsttex, &skip);
2631 assert(srctex.internal_format == dsttex.internal_format);
2632 assert(srcWidth >= 0);
2633 assert(srcHeight >= 0);
2634 assert(srcX + srcWidth <= srctex.width);
2635 assert(srcY + srcHeight <= srctex.height);
2636 assert(dstX + srcWidth <= dsttex.width);
2637 assert(dstY + srcHeight <= dsttex.height);
2638 int bpp = srctex.bpp();
2639 int src_stride = srctex.stride();
2640 int dest_stride = dsttex.stride();
2641 char* dest = dsttex.sample_ptr(dstX, dstY);
2642 char* src = srctex.sample_ptr(srcX, srcY);
2643 for (int y = 0; y < srcHeight; y++) {
2644 memcpy(dest, src, srcWidth * bpp);
2645 dest += dest_stride;
2646 src += src_stride;
2650 void CopyTexSubImage2D(GLenum target, UNUSED GLint level, GLint xoffset,
2651 GLint yoffset, GLint x, GLint y, GLsizei width,
2652 GLsizei height) {
2653 assert(level == 0);
2654 Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
2655 if (!fb) return;
2656 CopyImageSubData(fb->color_attachment, GL_TEXTURE_2D, 0, x, y, 0,
2657 ctx->get_binding(target), GL_TEXTURE_2D, 0, xoffset, yoffset,
2658 0, width, height, 1);
2661 } // extern "C"
2663 #include "blend.h"
2664 #include "composite.h"
2665 #include "swgl_ext.h"
2667 #pragma GCC diagnostic push
2668 #pragma GCC diagnostic ignored "-Wuninitialized"
2669 #pragma GCC diagnostic ignored "-Wunused-function"
2670 #pragma GCC diagnostic ignored "-Wunused-parameter"
2671 #pragma GCC diagnostic ignored "-Wunused-variable"
2672 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
2673 #ifdef __clang__
2674 # pragma GCC diagnostic ignored "-Wunused-private-field"
2675 #else
2676 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
2677 #endif
2678 #include "load_shader.h"
2679 #pragma GCC diagnostic pop
2681 #include "rasterize.h"
2683 void VertexArray::validate() {
2684 int last_enabled = -1;
2685 for (int i = 0; i <= max_attrib; i++) {
2686 VertexAttrib& attr = attribs[i];
2687 if (attr.enabled) {
2688 // VertexArray &v = ctx->vertex_arrays[attr.vertex_array];
2689 Buffer& vertex_buf = ctx->buffers[attr.vertex_buffer];
2690 attr.buf = vertex_buf.buf;
2691 attr.buf_size = vertex_buf.size;
2692 // debugf("%d %x %d %d %d %d\n", i, attr.type, attr.size, attr.stride,
2693 // attr.offset, attr.divisor);
2694 last_enabled = i;
2697 max_attrib = last_enabled;
2700 extern "C" {
2702 void DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
2703 GLintptr offset, GLsizei instancecount) {
2704 if (offset < 0 || count <= 0 || instancecount <= 0 || !vertex_shader ||
2705 !fragment_shader) {
2706 return;
2709 Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
2710 if (!fb.color_attachment) {
2711 return;
2713 Texture& colortex = ctx->textures[fb.color_attachment];
2714 if (!colortex.buf) {
2715 return;
2717 assert(!colortex.locked);
2718 assert(colortex.internal_format == GL_RGBA8 ||
2719 colortex.internal_format == GL_R8);
2720 Texture& depthtex = ctx->textures[ctx->depthtest ? fb.depth_attachment : 0];
2721 if (depthtex.buf) {
2722 assert(depthtex.internal_format == GL_DEPTH_COMPONENT24);
2723 assert(colortex.width == depthtex.width &&
2724 colortex.height == depthtex.height);
2725 assert(colortex.offset == depthtex.offset);
2728 // debugf("current_vertex_array %d\n", ctx->current_vertex_array);
2729 // debugf("indices size: %d\n", indices_buf.size);
2730 VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
2731 if (ctx->validate_vertex_array) {
2732 ctx->validate_vertex_array = false;
2733 v.validate();
2736 #ifdef PRINT_TIMINGS
2737 uint64_t start = get_time_value();
2738 #endif
2740 ctx->shaded_rows = 0;
2741 ctx->shaded_pixels = 0;
2743 vertex_shader->init_batch();
2745 switch (type) {
2746 case GL_UNSIGNED_SHORT:
2747 assert(mode == GL_TRIANGLES);
2748 draw_elements<uint16_t>(count, instancecount, offset, v, colortex,
2749 depthtex);
2750 break;
2751 case GL_UNSIGNED_INT:
2752 assert(mode == GL_TRIANGLES);
2753 draw_elements<uint32_t>(count, instancecount, offset, v, colortex,
2754 depthtex);
2755 break;
2756 case GL_NONE:
2757 // Non-standard GL extension - if element type is GL_NONE, then we don't
2758 // use any element buffer and behave as if DrawArrays was called instead.
2759 for (GLsizei instance = 0; instance < instancecount; instance++) {
2760 switch (mode) {
2761 case GL_LINES:
2762 for (GLsizei i = 0; i + 2 <= count; i += 2) {
2763 vertex_shader->load_attribs(v.attribs, offset + i, instance, 2);
2764 draw_quad(2, colortex, depthtex);
2766 break;
2767 case GL_TRIANGLES:
2768 for (GLsizei i = 0; i + 3 <= count; i += 3) {
2769 vertex_shader->load_attribs(v.attribs, offset + i, instance, 3);
2770 draw_quad(3, colortex, depthtex);
2772 break;
2773 default:
2774 assert(false);
2775 break;
2778 break;
2779 default:
2780 assert(false);
2781 break;
2784 if (ctx->samples_passed_query) {
2785 Query& q = ctx->queries[ctx->samples_passed_query];
2786 q.value += ctx->shaded_pixels;
2789 #ifdef PRINT_TIMINGS
2790 uint64_t end = get_time_value();
2791 printf(
2792 "%7.3fms draw(%s, %d): %d pixels in %d rows (avg %f pixels/row, "
2793 "%fns/pixel)\n",
2794 double(end - start) / (1000. * 1000.),
2795 ctx->programs[ctx->current_program].impl->get_name(), instancecount,
2796 ctx->shaded_pixels, ctx->shaded_rows,
2797 double(ctx->shaded_pixels) / ctx->shaded_rows,
2798 double(end - start) / max(ctx->shaded_pixels, 1));
2799 #endif
2802 void Finish() {
2803 #ifdef PRINT_TIMINGS
2804 printf("Finish\n");
2805 #endif
2808 void MakeCurrent(Context* c) {
2809 if (ctx == c) {
2810 return;
2812 ctx = c;
2813 setup_program(ctx ? ctx->current_program : 0);
2816 Context* CreateContext() { return new Context; }
2818 void ReferenceContext(Context* c) {
2819 if (!c) {
2820 return;
2822 ++c->references;
2825 void DestroyContext(Context* c) {
2826 if (!c) {
2827 return;
2829 assert(c->references > 0);
2830 --c->references;
2831 if (c->references > 0) {
2832 return;
2834 if (ctx == c) {
2835 MakeCurrent(nullptr);
2837 delete c;
2840 size_t ReportMemory(Context* ctx, size_t (*size_of_op)(const void*)) {
2841 size_t size = 0;
2842 if (ctx) {
2843 for (auto& t : ctx->textures) {
2844 if (t && t->should_free()) {
2845 size += size_of_op(t->buf);
2849 return size;
2851 } // extern "C"