Bumping gaia.json for 1 gaia revision(s) a=gaia-bump
[gecko.git] / dom / canvas / WebGLElementArrayCache.cpp
blobb778371220211588744b494b053f5bc1547dbe2b
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "WebGLElementArrayCache.h"
8 #include "mozilla/Assertions.h"
9 #include "mozilla/MemoryReporting.h"
10 #include "mozilla/MathAlgorithms.h"
12 #include <cstdlib>
13 #include <cstring>
14 #include <limits>
15 #include <algorithm>
17 namespace mozilla {
19 static void
20 UpdateUpperBound(uint32_t* out_upperBound, uint32_t newBound)
22 MOZ_ASSERT(out_upperBound);
23 *out_upperBound = std::max(*out_upperBound, newBound);
27 * WebGLElementArrayCacheTree contains most of the implementation of WebGLElementArrayCache,
28 * which performs WebGL element array buffer validation for drawElements.
30 * Attention: Here lie nontrivial data structures, bug-prone algorithms, and non-canonical tweaks!
31 * Whence the explanatory comments, and compiled unit test.
33 * *** What problem are we solving here? ***
35 * WebGL::DrawElements has to validate that the elements are in range wrt the current vertex attribs.
36 * This boils down to the problem, given an array of integers, of computing the maximum in an arbitrary
37 * sub-array. The naive algorithm has linear complexity; this has been a major performance problem,
38 * see bug 569431. In that bug, we took the approach of caching the max for the whole array, which
39 * does cover most cases (DrawElements typically consumes the whole element array buffer) but doesn't
40 * help in other use cases:
41 * - when doing "partial DrawElements" i.e. consuming only part of the element array buffer
42 * - when doing frequent "partial buffer updates" i.e. bufferSubData calls updating parts of the
43 * element array buffer
45 * *** The solution: a binary tree ***
47 * The solution implemented here is to use a binary tree as the cache data structure. Each tree node
48 * contains the max of its two children nodes. In this way, finding the maximum in any contiguous sub-array
49 * has log complexity instead of linear complexity.
51 * Simplistically, if the element array is
53 * 1 4 3 2
55 * then the corresponding tree is
57 * 4
58 * _/ \_
59 * 4 3
60 * / \ / \
61 * 1 4 3 2
63 * In practice, the bottom-most levels of the tree are both the largest to store (because they
64 * have more nodes), and the least useful performance-wise (because each node in the bottom
65 * levels concerns only few entries in the elements array buffer, it is cheap to compute).
67 * For this reason, we stop the tree a few levels above, so that each tree leaf actually corresponds
68 * to more than one element array entry.
70 * The number of levels that we "drop" is |sSkippedBottomTreeLevels| and the number of element array entries
71 * that each leaf corresponds to, is |sElementsPerLeaf|. This being a binary tree, we have
73 * sElementsPerLeaf = 2 ^ sSkippedBottomTreeLevels.
75 * *** Storage layout of the binary tree ***
77 * We take advantage of the specifics of the situation to avoid generalist tree storage and instead
78 * store the tree entries in a vector, mTreeData.
80 * TreeData is always a vector of length
82 * 2 * (number of leaves).
84 * Its data layout is as follows: mTreeData[0] is unused, mTreeData[1] is the root node,
85 * then at offsets 2..3 is the tree level immediately below the root node, then at offsets 4..7
86 * is the tree level below that, etc.
88 * The figure below illustrates this by writing at each tree node the offset into mTreeData at
89 * which it is stored:
91 * 1
92 * _/ \_
93 * 2 3
94 * / \ / \
95 * 4 5 6 7
96 * ...
98 * Thus, under the convention that the root level is level 0, we see that level N is stored at offsets
100 * [ 2^n .. 2^(n+1) - 1 ]
102 * in mTreeData. Likewise, all the usual tree operations have simple mathematical expressions in
103 * terms of mTreeData offsets, see all the methods such as ParentNode, LeftChildNode, etc.
105 * *** Design constraint: element types aren't known at buffer-update time ***
107 * Note that a key constraint that we're operating under, is that we don't know the types of the elements
108 * by the time WebGL bufferData/bufferSubData methods are called. The type of elements is only
109 * specified in the drawElements call. This means that we may potentially have to store caches for
110 * multiple element types, for the same element array buffer. Since we don't know yet how many
111 * element types we'll eventually support (extensions add more), the concern about memory usage is serious.
112 * This is addressed by sSkippedBottomTreeLevels as explained above. Of course, in the typical
113 * case where each element array buffer is only ever used with one type, this is also addressed
114 * by having WebGLElementArrayCache lazily create trees for each type only upon first use.
116 * Another consequence of this constraint is that when updating the trees, we have to update
117 * all existing trees. So if trees for types uint8_t, uint16_t and uint32_t have ever been constructed for this buffer,
118 * every subsequent update will have to update all trees even if one of the types is never
119 * used again. That's inefficient, but content should not put indices of different types in the
120 * same element array buffer anyways. Different index types can only be consumed in separate
121 * drawElements calls, so nothing particular is to be achieved by lumping them in the same
122 * buffer object.
124 template<typename T>
125 struct WebGLElementArrayCacheTree
127 // A too-high sSkippedBottomTreeLevels would harm the performance of small drawElements calls
128 // A too-low sSkippedBottomTreeLevels would cause undue memory usage.
129 // The current value has been validated by some benchmarking. See bug 732660.
130 static const size_t sSkippedBottomTreeLevels = 3;
131 static const size_t sElementsPerLeaf = 1 << sSkippedBottomTreeLevels;
132 static const size_t sElementsPerLeafMask = sElementsPerLeaf - 1; // sElementsPerLeaf is POT
134 private:
136 // The WebGLElementArrayCache that owns this tree
137 WebGLElementArrayCache& mParent;
139 // The tree's internal data storage. Its length is 2 * (number of leaves)
140 // because of its data layout explained in the above class comment.
141 FallibleTArray<T> mTreeData;
143 public:
144 // Constructor. Takes a reference to the WebGLElementArrayCache that is to be
145 // the parent. Does not initialize the tree. Should be followed by a call
146 // to Update() to attempt initializing the tree.
147 WebGLElementArrayCacheTree(WebGLElementArrayCache& p)
148 : mParent(p)
152 T GlobalMaximum() const {
153 return mTreeData[1];
156 // returns the index of the parent node; if treeIndex=1 (the root node),
157 // the return value is 0.
158 static size_t ParentNode(size_t treeIndex) {
159 MOZ_ASSERT(treeIndex > 1);
160 return treeIndex >> 1;
163 static bool IsRightNode(size_t treeIndex) {
164 MOZ_ASSERT(treeIndex > 1);
165 return treeIndex & 1;
168 static bool IsLeftNode(size_t treeIndex) {
169 MOZ_ASSERT(treeIndex > 1);
170 return !IsRightNode(treeIndex);
173 static size_t SiblingNode(size_t treeIndex) {
174 MOZ_ASSERT(treeIndex > 1);
175 return treeIndex ^ 1;
178 static size_t LeftChildNode(size_t treeIndex) {
179 MOZ_ASSERT(treeIndex);
180 return treeIndex << 1;
183 static size_t RightChildNode(size_t treeIndex) {
184 MOZ_ASSERT(treeIndex);
185 return SiblingNode(LeftChildNode(treeIndex));
188 static size_t LeftNeighborNode(size_t treeIndex, size_t distance = 1) {
189 MOZ_ASSERT(treeIndex > 1);
190 return treeIndex - distance;
193 static size_t RightNeighborNode(size_t treeIndex, size_t distance = 1) {
194 MOZ_ASSERT(treeIndex > 1);
195 return treeIndex + distance;
198 size_t NumLeaves() const {
199 // see class comment for why we the tree storage size is 2 * numLeaves
200 return mTreeData.Length() >> 1;
203 size_t LeafForElement(size_t element) const {
204 size_t leaf = element / sElementsPerLeaf;
205 MOZ_ASSERT(leaf < NumLeaves());
206 return leaf;
209 size_t LeafForByte(size_t byte) const {
210 return LeafForElement(byte / sizeof(T));
213 // Returns the index, into the tree storage, where a given leaf is stored
214 size_t TreeIndexForLeaf(size_t leaf) const {
215 // See above class comment. The tree storage is an array of length 2 * numLeaves.
216 // The leaves are stored in its second half.
217 return leaf + NumLeaves();
220 static size_t LastElementUnderSameLeaf(size_t element) {
221 return element | sElementsPerLeafMask;
224 static size_t FirstElementUnderSameLeaf(size_t element) {
225 return element & ~sElementsPerLeafMask;
228 static size_t NextMultipleOfElementsPerLeaf(size_t numElements) {
229 MOZ_ASSERT(numElements >= 1);
230 return ((numElements - 1) | sElementsPerLeafMask) + 1;
233 bool Validate(T maxAllowed, size_t firstLeaf, size_t lastLeaf,
234 uint32_t* out_upperBound)
236 size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf);
237 size_t lastTreeIndex = TreeIndexForLeaf(lastLeaf);
239 while (true) {
240 // given that we tweak these values in nontrivial ways, it doesn't hurt to do
241 // this sanity check
242 MOZ_ASSERT(firstTreeIndex <= lastTreeIndex);
244 // final case where there is only 1 node to validate at the current tree level
245 if (lastTreeIndex == firstTreeIndex) {
246 const T& curData = mTreeData[firstTreeIndex];
247 UpdateUpperBound(out_upperBound, curData);
248 return curData <= maxAllowed;
251 // if the first node at current tree level is a right node, handle it individually
252 // and replace it with its right neighbor, which is a left node
253 if (IsRightNode(firstTreeIndex)) {
254 const T& curData = mTreeData[firstTreeIndex];
255 UpdateUpperBound(out_upperBound, curData);
256 if (curData > maxAllowed)
257 return false;
258 firstTreeIndex = RightNeighborNode(firstTreeIndex);
261 // if the last node at current tree level is a left node, handle it individually
262 // and replace it with its left neighbor, which is a right node
263 if (IsLeftNode(lastTreeIndex)) {
264 const T& curData = mTreeData[lastTreeIndex];
265 UpdateUpperBound(out_upperBound, curData);
266 if (curData > maxAllowed)
267 return false;
268 lastTreeIndex = LeftNeighborNode(lastTreeIndex);
271 // at this point it can happen that firstTreeIndex and lastTreeIndex "crossed" each
272 // other. That happens if firstTreeIndex was a right node and lastTreeIndex was its
273 // right neighor: in that case, both above tweaks happened and as a result, they ended
274 // up being swapped: lastTreeIndex is now the _left_ neighbor of firstTreeIndex.
275 // When that happens, there is nothing left to validate.
276 if (lastTreeIndex == LeftNeighborNode(firstTreeIndex)) {
277 return true;
280 // walk up 1 level
281 firstTreeIndex = ParentNode(firstTreeIndex);
282 lastTreeIndex = ParentNode(lastTreeIndex);
286 // Updates the tree from the parent's buffer contents. Fallible, as it
287 // may have to resize the tree storage.
288 bool Update(size_t firstByte, size_t lastByte);
290 size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
292 return aMallocSizeOf(this) + mTreeData.SizeOfExcludingThis(aMallocSizeOf);
296 // TreeForType: just a template helper to select the right tree object for a given
297 // element type.
298 template<typename T>
299 struct TreeForType {};
301 template<>
302 struct TreeForType<uint8_t>
304 static ScopedDeletePtr<WebGLElementArrayCacheTree<uint8_t>>&
305 Value(WebGLElementArrayCache *b) {
306 return b->mUint8Tree;
310 template<>
311 struct TreeForType<uint16_t>
313 static ScopedDeletePtr<WebGLElementArrayCacheTree<uint16_t>>&
314 Value(WebGLElementArrayCache *b) {
315 return b->mUint16Tree;
319 template<>
320 struct TreeForType<uint32_t>
322 static ScopedDeletePtr<WebGLElementArrayCacheTree<uint32_t>>&
323 Value(WebGLElementArrayCache *b) {
324 return b->mUint32Tree;
328 // Calling this method will 1) update the leaves in this interval
329 // from the raw buffer data, and 2) propagate this update up the tree
330 template<typename T>
331 bool WebGLElementArrayCacheTree<T>::Update(size_t firstByte, size_t lastByte)
333 MOZ_ASSERT(firstByte <= lastByte);
334 MOZ_ASSERT(lastByte < mParent.mBytes.Length());
336 size_t numberOfElements = mParent.mBytes.Length() / sizeof(T);
337 size_t requiredNumLeaves = 0;
338 if (numberOfElements > 0) {
339 // If we didn't require the number of leaves to be a power of two, then
340 // it would just be equal to
342 // ceil(numberOfElements / sElementsPerLeaf)
344 // The way we implement this (division+ceil) operation in integer arithmetic
345 // is as follows:
346 size_t numLeavesNonPOT = (numberOfElements + sElementsPerLeaf - 1) / sElementsPerLeaf;
347 // It only remains to round that up to the next power of two:
348 requiredNumLeaves = RoundUpPow2(numLeavesNonPOT);
351 // Step #0: if needed, resize our tree data storage.
352 if (requiredNumLeaves != NumLeaves()) {
353 // see class comment for why we the tree storage size is 2 * numLeaves
354 if (!mTreeData.SetLength(2 * requiredNumLeaves)) {
355 mTreeData.SetLength(0);
356 return false;
358 MOZ_ASSERT(NumLeaves() == requiredNumLeaves);
360 if (NumLeaves()) {
361 // when resizing, update the whole tree, not just the subset corresponding
362 // to the part of the buffer being updated.
363 memset(mTreeData.Elements(), 0, mTreeData.Length() * sizeof(T));
364 firstByte = 0;
365 lastByte = mParent.mBytes.Length() - 1;
369 if (NumLeaves() == 0) {
370 return true;
373 lastByte = std::min(lastByte, NumLeaves() * sElementsPerLeaf * sizeof(T) - 1);
374 if (firstByte > lastByte) {
375 return true;
378 size_t firstLeaf = LeafForByte(firstByte);
379 size_t lastLeaf = LeafForByte(lastByte);
381 MOZ_ASSERT(firstLeaf <= lastLeaf && lastLeaf < NumLeaves());
383 size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf);
384 size_t lastTreeIndex = TreeIndexForLeaf(lastLeaf);
386 // Step #1: initialize the tree leaves from plain buffer data.
387 // That is, each tree leaf must be set to the max of the |sElementsPerLeaf| corresponding
388 // buffer entries.
389 // condition-less scope to prevent leaking this scope's variables into the code below
391 // treeIndex is the index of the tree leaf we're writing, i.e. the destination index
392 size_t treeIndex = firstTreeIndex;
393 // srcIndex is the index in the source buffer
394 size_t srcIndex = firstLeaf * sElementsPerLeaf;
395 while (treeIndex <= lastTreeIndex) {
396 T m = 0;
397 size_t a = srcIndex;
398 size_t srcIndexNextLeaf = std::min(a + sElementsPerLeaf, numberOfElements);
399 for (; srcIndex < srcIndexNextLeaf; srcIndex++) {
400 m = std::max(m, mParent.Element<T>(srcIndex));
402 mTreeData[treeIndex] = m;
403 treeIndex++;
407 // Step #2: propagate the values up the tree. This is simply a matter of walking up
408 // the tree and setting each node to the max of its two children.
409 while (firstTreeIndex > 1) {
410 // move up 1 level
411 firstTreeIndex = ParentNode(firstTreeIndex);
412 lastTreeIndex = ParentNode(lastTreeIndex);
414 // fast-exit case where only one node is updated at the current level
415 if (firstTreeIndex == lastTreeIndex) {
416 mTreeData[firstTreeIndex] = std::max(mTreeData[LeftChildNode(firstTreeIndex)], mTreeData[RightChildNode(firstTreeIndex)]);
417 continue;
420 size_t child = LeftChildNode(firstTreeIndex);
421 size_t parent = firstTreeIndex;
422 while (parent <= lastTreeIndex)
424 T a = mTreeData[child];
425 child = RightNeighborNode(child);
426 T b = mTreeData[child];
427 child = RightNeighborNode(child);
428 mTreeData[parent] = std::max(a, b);
429 parent = RightNeighborNode(parent);
433 return true;
436 WebGLElementArrayCache::WebGLElementArrayCache() {
439 WebGLElementArrayCache::~WebGLElementArrayCache() {
442 bool WebGLElementArrayCache::BufferData(const void* ptr, size_t byteLength) {
443 if (mBytes.Length() != byteLength) {
444 if (!mBytes.SetLength(byteLength)) {
445 mBytes.SetLength(0);
446 return false;
449 MOZ_ASSERT(mBytes.Length() == byteLength);
450 return BufferSubData(0, ptr, byteLength);
453 bool WebGLElementArrayCache::BufferSubData(size_t pos, const void* ptr, size_t updateByteLength) {
454 MOZ_ASSERT(pos + updateByteLength <= mBytes.Length());
455 if (!updateByteLength)
456 return true;
457 if (ptr)
458 memcpy(mBytes.Elements() + pos, ptr, updateByteLength);
459 else
460 memset(mBytes.Elements() + pos, 0, updateByteLength);
461 return UpdateTrees(pos, pos + updateByteLength - 1);
464 bool WebGLElementArrayCache::UpdateTrees(size_t firstByte, size_t lastByte)
466 bool result = true;
467 if (mUint8Tree)
468 result &= mUint8Tree->Update(firstByte, lastByte);
469 if (mUint16Tree)
470 result &= mUint16Tree->Update(firstByte, lastByte);
471 if (mUint32Tree)
472 result &= mUint32Tree->Update(firstByte, lastByte);
473 return result;
476 template<typename T>
477 bool
478 WebGLElementArrayCache::Validate(uint32_t maxAllowed, size_t firstElement,
479 size_t countElements, uint32_t* out_upperBound)
481 *out_upperBound = 0;
483 // if maxAllowed is >= the max T value, then there is no way that a T index could be invalid
484 uint32_t maxTSize = std::numeric_limits<T>::max();
485 if (maxAllowed >= maxTSize) {
486 UpdateUpperBound(out_upperBound, maxTSize);
487 return true;
490 T maxAllowedT(maxAllowed);
492 // integer overflow must have been handled earlier, so we assert that maxAllowedT
493 // is exactly the max allowed value.
494 MOZ_ASSERT(uint32_t(maxAllowedT) == maxAllowed);
496 if (!mBytes.Length() || !countElements)
497 return true;
499 ScopedDeletePtr<WebGLElementArrayCacheTree<T>>& tree = TreeForType<T>::Value(this);
500 if (!tree) {
501 tree = new WebGLElementArrayCacheTree<T>(*this);
502 if (mBytes.Length()) {
503 bool valid = tree->Update(0, mBytes.Length() - 1);
504 if (!valid) {
505 // Do not assert here. This case would happen if an allocation failed.
506 // We've already settled on fallible allocations around here.
507 tree = nullptr;
508 return false;
513 size_t lastElement = firstElement + countElements - 1;
515 // fast exit path when the global maximum for the whole element array buffer
516 // falls in the allowed range
517 T globalMax = tree->GlobalMaximum();
518 if (globalMax <= maxAllowedT)
520 UpdateUpperBound(out_upperBound, globalMax);
521 return true;
524 const T* elements = Elements<T>();
526 // before calling tree->Validate, we have to validate ourselves the boundaries of the elements span,
527 // to round them to the nearest multiple of sElementsPerLeaf.
528 size_t firstElementAdjustmentEnd = std::min(lastElement,
529 tree->LastElementUnderSameLeaf(firstElement));
530 while (firstElement <= firstElementAdjustmentEnd) {
531 const T& curData = elements[firstElement];
532 UpdateUpperBound(out_upperBound, curData);
533 if (curData > maxAllowedT)
534 return false;
535 firstElement++;
537 size_t lastElementAdjustmentEnd = std::max(firstElement,
538 tree->FirstElementUnderSameLeaf(lastElement));
539 while (lastElement >= lastElementAdjustmentEnd) {
540 const T& curData = elements[lastElement];
541 UpdateUpperBound(out_upperBound, curData);
542 if (curData > maxAllowedT)
543 return false;
544 lastElement--;
547 // at this point, for many tiny validations, we're already done.
548 if (firstElement > lastElement)
549 return true;
551 // general case
552 return tree->Validate(maxAllowedT,
553 tree->LeafForElement(firstElement),
554 tree->LeafForElement(lastElement),
555 out_upperBound);
558 bool
559 WebGLElementArrayCache::Validate(GLenum type, uint32_t maxAllowed,
560 size_t firstElement, size_t countElements,
561 uint32_t* out_upperBound)
563 if (type == LOCAL_GL_UNSIGNED_BYTE)
564 return Validate<uint8_t>(maxAllowed, firstElement, countElements, out_upperBound);
565 if (type == LOCAL_GL_UNSIGNED_SHORT)
566 return Validate<uint16_t>(maxAllowed, firstElement, countElements, out_upperBound);
567 if (type == LOCAL_GL_UNSIGNED_INT)
568 return Validate<uint32_t>(maxAllowed, firstElement, countElements, out_upperBound);
570 MOZ_ASSERT(false, "Invalid type.");
571 return false;
574 size_t
575 WebGLElementArrayCache::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
577 size_t uint8TreeSize = mUint8Tree ? mUint8Tree->SizeOfIncludingThis(aMallocSizeOf) : 0;
578 size_t uint16TreeSize = mUint16Tree ? mUint16Tree->SizeOfIncludingThis(aMallocSizeOf) : 0;
579 size_t uint32TreeSize = mUint32Tree ? mUint32Tree->SizeOfIncludingThis(aMallocSizeOf) : 0;
580 return aMallocSizeOf(this) +
581 mBytes.SizeOfExcludingThis(aMallocSizeOf) +
582 uint8TreeSize +
583 uint16TreeSize +
584 uint32TreeSize;
587 bool
588 WebGLElementArrayCache::BeenUsedWithMultipleTypes() const
590 // C++ Standard ($4.7)
591 // "If the source type is bool, the value false is converted to zero and
592 // the value true is converted to one."
593 const int num_types_used = (mUint8Tree != nullptr) +
594 (mUint16Tree != nullptr) +
595 (mUint32Tree != nullptr);
596 return num_types_used > 1;
599 } // end namespace mozilla