mozglue/baseprofiler/public/leb128iterator.h

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 // LEB128 utilities that can read/write unsigned LEB128 numbers from/to
   8 // iterators.
   9 //
  10 // LEB128 = Little Endian Base 128, where small numbers take few bytes, but
  11 // large numbers are still allowed, which is ideal when serializing numbers that
  12 // are likely to be small.
  13 // Each byte contains 7 bits from the number, starting at the "little end", the
  14 // top bit is 0 for the last byte, 1 otherwise.
  15 // Numbers 0-127 only take 1 byte. 128-16383 take 2 bytes. Etc.
  16 //
  17 // Iterators only need to provide:
  18 // - `*it` to return a reference to the next byte to be read from or written to.
  19 // - `++it` to advance the iterator after a byte is written.
  20 //
  21 // The caller must always provide sufficient space to write any number, by:
  22 // - pre-allocating a large enough buffer, or
  23 // - allocating more space when `++it` reaches the end and/or `*it` is invoked
  24 //   after the end, or
  25 // - moving the underlying pointer to an appropriate location (e.g., wrapping
  26 //   around a circular buffer).
  27 // The caller must also provide enough bytes to read a full value (i.e., at
  28 // least one byte should have its top bit unset), and a type large enough to
  29 // hold the stored value.
  30 //
  31 // Note: There are insufficient checks for validity! These functions are
  32 // intended to be used together, i.e., the user should only `ReadULEB128()` from
  33 // a sufficiently-large buffer that the same user filled with `WriteULEB128()`.
  34 // Using with externally-sourced data (e.g., DWARF) is *not* recommended.
  35 //
  36 // https://en.wikipedia.org/wiki/LEB128
  37
  38 #ifndef leb128iterator_h
  39 #define leb128iterator_h
  40
  41 #include "mozilla/Assertions.h"
  42 #include "mozilla/Likely.h"
  43
  44 #include <climits>
  45 #include <cstdint>
  46 #include <limits>
  47 #include <type_traits>
  48
  49 namespace mozilla {
  50
  51 // Number of bytes needed to represent `aValue`.
  52 template <typename T>
  53 constexpr uint_fast8_t ULEB128Size(T aValue) {
  54   static_assert(!std::numeric_limits<T>::is_signed,
  55                 "ULEB128Size only takes unsigned types");
  56   // We need one output byte per 7 bits of non-zero value. So we just remove
  57   // 7 least significant bits at a time until the value becomes zero.
  58   // Note the special case of 0, which still needs 1 output byte; this is done
  59   // by starting the first loop before we check for 0.
  60   uint_fast8_t size = 0;
  61   for (;;) {
  62     size += 1;
  63     aValue >>= 7;
  64     // Expecting small values, so it should be more likely that `aValue == 0`.
  65     if (MOZ_LIKELY(aValue == 0)) {
  66       return size;
  67     }
  68   }
  69 }
  70
  71 // Maximum number of bytes needed to represent any value of type `T`.
  72 template <typename T>
  73 constexpr uint_fast8_t ULEB128MaxSize() {
  74   return ULEB128Size<T>(std::numeric_limits<T>::max());
  75 }
  76
  77 // Write `aValue` in LEB128 to `aIterator`.
  78 // The iterator will be moved past the last byte.
  79 template <typename T, typename It>
  80 void WriteULEB128(T aValue, It& aIterator) {
  81   static_assert(!std::numeric_limits<T>::is_signed,
  82                 "WriteULEB128 only takes unsigned types");
  83   using IteratorValue = std::remove_reference_t<decltype(*aIterator)>;
  84   static_assert(sizeof(IteratorValue) == 1,
  85                 "WriteULEB128 expects an iterator to single bytes");
  86   // 0. Don't test for 0 yet, as we want to output one byte for it.
  87   for (;;) {
  88     // 1. Extract the 7 least significant bits.
  89     const uint_fast8_t byte = aValue & 0x7Fu;
  90     // 2. Remove them from `aValue`.
  91     aValue >>= 7;
  92     // 3. Write the 7 bits, and set the 8th bit if `aValue` is not 0 yet
  93     // (meaning there will be more bytes after this one.)
  94     // Expecting small values, so it should be more likely that `aValue == 0`.
  95     // Note: No absolute need to force-cast to IteratorValue, because we have
  96     // only changed the bottom 8 bits above. However the compiler could warn
  97     // about a narrowing conversion from potentially-multibyte uint_fast8_t down
  98     // to whatever single-byte type `*iterator* expects, so we make it explicit.
  99     *aIterator = static_cast<IteratorValue>(
 100         MOZ_LIKELY(aValue == 0) ? byte : (byte | 0x80u));
 101     // 4. Always advance the iterator to the next byte.
 102     ++aIterator;
 103     // 5. We're done if `aValue` is 0.
 104     // Expecting small values, so it should be more likely that `aValue == 0`.
 105     if (MOZ_LIKELY(aValue == 0)) {
 106       return;
 107     }
 108   }
 109 }
 110
 111 // Read an LEB128 value from `aIterator`.
 112 // The iterator will be moved past the last byte.
 113 template <typename T, typename It>
 114 T ReadULEB128(It& aIterator) {
 115   static_assert(!std::numeric_limits<T>::is_signed,
 116                 "ReadULEB128 must return an unsigned type");
 117   using IteratorValue = std::remove_reference_t<decltype(*aIterator)>;
 118   static_assert(sizeof(IteratorValue) == 1,
 119                 "ReadULEB128 expects an iterator to single bytes");
 120   // Incoming bits will be added to `result`...
 121   T result = 0;
 122   // ... starting with the least significant bits.
 123   uint_fast8_t shift = 0;
 124   for (;;) {
 125     // 1. Read one byte from the iterator.
 126     // `static_cast` just in case IteratorValue is not implicitly convertible to
 127     // uint_fast8_t. It wouldn't matter if the sign was extended, we're only
 128     // dealing with the bottom 8 bits below.
 129     const uint_fast8_t byte = static_cast<uint_fast8_t>(*aIterator);
 130     // 2. Always advance the iterator.
 131     ++aIterator;
 132     // 3. Extract the 7 bits of value, and shift them in place into `result`.
 133     result |= static_cast<T>(byte & 0x7fu) << shift;
 134     // 4. If the 8th bit is *not* set, this was the last byte.
 135     // Expecting small values, so it should be more likely that the bit is off.
 136     if (MOZ_LIKELY((byte & 0x80u) == 0)) {
 137       return result;
 138     }
 139     // There are more bytes to read.
 140     // 5. Next byte will contain more significant bits above the past 7.
 141     shift += 7;
 142     // Safety check that we're not going to shift by >= than the type size,
 143     // which is Undefined Behavior in C++.
 144     MOZ_ASSERT(shift < CHAR_BIT * sizeof(T));
 145   }
 146 }
 147
 148 // constexpr ULEB128 reader class.
 149 // Mostly useful when dealing with non-trivial byte feeds.
 150 template <typename T>
 151 class ULEB128Reader {
 152   static_assert(!std::numeric_limits<T>::is_signed,
 153                 "ULEB128Reader must handle an unsigned type");
 154
 155  public:
 156   constexpr ULEB128Reader() = default;
 157
 158   // Don't allow copy/assignment, it doesn't make sense for a stateful parser.
 159   constexpr ULEB128Reader(const ULEB128Reader&) = delete;
 160   constexpr ULEB128Reader& operator=(const ULEB128Reader&) = delete;
 161
 162   // Feed a byte into the parser.
 163   // Returns true if this was the last byte.
 164   [[nodiscard]] constexpr bool FeedByteIsComplete(unsigned aByte) {
 165     MOZ_ASSERT(!IsComplete());
 166     // Extract the 7 bits of value, and shift them in place into the value.
 167     mValue |= static_cast<T>(aByte & 0x7fu) << mShift;
 168     // If the 8th bit is *not* set, this was the last byte.
 169     // Expecting small values, so it should be more likely that the bit is off.
 170     if (MOZ_LIKELY((aByte & 0x80u) == 0)) {
 171       mShift = mCompleteShift;
 172       return true;
 173     }
 174     // There are more bytes to read.
 175     // Next byte will contain more significant bits above the past 7.
 176     mShift += 7;
 177     // Safety check that we're not going to shift by >= than the type size,
 178     // which is Undefined Behavior in C++.
 179     MOZ_ASSERT(mShift < CHAR_BIT * sizeof(T));
 180     return false;
 181   }
 182
 183   constexpr void Reset() {
 184     mValue = 0;
 185     mShift = 0;
 186   }
 187
 188   [[nodiscard]] constexpr bool IsComplete() const {
 189     return mShift == mCompleteShift;
 190   }
 191
 192   [[nodiscard]] constexpr T Value() const {
 193     MOZ_ASSERT(IsComplete());
 194     return mValue;
 195   }
 196
 197  private:
 198   // Special value of `mShift` indicating that parsing is complete.
 199   constexpr static unsigned mCompleteShift = 0x10000u;
 200
 201   T mValue = 0;
 202   unsigned mShift = 0;
 203 };
 204
 205 }  // namespace mozilla
 206
 207 #endif  // leb128iterator_h