Bug 1826564 [wpt PR 39394] - Update mypy, a=testonly
[gecko.git] / mfbt / JSONWriter.h
blobf779ee98378916ec189918dcf5364cfea3bc7c46
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /* A JSON pretty-printer class. */
9 // A typical JSON-writing library requires you to first build up a data
10 // structure that represents a JSON object and then serialize it (to file, or
11 // somewhere else). This approach makes for a clean API, but building the data
12 // structure takes up memory. Sometimes that isn't desirable, such as when the
13 // JSON data is produced for memory reporting.
15 // The JSONWriter class instead allows JSON data to be written out
16 // incrementally without building up large data structures.
18 // The API is slightly uglier than you would see in a typical JSON-writing
19 // library, but still fairly easy to use. It's possible to generate invalid
20 // JSON with JSONWriter, but typically the most basic testing will identify any
21 // such problems.
23 // Similarly, there are no RAII facilities for automatically closing objects
24 // and arrays. These would be nice if you are generating all your code within
25 // nested functions, but in other cases you'd have to maintain an explicit
26 // stack of RAII objects and manually unwind it, which is no better than just
27 // calling "end" functions. Furthermore, the consequences of forgetting to
28 // close an object or array are obvious and, again, will be identified via
29 // basic testing, unlike other cases where RAII is typically used (e.g. smart
30 // pointers) and the consequences of defects are more subtle.
32 // Importantly, the class does solve the two hard problems of JSON
33 // pretty-printing, which are (a) correctly escaping strings, and (b) adding
34 // appropriate indentation and commas between items.
36 // By default, every property is placed on its own line. However, it is
37 // possible to request that objects and arrays be placed entirely on a single
38 // line, which can reduce output size significantly in some cases.
40 // Strings used (for property names and string property values) are |const
41 // char*| throughout, and can be ASCII or UTF-8.
43 // EXAMPLE
44 // -------
45 // Assume that |MyWriteFunc| is a class that implements |JSONWriteFunc|. The
46 // following code:
48 // JSONWriter w(MakeUnique<MyWriteFunc>());
49 // w.Start();
50 // {
51 // w.NullProperty("null");
52 // w.BoolProperty("bool", true);
53 // w.IntProperty("int", 1);
54 // w.StartArrayProperty("array");
55 // {
56 // w.StringElement("string");
57 // w.StartObjectElement();
58 // {
59 // w.DoubleProperty("double", 3.4);
60 // w.StartArrayProperty("single-line array", w.SingleLineStyle);
61 // {
62 // w.IntElement(1);
63 // w.StartObjectElement(); // SingleLineStyle is inherited from
64 // w.EndObjectElement(); // above for this collection
65 // }
66 // w.EndArray();
67 // }
68 // w.EndObjectElement();
69 // }
70 // w.EndArrayProperty();
71 // }
72 // w.End();
74 // will produce pretty-printed output for the following JSON object:
76 // {
77 // "null": null,
78 // "bool": true,
79 // "int": 1,
80 // "array": [
81 // "string",
82 // {
83 // "double": 3.4,
84 // "single-line array": [1, {}]
85 // }
86 // ]
87 // }
89 // The nesting in the example code is obviously optional, but can aid
90 // readability.
92 #ifndef mozilla_JSONWriter_h
93 #define mozilla_JSONWriter_h
95 #include "double-conversion/double-conversion.h"
96 #include "mozilla/Assertions.h"
97 #include "mozilla/IntegerPrintfMacros.h"
98 #include "mozilla/PodOperations.h"
99 #include "mozilla/Span.h"
100 #include "mozilla/Sprintf.h"
101 #include "mozilla/UniquePtr.h"
102 #include "mozilla/Vector.h"
104 #include <utility>
106 namespace mozilla {
108 // A quasi-functor for JSONWriter. We don't use a true functor because that
109 // requires templatizing JSONWriter, and the templatization seeps to lots of
110 // places we don't want it to.
111 class JSONWriteFunc {
112 public:
113 virtual void Write(const Span<const char>& aStr) = 0;
114 virtual ~JSONWriteFunc() = default;
117 // Ideally this would be within |EscapedString| but when compiling with GCC
118 // on Linux that caused link errors, whereas this formulation didn't.
119 namespace detail {
120 extern MFBT_DATA const char gTwoCharEscapes[256];
121 } // namespace detail
123 class JSONWriter {
124 // From http://www.ietf.org/rfc/rfc4627.txt:
126 // "All Unicode characters may be placed within the quotation marks except
127 // for the characters that must be escaped: quotation mark, reverse
128 // solidus, and the control characters (U+0000 through U+001F)."
130 // This implementation uses two-char escape sequences where possible, namely:
132 // \", \\, \b, \f, \n, \r, \t
134 // All control characters not in the above list are represented with a
135 // six-char escape sequence, e.g. '\u000b' (a.k.a. '\v').
137 class EscapedString {
138 // `mStringSpan` initially points at the user-provided string. If that
139 // string needs escaping, `mStringSpan` will point at `mOwnedStr` below.
140 Span<const char> mStringSpan;
141 // String storage in case escaping is actually needed, null otherwise.
142 UniquePtr<char[]> mOwnedStr;
144 void CheckInvariants() const {
145 // Either there was no escaping so `mOwnedStr` is null, or escaping was
146 // needed, in which case `mStringSpan` should point at `mOwnedStr`.
147 MOZ_ASSERT(!mOwnedStr || mStringSpan.data() == mOwnedStr.get());
150 static char hexDigitToAsciiChar(uint8_t u) {
151 u = u & 0xf;
152 return u < 10 ? '0' + u : 'a' + (u - 10);
155 public:
156 explicit EscapedString(const Span<const char>& aStr) : mStringSpan(aStr) {
157 // First, see if we need to modify the string.
158 size_t nExtra = 0;
159 for (const char& c : aStr) {
160 // ensure it can't be interpreted as negative
161 uint8_t u = static_cast<uint8_t>(c);
162 if (u == 0) {
163 // Null terminator within the span, assume we may have been given a
164 // span to a buffer that contains a null-terminated string in it.
165 // We need to truncate the Span so that it doesn't include this null
166 // terminator and anything past it; Either we will return it as-is, or
167 // processing should stop there.
168 mStringSpan = mStringSpan.First(&c - mStringSpan.data());
169 break;
171 if (detail::gTwoCharEscapes[u]) {
172 nExtra += 1;
173 } else if (u <= 0x1f) {
174 nExtra += 5;
178 // Note: Don't use `aStr` anymore, as it could contain a null terminator;
179 // use the correctly-sized `mStringSpan` instead.
181 if (nExtra == 0) {
182 // No escapes needed. mStringSpan already points at the original string.
183 CheckInvariants();
184 return;
187 // Escapes are needed. We'll create a new string.
188 mOwnedStr = MakeUnique<char[]>(mStringSpan.Length() + nExtra);
190 size_t i = 0;
191 for (const char c : mStringSpan) {
192 // ensure it can't be interpreted as negative
193 uint8_t u = static_cast<uint8_t>(c);
194 MOZ_ASSERT(u != 0, "Null terminator should have been handled above");
195 if (detail::gTwoCharEscapes[u]) {
196 mOwnedStr[i++] = '\\';
197 mOwnedStr[i++] = detail::gTwoCharEscapes[u];
198 } else if (u <= 0x1f) {
199 mOwnedStr[i++] = '\\';
200 mOwnedStr[i++] = 'u';
201 mOwnedStr[i++] = '0';
202 mOwnedStr[i++] = '0';
203 mOwnedStr[i++] = hexDigitToAsciiChar((u & 0x00f0) >> 4);
204 mOwnedStr[i++] = hexDigitToAsciiChar(u & 0x000f);
205 } else {
206 mOwnedStr[i++] = u;
209 MOZ_ASSERT(i == mStringSpan.Length() + nExtra);
210 mStringSpan = Span<const char>(mOwnedStr.get(), i);
211 CheckInvariants();
214 explicit EscapedString(const char* aStr) = delete;
216 const Span<const char>& SpanRef() const { return mStringSpan; }
219 public:
220 // Collections (objects and arrays) are printed in a multi-line style by
221 // default. This can be changed to a single-line style if SingleLineStyle is
222 // specified. If a collection is printed in single-line style, every nested
223 // collection within it is also printed in single-line style, even if
224 // multi-line style is requested.
225 // If SingleLineStyle is set in the constructer, all JSON whitespace is
226 // eliminated, including spaces after colons and commas, for the most compact
227 // encoding possible.
228 enum CollectionStyle {
229 MultiLineStyle, // the default
230 SingleLineStyle
233 protected:
234 static constexpr Span<const char> scArrayBeginString = MakeStringSpan("[");
235 static constexpr Span<const char> scArrayEndString = MakeStringSpan("]");
236 static constexpr Span<const char> scCommaString = MakeStringSpan(",");
237 static constexpr Span<const char> scEmptyString = MakeStringSpan("");
238 static constexpr Span<const char> scFalseString = MakeStringSpan("false");
239 static constexpr Span<const char> scNewLineString = MakeStringSpan("\n");
240 static constexpr Span<const char> scNullString = MakeStringSpan("null");
241 static constexpr Span<const char> scObjectBeginString = MakeStringSpan("{");
242 static constexpr Span<const char> scObjectEndString = MakeStringSpan("}");
243 static constexpr Span<const char> scPropertyBeginString =
244 MakeStringSpan("\"");
245 static constexpr Span<const char> scPropertyEndString = MakeStringSpan("\":");
246 static constexpr Span<const char> scQuoteString = MakeStringSpan("\"");
247 static constexpr Span<const char> scSpaceString = MakeStringSpan(" ");
248 static constexpr Span<const char> scTopObjectBeginString =
249 MakeStringSpan("{");
250 static constexpr Span<const char> scTopObjectEndString = MakeStringSpan("}");
251 static constexpr Span<const char> scTrueString = MakeStringSpan("true");
253 JSONWriteFunc& mWriter;
254 const UniquePtr<JSONWriteFunc> mMaybeOwnedWriter;
255 Vector<bool, 8> mNeedComma; // do we need a comma at depth N?
256 Vector<bool, 8> mNeedNewlines; // do we need newlines at depth N?
257 size_t mDepth; // the current nesting depth
259 void Indent() {
260 for (size_t i = 0; i < mDepth; i++) {
261 mWriter.Write(scSpaceString);
265 // Adds whatever is necessary (maybe a comma, and then a newline and
266 // whitespace) to separate an item (property or element) from what's come
267 // before.
268 void Separator() {
269 if (mNeedComma[mDepth]) {
270 mWriter.Write(scCommaString);
272 if (mDepth > 0 && mNeedNewlines[mDepth]) {
273 mWriter.Write(scNewLineString);
274 Indent();
275 } else if (mNeedComma[mDepth] && mNeedNewlines[0]) {
276 mWriter.Write(scSpaceString);
280 void PropertyNameAndColon(const Span<const char>& aName) {
281 mWriter.Write(scPropertyBeginString);
282 mWriter.Write(EscapedString(aName).SpanRef());
283 mWriter.Write(scPropertyEndString);
284 if (mNeedNewlines[0]) {
285 mWriter.Write(scSpaceString);
289 void Scalar(const Span<const char>& aMaybePropertyName,
290 const Span<const char>& aStringValue) {
291 Separator();
292 if (!aMaybePropertyName.empty()) {
293 PropertyNameAndColon(aMaybePropertyName);
295 mWriter.Write(aStringValue);
296 mNeedComma[mDepth] = true;
299 void QuotedScalar(const Span<const char>& aMaybePropertyName,
300 const Span<const char>& aStringValue) {
301 Separator();
302 if (!aMaybePropertyName.empty()) {
303 PropertyNameAndColon(aMaybePropertyName);
305 mWriter.Write(scQuoteString);
306 mWriter.Write(aStringValue);
307 mWriter.Write(scQuoteString);
308 mNeedComma[mDepth] = true;
311 void NewVectorEntries(bool aNeedNewLines) {
312 // If these tiny allocations OOM we might as well just crash because we
313 // must be in serious memory trouble.
314 MOZ_RELEASE_ASSERT(mNeedComma.resizeUninitialized(mDepth + 1));
315 MOZ_RELEASE_ASSERT(mNeedNewlines.resizeUninitialized(mDepth + 1));
316 mNeedComma[mDepth] = false;
317 mNeedNewlines[mDepth] = aNeedNewLines;
320 void StartCollection(const Span<const char>& aMaybePropertyName,
321 const Span<const char>& aStartChar,
322 CollectionStyle aStyle = MultiLineStyle) {
323 Separator();
324 if (!aMaybePropertyName.empty()) {
325 PropertyNameAndColon(aMaybePropertyName);
327 mWriter.Write(aStartChar);
328 mNeedComma[mDepth] = true;
329 mDepth++;
330 NewVectorEntries(mNeedNewlines[mDepth - 1] && aStyle == MultiLineStyle);
333 // Adds the whitespace and closing char necessary to end a collection.
334 void EndCollection(const Span<const char>& aEndChar) {
335 MOZ_ASSERT(mDepth > 0);
336 if (mNeedNewlines[mDepth]) {
337 mWriter.Write(scNewLineString);
338 mDepth--;
339 Indent();
340 } else {
341 mDepth--;
343 mWriter.Write(aEndChar);
346 public:
347 explicit JSONWriter(JSONWriteFunc& aWriter,
348 CollectionStyle aStyle = MultiLineStyle)
349 : mWriter(aWriter), mNeedComma(), mNeedNewlines(), mDepth(0) {
350 NewVectorEntries(aStyle == MultiLineStyle);
353 explicit JSONWriter(UniquePtr<JSONWriteFunc> aWriter,
354 CollectionStyle aStyle = MultiLineStyle)
355 : mWriter(*aWriter),
356 mMaybeOwnedWriter(std::move(aWriter)),
357 mNeedComma(),
358 mNeedNewlines(),
359 mDepth(0) {
360 MOZ_RELEASE_ASSERT(
361 mMaybeOwnedWriter,
362 "JSONWriter must be given a non-null UniquePtr<JSONWriteFunc>");
363 NewVectorEntries(aStyle == MultiLineStyle);
366 // Returns the JSONWriteFunc passed in at creation, for temporary use. The
367 // JSONWriter object still owns the JSONWriteFunc.
368 JSONWriteFunc& WriteFunc() const { return mWriter; }
370 // For all the following functions, the "Prints:" comment indicates what the
371 // basic output looks like. However, it doesn't indicate the whitespace and
372 // trailing commas, which are automatically added as required.
374 // All property names and string properties are escaped as necessary.
376 // Prints: {
377 void Start(CollectionStyle aStyle = MultiLineStyle) {
378 StartCollection(scEmptyString, scTopObjectBeginString, aStyle);
381 // Prints: } and final newline.
382 void End() {
383 EndCollection(scTopObjectEndString);
384 if (mNeedNewlines[mDepth]) {
385 mWriter.Write(scNewLineString);
389 // Prints: "<aName>": null
390 void NullProperty(const Span<const char>& aName) {
391 Scalar(aName, scNullString);
394 template <size_t N>
395 void NullProperty(const char (&aName)[N]) {
396 // Keep null terminator from literal strings, will be removed by
397 // EscapedString. This way C buffer arrays can be used as well.
398 NullProperty(Span<const char>(aName, N));
401 // Prints: null
402 void NullElement() { NullProperty(scEmptyString); }
404 // Prints: "<aName>": <aBool>
405 void BoolProperty(const Span<const char>& aName, bool aBool) {
406 Scalar(aName, aBool ? scTrueString : scFalseString);
409 template <size_t N>
410 void BoolProperty(const char (&aName)[N], bool aBool) {
411 // Keep null terminator from literal strings, will be removed by
412 // EscapedString. This way C buffer arrays can be used as well.
413 BoolProperty(Span<const char>(aName, N), aBool);
416 // Prints: <aBool>
417 void BoolElement(bool aBool) { BoolProperty(scEmptyString, aBool); }
419 // Prints: "<aName>": <aInt>
420 void IntProperty(const Span<const char>& aName, int64_t aInt) {
421 char buf[64];
422 int len = SprintfLiteral(buf, "%" PRId64, aInt);
423 MOZ_RELEASE_ASSERT(len > 0);
424 Scalar(aName, Span<const char>(buf, size_t(len)));
427 template <size_t N>
428 void IntProperty(const char (&aName)[N], int64_t aInt) {
429 // Keep null terminator from literal strings, will be removed by
430 // EscapedString. This way C buffer arrays can be used as well.
431 IntProperty(Span<const char>(aName, N), aInt);
434 // Prints: <aInt>
435 void IntElement(int64_t aInt) { IntProperty(scEmptyString, aInt); }
437 // Prints: "<aName>": <aDouble>
438 void DoubleProperty(const Span<const char>& aName, double aDouble) {
439 static const size_t buflen = 64;
440 char buf[buflen];
441 const double_conversion::DoubleToStringConverter& converter =
442 double_conversion::DoubleToStringConverter::EcmaScriptConverter();
443 double_conversion::StringBuilder builder(buf, buflen);
444 converter.ToShortest(aDouble, &builder);
445 // TODO: The builder should know the length?!
446 Scalar(aName, MakeStringSpan(builder.Finalize()));
449 template <size_t N>
450 void DoubleProperty(const char (&aName)[N], double aDouble) {
451 // Keep null terminator from literal strings, will be removed by
452 // EscapedString. This way C buffer arrays can be used as well.
453 DoubleProperty(Span<const char>(aName, N), aDouble);
456 // Prints: <aDouble>
457 void DoubleElement(double aDouble) { DoubleProperty(scEmptyString, aDouble); }
459 // Prints: "<aName>": "<aStr>"
460 void StringProperty(const Span<const char>& aName,
461 const Span<const char>& aStr) {
462 QuotedScalar(aName, EscapedString(aStr).SpanRef());
465 template <size_t NN>
466 void StringProperty(const char (&aName)[NN], const Span<const char>& aStr) {
467 // Keep null terminator from literal strings, will be removed by
468 // EscapedString. This way C buffer arrays can be used as well.
469 StringProperty(Span<const char>(aName, NN), aStr);
472 template <size_t SN>
473 void StringProperty(const Span<const char>& aName, const char (&aStr)[SN]) {
474 // Keep null terminator from literal strings, will be removed by
475 // EscapedString. This way C buffer arrays can be used as well.
476 StringProperty(aName, Span<const char>(aStr, SN));
479 template <size_t NN, size_t SN>
480 void StringProperty(const char (&aName)[NN], const char (&aStr)[SN]) {
481 // Keep null terminators from literal strings, will be removed by
482 // EscapedString. This way C buffer arrays can be used as well.
483 StringProperty(Span<const char>(aName, NN), Span<const char>(aStr, SN));
486 // Prints: "<aStr>"
487 void StringElement(const Span<const char>& aStr) {
488 StringProperty(scEmptyString, aStr);
491 template <size_t N>
492 void StringElement(const char (&aName)[N]) {
493 // Keep null terminator from literal strings, will be removed by
494 // EscapedString. This way C buffer arrays can be used as well.
495 StringElement(Span<const char>(aName, N));
498 // Prints: "<aName>": [
499 void StartArrayProperty(const Span<const char>& aName,
500 CollectionStyle aStyle = MultiLineStyle) {
501 StartCollection(aName, scArrayBeginString, aStyle);
504 template <size_t N>
505 void StartArrayProperty(const char (&aName)[N],
506 CollectionStyle aStyle = MultiLineStyle) {
507 // Keep null terminator from literal strings, will be removed by
508 // EscapedString. This way C buffer arrays can be used as well.
509 StartArrayProperty(Span<const char>(aName, N), aStyle);
512 // Prints: [
513 void StartArrayElement(CollectionStyle aStyle = MultiLineStyle) {
514 StartArrayProperty(scEmptyString, aStyle);
517 // Prints: ]
518 void EndArray() { EndCollection(scArrayEndString); }
520 // Prints: "<aName>": {
521 void StartObjectProperty(const Span<const char>& aName,
522 CollectionStyle aStyle = MultiLineStyle) {
523 StartCollection(aName, scObjectBeginString, aStyle);
526 template <size_t N>
527 void StartObjectProperty(const char (&aName)[N],
528 CollectionStyle aStyle = MultiLineStyle) {
529 // Keep null terminator from literal strings, will be removed by
530 // EscapedString. This way C buffer arrays can be used as well.
531 StartObjectProperty(Span<const char>(aName, N), aStyle);
534 // Prints: {
535 void StartObjectElement(CollectionStyle aStyle = MultiLineStyle) {
536 StartObjectProperty(scEmptyString, aStyle);
539 // Prints: }
540 void EndObject() { EndCollection(scObjectEndString); }
543 } // namespace mozilla
545 #endif /* mozilla_JSONWriter_h */