Bug 1795082 - Part 2/2: Drop post-processing from getURL() r=zombie
[gecko.git] / js / public / Printer.h
blob510d9582642e97517413da54cc09d7d0667d9b68
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef js_Printer_h
8 #define js_Printer_h
10 #include "mozilla/Attributes.h"
11 #include "mozilla/glue/Debug.h"
12 #include "mozilla/Range.h"
14 #include <stdarg.h>
15 #include <stddef.h>
16 #include <stdio.h>
17 #include <string.h>
19 #include "js/TypeDecls.h"
20 #include "js/Utility.h"
22 // [SMDOC] *Printer, Sprinter, Fprinter, ...
24 // # Motivation
26 // In many places, we want to have functions which are capable of logging
27 // various data structures. Previously, we had logging functions for each
28 // storage, such as using `fwrite`, `printf` or `snprintf`. In additional cases,
29 // many of these logging options were using a string serializing logging
30 // function, only to discard the allocated string after it had been copied to a
31 // file.
33 // GenericPrinter is an answer to avoid excessive amount of temporary
34 // allocations which are used once, and a way to make logging functions work
35 // independently of the backend they are used with.
37 // # Design
39 // The GenericPrinter implements most of `put`, `printf`, `vprintf` and
40 // `putChar` functions, which are implemented using `put` and `putChar`
41 // functions in the derivative classes. Thus, one does not have to reimplement
42 // `putString` nor `printf` for each printer.
44 // // Logging the value N to whatever printer is provided such as
45 // // a file or a string.
46 // void logN(GenericPrinter& out) {
47 // out.printf("[Logging] %d\n", this->n);
48 // }
50 // The printing functions are infallible, from the logging functions
51 // perspective. If an issue happens while printing, this would be recorded by
52 // the Printer, and this can be tested using `hadOutOfMemory` function by the
53 // owner of the Printer instance.
55 // Even in case of failure, printing functions should remain safe to use. Thus
56 // calling `put` twice in a row is safe even if no check for `hadOutOfMemory` is
57 // performed. This is necessary to simplify the control flow and avoid bubble up
58 // failures out of logging functions.
60 // Note, being safe to use does not imply correctness. In case of failure the
61 // correctness of the printed characters is no longer guarantee. One should use
62 // `hadOutOfMemory` function to know if any failure happened which might have
63 // caused incorrect content to be saved. In some cases, such as `Sprinter`,
64 // where the string buffer can be extracted, the returned value would account
65 // for checking `hadOutOfMemory`.
67 // # Implementations
69 // The GenericPrinter is a base class where the derivative classes are providing
70 // different implementations which have their own advantages and disadvantages:
72 // - Fprinter: FILE* printer. Write the content directly to a file.
74 // - Sprinter: System allocator C-string buffer. Write the content to a buffer
75 // which is reallocated as more content is added. The buffer can then be
76 // extracted into a C-string or a JSString, respectively using `release` and
77 // `releaseJS`.
79 // - LSprinter: LifoAlloc C-string rope. Write the content to a list of chunks
80 // in a LifoAlloc buffer, no-reallocation occur but one should use
81 // `exportInto` to serialize its content to a Sprinter or a Fprinter. This is
82 // useful to avoid reallocation copies, while using an existing LifoAlloc.
84 // - SEPrinter: Roughly the same as Fprinter for stderr, except it goes through
85 // printf_stderr, which makes sure the output goes to a useful place: the
86 // Android log or the Windows debug output.
88 // - EscapePrinter: Wrapper around other printers, to escape characters when
89 // necessary.
91 // # Print UTF-16
93 // The GenericPrinter only handle `char` inputs, which is good enough for ASCII
94 // and Latin1 character sets. However, to handle UTF-16, one should use an
95 // EscapePrinter as well as a policy for escaping characters.
97 // One might require different escaping policies based on the escape sequences
98 // and based on the set of accepted character for the content generated. For
99 // example, JSON does not specify \x<XX> escape sequences.
101 // Today the following escape policies exists:
103 // - StringEscape: Produce C-like escape sequences: \<c>, \x<XX> and \u<XXXX>.
104 // - JSONEscape: Produce JSON escape sequences: \<c> and \u<XXXX>.
106 // An escape policy is defined by 2 functions:
108 // bool isSafeChar(char16_t c):
109 // Returns whether a character can be printed without being escaped.
111 // void convertInto(GenericPrinter& out, char16_t c):
112 // Calls the printer with the escape sequence for the character given as
113 // argument.
115 // To use an escape policy, the printer should be wrapped using an EscapePrinter
116 // as follows:
118 // {
119 // // The escaped string is surrounded by double-quotes, escape the double
120 // // quotes as well.
121 // StringEscape esc('"');
123 // // Wrap our existing `GenericPrinter& out` using the `EscapePrinter`.
124 // EscapePrinter ep(out, esc);
126 // // Append a sequence of characters which might contain UTF-16 characters.
127 // ep.put(chars);
128 // }
131 namespace js {
133 class LifoAlloc;
135 // Generic printf interface, similar to an ostream in the standard library.
137 // This class is useful to make generic printers which can work either with a
138 // file backend, with a buffer allocated with an JSContext or a link-list
139 // of chunks allocated with a LifoAlloc.
140 class JS_PUBLIC_API GenericPrinter {
141 protected:
142 bool hadOOM_; // whether reportOutOfMemory() has been called.
144 constexpr GenericPrinter() : hadOOM_(false) {}
146 public:
147 // Puts |len| characters from |s| at the current position. This function might
148 // silently fail and the error can be tested using `hadOutOfMemory()`. Calling
149 // this function or any other printing functions after a failures is accepted,
150 // but the outcome would still remain incorrect and `hadOutOfMemory()` would
151 // still report any of the previous errors.
152 virtual void put(const char* s, size_t len) = 0;
153 inline void put(const char* s) { put(s, strlen(s)); }
155 // Put a mozilla::Span / mozilla::Range of Latin1Char or char16_t characters
156 // in the output.
158 // Note that the char16_t variant is expected to crash unless putChar is
159 // overriden to handle properly the full set of WTF-16 character set.
160 virtual void put(mozilla::Span<const JS::Latin1Char> str);
161 virtual void put(mozilla::Span<const char16_t> str);
163 // Same as the various put function but only appending a single character.
165 // Note that the char16_t variant is expected to crash unless putChar is
166 // overriden to handle properly the full set of WTF-16 character set.
167 virtual inline void putChar(const char c) { put(&c, 1); }
168 virtual inline void putChar(const JS::Latin1Char c) { putChar(char(c)); }
169 virtual inline void putChar(const char16_t c) {
170 MOZ_CRASH("Use an EscapePrinter to handle all characters");
173 virtual void putString(JSContext* cx, JSString* str);
175 // Prints a formatted string into the buffer.
176 void printf(const char* fmt, ...) MOZ_FORMAT_PRINTF(2, 3);
177 void vprintf(const char* fmt, va_list ap) MOZ_FORMAT_PRINTF(2, 0);
179 // In some cases, such as handling JSRopes in a less-quadratic worse-case,
180 // it might be useful to copy content which has already been generated.
182 // If the buffer is back-readable, then this function should return `true`
183 // and `putFromIndex` should be implemented to delegate to a `put` call at
184 // the matching index and the corresponding length. To provide the index
185 // argument of `putFromIndex`, the `index` method should also be implemented
186 // to return the index within the inner buffer used by the printer.
187 virtual bool canPutFromIndex() const { return false; }
189 // Append to the current buffer, bytes which have previously been appended
190 // before.
191 virtual void putFromIndex(size_t index, size_t length) {
192 MOZ_CRASH("Calls to putFromIndex should be guarded by canPutFromIndex.");
195 // When the printer has a seekable buffer and `canPutFromIndex` returns
196 // `true`, this function can return the `index` of the next character to be
197 // added to the buffer.
199 // This function is monotonic. Thus, if the printer encounter an
200 // Out-Of-Memory issue, then the returned index should be the maximal value
201 // ever returned.
202 virtual size_t index() const { return 0; }
204 // In some printers, this ensure that the content is fully written.
205 virtual void flush() { /* Do nothing */ }
207 // Report that a string operation failed to get the memory it requested.
208 virtual void reportOutOfMemory();
210 // Return true if this Sprinter ran out of memory.
211 virtual bool hadOutOfMemory() const { return hadOOM_; }
214 // Sprintf / JSSprintf, but with unlimited and automatically allocated
215 // buffering.
216 class JS_PUBLIC_API StringPrinter : public GenericPrinter {
217 public:
218 // Check that the invariant holds at the entry and exit of a scope.
219 struct InvariantChecker {
220 const StringPrinter* parent;
222 explicit InvariantChecker(const StringPrinter* p) : parent(p) {
223 parent->checkInvariants();
226 ~InvariantChecker() { parent->checkInvariants(); }
229 JSContext* maybeCx;
231 private:
232 static const size_t DefaultSize;
233 #ifdef DEBUG
234 bool initialized; // true if this is initialized, use for debug builds
235 #endif
236 bool shouldReportOOM; // whether to report OOM to the maybeCx
237 char* base; // malloc'd buffer address
238 size_t size; // size of buffer allocated at base
239 ptrdiff_t offset; // offset of next free char in buffer
241 // The arena to be used by jemalloc to allocate the string into. This is
242 // selected by the child classes when calling the constructor. JSStrings have
243 // a different arena than strings which do not belong to the JS engine, and as
244 // such when building a JSString with the intent of avoiding reallocation, the
245 // destination arena has to be selected upfront.
246 arena_id_t arena;
248 private:
249 [[nodiscard]] bool realloc_(size_t newSize);
251 protected:
252 // JSContext* parameter is optional and can be omitted if the following
253 // are not used.
254 // * putString method with JSString
255 // * QuoteString function with JSString
256 // * JSONQuoteString function with JSString
258 // If JSContext* parameter is not provided, or shouldReportOOM is false,
259 // the consumer should manually report OOM on any failure.
260 explicit StringPrinter(arena_id_t arena, JSContext* maybeCx = nullptr,
261 bool shouldReportOOM = true);
262 ~StringPrinter();
264 JS::UniqueChars releaseChars();
265 JSString* releaseJS(JSContext* cx);
267 public:
268 // Initialize this sprinter, returns false on error.
269 [[nodiscard]] bool init();
271 void checkInvariants() const;
273 // Attempt to reserve len + 1 space (for a trailing nullptr byte). If the
274 // attempt succeeds, return a pointer to the start of that space and adjust
275 // the internal content. The caller *must* completely fill this space on
276 // success.
277 char* reserve(size_t len);
279 // Puts |len| characters from |s| at the current position. May OOM, which must
280 // be checked by testing the return value of releaseJS() at the end of
281 // printing.
282 virtual void put(const char* s, size_t len) final;
283 using GenericPrinter::put; // pick up |put(const char* s);|
285 virtual bool canPutFromIndex() const final { return true; }
286 virtual void putFromIndex(size_t index, size_t length) final {
287 MOZ_ASSERT(index <= this->index());
288 MOZ_ASSERT(index + length <= this->index());
289 put(base + index, length);
291 virtual size_t index() const final { return length(); }
293 virtual void putString(JSContext* cx, JSString* str) final;
295 size_t length() const;
297 // When an OOM has already been reported on the Sprinter, this function will
298 // forward this error to the JSContext given in the Sprinter initialization.
300 // If no JSContext had been provided or the Sprinter is configured to not
301 // report OOM, then nothing happens.
302 void forwardOutOfMemory();
305 class JS_PUBLIC_API Sprinter : public StringPrinter {
306 public:
307 explicit Sprinter(JSContext* maybeCx = nullptr, bool shouldReportOOM = true)
308 : StringPrinter(js::MallocArena, maybeCx, shouldReportOOM) {}
309 ~Sprinter() {}
311 JS::UniqueChars release() { return releaseChars(); }
314 class JS_PUBLIC_API JSSprinter : public StringPrinter {
315 public:
316 explicit JSSprinter(JSContext* cx)
317 : StringPrinter(js::StringBufferArena, cx, true) {}
318 ~JSSprinter() {}
320 JSString* release(JSContext* cx) { return releaseJS(cx); }
323 // Fprinter, print a string directly into a file.
324 class JS_PUBLIC_API Fprinter final : public GenericPrinter {
325 private:
326 FILE* file_;
327 bool init_;
329 public:
330 explicit Fprinter(FILE* fp);
332 constexpr Fprinter() : file_(nullptr), init_(false) {}
334 #ifdef DEBUG
335 ~Fprinter();
336 #endif
338 // Initialize this printer, returns false on error.
339 [[nodiscard]] bool init(const char* path);
340 void init(FILE* fp);
341 bool isInitialized() const { return file_ != nullptr; }
342 void flush() override;
343 void finish();
345 // Puts |len| characters from |s| at the current position. Errors may be
346 // detected with hadOutOfMemory() (which will be set for any fwrite() error,
347 // not just OOM.)
348 void put(const char* s, size_t len) override;
349 using GenericPrinter::put; // pick up |put(const char* s);|
352 // SEprinter, print using printf_stderr (goes to Android log, Windows debug,
353 // else just stderr).
354 class SEprinter final : public GenericPrinter {
355 public:
356 constexpr SEprinter() {}
358 // Puts |len| characters from |s| at the current position. Ignores errors.
359 virtual void put(const char* s, size_t len) override {
360 printf_stderr("%.*s", int(len), s);
362 using GenericPrinter::put; // pick up |put(const char* s);|
365 // LSprinter, is similar to Sprinter except that instead of using an
366 // JSContext to allocate strings, it use a LifoAlloc as a backend for the
367 // allocation of the chunk of the string.
368 class JS_PUBLIC_API LSprinter final : public GenericPrinter {
369 private:
370 struct Chunk {
371 Chunk* next;
372 size_t length;
374 char* chars() { return reinterpret_cast<char*>(this + 1); }
375 char* end() { return chars() + length; }
378 private:
379 LifoAlloc* alloc_; // LifoAlloc used as a backend of chunk allocations.
380 Chunk* head_;
381 Chunk* tail_;
382 size_t unused_;
384 public:
385 explicit LSprinter(LifoAlloc* lifoAlloc);
386 ~LSprinter();
388 // Copy the content of the chunks into another printer, such that we can
389 // flush the content of this printer to a file.
390 void exportInto(GenericPrinter& out) const;
392 // Drop the current string, and let them be free with the LifoAlloc.
393 void clear();
395 // Puts |len| characters from |s| at the current position.
396 virtual void put(const char* s, size_t len) override;
397 using GenericPrinter::put; // pick up |put(const char* s);|
400 // Escaping printers work like any other printer except that any added character
401 // are checked for escaping sequences. This one would escape a string such that
402 // it can safely be embedded in a JS string.
403 template <typename Delegate, typename Escape>
404 class JS_PUBLIC_API EscapePrinter final : public GenericPrinter {
405 size_t lengthOfSafeChars(const char* s, size_t len) {
406 for (size_t i = 0; i < len; i++) {
407 if (!esc.isSafeChar(uint8_t(s[i]))) {
408 return i;
411 return len;
414 private:
415 Delegate& out;
416 Escape& esc;
418 public:
419 EscapePrinter(Delegate& out, Escape& esc) : out(out), esc(esc) {}
420 ~EscapePrinter() {}
422 using GenericPrinter::put;
423 void put(const char* s, size_t len) override {
424 const char* b = s;
425 while (len) {
426 size_t index = lengthOfSafeChars(b, len);
427 if (index) {
428 out.put(b, index);
429 len -= index;
430 b += index;
432 if (len) {
433 esc.convertInto(out, char16_t(uint8_t(*b)));
434 len -= 1;
435 b += 1;
440 inline void putChar(const char c) override {
441 if (esc.isSafeChar(char16_t(uint8_t(c)))) {
442 out.putChar(char(c));
443 return;
445 esc.convertInto(out, char16_t(uint8_t(c)));
448 inline void putChar(const JS::Latin1Char c) override {
449 if (esc.isSafeChar(char16_t(c))) {
450 out.putChar(char(c));
451 return;
453 esc.convertInto(out, char16_t(c));
456 inline void putChar(const char16_t c) override {
457 if (esc.isSafeChar(c)) {
458 out.putChar(char(c));
459 return;
461 esc.convertInto(out, c);
464 // Forward calls to delegated printer.
465 bool canPutFromIndex() const override { return out.canPutFromIndex(); }
466 void putFromIndex(size_t index, size_t length) final {
467 out.putFromIndex(index, length);
469 size_t index() const final { return out.index(); }
470 void flush() final { out.flush(); }
471 void reportOutOfMemory() final { out.reportOutOfMemory(); }
472 bool hadOutOfMemory() const final { return out.hadOutOfMemory(); }
475 class JS_PUBLIC_API JSONEscape {
476 public:
477 bool isSafeChar(char16_t c);
478 void convertInto(GenericPrinter& out, char16_t c);
481 class JS_PUBLIC_API StringEscape {
482 private:
483 const char quote = '\0';
485 public:
486 explicit StringEscape(const char quote = '\0') : quote(quote) {}
488 bool isSafeChar(char16_t c);
489 void convertInto(GenericPrinter& out, char16_t c);
492 // A GenericPrinter that formats everything at a nested indentation level.
493 class JS_PUBLIC_API IndentedPrinter final : public GenericPrinter {
494 GenericPrinter& out_;
495 // The number of indents to insert at the beginning of each line.
496 uint32_t indentLevel_;
497 // The number of spaces to insert for each indent.
498 uint32_t indentAmount_;
499 // Whether we have seen a line ending and should insert an indent at the
500 // next line fragment.
501 bool pendingIndent_;
503 // Put an indent to `out_`
504 void putIndent();
505 // Put `s` to `out_`, inserting an indent if we need to
506 void putWithMaybeIndent(const char* s, size_t len);
508 public:
509 explicit IndentedPrinter(GenericPrinter& out, uint32_t indentLevel = 0,
510 uint32_t indentAmount = 2)
511 : out_(out),
512 indentLevel_(indentLevel),
513 indentAmount_(indentAmount),
514 pendingIndent_(false) {}
516 // Automatically insert and remove and indent for a scope
517 class AutoIndent {
518 IndentedPrinter& printer_;
520 public:
521 explicit AutoIndent(IndentedPrinter& printer) : printer_(printer) {
522 printer_.setIndentLevel(printer_.indentLevel() + 1);
524 ~AutoIndent() { printer_.setIndentLevel(printer_.indentLevel() - 1); }
527 uint32_t indentLevel() const { return indentLevel_; }
528 void setIndentLevel(uint32_t indentLevel) { indentLevel_ = indentLevel; }
530 virtual void put(const char* s, size_t len) override;
531 using GenericPrinter::put; // pick up |inline void put(const char* s);|
534 // Map escaped code to the letter/symbol escaped with a backslash.
535 extern const char js_EscapeMap[];
537 // Return a C-string containing the chars in str, with any non-printing chars
538 // escaped. If the optional quote parameter is present and is not '\0', quotes
539 // (as specified by the quote argument) are also escaped, and the quote
540 // character is appended at the beginning and end of the result string.
541 // The returned string is guaranteed to contain only ASCII characters.
542 extern JS_PUBLIC_API JS::UniqueChars QuoteString(JSContext* cx, JSString* str,
543 char quote = '\0');
545 // Appends the quoted string to the given Sprinter. Follows the same semantics
546 // as QuoteString from above.
547 extern JS_PUBLIC_API void QuoteString(Sprinter* sp, JSString* str,
548 char quote = '\0');
550 // Appends the quoted string to the given Sprinter. Follows the same
551 // Appends the JSON quoted string to the given Sprinter.
552 extern JS_PUBLIC_API void JSONQuoteString(StringPrinter* sp, JSString* str);
554 // Internal implementation code for QuoteString methods above.
555 enum class QuoteTarget { String, JSON };
557 template <QuoteTarget target, typename CharT>
558 void JS_PUBLIC_API QuoteString(Sprinter* sp,
559 const mozilla::Range<const CharT>& chars,
560 char quote = '\0');
562 } // namespace js
564 #endif // js_Printer_h