Bug 1728955: part 3) Add logging to `nsBaseClipboard`. r=masayuki
[gecko.git] / dom / base / BodyUtil.cpp
blob94f54de673e748c85151afb1ced017d2f06f1000
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "BodyUtil.h"
9 #include "nsError.h"
10 #include "nsString.h"
11 #include "nsIGlobalObject.h"
12 #include "mozilla/Encoding.h"
14 #include "nsCRT.h"
15 #include "nsCharSeparatedTokenizer.h"
16 #include "nsDOMString.h"
17 #include "nsNetUtil.h"
18 #include "nsReadableUtils.h"
19 #include "nsStreamUtils.h"
20 #include "nsStringStream.h"
21 #include "nsURLHelper.h"
23 #include "js/ArrayBuffer.h" // JS::NewArrayBufferWithContents
24 #include "js/JSON.h"
25 #include "mozilla/ErrorResult.h"
26 #include "mozilla/dom/Exceptions.h"
27 #include "mozilla/dom/FetchUtil.h"
28 #include "mozilla/dom/File.h"
29 #include "mozilla/dom/FormData.h"
30 #include "mozilla/dom/Headers.h"
31 #include "mozilla/dom/Promise.h"
33 namespace mozilla::dom {
35 namespace {
37 // Reads over a CRLF and positions start after it.
38 static bool PushOverLine(nsACString::const_iterator& aStart,
39 const nsACString::const_iterator& aEnd) {
40 if (*aStart == nsCRT::CR && (aEnd - aStart > 1) && *(++aStart) == nsCRT::LF) {
41 ++aStart; // advance to after CRLF
42 return true;
45 return false;
48 /**
49 * A simple multipart/form-data parser as defined in RFC 2388 and RFC 2046.
50 * This does not respect any encoding specified per entry, using UTF-8
51 * throughout. This is as the Fetch spec states in the consume body algorithm.
52 * Borrows some things from Necko's nsMultiMixedConv, but is simpler since
53 * unlike Necko we do not have to deal with receiving incomplete chunks of data.
55 * This parser will fail the entire parse on any invalid entry, so it will
56 * never return a partially filled FormData.
57 * The content-disposition header is used to figure out the name and filename
58 * entries. The inclusion of the filename parameter decides if the entry is
59 * inserted into the FormData as a string or a File.
61 * File blobs are copies of the underlying data string since we cannot adopt
62 * char* chunks embedded within the larger body without significant effort.
63 * FIXME(nsm): Bug 1127552 - We should add telemetry to calls to formData() and
64 * friends to figure out if Fetch ends up copying big blobs to see if this is
65 * worth optimizing.
67 class MOZ_STACK_CLASS FormDataParser {
68 private:
69 RefPtr<FormData> mFormData;
70 nsCString mMimeType;
71 nsCString mData;
73 // Entry state, reset in START_PART.
74 nsCString mName;
75 nsCString mFilename;
76 nsCString mContentType;
78 enum {
79 START_PART,
80 PARSE_HEADER,
81 PARSE_BODY,
82 } mState;
84 nsIGlobalObject* mParentObject;
86 // Reads over a boundary and sets start to the position after the end of the
87 // boundary. Returns false if no boundary is found immediately.
88 bool PushOverBoundary(const nsACString& aBoundaryString,
89 nsACString::const_iterator& aStart,
90 nsACString::const_iterator& aEnd) {
91 // We copy the end iterator to keep the original pointing to the real end
92 // of the string.
93 nsACString::const_iterator end(aEnd);
94 const char* beginning = aStart.get();
95 if (FindInReadable(aBoundaryString, aStart, end)) {
96 // We either should find the body immediately, or after 2 chars with the
97 // 2 chars being '-', everything else is failure.
98 if ((aStart.get() - beginning) == 0) {
99 aStart.advance(aBoundaryString.Length());
100 return true;
103 if ((aStart.get() - beginning) == 2) {
104 if (*(--aStart) == '-' && *(--aStart) == '-') {
105 aStart.advance(aBoundaryString.Length() + 2);
106 return true;
111 return false;
114 bool ParseHeader(nsACString::const_iterator& aStart,
115 nsACString::const_iterator& aEnd, bool* aWasEmptyHeader) {
116 nsAutoCString headerName, headerValue;
117 if (!FetchUtil::ExtractHeader(aStart, aEnd, headerName, headerValue,
118 aWasEmptyHeader)) {
119 return false;
121 if (*aWasEmptyHeader) {
122 return true;
125 if (headerName.LowerCaseEqualsLiteral("content-disposition")) {
126 bool seenFormData = false;
127 for (const nsACString& token :
128 nsCCharSeparatedTokenizer(headerValue, ';').ToRange()) {
129 if (token.IsEmpty()) {
130 continue;
133 if (token.EqualsLiteral("form-data")) {
134 seenFormData = true;
135 continue;
138 if (seenFormData && StringBeginsWith(token, "name="_ns)) {
139 mName = StringTail(token, token.Length() - 5);
140 mName.Trim(" \"");
141 continue;
144 if (seenFormData && StringBeginsWith(token, "filename="_ns)) {
145 mFilename = StringTail(token, token.Length() - 9);
146 mFilename.Trim(" \"");
147 continue;
151 if (mName.IsVoid()) {
152 // Could not parse a valid entry name.
153 return false;
155 } else if (headerName.LowerCaseEqualsLiteral("content-type")) {
156 mContentType = headerValue;
159 return true;
162 // The end of a body is marked by a CRLF followed by the boundary. So the
163 // CRLF is part of the boundary and not the body, but any prior CRLFs are
164 // part of the body. This will position the iterator at the beginning of the
165 // boundary (after the CRLF).
166 bool ParseBody(const nsACString& aBoundaryString,
167 nsACString::const_iterator& aStart,
168 nsACString::const_iterator& aEnd) {
169 const char* beginning = aStart.get();
171 // Find the boundary marking the end of the body.
172 nsACString::const_iterator end(aEnd);
173 if (!FindInReadable(aBoundaryString, aStart, end)) {
174 return false;
177 // We found a boundary, strip the just prior CRLF, and consider
178 // everything else the body section.
179 if (aStart.get() - beginning < 2) {
180 // Only the first entry can have a boundary right at the beginning. Even
181 // an empty body will have a CRLF before the boundary. So this is
182 // a failure.
183 return false;
186 // Check that there is a CRLF right before the boundary.
187 aStart.advance(-2);
189 // Skip optional hyphens.
190 if (*aStart == '-' && *(aStart.get() + 1) == '-') {
191 if (aStart.get() - beginning < 2) {
192 return false;
195 aStart.advance(-2);
198 if (*aStart != nsCRT::CR || *(aStart.get() + 1) != nsCRT::LF) {
199 return false;
202 nsAutoCString body(beginning, aStart.get() - beginning);
204 // Restore iterator to after the \r\n as we promised.
205 // We do not need to handle the extra hyphens case since our boundary
206 // parser in PushOverBoundary()
207 aStart.advance(2);
209 if (!mFormData) {
210 mFormData = new FormData();
213 NS_ConvertUTF8toUTF16 name(mName);
215 if (mFilename.IsVoid()) {
216 ErrorResult rv;
217 mFormData->Append(name, NS_ConvertUTF8toUTF16(body), rv);
218 MOZ_ASSERT(!rv.Failed());
219 } else {
220 // Unfortunately we've to copy the data first since all our strings are
221 // going to free it. We also need fallible alloc, so we can't just use
222 // ToNewCString().
223 char* copy = static_cast<char*>(moz_xmalloc(body.Length()));
224 nsCString::const_iterator bodyIter, bodyEnd;
225 body.BeginReading(bodyIter);
226 body.EndReading(bodyEnd);
227 char* p = copy;
228 while (bodyIter != bodyEnd) {
229 *p++ = *bodyIter++;
231 p = nullptr;
233 RefPtr<Blob> file = File::CreateMemoryFileWithCustomLastModified(
234 mParentObject, reinterpret_cast<void*>(copy), body.Length(),
235 NS_ConvertUTF8toUTF16(mFilename), NS_ConvertUTF8toUTF16(mContentType),
236 /* aLastModifiedDate */ 0);
237 if (NS_WARN_IF(!file)) {
238 return false;
241 Optional<nsAString> dummy;
242 ErrorResult rv;
243 mFormData->Append(name, *file, dummy, rv);
244 if (NS_WARN_IF(rv.Failed())) {
245 rv.SuppressException();
246 return false;
250 return true;
253 public:
254 FormDataParser(const nsACString& aMimeType, const nsACString& aData,
255 nsIGlobalObject* aParent)
256 : mMimeType(aMimeType),
257 mData(aData),
258 mState(START_PART),
259 mParentObject(aParent) {}
261 bool Parse() {
262 if (mData.IsEmpty()) {
263 return false;
266 // Determine boundary from mimetype.
267 const char* boundaryId = nullptr;
268 boundaryId = strstr(mMimeType.BeginWriting(), "boundary");
269 if (!boundaryId) {
270 return false;
273 boundaryId = strchr(boundaryId, '=');
274 if (!boundaryId) {
275 return false;
278 // Skip over '='.
279 boundaryId++;
281 char* attrib = (char*)strchr(boundaryId, ';');
282 if (attrib) *attrib = '\0';
284 nsAutoCString boundaryString(boundaryId);
285 if (attrib) *attrib = ';';
287 boundaryString.Trim(" \"");
289 if (boundaryString.Length() == 0) {
290 return false;
293 nsACString::const_iterator start, end;
294 mData.BeginReading(start);
295 // This should ALWAYS point to the end of data.
296 // Helpers make copies.
297 mData.EndReading(end);
299 while (start != end) {
300 switch (mState) {
301 case START_PART:
302 mName.SetIsVoid(true);
303 mFilename.SetIsVoid(true);
304 mContentType = "text/plain"_ns;
306 // MUST start with boundary.
307 if (!PushOverBoundary(boundaryString, start, end)) {
308 return false;
311 if (start != end && *start == '-') {
312 // End of data.
313 if (!mFormData) {
314 mFormData = new FormData();
316 return true;
319 if (!PushOverLine(start, end)) {
320 return false;
322 mState = PARSE_HEADER;
323 break;
325 case PARSE_HEADER:
326 bool emptyHeader;
327 if (!ParseHeader(start, end, &emptyHeader)) {
328 return false;
331 if (emptyHeader && !PushOverLine(start, end)) {
332 return false;
335 mState = emptyHeader ? PARSE_BODY : PARSE_HEADER;
336 break;
338 case PARSE_BODY:
339 if (mName.IsVoid()) {
340 NS_WARNING(
341 "No content-disposition header with a valid name was "
342 "found. Failing at body parse.");
343 return false;
346 if (!ParseBody(boundaryString, start, end)) {
347 return false;
350 mState = START_PART;
351 break;
353 default:
354 MOZ_CRASH("Invalid case");
358 MOZ_ASSERT_UNREACHABLE("Should never reach here.");
359 return false;
362 already_AddRefed<FormData> GetFormData() { return mFormData.forget(); }
364 } // namespace
366 // static
367 void BodyUtil::ConsumeArrayBuffer(JSContext* aCx,
368 JS::MutableHandle<JSObject*> aValue,
369 uint32_t aInputLength, uint8_t* aInput,
370 ErrorResult& aRv) {
371 JS::Rooted<JSObject*> arrayBuffer(aCx);
372 arrayBuffer = JS::NewArrayBufferWithContents(aCx, aInputLength,
373 reinterpret_cast<void*>(aInput));
374 if (!arrayBuffer) {
375 JS_ClearPendingException(aCx);
376 aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
377 return;
379 aValue.set(arrayBuffer);
382 // static
383 already_AddRefed<Blob> BodyUtil::ConsumeBlob(nsIGlobalObject* aParent,
384 const nsString& aMimeType,
385 uint32_t aInputLength,
386 uint8_t* aInput,
387 ErrorResult& aRv) {
388 RefPtr<Blob> blob = Blob::CreateMemoryBlob(
389 aParent, reinterpret_cast<void*>(aInput), aInputLength, aMimeType);
391 if (!blob) {
392 aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
393 return nullptr;
395 return blob.forget();
398 // static
399 already_AddRefed<FormData> BodyUtil::ConsumeFormData(nsIGlobalObject* aParent,
400 const nsCString& aMimeType,
401 const nsCString& aStr,
402 ErrorResult& aRv) {
403 constexpr auto formDataMimeType = "multipart/form-data"_ns;
405 // Allow semicolon separated boundary/encoding suffix like
406 // multipart/form-data; boundary= but disallow multipart/form-datafoobar.
407 bool isValidFormDataMimeType = StringBeginsWith(aMimeType, formDataMimeType);
409 if (isValidFormDataMimeType &&
410 aMimeType.Length() > formDataMimeType.Length()) {
411 isValidFormDataMimeType = aMimeType[formDataMimeType.Length()] == ';';
414 if (isValidFormDataMimeType) {
415 FormDataParser parser(aMimeType, aStr, aParent);
416 if (!parser.Parse()) {
417 aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
418 return nullptr;
421 RefPtr<FormData> fd = parser.GetFormData();
422 MOZ_ASSERT(fd);
423 return fd.forget();
426 constexpr auto urlDataMimeType = "application/x-www-form-urlencoded"_ns;
427 bool isValidUrlEncodedMimeType = StringBeginsWith(aMimeType, urlDataMimeType);
429 if (isValidUrlEncodedMimeType &&
430 aMimeType.Length() > urlDataMimeType.Length()) {
431 isValidUrlEncodedMimeType = aMimeType[urlDataMimeType.Length()] == ';';
434 if (isValidUrlEncodedMimeType) {
435 RefPtr<FormData> fd = new FormData(aParent);
436 DebugOnly<bool> status = URLParams::Parse(
437 aStr, [&fd](const nsAString& aName, const nsAString& aValue) {
438 ErrorResult rv;
439 fd->Append(aName, aValue, rv);
440 MOZ_ASSERT(!rv.Failed());
441 return true;
443 MOZ_ASSERT(status);
445 return fd.forget();
448 aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
449 return nullptr;
452 // static
453 nsresult BodyUtil::ConsumeText(uint32_t aInputLength, uint8_t* aInput,
454 nsString& aText) {
455 nsresult rv =
456 UTF_8_ENCODING->DecodeWithBOMRemoval(Span(aInput, aInputLength), aText);
457 if (NS_FAILED(rv)) {
458 return rv;
460 return NS_OK;
463 // static
464 void BodyUtil::ConsumeJson(JSContext* aCx, JS::MutableHandle<JS::Value> aValue,
465 const nsString& aStr, ErrorResult& aRv) {
466 aRv.MightThrowJSException();
468 JS::Rooted<JS::Value> json(aCx);
469 if (!JS_ParseJSON(aCx, aStr.get(), aStr.Length(), &json)) {
470 if (!JS_IsExceptionPending(aCx)) {
471 aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
472 return;
475 JS::Rooted<JS::Value> exn(aCx);
476 DebugOnly<bool> gotException = JS_GetPendingException(aCx, &exn);
477 MOZ_ASSERT(gotException);
479 JS_ClearPendingException(aCx);
480 aRv.ThrowJSException(aCx, exn);
481 return;
484 aValue.set(json);
487 } // namespace mozilla::dom