dom/base/BodyUtil.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "BodyUtil.h"
   8
   9 #include "nsError.h"
  10 #include "nsString.h"
  11 #include "nsIGlobalObject.h"
  12 #include "mozilla/Encoding.h"
  13
  14 #include "nsCRT.h"
  15 #include "nsCharSeparatedTokenizer.h"
  16 #include "nsDOMString.h"
  17 #include "nsNetUtil.h"
  18 #include "nsReadableUtils.h"
  19 #include "nsStreamUtils.h"
  20 #include "nsStringStream.h"
  21 #include "nsURLHelper.h"
  22
  23 #include "js/ArrayBuffer.h"  // JS::NewArrayBufferWithContents
  24 #include "js/JSON.h"
  25 #include "mozilla/ErrorResult.h"
  26 #include "mozilla/dom/Exceptions.h"
  27 #include "mozilla/dom/FetchUtil.h"
  28 #include "mozilla/dom/File.h"
  29 #include "mozilla/dom/FormData.h"
  30 #include "mozilla/dom/Headers.h"
  31 #include "mozilla/dom/Promise.h"
  32
  33 namespace mozilla::dom {
  34
  35 namespace {
  36
  37 // Reads over a CRLF and positions start after it.
  38 static bool PushOverLine(nsACString::const_iterator& aStart,
  39                          const nsACString::const_iterator& aEnd) {
  40   if (*aStart == nsCRT::CR && (aEnd - aStart > 1) && *(++aStart) == nsCRT::LF) {
  41     ++aStart;  // advance to after CRLF
  42     return true;
  43   }
  44
  45   return false;
  46 }
  47
  48 /**
  49  * A simple multipart/form-data parser as defined in RFC 2388 and RFC 2046.
  50  * This does not respect any encoding specified per entry, using UTF-8
  51  * throughout. This is as the Fetch spec states in the consume body algorithm.
  52  * Borrows some things from Necko's nsMultiMixedConv, but is simpler since
  53  * unlike Necko we do not have to deal with receiving incomplete chunks of data.
  54  *
  55  * This parser will fail the entire parse on any invalid entry, so it will
  56  * never return a partially filled FormData.
  57  * The content-disposition header is used to figure out the name and filename
  58  * entries. The inclusion of the filename parameter decides if the entry is
  59  * inserted into the FormData as a string or a File.
  60  *
  61  * File blobs are copies of the underlying data string since we cannot adopt
  62  * char* chunks embedded within the larger body without significant effort.
  63  * FIXME(nsm): Bug 1127552 - We should add telemetry to calls to formData() and
  64  * friends to figure out if Fetch ends up copying big blobs to see if this is
  65  * worth optimizing.
  66  */
  67 class MOZ_STACK_CLASS FormDataParser {
  68  private:
  69   RefPtr<FormData> mFormData;
  70   nsCString mMimeType;
  71   nsCString mData;
  72
  73   // Entry state, reset in START_PART.
  74   nsCString mName;
  75   nsCString mFilename;
  76   nsCString mContentType;
  77
  78   enum {
  79     START_PART,
  80     PARSE_HEADER,
  81     PARSE_BODY,
  82   } mState;
  83
  84   nsIGlobalObject* mParentObject;
  85
  86   // Reads over a boundary and sets start to the position after the end of the
  87   // boundary. Returns false if no boundary is found immediately.
  88   bool PushOverBoundary(const nsACString& aBoundaryString,
  89                         nsACString::const_iterator& aStart,
  90                         nsACString::const_iterator& aEnd) {
  91     // We copy the end iterator to keep the original pointing to the real end
  92     // of the string.
  93     nsACString::const_iterator end(aEnd);
  94     const char* beginning = aStart.get();
  95     if (FindInReadable(aBoundaryString, aStart, end)) {
  96       // We either should find the body immediately, or after 2 chars with the
  97       // 2 chars being '-', everything else is failure.
  98       if ((aStart.get() - beginning) == 0) {
  99         aStart.advance(aBoundaryString.Length());
 100         return true;
 101       }
 102
 103       if ((aStart.get() - beginning) == 2) {
 104         if (*(--aStart) == '-' && *(--aStart) == '-') {
 105           aStart.advance(aBoundaryString.Length() + 2);
 106           return true;
 107         }
 108       }
 109     }
 110
 111     return false;
 112   }
 113
 114   bool ParseHeader(nsACString::const_iterator& aStart,
 115                    nsACString::const_iterator& aEnd, bool* aWasEmptyHeader) {
 116     nsAutoCString headerName, headerValue;
 117     if (!FetchUtil::ExtractHeader(aStart, aEnd, headerName, headerValue,
 118                                   aWasEmptyHeader)) {
 119       return false;
 120     }
 121     if (*aWasEmptyHeader) {
 122       return true;
 123     }
 124
 125     if (headerName.LowerCaseEqualsLiteral("content-disposition")) {
 126       bool seenFormData = false;
 127       for (const nsACString& token :
 128            nsCCharSeparatedTokenizer(headerValue, ';').ToRange()) {
 129         if (token.IsEmpty()) {
 130           continue;
 131         }
 132
 133         if (token.EqualsLiteral("form-data")) {
 134           seenFormData = true;
 135           continue;
 136         }
 137
 138         if (seenFormData && StringBeginsWith(token, "name="_ns)) {
 139           mName = StringTail(token, token.Length() - 5);
 140           mName.Trim(" \"");
 141           continue;
 142         }
 143
 144         if (seenFormData && StringBeginsWith(token, "filename="_ns)) {
 145           mFilename = StringTail(token, token.Length() - 9);
 146           mFilename.Trim(" \"");
 147           continue;
 148         }
 149       }
 150
 151       if (mName.IsVoid()) {
 152         // Could not parse a valid entry name.
 153         return false;
 154       }
 155     } else if (headerName.LowerCaseEqualsLiteral("content-type")) {
 156       mContentType = headerValue;
 157     }
 158
 159     return true;
 160   }
 161
 162   // The end of a body is marked by a CRLF followed by the boundary. So the
 163   // CRLF is part of the boundary and not the body, but any prior CRLFs are
 164   // part of the body. This will position the iterator at the beginning of the
 165   // boundary (after the CRLF).
 166   bool ParseBody(const nsACString& aBoundaryString,
 167                  nsACString::const_iterator& aStart,
 168                  nsACString::const_iterator& aEnd) {
 169     const char* beginning = aStart.get();
 170
 171     // Find the boundary marking the end of the body.
 172     nsACString::const_iterator end(aEnd);
 173     if (!FindInReadable(aBoundaryString, aStart, end)) {
 174       return false;
 175     }
 176
 177     // We found a boundary, strip the just prior CRLF, and consider
 178     // everything else the body section.
 179     if (aStart.get() - beginning < 2) {
 180       // Only the first entry can have a boundary right at the beginning. Even
 181       // an empty body will have a CRLF before the boundary. So this is
 182       // a failure.
 183       return false;
 184     }
 185
 186     // Check that there is a CRLF right before the boundary.
 187     aStart.advance(-2);
 188
 189     // Skip optional hyphens.
 190     if (*aStart == '-' && *(aStart.get() + 1) == '-') {
 191       if (aStart.get() - beginning < 2) {
 192         return false;
 193       }
 194
 195       aStart.advance(-2);
 196     }
 197
 198     if (*aStart != nsCRT::CR || *(aStart.get() + 1) != nsCRT::LF) {
 199       return false;
 200     }
 201
 202     nsAutoCString body(beginning, aStart.get() - beginning);
 203
 204     // Restore iterator to after the \r\n as we promised.
 205     // We do not need to handle the extra hyphens case since our boundary
 206     // parser in PushOverBoundary()
 207     aStart.advance(2);
 208
 209     if (!mFormData) {
 210       mFormData = new FormData();
 211     }
 212
 213     NS_ConvertUTF8toUTF16 name(mName);
 214
 215     if (mFilename.IsVoid()) {
 216       ErrorResult rv;
 217       mFormData->Append(name, NS_ConvertUTF8toUTF16(body), rv);
 218       MOZ_ASSERT(!rv.Failed());
 219     } else {
 220       // Unfortunately we've to copy the data first since all our strings are
 221       // going to free it. We also need fallible alloc, so we can't just use
 222       // ToNewCString().
 223       char* copy = static_cast<char*>(moz_xmalloc(body.Length()));
 224       nsCString::const_iterator bodyIter, bodyEnd;
 225       body.BeginReading(bodyIter);
 226       body.EndReading(bodyEnd);
 227       char* p = copy;
 228       while (bodyIter != bodyEnd) {
 229         *p++ = *bodyIter++;
 230       }
 231       p = nullptr;
 232
 233       RefPtr<Blob> file = File::CreateMemoryFileWithCustomLastModified(
 234           mParentObject, reinterpret_cast<void*>(copy), body.Length(),
 235           NS_ConvertUTF8toUTF16(mFilename), NS_ConvertUTF8toUTF16(mContentType),
 236           /* aLastModifiedDate */ 0);
 237       if (NS_WARN_IF(!file)) {
 238         return false;
 239       }
 240
 241       Optional<nsAString> dummy;
 242       ErrorResult rv;
 243       mFormData->Append(name, *file, dummy, rv);
 244       if (NS_WARN_IF(rv.Failed())) {
 245         rv.SuppressException();
 246         return false;
 247       }
 248     }
 249
 250     return true;
 251   }
 252
 253  public:
 254   FormDataParser(const nsACString& aMimeType, const nsACString& aData,
 255                  nsIGlobalObject* aParent)
 256       : mMimeType(aMimeType),
 257         mData(aData),
 258         mState(START_PART),
 259         mParentObject(aParent) {}
 260
 261   bool Parse() {
 262     if (mData.IsEmpty()) {
 263       return false;
 264     }
 265
 266     // Determine boundary from mimetype.
 267     const char* boundaryId = nullptr;
 268     boundaryId = strstr(mMimeType.BeginWriting(), "boundary");
 269     if (!boundaryId) {
 270       return false;
 271     }
 272
 273     boundaryId = strchr(boundaryId, '=');
 274     if (!boundaryId) {
 275       return false;
 276     }
 277
 278     // Skip over '='.
 279     boundaryId++;
 280
 281     char* attrib = (char*)strchr(boundaryId, ';');
 282     if (attrib) *attrib = '\0';
 283
 284     nsAutoCString boundaryString(boundaryId);
 285     if (attrib) *attrib = ';';
 286
 287     boundaryString.Trim(" \"");
 288
 289     if (boundaryString.Length() == 0) {
 290       return false;
 291     }
 292
 293     nsACString::const_iterator start, end;
 294     mData.BeginReading(start);
 295     // This should ALWAYS point to the end of data.
 296     // Helpers make copies.
 297     mData.EndReading(end);
 298
 299     while (start != end) {
 300       switch (mState) {
 301         case START_PART:
 302           mName.SetIsVoid(true);
 303           mFilename.SetIsVoid(true);
 304           mContentType = "text/plain"_ns;
 305
 306           // MUST start with boundary.
 307           if (!PushOverBoundary(boundaryString, start, end)) {
 308             return false;
 309           }
 310
 311           if (start != end && *start == '-') {
 312             // End of data.
 313             if (!mFormData) {
 314               mFormData = new FormData();
 315             }
 316             return true;
 317           }
 318
 319           if (!PushOverLine(start, end)) {
 320             return false;
 321           }
 322           mState = PARSE_HEADER;
 323           break;
 324
 325         case PARSE_HEADER:
 326           bool emptyHeader;
 327           if (!ParseHeader(start, end, &emptyHeader)) {
 328             return false;
 329           }
 330
 331           if (emptyHeader && !PushOverLine(start, end)) {
 332             return false;
 333           }
 334
 335           mState = emptyHeader ? PARSE_BODY : PARSE_HEADER;
 336           break;
 337
 338         case PARSE_BODY:
 339           if (mName.IsVoid()) {
 340             NS_WARNING(
 341                 "No content-disposition header with a valid name was "
 342                 "found. Failing at body parse.");
 343             return false;
 344           }
 345
 346           if (!ParseBody(boundaryString, start, end)) {
 347             return false;
 348           }
 349
 350           mState = START_PART;
 351           break;
 352
 353         default:
 354           MOZ_CRASH("Invalid case");
 355       }
 356     }
 357
 358     MOZ_ASSERT_UNREACHABLE("Should never reach here.");
 359     return false;
 360   }
 361
 362   already_AddRefed<FormData> GetFormData() { return mFormData.forget(); }
 363 };
 364 }  // namespace
 365
 366 // static
 367 void BodyUtil::ConsumeArrayBuffer(JSContext* aCx,
 368                                   JS::MutableHandle<JSObject*> aValue,
 369                                   uint32_t aInputLength, uint8_t* aInput,
 370                                   ErrorResult& aRv) {
 371   JS::Rooted<JSObject*> arrayBuffer(aCx);
 372   arrayBuffer = JS::NewArrayBufferWithContents(aCx, aInputLength,
 373                                                reinterpret_cast<void*>(aInput));
 374   if (!arrayBuffer) {
 375     JS_ClearPendingException(aCx);
 376     aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
 377     return;
 378   }
 379   aValue.set(arrayBuffer);
 380 }
 381
 382 // static
 383 already_AddRefed<Blob> BodyUtil::ConsumeBlob(nsIGlobalObject* aParent,
 384                                              const nsString& aMimeType,
 385                                              uint32_t aInputLength,
 386                                              uint8_t* aInput,
 387                                              ErrorResult& aRv) {
 388   RefPtr<Blob> blob = Blob::CreateMemoryBlob(
 389       aParent, reinterpret_cast<void*>(aInput), aInputLength, aMimeType);
 390
 391   if (!blob) {
 392     aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
 393     return nullptr;
 394   }
 395   return blob.forget();
 396 }
 397
 398 // static
 399 already_AddRefed<FormData> BodyUtil::ConsumeFormData(nsIGlobalObject* aParent,
 400                                                      const nsCString& aMimeType,
 401                                                      const nsCString& aStr,
 402                                                      ErrorResult& aRv) {
 403   constexpr auto formDataMimeType = "multipart/form-data"_ns;
 404
 405   // Allow semicolon separated boundary/encoding suffix like
 406   // multipart/form-data; boundary= but disallow multipart/form-datafoobar.
 407   bool isValidFormDataMimeType = StringBeginsWith(aMimeType, formDataMimeType);
 408
 409   if (isValidFormDataMimeType &&
 410       aMimeType.Length() > formDataMimeType.Length()) {
 411     isValidFormDataMimeType = aMimeType[formDataMimeType.Length()] == ';';
 412   }
 413
 414   if (isValidFormDataMimeType) {
 415     FormDataParser parser(aMimeType, aStr, aParent);
 416     if (!parser.Parse()) {
 417       aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
 418       return nullptr;
 419     }
 420
 421     RefPtr<FormData> fd = parser.GetFormData();
 422     MOZ_ASSERT(fd);
 423     return fd.forget();
 424   }
 425
 426   constexpr auto urlDataMimeType = "application/x-www-form-urlencoded"_ns;
 427   bool isValidUrlEncodedMimeType = StringBeginsWith(aMimeType, urlDataMimeType);
 428
 429   if (isValidUrlEncodedMimeType &&
 430       aMimeType.Length() > urlDataMimeType.Length()) {
 431     isValidUrlEncodedMimeType = aMimeType[urlDataMimeType.Length()] == ';';
 432   }
 433
 434   if (isValidUrlEncodedMimeType) {
 435     RefPtr<FormData> fd = new FormData(aParent);
 436     DebugOnly<bool> status = URLParams::Parse(
 437         aStr, [&fd](const nsAString& aName, const nsAString& aValue) {
 438           ErrorResult rv;
 439           fd->Append(aName, aValue, rv);
 440           MOZ_ASSERT(!rv.Failed());
 441           return true;
 442         });
 443     MOZ_ASSERT(status);
 444
 445     return fd.forget();
 446   }
 447
 448   aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
 449   return nullptr;
 450 }
 451
 452 // static
 453 nsresult BodyUtil::ConsumeText(uint32_t aInputLength, uint8_t* aInput,
 454                                nsString& aText) {
 455   nsresult rv =
 456       UTF_8_ENCODING->DecodeWithBOMRemoval(Span(aInput, aInputLength), aText);
 457   if (NS_FAILED(rv)) {
 458     return rv;
 459   }
 460   return NS_OK;
 461 }
 462
 463 // static
 464 void BodyUtil::ConsumeJson(JSContext* aCx, JS::MutableHandle<JS::Value> aValue,
 465                            const nsString& aStr, ErrorResult& aRv) {
 466   aRv.MightThrowJSException();
 467
 468   JS::Rooted<JS::Value> json(aCx);
 469   if (!JS_ParseJSON(aCx, aStr.get(), aStr.Length(), &json)) {
 470     if (!JS_IsExceptionPending(aCx)) {
 471       aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
 472       return;
 473     }
 474
 475     JS::Rooted<JS::Value> exn(aCx);
 476     DebugOnly<bool> gotException = JS_GetPendingException(aCx, &exn);
 477     MOZ_ASSERT(gotException);
 478
 479     JS_ClearPendingException(aCx);
 480     aRv.ThrowJSException(aCx, exn);
 481     return;
 482   }
 483
 484   aValue.set(json);
 485 }
 486
 487 }  // namespace mozilla::dom