Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / net / base / mime_util.cc
blob0d511201fd0cdfdda126ad480bc0157e5385e33f
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <algorithm>
6 #include <iterator>
7 #include <map>
8 #include <string>
10 #include "base/containers/hash_tables.h"
11 #include "base/lazy_instance.h"
12 #include "base/logging.h"
13 #include "base/stl_util.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_split.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "build/build_config.h"
19 #include "net/base/mime_util.h"
20 #include "net/base/platform_mime_util.h"
21 #include "net/http/http_util.h"
23 using std::string;
25 namespace net {
27 // Singleton utility class for mime types.
28 class MimeUtil : public PlatformMimeUtil {
29 public:
30 bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
31 std::string* mime_type) const;
33 bool GetMimeTypeFromFile(const base::FilePath& file_path,
34 std::string* mime_type) const;
36 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
37 std::string* mime_type) const;
39 bool MatchesMimeType(const std::string &mime_type_pattern,
40 const std::string &mime_type) const;
42 bool ParseMimeTypeWithoutParameter(const std::string& type_string,
43 std::string* top_level_type,
44 std::string* subtype) const;
46 bool IsValidTopLevelMimeType(const std::string& type_string) const;
48 private:
49 friend struct base::DefaultLazyInstanceTraits<MimeUtil>;
51 MimeUtil();
53 bool GetMimeTypeFromExtensionHelper(const base::FilePath::StringType& ext,
54 bool include_platform_types,
55 std::string* mime_type) const;
56 }; // class MimeUtil
58 // This variable is Leaky because we need to access it from WorkerPool threads.
59 static base::LazyInstance<MimeUtil>::Leaky g_mime_util =
60 LAZY_INSTANCE_INITIALIZER;
62 static const MimeInfo primary_mappings[] = {
63 { "text/html", "html,htm,shtml,shtm" },
64 { "text/css", "css" },
65 { "text/xml", "xml" },
66 { "image/gif", "gif" },
67 { "image/jpeg", "jpeg,jpg" },
68 { "image/webp", "webp" },
69 { "image/png", "png" },
70 { "video/mp4", "mp4,m4v" },
71 { "audio/x-m4a", "m4a" },
72 { "audio/mp3", "mp3" },
73 { "video/ogg", "ogv,ogm" },
74 { "audio/ogg", "ogg,oga,opus" },
75 { "video/webm", "webm" },
76 { "audio/webm", "webm" },
77 { "audio/wav", "wav" },
78 { "audio/flac", "flac" },
79 { "application/xhtml+xml", "xhtml,xht,xhtm" },
80 { "application/x-chrome-extension", "crx" },
81 { "multipart/related", "mhtml,mht" }
84 static const MimeInfo secondary_mappings[] = {
85 { "application/octet-stream", "exe,com,bin" },
86 { "application/gzip", "gz" },
87 { "application/pdf", "pdf" },
88 { "application/postscript", "ps,eps,ai" },
89 { "application/javascript", "js" },
90 { "application/font-woff", "woff" },
91 { "image/bmp", "bmp" },
92 { "image/x-icon", "ico" },
93 { "image/vnd.microsoft.icon", "ico" },
94 { "image/jpeg", "jfif,pjpeg,pjp" },
95 { "image/tiff", "tiff,tif" },
96 { "image/x-xbitmap", "xbm" },
97 { "image/svg+xml", "svg,svgz" },
98 { "image/x-png", "png"},
99 { "message/rfc822", "eml" },
100 { "text/plain", "txt,text" },
101 { "text/html", "ehtml" },
102 { "application/rss+xml", "rss" },
103 { "application/rdf+xml", "rdf" },
104 { "text/xml", "xsl,xbl,xslt" },
105 { "application/vnd.mozilla.xul+xml", "xul" },
106 { "application/x-shockwave-flash", "swf,swl" },
107 { "application/pkcs7-mime", "p7m,p7c,p7z" },
108 { "application/pkcs7-signature", "p7s" },
109 { "application/x-mpegurl", "m3u8" },
112 const char* FindMimeType(const MimeInfo* mappings,
113 size_t mappings_len,
114 const std::string& ext) {
115 for (size_t i = 0; i < mappings_len; ++i) {
116 const char* extensions = mappings[i].extensions;
117 for (;;) {
118 size_t end_pos = strcspn(extensions, ",");
119 // The length check is required to prevent the StringPiece below from
120 // including uninitialized memory if ext is longer than extensions.
121 if (end_pos == ext.size() &&
122 base::EqualsCaseInsensitiveASCII(
123 base::StringPiece(extensions, ext.size()), ext))
124 return mappings[i].mime_type;
125 extensions += end_pos;
126 if (!*extensions)
127 break;
128 extensions += 1; // skip over comma
131 return NULL;
134 bool MimeUtil::GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
135 string* result) const {
136 return GetMimeTypeFromExtensionHelper(ext, true, result);
139 bool MimeUtil::GetWellKnownMimeTypeFromExtension(
140 const base::FilePath::StringType& ext,
141 string* result) const {
142 return GetMimeTypeFromExtensionHelper(ext, false, result);
145 bool MimeUtil::GetMimeTypeFromFile(const base::FilePath& file_path,
146 string* result) const {
147 base::FilePath::StringType file_name_str = file_path.Extension();
148 if (file_name_str.empty())
149 return false;
150 return GetMimeTypeFromExtension(file_name_str.substr(1), result);
153 bool MimeUtil::GetMimeTypeFromExtensionHelper(
154 const base::FilePath::StringType& ext,
155 bool include_platform_types,
156 string* result) const {
157 // Avoids crash when unable to handle a long file path. See crbug.com/48733.
158 const unsigned kMaxFilePathSize = 65536;
159 if (ext.length() > kMaxFilePathSize)
160 return false;
162 // Reject a string which contains null character.
163 base::FilePath::StringType::size_type nul_pos =
164 ext.find(FILE_PATH_LITERAL('\0'));
165 if (nul_pos != base::FilePath::StringType::npos)
166 return false;
168 // We implement the same algorithm as Mozilla for mapping a file extension to
169 // a mime type. That is, we first check a hard-coded list (that cannot be
170 // overridden), and then if not found there, we defer to the system registry.
171 // Finally, we scan a secondary hard-coded list to catch types that we can
172 // deduce but that we also want to allow the OS to override.
174 base::FilePath path_ext(ext);
175 const string ext_narrow_str = path_ext.AsUTF8Unsafe();
176 const char* mime_type = FindMimeType(
177 primary_mappings, arraysize(primary_mappings), ext_narrow_str);
178 if (mime_type) {
179 *result = mime_type;
180 return true;
183 if (include_platform_types && GetPlatformMimeTypeFromExtension(ext, result))
184 return true;
186 mime_type = FindMimeType(secondary_mappings, arraysize(secondary_mappings),
187 ext_narrow_str);
188 if (mime_type) {
189 *result = mime_type;
190 return true;
193 return false;
196 MimeUtil::MimeUtil() {
199 // Tests for MIME parameter equality. Each parameter in the |mime_type_pattern|
200 // must be matched by a parameter in the |mime_type|. If there are no
201 // parameters in the pattern, the match is a success.
203 // According rfc2045 keys of parameters are case-insensitive, while values may
204 // or may not be case-sensitive, but they are usually case-sensitive. So, this
205 // function matches values in *case-sensitive* manner, however note that this
206 // may produce some false negatives.
207 bool MatchesMimeTypeParameters(const std::string& mime_type_pattern,
208 const std::string& mime_type) {
209 typedef std::map<std::string, std::string> StringPairMap;
211 const std::string::size_type semicolon = mime_type_pattern.find(';');
212 const std::string::size_type test_semicolon = mime_type.find(';');
213 if (semicolon != std::string::npos) {
214 if (test_semicolon == std::string::npos)
215 return false;
217 base::StringPairs pattern_parameters;
218 base::SplitStringIntoKeyValuePairs(mime_type_pattern.substr(semicolon + 1),
219 '=', ';', &pattern_parameters);
220 base::StringPairs test_parameters;
221 base::SplitStringIntoKeyValuePairs(mime_type.substr(test_semicolon + 1),
222 '=', ';', &test_parameters);
224 // Put the parameters to maps with the keys converted to lower case.
225 StringPairMap pattern_parameter_map;
226 for (const auto& pair : pattern_parameters) {
227 pattern_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
230 StringPairMap test_parameter_map;
231 for (const auto& pair : test_parameters) {
232 test_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
235 if (pattern_parameter_map.size() > test_parameter_map.size())
236 return false;
238 for (const auto& parameter_pair : pattern_parameter_map) {
239 const auto& test_parameter_pair_it =
240 test_parameter_map.find(parameter_pair.first);
241 if (test_parameter_pair_it == test_parameter_map.end())
242 return false;
243 if (parameter_pair.second != test_parameter_pair_it->second)
244 return false;
248 return true;
251 // This comparison handles absolute maching and also basic
252 // wildcards. The plugin mime types could be:
253 // application/x-foo
254 // application/*
255 // application/*+xml
256 // *
257 // Also tests mime parameters -- all parameters in the pattern must be present
258 // in the tested type for a match to succeed.
259 bool MimeUtil::MatchesMimeType(const std::string& mime_type_pattern,
260 const std::string& mime_type) const {
261 if (mime_type_pattern.empty())
262 return false;
264 std::string::size_type semicolon = mime_type_pattern.find(';');
265 const std::string base_pattern(mime_type_pattern.substr(0, semicolon));
266 semicolon = mime_type.find(';');
267 const std::string base_type(mime_type.substr(0, semicolon));
269 if (base_pattern == "*" || base_pattern == "*/*")
270 return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
272 const std::string::size_type star = base_pattern.find('*');
273 if (star == std::string::npos) {
274 if (base::EqualsCaseInsensitiveASCII(base_pattern, base_type))
275 return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
276 else
277 return false;
280 // Test length to prevent overlap between |left| and |right|.
281 if (base_type.length() < base_pattern.length() - 1)
282 return false;
284 base::StringPiece base_pattern_piece(base_pattern);
285 base::StringPiece left(base_pattern_piece.substr(0, star));
286 base::StringPiece right(base_pattern_piece.substr(star + 1));
288 if (!base::StartsWith(base_type, left, base::CompareCase::INSENSITIVE_ASCII))
289 return false;
291 if (!right.empty() &&
292 !base::EndsWith(base_type, right, base::CompareCase::INSENSITIVE_ASCII))
293 return false;
295 return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
298 // See http://www.iana.org/assignments/media-types/media-types.xhtml
299 static const char* const legal_top_level_types[] = {
300 "application",
301 "audio",
302 "example",
303 "image",
304 "message",
305 "model",
306 "multipart",
307 "text",
308 "video",
311 bool MimeUtil::ParseMimeTypeWithoutParameter(
312 const std::string& type_string,
313 std::string* top_level_type,
314 std::string* subtype) const {
315 std::vector<std::string> components = base::SplitString(
316 type_string, "/", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
317 if (components.size() != 2 ||
318 !HttpUtil::IsToken(components[0]) ||
319 !HttpUtil::IsToken(components[1]))
320 return false;
322 if (top_level_type)
323 *top_level_type = components[0];
324 if (subtype)
325 *subtype = components[1];
326 return true;
329 bool MimeUtil::IsValidTopLevelMimeType(const std::string& type_string) const {
330 std::string lower_type = base::ToLowerASCII(type_string);
331 for (size_t i = 0; i < arraysize(legal_top_level_types); ++i) {
332 if (lower_type.compare(legal_top_level_types[i]) == 0)
333 return true;
336 return type_string.size() > 2 &&
337 base::StartsWith(type_string, "x-",
338 base::CompareCase::INSENSITIVE_ASCII);
341 //----------------------------------------------------------------------------
342 // Wrappers for the singleton
343 //----------------------------------------------------------------------------
345 bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
346 std::string* mime_type) {
347 return g_mime_util.Get().GetMimeTypeFromExtension(ext, mime_type);
350 bool GetMimeTypeFromFile(const base::FilePath& file_path,
351 std::string* mime_type) {
352 return g_mime_util.Get().GetMimeTypeFromFile(file_path, mime_type);
355 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
356 std::string* mime_type) {
357 return g_mime_util.Get().GetWellKnownMimeTypeFromExtension(ext, mime_type);
360 bool GetPreferredExtensionForMimeType(const std::string& mime_type,
361 base::FilePath::StringType* extension) {
362 return g_mime_util.Get().GetPreferredExtensionForMimeType(mime_type,
363 extension);
366 bool MatchesMimeType(const std::string& mime_type_pattern,
367 const std::string& mime_type) {
368 return g_mime_util.Get().MatchesMimeType(mime_type_pattern, mime_type);
371 bool ParseMimeTypeWithoutParameter(const std::string& type_string,
372 std::string* top_level_type,
373 std::string* subtype) {
374 return g_mime_util.Get().ParseMimeTypeWithoutParameter(
375 type_string, top_level_type, subtype);
378 bool IsValidTopLevelMimeType(const std::string& type_string) {
379 return g_mime_util.Get().IsValidTopLevelMimeType(type_string);
382 namespace {
384 // From http://www.w3schools.com/media/media_mimeref.asp and
385 // http://plugindoc.mozdev.org/winmime.php
386 static const char* const kStandardImageTypes[] = {
387 "image/bmp",
388 "image/cis-cod",
389 "image/gif",
390 "image/ief",
391 "image/jpeg",
392 "image/webp",
393 "image/pict",
394 "image/pipeg",
395 "image/png",
396 "image/svg+xml",
397 "image/tiff",
398 "image/vnd.microsoft.icon",
399 "image/x-cmu-raster",
400 "image/x-cmx",
401 "image/x-icon",
402 "image/x-portable-anymap",
403 "image/x-portable-bitmap",
404 "image/x-portable-graymap",
405 "image/x-portable-pixmap",
406 "image/x-rgb",
407 "image/x-xbitmap",
408 "image/x-xpixmap",
409 "image/x-xwindowdump"
411 static const char* const kStandardAudioTypes[] = {
412 "audio/aac",
413 "audio/aiff",
414 "audio/amr",
415 "audio/basic",
416 "audio/flac",
417 "audio/midi",
418 "audio/mp3",
419 "audio/mp4",
420 "audio/mpeg",
421 "audio/mpeg3",
422 "audio/ogg",
423 "audio/vorbis",
424 "audio/wav",
425 "audio/webm",
426 "audio/x-m4a",
427 "audio/x-ms-wma",
428 "audio/vnd.rn-realaudio",
429 "audio/vnd.wave"
431 static const char* const kStandardVideoTypes[] = {
432 "video/avi",
433 "video/divx",
434 "video/flc",
435 "video/mp4",
436 "video/mpeg",
437 "video/ogg",
438 "video/quicktime",
439 "video/sd-video",
440 "video/webm",
441 "video/x-dv",
442 "video/x-m4v",
443 "video/x-mpeg",
444 "video/x-ms-asf",
445 "video/x-ms-wmv"
448 struct StandardType {
449 const char* const leading_mime_type;
450 const char* const* standard_types;
451 size_t standard_types_len;
453 static const StandardType kStandardTypes[] = {
454 { "image/", kStandardImageTypes, arraysize(kStandardImageTypes) },
455 { "audio/", kStandardAudioTypes, arraysize(kStandardAudioTypes) },
456 { "video/", kStandardVideoTypes, arraysize(kStandardVideoTypes) },
457 { NULL, NULL, 0 }
460 void GetExtensionsFromHardCodedMappings(
461 const MimeInfo* mappings,
462 size_t mappings_len,
463 const std::string& leading_mime_type,
464 base::hash_set<base::FilePath::StringType>* extensions) {
465 for (size_t i = 0; i < mappings_len; ++i) {
466 if (base::StartsWith(mappings[i].mime_type, leading_mime_type,
467 base::CompareCase::INSENSITIVE_ASCII)) {
468 for (const base::StringPiece& this_extension : base::SplitStringPiece(
469 mappings[i].extensions, ",", base::TRIM_WHITESPACE,
470 base::SPLIT_WANT_ALL)) {
471 #if defined(OS_WIN)
472 extensions->insert(base::UTF8ToUTF16(this_extension));
473 #else
474 extensions->insert(this_extension.as_string());
475 #endif
481 void GetExtensionsHelper(
482 const char* const* standard_types,
483 size_t standard_types_len,
484 const std::string& leading_mime_type,
485 base::hash_set<base::FilePath::StringType>* extensions) {
486 for (size_t i = 0; i < standard_types_len; ++i) {
487 g_mime_util.Get().GetPlatformExtensionsForMimeType(standard_types[i],
488 extensions);
491 // Also look up the extensions from hard-coded mappings in case that some
492 // supported extensions are not registered in the system registry, like ogg.
493 GetExtensionsFromHardCodedMappings(primary_mappings,
494 arraysize(primary_mappings),
495 leading_mime_type,
496 extensions);
498 GetExtensionsFromHardCodedMappings(secondary_mappings,
499 arraysize(secondary_mappings),
500 leading_mime_type,
501 extensions);
504 // Note that the elements in the source set will be appended to the target
505 // vector.
506 template<class T>
507 void HashSetToVector(base::hash_set<T>* source, std::vector<T>* target) {
508 size_t old_target_size = target->size();
509 target->resize(old_target_size + source->size());
510 size_t i = 0;
511 for (typename base::hash_set<T>::iterator iter = source->begin();
512 iter != source->end(); ++iter, ++i)
513 (*target)[old_target_size + i] = *iter;
516 } // namespace
518 void GetExtensionsForMimeType(
519 const std::string& unsafe_mime_type,
520 std::vector<base::FilePath::StringType>* extensions) {
521 if (unsafe_mime_type == "*/*" || unsafe_mime_type == "*")
522 return;
524 const std::string mime_type = base::ToLowerASCII(unsafe_mime_type);
525 base::hash_set<base::FilePath::StringType> unique_extensions;
527 if (base::EndsWith(mime_type, "/*", base::CompareCase::INSENSITIVE_ASCII)) {
528 std::string leading_mime_type = mime_type.substr(0, mime_type.length() - 1);
530 // Find the matching StandardType from within kStandardTypes, or fall
531 // through to the last (default) StandardType.
532 const StandardType* type = NULL;
533 for (size_t i = 0; i < arraysize(kStandardTypes); ++i) {
534 type = &(kStandardTypes[i]);
535 if (type->leading_mime_type &&
536 leading_mime_type == type->leading_mime_type)
537 break;
539 DCHECK(type);
540 GetExtensionsHelper(type->standard_types,
541 type->standard_types_len,
542 leading_mime_type,
543 &unique_extensions);
544 } else {
545 g_mime_util.Get().GetPlatformExtensionsForMimeType(mime_type,
546 &unique_extensions);
548 // Also look up the extensions from hard-coded mappings in case that some
549 // supported extensions are not registered in the system registry, like ogg.
550 GetExtensionsFromHardCodedMappings(primary_mappings,
551 arraysize(primary_mappings),
552 mime_type,
553 &unique_extensions);
555 GetExtensionsFromHardCodedMappings(secondary_mappings,
556 arraysize(secondary_mappings),
557 mime_type,
558 &unique_extensions);
561 HashSetToVector(&unique_extensions, extensions);
564 void AddMultipartValueForUpload(const std::string& value_name,
565 const std::string& value,
566 const std::string& mime_boundary,
567 const std::string& content_type,
568 std::string* post_data) {
569 DCHECK(post_data);
570 // First line is the boundary.
571 post_data->append("--" + mime_boundary + "\r\n");
572 // Next line is the Content-disposition.
573 post_data->append("Content-Disposition: form-data; name=\"" +
574 value_name + "\"\r\n");
575 if (!content_type.empty()) {
576 // If Content-type is specified, the next line is that.
577 post_data->append("Content-Type: " + content_type + "\r\n");
579 // Leave an empty line and append the value.
580 post_data->append("\r\n" + value + "\r\n");
583 void AddMultipartFinalDelimiterForUpload(const std::string& mime_boundary,
584 std::string* post_data) {
585 DCHECK(post_data);
586 post_data->append("--" + mime_boundary + "--\r\n");
589 } // namespace net