1 """Guess the MIME type of a file.
3 This module defines two useful functions:
5 guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
7 guess_extension(type, strict=True) -- guess the extension for a given MIME type.
9 It also contains the following, for tuning the behavior:
13 knownfiles -- list of files to parse
14 inited -- flag set when init() has been called
15 suffix_map -- dictionary mapping suffixes to suffixes
16 encodings_map -- dictionary mapping suffixes to encodings
17 types_map -- dictionary mapping suffixes to types
21 init([files]) -- parse a list of files, default knownfiles
22 read_mime_types(file) -- parse one file, return a dictionary or None
30 "guess_type","guess_extension","guess_all_extensions",
31 "add_type","read_mime_types","init"
36 "/etc/httpd/mime.types", # Mac OS X
37 "/etc/httpd/conf/mime.types", # Apache
38 "/etc/apache/mime.types", # Apache 1
39 "/etc/apache2/mime.types", # Apache 2
40 "/usr/local/etc/httpd/conf/mime.types",
41 "/usr/local/lib/netscape/mime.types",
42 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
43 "/usr/local/etc/mime.types", # Apache 1.3
51 """MIME-types datastore.
53 This datastore can handle information from mime.types-style files
54 and supports basic determination of MIME type from a filename or
55 URL, and can guess a reasonable extension given a MIME type.
58 def __init__(self
, filenames
=(), strict
=True):
61 self
.encodings_map
= encodings_map
.copy()
62 self
.suffix_map
= suffix_map
.copy()
63 self
.types_map
= ({}, {}) # dict for (non-strict, strict)
64 self
.types_map_inv
= ({}, {})
65 for (ext
, type) in types_map
.items():
66 self
.add_type(type, ext
, True)
67 for (ext
, type) in common_types
.items():
68 self
.add_type(type, ext
, False)
69 for name
in filenames
:
70 self
.read(name
, strict
)
72 def add_type(self
, type, ext
, strict
=True):
73 """Add a mapping between a type and an extension.
75 When the extension is already known, the new
76 type will replace the old one. When the type
77 is already known the extension will be added
78 to the list of known extensions.
80 If strict is true, information will be added to
81 list of standard types, else to the list of non-standard
84 self
.types_map
[strict
][ext
] = type
85 exts
= self
.types_map_inv
[strict
].setdefault(type, [])
89 def guess_type(self
, url
, strict
=True):
90 """Guess the type of a file based on its URL.
92 Return value is a tuple (type, encoding) where type is None if
93 the type can't be guessed (no or unknown suffix) or a string
94 of the form type/subtype, usable for a MIME Content-type
95 header; and encoding is None for no encoding or the name of
96 the program used to encode (e.g. compress or gzip). The
97 mappings are table driven. Encoding suffixes are case
98 sensitive; type suffixes are first tried case sensitive, then
101 The suffixes .tgz, .taz and .tz (case sensitive!) are all
102 mapped to '.tar.gz'. (This is table-driven too, using the
103 dictionary suffix_map.)
105 Optional `strict' argument when False adds a bunch of commonly found,
106 but non-standard types.
108 scheme
, url
= urllib
.parse
.splittype(url
)
110 # syntax of data URLs:
111 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
112 # mediatype := [ type "/" subtype ] *( ";" parameter )
114 # parameter := attribute "=" value
115 # type/subtype defaults to "text/plain"
116 comma
= url
.find(',')
120 semi
= url
.find(';', 0, comma
)
125 if '=' in type or '/' not in type:
127 return type, None # never compressed, so encoding is None
128 base
, ext
= posixpath
.splitext(url
)
129 while ext
in self
.suffix_map
:
130 base
, ext
= posixpath
.splitext(base
+ self
.suffix_map
[ext
])
131 if ext
in self
.encodings_map
:
132 encoding
= self
.encodings_map
[ext
]
133 base
, ext
= posixpath
.splitext(base
)
136 types_map
= self
.types_map
[True]
138 return types_map
[ext
], encoding
139 elif ext
.lower() in types_map
:
140 return types_map
[ext
.lower()], encoding
142 return None, encoding
143 types_map
= self
.types_map
[False]
145 return types_map
[ext
], encoding
146 elif ext
.lower() in types_map
:
147 return types_map
[ext
.lower()], encoding
149 return None, encoding
151 def guess_all_extensions(self
, type, strict
=True):
152 """Guess the extensions for a file based on its MIME type.
154 Return value is a list of strings giving the possible filename
155 extensions, including the leading dot ('.'). The extension is not
156 guaranteed to have been associated with any particular data stream,
157 but would be mapped to the MIME type `type' by guess_type().
159 Optional `strict' argument when false adds a bunch of commonly found,
160 but non-standard types.
163 extensions
= self
.types_map_inv
[True].get(type, [])
165 for ext
in self
.types_map_inv
[False].get(type, []):
166 if ext
not in extensions
:
167 extensions
.append(ext
)
170 def guess_extension(self
, type, strict
=True):
171 """Guess the extension for a file based on its MIME type.
173 Return value is a string giving a filename extension,
174 including the leading dot ('.'). The extension is not
175 guaranteed to have been associated with any particular data
176 stream, but would be mapped to the MIME type `type' by
177 guess_type(). If no extension can be guessed for `type', None
180 Optional `strict' argument when false adds a bunch of commonly found,
181 but non-standard types.
183 extensions
= self
.guess_all_extensions(type, strict
)
188 def read(self
, filename
, strict
=True):
190 Read a single mime.types-format file, specified by pathname.
192 If strict is true, information will be added to
193 list of standard types, else to the list of non-standard
197 self
.readfp(fp
, strict
)
200 def readfp(self
, fp
, strict
=True):
202 Read a single mime.types-format file.
204 If strict is true, information will be added to
205 list of standard types, else to the list of non-standard
213 for i
in range(len(words
)):
214 if words
[i
][0] == '#':
219 type, suffixes
= words
[0], words
[1:]
220 for suff
in suffixes
:
221 self
.add_type(type, '.' + suff
, strict
)
223 def guess_type(url
, strict
=True):
224 """Guess the type of a file based on its URL.
226 Return value is a tuple (type, encoding) where type is None if the
227 type can't be guessed (no or unknown suffix) or a string of the
228 form type/subtype, usable for a MIME Content-type header; and
229 encoding is None for no encoding or the name of the program used
230 to encode (e.g. compress or gzip). The mappings are table
231 driven. Encoding suffixes are case sensitive; type suffixes are
232 first tried case sensitive, then case insensitive.
234 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
235 to ".tar.gz". (This is table-driven too, using the dictionary
238 Optional `strict' argument when false adds a bunch of commonly found, but
243 return _db
.guess_type(url
, strict
)
246 def guess_all_extensions(type, strict
=True):
247 """Guess the extensions for a file based on its MIME type.
249 Return value is a list of strings giving the possible filename
250 extensions, including the leading dot ('.'). The extension is not
251 guaranteed to have been associated with any particular data
252 stream, but would be mapped to the MIME type `type' by
253 guess_type(). If no extension can be guessed for `type', None
256 Optional `strict' argument when false adds a bunch of commonly found,
257 but non-standard types.
261 return _db
.guess_all_extensions(type, strict
)
263 def guess_extension(type, strict
=True):
264 """Guess the extension for a file based on its MIME type.
266 Return value is a string giving a filename extension, including the
267 leading dot ('.'). The extension is not guaranteed to have been
268 associated with any particular data stream, but would be mapped to the
269 MIME type `type' by guess_type(). If no extension can be guessed for
270 `type', None is returned.
272 Optional `strict' argument when false adds a bunch of commonly found,
273 but non-standard types.
277 return _db
.guess_extension(type, strict
)
279 def add_type(type, ext
, strict
=True):
280 """Add a mapping between a type and an extension.
282 When the extension is already known, the new
283 type will replace the old one. When the type
284 is already known the extension will be added
285 to the list of known extensions.
287 If strict is true, information will be added to
288 list of standard types, else to the list of non-standard
293 return _db
.add_type(type, ext
, strict
)
296 def init(files
=None):
297 global suffix_map
, types_map
, encodings_map
, common_types
299 inited
= True # so that MimeTypes.__init__() doesn't call us again
304 if os
.path
.isfile(file):
305 db
.readfp(open(file))
306 encodings_map
= db
.encodings_map
307 suffix_map
= db
.suffix_map
308 types_map
= db
.types_map
[True]
309 common_types
= db
.types_map
[False]
310 # Make the DB a global variable now that it is fully initialized
314 def read_mime_types(file):
321 return db
.types_map
[True]
324 def _default_mime_types():
343 # Before adding new types, make sure they are either registered with IANA,
344 # at http://www.isi.edu/in-notes/iana/assignments/media-types
345 # or extensions, i.e. using the x- prefix
347 # If you add to these, please keep them sorted!
349 '.a' : 'application/octet-stream',
350 '.ai' : 'application/postscript',
351 '.aif' : 'audio/x-aiff',
352 '.aifc' : 'audio/x-aiff',
353 '.aiff' : 'audio/x-aiff',
354 '.au' : 'audio/basic',
355 '.avi' : 'video/x-msvideo',
356 '.bat' : 'text/plain',
357 '.bcpio' : 'application/x-bcpio',
358 '.bin' : 'application/octet-stream',
359 '.bmp' : 'image/x-ms-bmp',
362 '.cdf' : 'application/x-cdf',
363 '.cdf' : 'application/x-netcdf',
364 '.cpio' : 'application/x-cpio',
365 '.csh' : 'application/x-csh',
367 '.dll' : 'application/octet-stream',
368 '.doc' : 'application/msword',
369 '.dot' : 'application/msword',
370 '.dvi' : 'application/x-dvi',
371 '.eml' : 'message/rfc822',
372 '.eps' : 'application/postscript',
373 '.etx' : 'text/x-setext',
374 '.exe' : 'application/octet-stream',
375 '.gif' : 'image/gif',
376 '.gtar' : 'application/x-gtar',
378 '.hdf' : 'application/x-hdf',
379 '.htm' : 'text/html',
380 '.html' : 'text/html',
381 '.ief' : 'image/ief',
382 '.jpe' : 'image/jpeg',
383 '.jpeg' : 'image/jpeg',
384 '.jpg' : 'image/jpeg',
385 '.js' : 'application/x-javascript',
386 '.ksh' : 'text/plain',
387 '.latex' : 'application/x-latex',
388 '.m1v' : 'video/mpeg',
389 '.man' : 'application/x-troff-man',
390 '.me' : 'application/x-troff-me',
391 '.mht' : 'message/rfc822',
392 '.mhtml' : 'message/rfc822',
393 '.mif' : 'application/x-mif',
394 '.mov' : 'video/quicktime',
395 '.movie' : 'video/x-sgi-movie',
396 '.mp2' : 'audio/mpeg',
397 '.mp3' : 'audio/mpeg',
398 '.mp4' : 'video/mp4',
399 '.mpa' : 'video/mpeg',
400 '.mpe' : 'video/mpeg',
401 '.mpeg' : 'video/mpeg',
402 '.mpg' : 'video/mpeg',
403 '.ms' : 'application/x-troff-ms',
404 '.nc' : 'application/x-netcdf',
405 '.nws' : 'message/rfc822',
406 '.o' : 'application/octet-stream',
407 '.obj' : 'application/octet-stream',
408 '.oda' : 'application/oda',
409 '.p12' : 'application/x-pkcs12',
410 '.p7c' : 'application/pkcs7-mime',
411 '.pbm' : 'image/x-portable-bitmap',
412 '.pdf' : 'application/pdf',
413 '.pfx' : 'application/x-pkcs12',
414 '.pgm' : 'image/x-portable-graymap',
415 '.pl' : 'text/plain',
416 '.png' : 'image/png',
417 '.pnm' : 'image/x-portable-anymap',
418 '.pot' : 'application/vnd.ms-powerpoint',
419 '.ppa' : 'application/vnd.ms-powerpoint',
420 '.ppm' : 'image/x-portable-pixmap',
421 '.pps' : 'application/vnd.ms-powerpoint',
422 '.ppt' : 'application/vnd.ms-powerpoint',
423 '.ps' : 'application/postscript',
424 '.pwz' : 'application/vnd.ms-powerpoint',
425 '.py' : 'text/x-python',
426 '.pyc' : 'application/x-python-code',
427 '.pyo' : 'application/x-python-code',
428 '.qt' : 'video/quicktime',
429 '.ra' : 'audio/x-pn-realaudio',
430 '.ram' : 'application/x-pn-realaudio',
431 '.ras' : 'image/x-cmu-raster',
432 '.rdf' : 'application/xml',
433 '.rgb' : 'image/x-rgb',
434 '.roff' : 'application/x-troff',
435 '.rtx' : 'text/richtext',
436 '.sgm' : 'text/x-sgml',
437 '.sgml' : 'text/x-sgml',
438 '.sh' : 'application/x-sh',
439 '.shar' : 'application/x-shar',
440 '.snd' : 'audio/basic',
441 '.so' : 'application/octet-stream',
442 '.src' : 'application/x-wais-source',
443 '.sv4cpio': 'application/x-sv4cpio',
444 '.sv4crc' : 'application/x-sv4crc',
445 '.swf' : 'application/x-shockwave-flash',
446 '.t' : 'application/x-troff',
447 '.tar' : 'application/x-tar',
448 '.tcl' : 'application/x-tcl',
449 '.tex' : 'application/x-tex',
450 '.texi' : 'application/x-texinfo',
451 '.texinfo': 'application/x-texinfo',
452 '.tif' : 'image/tiff',
453 '.tiff' : 'image/tiff',
454 '.tr' : 'application/x-troff',
455 '.tsv' : 'text/tab-separated-values',
456 '.txt' : 'text/plain',
457 '.ustar' : 'application/x-ustar',
458 '.vcf' : 'text/x-vcard',
459 '.wav' : 'audio/x-wav',
460 '.wiz' : 'application/msword',
461 '.wsdl' : 'application/xml',
462 '.xbm' : 'image/x-xbitmap',
463 '.xlb' : 'application/vnd.ms-excel',
465 '.xls' : 'application/excel',
466 '.xls' : 'application/vnd.ms-excel',
468 '.xpdl' : 'application/xml',
469 '.xpm' : 'image/x-xpixmap',
470 '.xsl' : 'application/xml',
471 '.xwd' : 'image/x-xwindowdump',
472 '.zip' : 'application/zip',
475 # These are non-standard types, commonly found in the wild. They will
476 # only match if strict=0 flag is given to the API methods.
478 # Please sort these too
480 '.jpg' : 'image/jpg',
481 '.mid' : 'audio/midi',
482 '.midi': 'audio/midi',
483 '.pct' : 'image/pict',
484 '.pic' : 'image/pict',
485 '.pict': 'image/pict',
486 '.rtf' : 'application/rtf',
491 _default_mime_types()
494 if __name__
== '__main__':
499 Usage: mimetypes.py [options] type
502 --help / -h -- print this message and exit
503 --lenient / -l -- additionally search of some common, but non-standard
505 --extension / -e -- guess extension instead of type
507 More than one type argument may be given.
510 def usage(code
, msg
=''):
516 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'hle',
517 ['help', 'lenient', 'extension'])
518 except getopt
.error
as msg
:
523 for opt
, arg
in opts
:
524 if opt
in ('-h', '--help'):
526 elif opt
in ('-l', '--lenient'):
528 elif opt
in ('-e', '--extension'):
532 guess
= guess_extension(gtype
, strict
)
533 if not guess
: print("I don't know anything about type", gtype
)
536 guess
, encoding
= guess_type(gtype
, strict
)
537 if not guess
: print("I don't know anything about type", gtype
)
538 else: print('type:', guess
, 'encoding:', encoding
)