mygpo/data/mimetype.py

   1 from collections import defaultdict
   2 from itertools import ifilter as filter
   3 import mimetypes
   4
   5 from django.utils.translation import ugettext_lazy as _
   6
   7
   8 # If 20% of the episodes of a podcast are of a given type,
   9 # then the podcast is considered to be of that type, too
  10 TYPE_THRESHOLD=.2
  11
  12
  13 CONTENT_TYPES = (_('image'), _('audio'), _('video'))
  14
  15 def get_podcast_types(episodes):
  16     """Returns the types of a podcast
  17
  18     A podcast is considered to be of a given types if the ratio of episodes that are of that type equals TYPE_THRESHOLD
  19     """
  20     has_mimetype = lambda e: e.mimetypes
  21     episodes = filter(has_mimetype, episodes)
  22     types = defaultdict()
  23     for e in episodes:
  24         for mimetype in e.mimetypes:
  25             t = get_type(mimetype)
  26             if not t:
  27                 continue
  28             types[t] = types.get(t, 0) + 1
  29
  30     max_episodes = sum(types.itervalues())
  31     l = list(types.iteritems())
  32     l.sort(key=lambda x: x[1], reverse=True)
  33
  34     return [x[0] for x in
  35         filter(lambda x: max_episodes / float(x[1]) >= TYPE_THRESHOLD, l)]
  36
  37
  38 def get_type(mimetype):
  39     """Returns the simplified type for the given mimetype
  40
  41     All "wanted" mimetypes are mapped to one of audio/video/image
  42     Everything else returns None
  43     """
  44     if not mimetype:
  45         return None
  46
  47     if '/' in mimetype:
  48         category, type = mimetype.split('/', 1)
  49         if category in ('audio', 'video', 'image'):
  50             return category
  51         elif type == 'ogg':
  52             return 'audio'
  53         elif type == 'x-youtube':
  54             return 'video'
  55         elif type == 'x-vimeo':
  56             return 'video'
  57     return None
  58
  59
  60 def get_mimetype(mimetype, url):
  61     """Returns the mimetype; if None is given it tries to guess it"""
  62
  63     if not mimetype:
  64         mimetype, _encoding = mimetypes.guess_type(url)
  65
  66     return mimetype