metadata.py

   1 #!/usr/bin/env python
   2
   3 import os
   4 import subprocess
   5 import sys
   6 from datetime import datetime
   7 from xml.dom import minidom
   8 try:
   9     import plistlib
  10 except:
  11     pass
  12
  13 import mutagen
  14 from lrucache import LRUCache
  15
  16 import config
  17
  18 # Something to strip
  19 TRIBUNE_CR = ' Copyright Tribune Media Services, Inc.'
  20
  21 tivo_cache = LRUCache(50)
  22 mp4_cache = LRUCache(50)
  23 dvrms_cache = LRUCache(50)
  24
  25 def tag_data(element, tag):
  26     for name in tag.split('/'):
  27         new_element = element.getElementsByTagName(name)
  28         if not new_element:
  29             return ''
  30         element = new_element[0]
  31     if not element.firstChild:
  32         return ''
  33     return element.firstChild.data
  34
  35 def _vtag_data(element, tag):
  36     for name in tag.split('/'):
  37         new_element = element.getElementsByTagName(name)
  38         if not new_element:
  39             return []
  40         element = new_element[0]
  41     elements = element.getElementsByTagName('element')
  42     return [x.firstChild.data for x in elements if x.firstChild]
  43
  44 def _tag_value(element, tag):
  45     item = element.getElementsByTagName(tag)
  46     if item:
  47         value = item[0].attributes['value'].value
  48         name = item[0].firstChild.data
  49         return name[0] + value[0]
  50
  51 def from_moov(full_path):
  52     if full_path in mp4_cache:
  53         return mp4_cache[full_path]
  54
  55     metadata = {}
  56     len_desc = 0
  57
  58     try:
  59         mp4meta = mutagen.File(full_path)
  60         assert(mp4meta)
  61     except:
  62         mp4_cache[full_path] = {}
  63         return {}
  64
  65     # The following 1-to-1 correspondence of atoms to pyTivo
  66     # variables is TV-biased
  67     keys = {'tvnn': 'callsign', 'tven': 'episodeNumber',
  68             'tvsh': 'seriesTitle'}
  69
  70     # Possible TV values: TV-Y7 TV-Y TV-G TV-PG TV-14 TV-MA Unrated
  71     # Possible MPAA values: G PG PG-13 R NC-17 Unrated
  72     ratings = {'TV-Y7': 'x1', 'TV-Y': 'x2', 'TV-G': 'x3',
  73                'TV-PG': 'x4', 'TV-14': 'x5', 'TV-MA': 'x6',
  74                'Unrated': 'x7', 'G': 'G1', 'PG': 'P2',
  75                'PG-13': 'P3', 'R': 'R4', 'NC-17': 'N6'}
  76
  77     for key, value in mp4meta.items():
  78         if type(value) == list:
  79             value = value[0]
  80         if key == 'stik':
  81             metadata['isEpisode'] = ['false', 'true'][value == 'TV Show']
  82         elif key in keys:
  83             metadata[keys[key]] = value
  84         # These keys begin with the copyright symbol \xA9
  85         elif key == '\xa9day':
  86             if len(value) == 4:
  87                 value += '-01-01T16:00:00Z'
  88             metadata['originalAirDate'] = value
  89             #metadata['time'] = value
  90         elif key in ['\xa9gen', 'gnre']:
  91             for k in ('vProgramGenre', 'vSeriesGenre'):
  92                 if k in metadata:
  93                     metadata[k].append(value)
  94                 else:
  95                     metadata[k] = [value]
  96         elif key == '\xa9nam':
  97             if 'tvsh' in mp4meta:
  98                 metadata['episodeTitle'] = value
  99             else:
 100                 metadata['title'] = value
 101
 102         # Description in desc, cmt, and/or ldes tags. Keep the longest.
 103         elif key in ['desc', '\xa9cmt', 'ldes'] and len(value) > len_desc:
 104             metadata['description'] = value
 105             len_desc = len(value)
 106
 107         # A common custom "reverse DNS format" tag
 108         elif (key == '----:com.apple.iTunes:iTunEXTC' and
 109               ('us-tv' in value or 'mpaa' in value)):
 110             rating = value.split("|")[1]
 111             if rating in ratings:
 112                 if 'us-tv' in value:
 113                     metadata['tvRating'] = ratings[rating]
 114                 elif 'mpaa' in value:
 115                     metadata['mpaaRating'] = ratings[rating]
 116
 117         # Actors, directors, producers, AND screenwriters may be in a long
 118         # embedded XML plist, with key '----' and rDNS 'iTunMOVI'. Ughh!
 119
 120     mp4_cache[full_path] = metadata
 121     return metadata
 122
 123 def from_dvrms(full_path):
 124     if full_path in dvrms_cache:
 125         return dvrms_cache[full_path]
 126
 127     metadata = {}
 128
 129     try:
 130         meta = mutagen.File(full_path)
 131         assert(meta)
 132     except:
 133         dvrms_cache[full_path] = {}
 134         return {}
 135
 136     keys = {'title': ['Title'],
 137             'description': ['Description', 'WM/SubTitleDescription'],
 138             'episodeTitle': ['WM/SubTitle'],
 139             'callsign': ['WM/MediaStationCallSign'],
 140             'displayMajorNumber': ['WM/MediaOriginalChannel'],
 141             'originalAirDate': ['WM/MediaOriginalBroadcastDateTime'],
 142             'rating': ['WM/ParentalRating'],
 143             'credits': ['WM/MediaCredits'], 'genre': ['WM/Genre']}
 144
 145     ratings = {'TV-Y7': 'x1', 'TV-Y': 'x2', 'TV-G': 'x3', 'TV-PG': 'x4',
 146                'TV-14': 'x5', 'TV-MA': 'x6', 'TV-NR': 'x7'}
 147
 148     for tagname in keys:
 149         for tag in keys[tagname]:
 150             try:
 151                 if tag in meta:
 152                     value = str(meta[tag][0])
 153                     if value:
 154                         metadata[tagname] = value
 155             except:
 156                 pass
 157
 158     if 'episodeTitle' in metadata and 'title' in metadata:
 159         metadata['seriesTitle'] = metadata['title']
 160     if 'genre' in metadata:
 161         value = metadata['genre'].split(',')
 162         metadata['vProgramGenre'] = value
 163         metadata['vSeriesGenre'] = value
 164         del metadata['genre']
 165     if 'credits' in metadata:
 166         value = [x.split('/') for x in metadata['credits'].split(';')]
 167         metadata['vActor'] = value[0] + value[3]
 168         metadata['vDirector'] = value[1]
 169         del metadata['credits']
 170     if 'rating' in metadata:
 171         rating = metadata['rating']
 172         if rating in ratings:
 173             metadata['tvRating'] = ratings[rating]
 174         del metadata['rating']
 175
 176     dvrms_cache[full_path] = metadata
 177     return metadata
 178
 179 def from_eyetv(full_path):
 180     ratings = {'TVY7': 'x1', 'TVY': 'x2', 'TVG': 'x3', 'TVPG': 'x4',
 181                'TV14': 'x5', 'TVMA': 'x6', 'TVNR': 'x7',
 182                'G': 'G1', 'PG': 'P2', 'PG-13': 'P3',
 183                'R': 'R4', 'NC-17': 'N6'}
 184     keys = {'TITLE': 'title', 'SUBTITLE': 'episodeTitle',
 185             'DESCRIPTION': 'description', 'YEAR': 'movieYear',
 186             'EPISODENUM': 'episodeNumber'}
 187     metadata = {}
 188     path, name = os.path.split(full_path)
 189     eyetvp = [x for x in os.listdir(path) if x.endswith('.eyetvp')][0]
 190     eyetvp = os.path.join(path, eyetvp)
 191     eyetv = plistlib.readPlist(eyetvp)
 192     if 'epg info' in eyetv:
 193         info = eyetv['epg info']
 194         for key in keys:
 195             if info[key]:
 196                 metadata[keys[key]] = info[key]
 197         if info['SUBTITLE']:
 198             metadata['seriesTitle'] = info['TITLE']
 199         if info['ACTORS']:
 200             metadata['vActor'] = [x.strip() for x in info['ACTORS'].split(',')]
 201         if info['DIRECTOR']:
 202             metadata['vDirector'] = [info['DIRECTOR']]
 203         if info['TV_RATING']:
 204             metadata['tvRating'] = ratings[info['TV_RATING']]
 205         if info['STAR_RATING']:
 206             metadata['starRating'] = 'x%d' % (len(info['STAR_RATING']) * 2 - 1)
 207         mpaa = info['MPAA_RATING']
 208         if mpaa and mpaa != 'NR':
 209             metadata['mpaaRating'] = ratings[mpaa]
 210         # movieYear must be set for the mpaa/star ratings to work
 211         if (('mpaaRating' in metadata or 'starRating' in metadata) and
 212             'movieYear' not in metadata):
 213             metadata['movieYear'] = eyetv['info']['start'].year
 214     return metadata
 215
 216 def from_text(full_path):
 217     metadata = {}
 218     path, name = os.path.split(full_path)
 219     title, ext = os.path.splitext(name)
 220     for metafile in [os.path.join(path, title) + '.properties',
 221                      os.path.join(path, 'default.txt'), full_path + '.txt',
 222                      os.path.join(path, '.meta', 'default.txt'),
 223                      os.path.join(path, '.meta', name) + '.txt']:
 224         if os.path.exists(metafile):
 225             sep = ':='[metafile.endswith('.properties')]
 226             for line in file(metafile, 'U'):
 227                 if line.strip().startswith('#') or not sep in line:
 228                     continue
 229                 key, value = [x.strip() for x in line.split(sep, 1)]
 230                 if not key or not value:
 231                     continue
 232                 if key.startswith('v'):
 233                     if key in metadata:
 234                         metadata[key].append(value)
 235                     else:
 236                         metadata[key] = [value]
 237                 else:
 238                     metadata[key] = value
 239     return metadata
 240
 241 def basic(full_path):
 242     base_path, name = os.path.split(full_path)
 243     title, ext = os.path.splitext(name)
 244     mtime = os.stat(full_path).st_mtime
 245     if (mtime < 0):
 246         mtime = 0
 247     originalAirDate = datetime.fromtimestamp(mtime)
 248
 249     metadata = {'title': title,
 250                 'originalAirDate': originalAirDate.isoformat()}
 251     ext = ext.lower()
 252     if ext in ['.mp4', '.m4v', '.mov']:
 253         metadata.update(from_moov(full_path))
 254     elif ext in ['.dvr-ms', '.asf', '.wmv']:
 255         metadata.update(from_dvrms(full_path))
 256     elif 'plistlib' in sys.modules and base_path.endswith('.eyetv'):
 257         metadata.update(from_eyetv(full_path))
 258     metadata.update(from_text(full_path))
 259
 260     return metadata
 261
 262 def from_container(xmldoc):
 263     metadata = {}
 264
 265     keys = {'title': 'Title', 'episodeTitle': 'EpisodeTitle',
 266             'description': 'Description', 'seriesId': 'SeriesId',
 267             'episodeNumber': 'EpisodeNumber', 'tvRating': 'TvRating',
 268             'displayMajorNumber': 'SourceChannel', 'callsign': 'SourceStation'}
 269
 270     details = xmldoc.getElementsByTagName('Details')[0]
 271
 272     for key in keys:
 273         data = tag_data(details, keys[key])
 274         if data:
 275             if key == 'description':
 276                 data = data.replace(TRIBUNE_CR, '')
 277             elif key == 'tvRating':
 278                 data = 'x' + data
 279             elif key == 'displayMajorNumber':
 280                 if '-' in data:
 281                     data, metadata['displayMinorNumber'] = data.split('-')
 282             metadata[key] = data
 283
 284     return metadata
 285
 286 def from_details(xmldoc):
 287     metadata = {}
 288
 289     showing = xmldoc.getElementsByTagName('showing')[0]
 290     program = showing.getElementsByTagName('program')[0]
 291
 292     items = {'description': 'program/description',
 293              'title': 'program/title',
 294              'episodeTitle': 'program/episodeTitle',
 295              'episodeNumber': 'program/episodeNumber',
 296              'seriesId': 'program/series/uniqueId',
 297              'seriesTitle': 'program/series/seriesTitle',
 298              'originalAirDate': 'program/originalAirDate',
 299              'isEpisode': 'program/isEpisode',
 300              'movieYear': 'program/movieYear',
 301              'partCount': 'partCount',
 302              'partIndex': 'partIndex',
 303              'time': 'time'}
 304
 305     for item in items:
 306         data = tag_data(showing, items[item])
 307         if data:
 308             if item == 'description':
 309                 data = data.replace(TRIBUNE_CR, '')
 310             metadata[item] = data
 311
 312     vItems = ['vActor', 'vChoreographer', 'vDirector',
 313               'vExecProducer', 'vProgramGenre', 'vGuestStar',
 314               'vHost', 'vProducer', 'vWriter']
 315
 316     for item in vItems:
 317         data = _vtag_data(program, item)
 318         if data:
 319             metadata[item] = data
 320
 321     sb = showing.getElementsByTagName('showingBits')
 322     if sb:
 323         metadata['showingBits'] = sb[0].attributes['value'].value
 324
 325     for tag in ['starRating', 'mpaaRating', 'colorCode']:
 326         value = _tag_value(program, tag)
 327         if value:
 328             metadata[tag] = value
 329
 330     rating = _tag_value(showing, 'tvRating')
 331     if rating:
 332         metadata['tvRating'] = 'x' + rating[1]
 333
 334     return metadata
 335
 336 def from_tivo(full_path):
 337     if full_path in tivo_cache:
 338         return tivo_cache[full_path]
 339
 340     tdcat_path = config.get_bin('tdcat')
 341     tivo_mak = config.get_server('tivo_mak')
 342     if tdcat_path and tivo_mak:
 343         tcmd = [tdcat_path, '-m', tivo_mak, '-2', full_path]
 344         tdcat = subprocess.Popen(tcmd, stdout=subprocess.PIPE)
 345         xmldoc = minidom.parse(tdcat.stdout)
 346         metadata = from_details(xmldoc)
 347         tivo_cache[full_path] = metadata
 348     else:
 349         metadata = {}
 350
 351     return metadata
 352
 353 if __name__ == '__main__':
 354     if len(sys.argv) > 1:
 355         metadata = {}
 356         ext = os.path.splitext(sys.argv[1])[1].lower()
 357         if ext == '.tivo':
 358             config.init([])
 359             metadata.update(from_tivo(sys.argv[1]))
 360         elif ext in ['.mp4', '.m4v', '.mov']:
 361             metadata.update(from_moov(sys.argv[1]))
 362         elif ext in ['.dvr-ms', '.asf', '.wmv']:
 363             metadata.update(from_dvrms(sys.argv[1]))
 364         for key in metadata:
 365             value = metadata[key]
 366             if type(value) == list:
 367                 for item in value:
 368                     print '%s: %s' % (key, item.encode('utf8'))
 369             else:
 370                 print '%s: %s' % (key, value.encode('utf8'))