metadata.py

   1 #!/usr/bin/env python
   2
   3 import os
   4 import subprocess
   5 import sys
   6 from datetime import datetime
   7 from xml.dom import minidom
   8 try:
   9     import plistlib
  10 except:
  11     pass
  12
  13 import mutagen
  14 from lrucache import LRUCache
  15
  16 import config
  17 import plugins.video.transcode
  18
  19 # Something to strip
  20 TRIBUNE_CR = ' Copyright Tribune Media Services, Inc.'
  21
  22 TV_RATINGS = {'TV-Y7': 1, 'TV-Y': 2, 'TV-G': 3, 'TV-PG': 4, 'TV-14': 5,
  23               'TV-MA': 6, 'TV-NR': 7, 'TVY7': 1, 'TVY': 2, 'TVG': 3,
  24               'TVPG': 4, 'TV14': 5, 'TVMA': 6, 'TVNR': 7, 'Y7': 1,
  25               'Y': 2, 'G': 3, 'PG': 4, '14': 5, 'MA': 6, 'NR': 7,
  26               'UNRATED': 7, 'X1': 1, 'X2': 2, 'X3': 3, 'X4': 4, 'X5': 5,
  27               'X6': 6, 'X7': 7}
  28
  29 MPAA_RATINGS = {'G': 1, 'PG': 2, 'PG-13': 3, 'PG13': 3, 'R': 4, 'X': 5,
  30                 'NC-17': 6, 'NC17': 6, 'NR': 8, 'UNRATED': 8, 'G1': 1,
  31                 'P2': 2, 'P3': 3, 'R4': 4, 'X5': 5, 'N6': 6, 'N8': 8}
  32
  33 STAR_RATINGS = {'1': 1, '1.5': 2, '2': 3, '2.5': 4, '3': 5, '3.5': 6,
  34                 '4': 7, '*': 1, '**': 3, '***': 5, '****': 7}
  35
  36 HUMAN = {'mpaaRating': {1: 'G', 2: 'PG', 3: 'PG-13', 4: 'R', 5: 'X',
  37                         6: 'NC-17', 8: 'NR'},
  38          'tvRating': {1: 'Y7', 2: 'Y', 3: 'G', 4: 'PG', 5: '14',
  39                       6: 'MA', 7: 'NR'},
  40          'starRating': {1: '1', 2: '1.5', 3: '2', 4: '2.5', 5: '3',
  41                         6: '3.5', 7: '4'}}
  42
  43 BOM = '\xef\xbb\xbf'
  44
  45 tivo_cache = LRUCache(50)
  46 mp4_cache = LRUCache(50)
  47 dvrms_cache = LRUCache(50)
  48 nfo_cache = LRUCache(50)
  49
  50 mswindows = (sys.platform == "win32")
  51
  52 def get_mpaa(rating):
  53     return HUMAN['mpaaRating'].get(rating, 'NR')
  54
  55 def get_tv(rating):
  56     return HUMAN['tvRating'].get(rating, 'NR')
  57
  58 def get_stars(rating):
  59     return HUMAN['starRating'].get(rating, '')
  60
  61 def tag_data(element, tag):
  62     for name in tag.split('/'):
  63         new_element = element.getElementsByTagName(name)
  64         if not new_element:
  65             return ''
  66         element = new_element[0]
  67     if not element.firstChild:
  68         return ''
  69     return element.firstChild.data
  70
  71 def _vtag_data(element, tag):
  72     for name in tag.split('/'):
  73         new_element = element.getElementsByTagName(name)
  74         if not new_element:
  75             return []
  76         element = new_element[0]
  77     elements = element.getElementsByTagName('element')
  78     return [x.firstChild.data for x in elements if x.firstChild]
  79
  80 def _vtag_data_alternate(element, tag):
  81     elements = [element]
  82     for name in tag.split('/'):
  83         new_elements = []
  84         for elmt in elements:
  85             new_elements += elmt.getElementsByTagName(name)
  86         elements = new_elements
  87     return [x.firstChild.data for x in elements if x.firstChild]
  88
  89 def _tag_value(element, tag):
  90     item = element.getElementsByTagName(tag)
  91     if item:
  92         value = item[0].attributes['value'].value
  93         return int(value[0])
  94
  95 def from_moov(full_path):
  96     if full_path in mp4_cache:
  97         return mp4_cache[full_path]
  98
  99     metadata = {}
 100     len_desc = 0
 101
 102     try:
 103         mp4meta = mutagen.File(unicode(full_path, 'utf-8'))
 104         assert(mp4meta)
 105     except:
 106         mp4_cache[full_path] = {}
 107         return {}
 108
 109     # The following 1-to-1 correspondence of atoms to pyTivo
 110     # variables is TV-biased
 111     keys = {'tvnn': 'callsign', 'tven': 'episodeNumber',
 112             'tvsh': 'seriesTitle'}
 113
 114     for key, value in mp4meta.items():
 115         if type(value) == list:
 116             value = value[0]
 117         if key == 'stik':
 118             metadata['isEpisode'] = ['false', 'true'][value == 'TV Show']
 119         elif key in keys:
 120             metadata[keys[key]] = value
 121         # These keys begin with the copyright symbol \xA9
 122         elif key == '\xa9day':
 123             if len(value) == 4:
 124                 value += '-01-01T16:00:00Z'
 125             metadata['originalAirDate'] = value
 126             #metadata['time'] = value
 127         elif key in ['\xa9gen', 'gnre']:
 128             for k in ('vProgramGenre', 'vSeriesGenre'):
 129                 if k in metadata:
 130                     metadata[k].append(value)
 131                 else:
 132                     metadata[k] = [value]
 133         elif key == '\xa9nam':
 134             if 'tvsh' in mp4meta:
 135                 metadata['episodeTitle'] = value
 136             else:
 137                 metadata['title'] = value
 138
 139         # Description in desc, cmt, and/or ldes tags. Keep the longest.
 140         elif key in ['desc', '\xa9cmt', 'ldes'] and len(value) > len_desc:
 141             metadata['description'] = value
 142             len_desc = len(value)
 143
 144         # A common custom "reverse DNS format" tag
 145         elif (key == '----:com.apple.iTunes:iTunEXTC' and
 146               ('us-tv' in value or 'mpaa' in value)):
 147             rating = value.split("|")[1].upper()
 148             if rating in TV_RATINGS and 'us-tv' in value:
 149                 metadata['tvRating'] = TV_RATINGS[rating]
 150             elif rating in MPAA_RATINGS and 'mpaa' in value:
 151                 metadata['mpaaRating'] = MPAA_RATINGS[rating]
 152
 153         # Actors, directors, producers, AND screenwriters may be in a long
 154         # embedded XML plist.
 155         elif (key == '----:com.apple.iTunes:iTunMOVI' and
 156               'plistlib' in sys.modules):
 157             items = {'cast': 'vActor', 'directors': 'vDirector',
 158                      'producers': 'vProducer', 'screenwriters': 'vWriter'}
 159             data = plistlib.readPlistFromString(value)
 160             for item in items:
 161                 if item in data:
 162                     metadata[items[item]] = [x['name'] for x in data[item]]
 163
 164     mp4_cache[full_path] = metadata
 165     return metadata
 166
 167 def from_mscore(rawmeta):
 168     metadata = {}
 169     keys = {'title': ['Title'],
 170             'description': ['Description', 'WM/SubTitleDescription'],
 171             'episodeTitle': ['WM/SubTitle'],
 172             'callsign': ['WM/MediaStationCallSign'],
 173             'displayMajorNumber': ['WM/MediaOriginalChannel'],
 174             'originalAirDate': ['WM/MediaOriginalBroadcastDateTime'],
 175             'rating': ['WM/ParentalRating'],
 176             'credits': ['WM/MediaCredits'], 'genre': ['WM/Genre']}
 177
 178     for tagname in keys:
 179         for tag in keys[tagname]:
 180             try:
 181                 if tag in rawmeta:
 182                     value = rawmeta[tag][0]
 183                     if type(value) not in (str, unicode):
 184                         value = str(value)
 185                     if value:
 186                         metadata[tagname] = value
 187             except:
 188                 pass
 189
 190     if 'episodeTitle' in metadata and 'title' in metadata:
 191         metadata['seriesTitle'] = metadata['title']
 192     if 'genre' in metadata:
 193         value = metadata['genre'].split(',')
 194         metadata['vProgramGenre'] = value
 195         metadata['vSeriesGenre'] = value
 196         del metadata['genre']
 197     if 'credits' in metadata:
 198         value = [x.split('/') for x in metadata['credits'].split(';')]
 199         if len(value) > 3:
 200             metadata['vActor'] = [x for x in (value[0] + value[3]) if x]
 201             metadata['vDirector'] = [x for x in value[1] if x]
 202         del metadata['credits']
 203     if 'rating' in metadata:
 204         rating = metadata['rating']
 205         if rating in TV_RATINGS:
 206             metadata['tvRating'] = TV_RATINGS[rating]
 207         del metadata['rating']
 208
 209     return metadata
 210
 211 def from_dvrms(full_path):
 212     if full_path in dvrms_cache:
 213         return dvrms_cache[full_path]
 214
 215     try:
 216         rawmeta = mutagen.File(unicode(full_path, 'utf-8'))
 217         assert(rawmeta)
 218     except:
 219         dvrms_cache[full_path] = {}
 220         return {}
 221
 222     metadata = from_mscore(rawmeta)
 223     dvrms_cache[full_path] = metadata
 224     return metadata
 225
 226 def from_eyetv(full_path):
 227     keys = {'TITLE': 'title', 'SUBTITLE': 'episodeTitle',
 228             'DESCRIPTION': 'description', 'YEAR': 'movieYear',
 229             'EPISODENUM': 'episodeNumber'}
 230     metadata = {}
 231     path = os.path.dirname(unicode(full_path, 'utf-8'))
 232     eyetvp = [x for x in os.listdir(path) if x.endswith('.eyetvp')][0]
 233     eyetvp = os.path.join(path, eyetvp)
 234     eyetv = plistlib.readPlist(eyetvp)
 235     if 'epg info' in eyetv:
 236         info = eyetv['epg info']
 237         for key in keys:
 238             if info[key]:
 239                 metadata[keys[key]] = info[key]
 240         if info['SUBTITLE']:
 241             metadata['seriesTitle'] = info['TITLE']
 242         if info['ACTORS']:
 243             metadata['vActor'] = [x.strip() for x in info['ACTORS'].split(',')]
 244         if info['DIRECTOR']:
 245             metadata['vDirector'] = [info['DIRECTOR']]
 246
 247         for ptag, etag, ratings in [('tvRating', 'TV_RATING', TV_RATINGS),
 248                               ('mpaaRating', 'MPAA_RATING', MPAA_RATINGS),
 249                               ('starRating', 'STAR_RATING', STAR_RATINGS)]:
 250             x = info[etag].upper()
 251             if x and x in ratings:
 252                 metadata[ptag] = ratings[x]
 253
 254         # movieYear must be set for the mpaa/star ratings to work
 255         if (('mpaaRating' in metadata or 'starRating' in metadata) and
 256             'movieYear' not in metadata):
 257             metadata['movieYear'] = eyetv['info']['start'].year
 258     return metadata
 259
 260 def from_text(full_path):
 261     metadata = {}
 262     full_path = unicode(full_path, 'utf-8')
 263     path, name = os.path.split(full_path)
 264     title, ext = os.path.splitext(name)
 265
 266     search_paths = []
 267     ptmp = full_path
 268     while ptmp:
 269         parent = os.path.dirname(ptmp)
 270         if ptmp != parent:
 271             ptmp = parent
 272         else:
 273             break
 274         search_paths.append(os.path.join(ptmp, 'default.txt'))
 275
 276     search_paths.append(os.path.join(path, title) + '.properties')
 277     search_paths.reverse()
 278
 279     search_paths += [full_path + '.txt',
 280                      os.path.join(path, '.meta', 'default.txt'),
 281                      os.path.join(path, '.meta', name) + '.txt']
 282
 283     for metafile in search_paths:
 284         if os.path.exists(metafile):
 285             sep = ':='[metafile.endswith('.properties')]
 286             for line in file(metafile, 'U'):
 287                 if line.startswith(BOM):
 288                     line = line[3:]
 289                 if line.strip().startswith('#') or not sep in line:
 290                     continue
 291                 key, value = [x.strip() for x in line.split(sep, 1)]
 292                 if not key or not value:
 293                     continue
 294                 if key.startswith('v'):
 295                     if key in metadata:
 296                         metadata[key].append(value)
 297                     else:
 298                         metadata[key] = [value]
 299                 else:
 300                     metadata[key] = value
 301
 302     for rating, ratings in [('tvRating', TV_RATINGS),
 303                             ('mpaaRating', MPAA_RATINGS),
 304                             ('starRating', STAR_RATINGS)]:
 305         x = metadata.get(rating, '').upper()
 306         if x in ratings:
 307             metadata[rating] = ratings[x]
 308
 309     return metadata
 310
 311 def basic(full_path):
 312     base_path, name = os.path.split(full_path)
 313     title, ext = os.path.splitext(name)
 314     mtime = os.stat(unicode(full_path, 'utf-8')).st_mtime
 315     if (mtime < 0):
 316         mtime = 0
 317     originalAirDate = datetime.utcfromtimestamp(mtime)
 318
 319     metadata = {'title': title,
 320                 'originalAirDate': originalAirDate.isoformat()}
 321     ext = ext.lower()
 322     if ext in ['.mp4', '.m4v', '.mov']:
 323         metadata.update(from_moov(full_path))
 324     elif ext in ['.dvr-ms', '.asf', '.wmv']:
 325         metadata.update(from_dvrms(full_path))
 326     elif 'plistlib' in sys.modules and base_path.endswith('.eyetv'):
 327         metadata.update(from_eyetv(full_path))
 328     metadata.update(from_nfo(full_path))
 329     metadata.update(from_text(full_path))
 330
 331     return metadata
 332
 333 def from_container(xmldoc):
 334     metadata = {}
 335
 336     keys = {'title': 'Title', 'episodeTitle': 'EpisodeTitle',
 337             'description': 'Description', 'seriesId': 'SeriesId',
 338             'episodeNumber': 'EpisodeNumber', 'tvRating': 'TvRating',
 339             'displayMajorNumber': 'SourceChannel', 'callsign': 'SourceStation',
 340             'showingBits': 'ShowingBits', 'mpaaRating': 'MpaaRating'}
 341
 342     details = xmldoc.getElementsByTagName('Details')[0]
 343
 344     for key in keys:
 345         data = tag_data(details, keys[key])
 346         if data:
 347             if key == 'description':
 348                 data = data.replace(TRIBUNE_CR, '')
 349             elif key == 'tvRating':
 350                 data = int(data)
 351             elif key == 'displayMajorNumber':
 352                 if '-' in data:
 353                     data, metadata['displayMinorNumber'] = data.split('-')
 354             metadata[key] = data
 355
 356     return metadata
 357
 358 def from_details(xml):
 359     metadata = {}
 360
 361     xmldoc = minidom.parse(xml)
 362     showing = xmldoc.getElementsByTagName('showing')[0]
 363     program = showing.getElementsByTagName('program')[0]
 364
 365     items = {'description': 'program/description',
 366              'title': 'program/title',
 367              'episodeTitle': 'program/episodeTitle',
 368              'episodeNumber': 'program/episodeNumber',
 369              'seriesId': 'program/series/uniqueId',
 370              'seriesTitle': 'program/series/seriesTitle',
 371              'originalAirDate': 'program/originalAirDate',
 372              'isEpisode': 'program/isEpisode',
 373              'movieYear': 'program/movieYear',
 374              'partCount': 'partCount',
 375              'partIndex': 'partIndex',
 376              'time': 'time'}
 377
 378     for item in items:
 379         data = tag_data(showing, items[item])
 380         if data:
 381             if item == 'description':
 382                 data = data.replace(TRIBUNE_CR, '')
 383             metadata[item] = data
 384
 385     vItems = ['vActor', 'vChoreographer', 'vDirector',
 386               'vExecProducer', 'vProgramGenre', 'vGuestStar',
 387               'vHost', 'vProducer', 'vWriter']
 388
 389     for item in vItems:
 390         data = _vtag_data(program, item)
 391         if data:
 392             metadata[item] = data
 393
 394     sb = showing.getElementsByTagName('showingBits')
 395     if sb:
 396         metadata['showingBits'] = sb[0].attributes['value'].value
 397
 398     #for tag in ['starRating', 'mpaaRating', 'colorCode']:
 399     for tag in ['starRating', 'mpaaRating']:
 400         value = _tag_value(program, tag)
 401         if value:
 402             metadata[tag] = value
 403
 404     rating = _tag_value(showing, 'tvRating')
 405     if rating:
 406         metadata['tvRating'] = rating
 407
 408     return metadata
 409
 410 def _nfo_vitems(source, metadata):
 411
 412     vItems = {'vGenre': 'genre',
 413               'vWriter': 'credits',
 414               'vDirector': 'director',
 415               'vActor': 'actor/name'}
 416
 417     for key in vItems:
 418         data = _vtag_data_alternate(source, vItems[key])
 419         if data:
 420             metadata.setdefault(key, [])
 421             for dat in data:
 422                 if not dat in metadata[key]:
 423                     metadata[key].append(dat)
 424
 425     if 'vGenre' in metadata:
 426         metadata['vSeriesGenre'] = metadata['vProgramGenre'] = metadata['vGenre']
 427
 428     return metadata
 429
 430 def _from_tvshow_nfo(tvshow_nfo_path):
 431     if tvshow_nfo_path in nfo_cache:
 432         return nfo_cache[tvshow_nfo_path]
 433
 434     items = {'description': 'plot',
 435              'title': 'title',
 436              'seriesTitle': 'showtitle',
 437              'starRating': 'rating',
 438              'tvRating': 'mpaa'}
 439
 440     nfo_cache[tvshow_nfo_path] = metadata = {}
 441
 442     try:
 443         xmldoc = minidom.parse(file(tvshow_nfo_path, 'U'))
 444     except:
 445         return metadata
 446
 447     tvshow = xmldoc.getElementsByTagName('tvshow')
 448     if tvshow:
 449         tvshow = tvshow[0]
 450     else:
 451         return metadata
 452
 453     for item in items:
 454         data = tag_data(tvshow, items[item])
 455         if data:
 456             metadata[item] = data
 457
 458     metadata = _nfo_vitems(tvshow, metadata)
 459
 460     nfo_cache[tvshow_nfo_path] = metadata
 461     return metadata
 462
 463 def _from_episode_nfo(nfo_path, xmldoc):
 464     metadata = {}
 465
 466     items = {'description': 'plot',
 467              'episodeTitle': 'title',
 468              'seriesTitle': 'showtitle',
 469              'originalAirDate': 'aired',
 470              'starRating': 'rating',
 471              'tvRating': 'mpaa'}
 472
 473     # find tvshow.nfo
 474     path = nfo_path
 475     while True:
 476         basepath = os.path.dirname(path)
 477         if path == basepath:
 478             break
 479         path = basepath
 480         tv_nfo = os.path.join(path, 'tvshow.nfo')
 481         if os.path.exists(tv_nfo):
 482             metadata.update(_from_tvshow_nfo(tv_nfo))
 483             break
 484
 485     episode = xmldoc.getElementsByTagName('episodedetails')
 486     if episode:
 487         episode = episode[0]
 488     else:
 489         return metadata
 490
 491     metadata['isEpisode'] = 'true'
 492     for item in items:
 493         data = tag_data(episode, items[item])
 494         if data:
 495             metadata[item] = data
 496
 497     season = tag_data(episode, 'displayseason')
 498     if not season or season == "-1":
 499         season = tag_data(episode, 'season')
 500     if not season:
 501         season = 1
 502
 503     ep_num = tag_data(episode, 'displayepisode')
 504     if not ep_num or ep_num == "-1":
 505         ep_num = tag_data(episode, 'episode')
 506     if ep_num and ep_num != "-1":
 507         metadata['episodeNumber'] = "%d%02d" % (int(season), int(ep_num))
 508
 509     if 'originalAirDate' in metadata:
 510         metadata['originalAirDate'] += 'T00:00:00Z'
 511
 512     metadata = _nfo_vitems(episode, metadata)
 513
 514     return metadata
 515
 516 def _from_movie_nfo(xmldoc):
 517     metadata = {}
 518
 519     movie = xmldoc.getElementsByTagName('movie')
 520     if movie:
 521         movie = movie[0]
 522     else:
 523         return metadata
 524
 525     items = {'description': 'plot',
 526              'title': 'title',
 527              'movieYear': 'year',
 528              'starRating': 'rating',
 529              'mpaaRating': 'mpaa'}
 530
 531     metadata['isEpisode'] = 'false'
 532
 533     for item in items:
 534         data = tag_data(movie, items[item])
 535         if data:
 536             metadata[item] = data
 537
 538     metadata['movieYear'] = "%04d" % int(metadata.get('movieYear', 0))
 539
 540     metadata = _nfo_vitems(movie, metadata)
 541     return metadata
 542
 543 def from_nfo(full_path):
 544     if full_path in nfo_cache:
 545         return nfo_cache[full_path]
 546
 547     metadata = nfo_cache[full_path] = {}
 548
 549     nfo_path = "%s.nfo" % os.path.splitext(full_path)[0]
 550     if not os.path.exists(nfo_path):
 551         return metadata
 552
 553     try:
 554         xmldoc = minidom.parse(file(nfo_path, 'U'))
 555     except:
 556         return metadata
 557
 558     if xmldoc.getElementsByTagName('episodedetails'):
 559         # it's an episode
 560         metadata.update(_from_episode_nfo(nfo_path, xmldoc))
 561     elif xmldoc.getElementsByTagName('movie'):
 562         # it's a movie
 563         metadata.update(_from_movie_nfo(xmldoc))
 564
 565     # common nfo cleanup
 566     if 'starRating' in metadata:
 567         # .NFO 0-10 -> TiVo 1-7
 568         rating = int(float(metadata['starRating']) * 6 / 10 + 1.5)
 569         metadata['starRating'] = rating
 570
 571     for key, mapping in [('mpaaRating', MPAA_RATINGS),
 572                          ('tvRating', TV_RATINGS)]:
 573         if key in metadata:
 574             rating = mapping.get(metadata[key], None)
 575             if rating:
 576                 metadata[key] = str(rating)
 577             else:
 578                 del metadata[key]
 579
 580     nfo_cache[full_path] = metadata
 581     return metadata
 582
 583 def from_tivo(full_path):
 584     if full_path in tivo_cache:
 585         return tivo_cache[full_path]
 586
 587     tdcat_path = config.get_bin('tdcat')
 588     tivo_mak = config.get_server('tivo_mak')
 589     try:
 590         assert(tdcat_path and tivo_mak)
 591         fname = unicode(full_path, 'utf-8')
 592         if mswindows:
 593             fname = fname.encode('iso8859-1')
 594         tcmd = [tdcat_path, '-m', tivo_mak, '-2', fname]
 595         tdcat = subprocess.Popen(tcmd, stdout=subprocess.PIPE)
 596         metadata = from_details(tdcat.stdout)
 597         tivo_cache[full_path] = metadata
 598     except:
 599         metadata = {}
 600
 601     return metadata
 602
 603 def force_utf8(text):
 604     if type(text) == str:
 605         try:
 606             text = text.decode('utf8')
 607         except:
 608             if sys.platform == 'darwin':
 609                 text = text.decode('macroman')
 610             else:
 611                 text = text.decode('iso8859-1')
 612     return text.encode('utf-8')
 613
 614 def dump(output, metadata):
 615     for key in metadata:
 616         value = metadata[key]
 617         if type(value) == list:
 618             for item in value:
 619                 output.write('%s: %s\n' % (key, item.encode('utf-8')))
 620         else:
 621             if key in HUMAN and value in HUMAN[key]:
 622                 output.write('%s: %s\n' % (key, HUMAN[key][value]))
 623             else:
 624                 output.write('%s: %s\n' % (key, value.encode('utf-8')))
 625
 626 if __name__ == '__main__':
 627     if len(sys.argv) > 1:
 628         metadata = {}
 629         fname = force_utf8(sys.argv[1])
 630         ext = os.path.splitext(fname)[1].lower()
 631         if ext == '.tivo':
 632             config.init([])
 633             metadata.update(from_tivo(fname))
 634         elif ext in ['.mp4', '.m4v', '.mov']:
 635             metadata.update(from_moov(fname))
 636         elif ext in ['.dvr-ms', '.asf', '.wmv']:
 637             metadata.update(from_dvrms(fname))
 638         elif ext == '.wtv':
 639             vInfo = plugins.video.transcode.video_info(fname)
 640             metadata.update(from_mscore(vInfo['rawmeta']))
 641         dump(sys.stdout, metadata)