Shorter notation for CSS colors.
[pyTivo/wmcbrine.git] / metadata.py
blob3723c8bd031c5da9818f7d9a5086a0f7c3e7a947
1 #!/usr/bin/env python
3 import logging
4 import os
5 import subprocess
6 import sys
7 from datetime import datetime
8 from xml.dom import minidom
9 from xml.parsers import expat
10 try:
11 import plistlib
12 except:
13 pass
15 import mutagen
16 from lrucache import LRUCache
18 import config
19 import plugins.video.transcode
21 # Something to strip
22 TRIBUNE_CR = ' Copyright Tribune Media Services, Inc.'
24 TV_RATINGS = {'TV-Y7': 1, 'TV-Y': 2, 'TV-G': 3, 'TV-PG': 4, 'TV-14': 5,
25 'TV-MA': 6, 'TV-NR': 7, 'TVY7': 1, 'TVY': 2, 'TVG': 3,
26 'TVPG': 4, 'TV14': 5, 'TVMA': 6, 'TVNR': 7, 'Y7': 1,
27 'Y': 2, 'G': 3, 'PG': 4, '14': 5, 'MA': 6, 'NR': 7,
28 'UNRATED': 7, 'X1': 1, 'X2': 2, 'X3': 3, 'X4': 4, 'X5': 5,
29 'X6': 6, 'X7': 7}
31 MPAA_RATINGS = {'G': 1, 'PG': 2, 'PG-13': 3, 'PG13': 3, 'R': 4, 'X': 5,
32 'NC-17': 6, 'NC17': 6, 'NR': 8, 'UNRATED': 8, 'G1': 1,
33 'P2': 2, 'P3': 3, 'R4': 4, 'X5': 5, 'N6': 6, 'N8': 8}
35 STAR_RATINGS = {'1': 1, '1.5': 2, '2': 3, '2.5': 4, '3': 5, '3.5': 6,
36 '4': 7, '*': 1, '**': 3, '***': 5, '****': 7, 'X1': 1,
37 'X2': 2, 'X3': 3, 'X4': 4, 'X5': 5, 'X6': 6, 'X7': 7}
39 HUMAN = {'mpaaRating': {1: 'G', 2: 'PG', 3: 'PG-13', 4: 'R', 5: 'X',
40 6: 'NC-17', 8: 'NR'},
41 'tvRating': {1: 'Y7', 2: 'Y', 3: 'G', 4: 'PG', 5: '14',
42 6: 'MA', 7: 'NR'},
43 'starRating': {1: '1', 2: '1.5', 3: '2', 4: '2.5', 5: '3',
44 6: '3.5', 7: '4'}}
46 BOM = '\xef\xbb\xbf'
48 tivo_cache = LRUCache(50)
49 mp4_cache = LRUCache(50)
50 dvrms_cache = LRUCache(50)
51 nfo_cache = LRUCache(50)
53 mswindows = (sys.platform == "win32")
55 def get_mpaa(rating):
56 return HUMAN['mpaaRating'].get(rating, 'NR')
58 def get_tv(rating):
59 return HUMAN['tvRating'].get(rating, 'NR')
61 def get_stars(rating):
62 return HUMAN['starRating'].get(rating, '')
64 def tag_data(element, tag):
65 for name in tag.split('/'):
66 found = False
67 for new_element in element.childNodes:
68 if new_element.nodeName == name:
69 found = True
70 element = new_element
71 break
72 if not found:
73 return ''
74 if not element.firstChild:
75 return ''
76 return element.firstChild.data
78 def _vtag_data(element, tag):
79 for name in tag.split('/'):
80 new_element = element.getElementsByTagName(name)
81 if not new_element:
82 return []
83 element = new_element[0]
84 elements = element.getElementsByTagName('element')
85 return [x.firstChild.data for x in elements if x.firstChild]
87 def _vtag_data_alternate(element, tag):
88 elements = [element]
89 for name in tag.split('/'):
90 new_elements = []
91 for elmt in elements:
92 new_elements += elmt.getElementsByTagName(name)
93 elements = new_elements
94 return [x.firstChild.data for x in elements if x.firstChild]
96 def _tag_value(element, tag):
97 item = element.getElementsByTagName(tag)
98 if item:
99 value = item[0].attributes['value'].value
100 return int(value[0])
102 def from_moov(full_path):
103 if full_path in mp4_cache:
104 return mp4_cache[full_path]
106 metadata = {}
107 len_desc = 0
109 try:
110 mp4meta = mutagen.File(unicode(full_path, 'utf-8'))
111 assert(mp4meta)
112 except:
113 mp4_cache[full_path] = {}
114 return {}
116 # The following 1-to-1 correspondence of atoms to pyTivo
117 # variables is TV-biased
118 keys = {'tvnn': 'callsign', 'tven': 'episodeNumber',
119 'tvsh': 'seriesTitle'}
121 for key, value in mp4meta.items():
122 if type(value) == list:
123 value = value[0]
124 if key == 'stik':
125 metadata['isEpisode'] = ['false', 'true'][value == 'TV Show']
126 elif key in keys:
127 metadata[keys[key]] = value
128 # These keys begin with the copyright symbol \xA9
129 elif key == '\xa9day':
130 if len(value) == 4:
131 value += '-01-01T16:00:00Z'
132 metadata['originalAirDate'] = value
133 #metadata['time'] = value
134 elif key in ['\xa9gen', 'gnre']:
135 for k in ('vProgramGenre', 'vSeriesGenre'):
136 if k in metadata:
137 metadata[k].append(value)
138 else:
139 metadata[k] = [value]
140 elif key == '\xa9nam':
141 if 'tvsh' in mp4meta:
142 metadata['episodeTitle'] = value
143 else:
144 metadata['title'] = value
146 # Description in desc, cmt, and/or ldes tags. Keep the longest.
147 elif key in ['desc', '\xa9cmt', 'ldes'] and len(value) > len_desc:
148 metadata['description'] = value
149 len_desc = len(value)
151 # A common custom "reverse DNS format" tag
152 elif (key == '----:com.apple.iTunes:iTunEXTC' and
153 ('us-tv' in value or 'mpaa' in value)):
154 rating = value.split("|")[1].upper()
155 if rating in TV_RATINGS and 'us-tv' in value:
156 metadata['tvRating'] = TV_RATINGS[rating]
157 elif rating in MPAA_RATINGS and 'mpaa' in value:
158 metadata['mpaaRating'] = MPAA_RATINGS[rating]
160 # Actors, directors, producers, AND screenwriters may be in a long
161 # embedded XML plist.
162 elif (key == '----:com.apple.iTunes:iTunMOVI' and
163 'plistlib' in sys.modules):
164 items = {'cast': 'vActor', 'directors': 'vDirector',
165 'producers': 'vProducer', 'screenwriters': 'vWriter'}
166 try:
167 data = plistlib.readPlistFromString(value)
168 except:
169 pass
170 else:
171 for item in items:
172 if item in data:
173 metadata[items[item]] = [x['name'] for x in data[item]]
175 mp4_cache[full_path] = metadata
176 return metadata
178 def from_mscore(rawmeta):
179 metadata = {}
180 keys = {'title': ['Title'],
181 'description': ['Description', 'WM/SubTitleDescription'],
182 'episodeTitle': ['WM/SubTitle'],
183 'callsign': ['WM/MediaStationCallSign'],
184 'displayMajorNumber': ['WM/MediaOriginalChannel'],
185 'originalAirDate': ['WM/MediaOriginalBroadcastDateTime'],
186 'rating': ['WM/ParentalRating'],
187 'credits': ['WM/MediaCredits'], 'genre': ['WM/Genre']}
189 for tagname in keys:
190 for tag in keys[tagname]:
191 try:
192 if tag in rawmeta:
193 value = rawmeta[tag][0]
194 if type(value) not in (str, unicode):
195 value = str(value)
196 if value:
197 metadata[tagname] = value
198 except:
199 pass
201 if 'episodeTitle' in metadata and 'title' in metadata:
202 metadata['seriesTitle'] = metadata['title']
203 if 'genre' in metadata:
204 value = metadata['genre'].split(',')
205 metadata['vProgramGenre'] = value
206 metadata['vSeriesGenre'] = value
207 del metadata['genre']
208 if 'credits' in metadata:
209 value = [x.split('/') for x in metadata['credits'].split(';')]
210 if len(value) > 3:
211 metadata['vActor'] = [x for x in (value[0] + value[3]) if x]
212 metadata['vDirector'] = [x for x in value[1] if x]
213 del metadata['credits']
214 if 'rating' in metadata:
215 rating = metadata['rating']
216 if rating in TV_RATINGS:
217 metadata['tvRating'] = TV_RATINGS[rating]
218 del metadata['rating']
220 return metadata
222 def from_dvrms(full_path):
223 if full_path in dvrms_cache:
224 return dvrms_cache[full_path]
226 try:
227 rawmeta = mutagen.File(unicode(full_path, 'utf-8'))
228 assert(rawmeta)
229 except:
230 dvrms_cache[full_path] = {}
231 return {}
233 metadata = from_mscore(rawmeta)
234 dvrms_cache[full_path] = metadata
235 return metadata
237 def from_eyetv(full_path):
238 keys = {'TITLE': 'title', 'SUBTITLE': 'episodeTitle',
239 'DESCRIPTION': 'description', 'YEAR': 'movieYear',
240 'EPISODENUM': 'episodeNumber'}
241 metadata = {}
242 path = os.path.dirname(unicode(full_path, 'utf-8'))
243 eyetvp = [x for x in os.listdir(path) if x.endswith('.eyetvp')][0]
244 eyetvp = os.path.join(path, eyetvp)
245 try:
246 eyetv = plistlib.readPlist(eyetvp)
247 except:
248 return metadata
249 if 'epg info' in eyetv:
250 info = eyetv['epg info']
251 for key in keys:
252 if info[key]:
253 metadata[keys[key]] = info[key]
254 if info['SUBTITLE']:
255 metadata['seriesTitle'] = info['TITLE']
256 if info['ACTORS']:
257 metadata['vActor'] = [x.strip() for x in info['ACTORS'].split(',')]
258 if info['DIRECTOR']:
259 metadata['vDirector'] = [info['DIRECTOR']]
261 for ptag, etag, ratings in [('tvRating', 'TV_RATING', TV_RATINGS),
262 ('mpaaRating', 'MPAA_RATING', MPAA_RATINGS),
263 ('starRating', 'STAR_RATING', STAR_RATINGS)]:
264 x = info[etag].upper()
265 if x and x in ratings:
266 metadata[ptag] = ratings[x]
268 # movieYear must be set for the mpaa/star ratings to work
269 if (('mpaaRating' in metadata or 'starRating' in metadata) and
270 'movieYear' not in metadata):
271 metadata['movieYear'] = eyetv['info']['start'].year
272 return metadata
274 def from_text(full_path):
275 metadata = {}
276 full_path = unicode(full_path, 'utf-8')
277 path, name = os.path.split(full_path)
278 title, ext = os.path.splitext(name)
280 search_paths = []
281 ptmp = full_path
282 while ptmp:
283 parent = os.path.dirname(ptmp)
284 if ptmp != parent:
285 ptmp = parent
286 else:
287 break
288 search_paths.append(os.path.join(ptmp, 'default.txt'))
290 search_paths.append(os.path.join(path, title) + '.properties')
291 search_paths.reverse()
293 search_paths += [full_path + '.txt',
294 os.path.join(path, '.meta', 'default.txt'),
295 os.path.join(path, '.meta', name) + '.txt']
297 for metafile in search_paths:
298 if os.path.exists(metafile):
299 sep = ':='[metafile.endswith('.properties')]
300 for line in file(metafile, 'U'):
301 if line.startswith(BOM):
302 line = line[3:]
303 if line.strip().startswith('#') or not sep in line:
304 continue
305 key, value = [x.strip() for x in line.split(sep, 1)]
306 if not key or not value:
307 continue
308 if key.startswith('v'):
309 if key in metadata:
310 metadata[key].append(value)
311 else:
312 metadata[key] = [value]
313 else:
314 metadata[key] = value
316 for rating, ratings in [('tvRating', TV_RATINGS),
317 ('mpaaRating', MPAA_RATINGS),
318 ('starRating', STAR_RATINGS)]:
319 x = metadata.get(rating, '').upper()
320 if x in ratings:
321 metadata[rating] = ratings[x]
322 else:
323 try:
324 x = int(x)
325 metadata[rating] = x
326 except:
327 pass
329 return metadata
331 def basic(full_path):
332 base_path, name = os.path.split(full_path)
333 title, ext = os.path.splitext(name)
334 mtime = os.stat(unicode(full_path, 'utf-8')).st_mtime
335 if (mtime < 0):
336 mtime = 0
337 originalAirDate = datetime.utcfromtimestamp(mtime)
339 metadata = {'title': title,
340 'originalAirDate': originalAirDate.isoformat()}
341 ext = ext.lower()
342 if ext in ['.mp4', '.m4v', '.mov']:
343 metadata.update(from_moov(full_path))
344 elif ext in ['.dvr-ms', '.asf', '.wmv']:
345 metadata.update(from_dvrms(full_path))
346 elif 'plistlib' in sys.modules and base_path.endswith('.eyetv'):
347 metadata.update(from_eyetv(full_path))
348 metadata.update(from_nfo(full_path))
349 metadata.update(from_text(full_path))
351 return metadata
353 def from_container(xmldoc):
354 metadata = {}
356 keys = {'title': 'Title', 'episodeTitle': 'EpisodeTitle',
357 'description': 'Description', 'programId': 'ProgramId',
358 'seriesId': 'SeriesId', 'episodeNumber': 'EpisodeNumber',
359 'tvRating': 'TvRating', 'displayMajorNumber': 'SourceChannel',
360 'callsign': 'SourceStation', 'showingBits': 'ShowingBits',
361 'mpaaRating': 'MpaaRating'}
363 details = xmldoc.getElementsByTagName('Details')[0]
365 for key in keys:
366 data = tag_data(details, keys[key])
367 if data:
368 if key == 'description':
369 data = data.replace(TRIBUNE_CR, '')
370 elif key == 'tvRating':
371 data = int(data)
372 elif key == 'displayMajorNumber':
373 if '-' in data:
374 data, metadata['displayMinorNumber'] = data.split('-')
375 metadata[key] = data
377 return metadata
379 def from_details(xml):
380 metadata = {}
382 xmldoc = minidom.parse(xml)
383 showing = xmldoc.getElementsByTagName('showing')[0]
384 program = showing.getElementsByTagName('program')[0]
386 items = {'description': 'program/description',
387 'title': 'program/title',
388 'episodeTitle': 'program/episodeTitle',
389 'episodeNumber': 'program/episodeNumber',
390 'programId': 'program/uniqueId',
391 'seriesId': 'program/series/uniqueId',
392 'seriesTitle': 'program/series/seriesTitle',
393 'originalAirDate': 'program/originalAirDate',
394 'isEpisode': 'program/isEpisode',
395 'movieYear': 'program/movieYear',
396 'partCount': 'partCount',
397 'partIndex': 'partIndex',
398 'time': 'time'}
400 for item in items:
401 data = tag_data(showing, items[item])
402 if data:
403 if item == 'description':
404 data = data.replace(TRIBUNE_CR, '')
405 metadata[item] = data
407 vItems = ['vActor', 'vChoreographer', 'vDirector',
408 'vExecProducer', 'vProgramGenre', 'vGuestStar',
409 'vHost', 'vProducer', 'vWriter']
411 for item in vItems:
412 data = _vtag_data(program, item)
413 if data:
414 metadata[item] = data
416 sb = showing.getElementsByTagName('showingBits')
417 if sb:
418 metadata['showingBits'] = sb[0].attributes['value'].value
420 #for tag in ['starRating', 'mpaaRating', 'colorCode']:
421 for tag in ['starRating', 'mpaaRating']:
422 value = _tag_value(program, tag)
423 if value:
424 metadata[tag] = value
426 rating = _tag_value(showing, 'tvRating')
427 if rating:
428 metadata['tvRating'] = rating
430 return metadata
432 def _nfo_vitems(source, metadata):
434 vItems = {'vGenre': 'genre',
435 'vWriter': 'credits',
436 'vDirector': 'director',
437 'vActor': 'actor/name'}
439 for key in vItems:
440 data = _vtag_data_alternate(source, vItems[key])
441 if data:
442 metadata.setdefault(key, [])
443 for dat in data:
444 if not dat in metadata[key]:
445 metadata[key].append(dat)
447 if 'vGenre' in metadata:
448 metadata['vSeriesGenre'] = metadata['vProgramGenre'] = metadata['vGenre']
450 return metadata
452 def _parse_nfo(nfo_path, nfo_data=None):
453 # nfo files can contain XML or a URL to seed the XBMC metadata scrapers
454 # It's also possible to have both (a URL after the XML metadata)
455 # pyTivo only parses the XML metadata, but we'll try to stip the URL
456 # from mixed XML/URL files. Returns `None` when XML can't be parsed.
457 if nfo_data is None:
458 nfo_data = [line.strip() for line in file(nfo_path, 'rU')]
459 xmldoc = None
460 try:
461 xmldoc = minidom.parseString(os.linesep.join(nfo_data))
462 except expat.ExpatError, err:
463 if expat.ErrorString(err.code) == expat.errors.XML_ERROR_INVALID_TOKEN:
464 # might be a URL outside the xml
465 while len(nfo_data) > err.lineno:
466 if len(nfo_data[-1]) == 0:
467 nfo_data.pop()
468 else:
469 break
470 if len(nfo_data) == err.lineno:
471 # last non-blank line contains the error
472 nfo_data.pop()
473 return _parse_nfo(nfo_path, nfo_data)
474 return xmldoc
476 def _from_tvshow_nfo(tvshow_nfo_path):
477 if tvshow_nfo_path in nfo_cache:
478 return nfo_cache[tvshow_nfo_path]
480 items = {'description': 'plot',
481 'title': 'title',
482 'seriesTitle': 'showtitle',
483 'starRating': 'rating',
484 'tvRating': 'mpaa'}
486 nfo_cache[tvshow_nfo_path] = metadata = {}
488 xmldoc = _parse_nfo(tvshow_nfo_path)
489 if not xmldoc:
490 return metadata
492 tvshow = xmldoc.getElementsByTagName('tvshow')
493 if tvshow:
494 tvshow = tvshow[0]
495 else:
496 return metadata
498 for item in items:
499 data = tag_data(tvshow, items[item])
500 if data:
501 metadata[item] = data
503 metadata = _nfo_vitems(tvshow, metadata)
505 nfo_cache[tvshow_nfo_path] = metadata
506 return metadata
508 def _from_episode_nfo(nfo_path, xmldoc):
509 metadata = {}
511 items = {'description': 'plot',
512 'episodeTitle': 'title',
513 'seriesTitle': 'showtitle',
514 'originalAirDate': 'aired',
515 'starRating': 'rating',
516 'tvRating': 'mpaa'}
518 # find tvshow.nfo
519 path = nfo_path
520 while True:
521 basepath = os.path.dirname(path)
522 if path == basepath:
523 break
524 path = basepath
525 tv_nfo = os.path.join(path, 'tvshow.nfo')
526 if os.path.exists(tv_nfo):
527 metadata.update(_from_tvshow_nfo(tv_nfo))
528 break
530 episode = xmldoc.getElementsByTagName('episodedetails')
531 if episode:
532 episode = episode[0]
533 else:
534 return metadata
536 metadata['isEpisode'] = 'true'
537 for item in items:
538 data = tag_data(episode, items[item])
539 if data:
540 metadata[item] = data
542 season = tag_data(episode, 'displayseason')
543 if not season or season == "-1":
544 season = tag_data(episode, 'season')
545 if not season:
546 season = 1
548 ep_num = tag_data(episode, 'displayepisode')
549 if not ep_num or ep_num == "-1":
550 ep_num = tag_data(episode, 'episode')
551 if ep_num and ep_num != "-1":
552 metadata['episodeNumber'] = "%d%02d" % (int(season), int(ep_num))
554 if 'originalAirDate' in metadata:
555 metadata['originalAirDate'] += 'T00:00:00Z'
557 metadata = _nfo_vitems(episode, metadata)
559 return metadata
561 def _from_movie_nfo(xmldoc):
562 metadata = {}
564 movie = xmldoc.getElementsByTagName('movie')
565 if movie:
566 movie = movie[0]
567 else:
568 return metadata
570 items = {'description': 'plot',
571 'title': 'title',
572 'movieYear': 'year',
573 'starRating': 'rating',
574 'mpaaRating': 'mpaa'}
576 metadata['isEpisode'] = 'false'
578 for item in items:
579 data = tag_data(movie, items[item])
580 if data:
581 metadata[item] = data
583 metadata['movieYear'] = "%04d" % int(metadata.get('movieYear', 0))
585 metadata = _nfo_vitems(movie, metadata)
586 return metadata
588 def from_nfo(full_path):
589 if full_path in nfo_cache:
590 return nfo_cache[full_path]
592 metadata = nfo_cache[full_path] = {}
594 nfo_path = "%s.nfo" % os.path.splitext(full_path)[0]
595 if not os.path.exists(nfo_path):
596 return metadata
598 xmldoc = _parse_nfo(nfo_path)
599 if not xmldoc:
600 return metadata
602 if xmldoc.getElementsByTagName('episodedetails'):
603 # it's an episode
604 metadata.update(_from_episode_nfo(nfo_path, xmldoc))
605 elif xmldoc.getElementsByTagName('movie'):
606 # it's a movie
607 metadata.update(_from_movie_nfo(xmldoc))
609 # common nfo cleanup
610 if 'starRating' in metadata:
611 # .NFO 0-10 -> TiVo 1-7
612 rating = int(float(metadata['starRating']) * 6 / 10 + 1.5)
613 metadata['starRating'] = rating
615 for key, mapping in [('mpaaRating', MPAA_RATINGS),
616 ('tvRating', TV_RATINGS)]:
617 if key in metadata:
618 rating = mapping.get(metadata[key], None)
619 if rating:
620 metadata[key] = str(rating)
621 else:
622 del metadata[key]
624 nfo_cache[full_path] = metadata
625 return metadata
627 def from_tivo(full_path):
628 if full_path in tivo_cache:
629 return tivo_cache[full_path]
631 tdcat_path = config.get_bin('tdcat')
632 tivo_mak = config.get_server('tivo_mak')
633 try:
634 assert(tdcat_path and tivo_mak)
635 fname = unicode(full_path, 'utf-8')
636 if mswindows:
637 fname = fname.encode('iso8859-1')
638 tcmd = [tdcat_path, '-m', tivo_mak, '-2', fname]
639 tdcat = subprocess.Popen(tcmd, stdout=subprocess.PIPE)
640 metadata = from_details(tdcat.stdout)
641 tivo_cache[full_path] = metadata
642 except:
643 metadata = {}
645 return metadata
647 def force_utf8(text):
648 if type(text) == str:
649 try:
650 text = text.decode('utf8')
651 except:
652 if sys.platform == 'darwin':
653 text = text.decode('macroman')
654 else:
655 text = text.decode('iso8859-1')
656 return text.encode('utf-8')
658 def dump(output, metadata):
659 for key in metadata:
660 value = metadata[key]
661 if type(value) == list:
662 for item in value:
663 output.write('%s: %s\n' % (key, item.encode('utf-8')))
664 else:
665 if key in HUMAN and value in HUMAN[key]:
666 output.write('%s: %s\n' % (key, HUMAN[key][value]))
667 else:
668 output.write('%s: %s\n' % (key, value.encode('utf-8')))
670 if __name__ == '__main__':
671 if len(sys.argv) > 1:
672 metadata = {}
673 config.init([])
674 logging.basicConfig()
675 fname = force_utf8(sys.argv[1])
676 ext = os.path.splitext(fname)[1].lower()
677 if ext == '.tivo':
678 metadata.update(from_tivo(fname))
679 elif ext in ['.mp4', '.m4v', '.mov']:
680 metadata.update(from_moov(fname))
681 elif ext in ['.dvr-ms', '.asf', '.wmv']:
682 metadata.update(from_dvrms(fname))
683 elif ext == '.wtv':
684 vInfo = plugins.video.transcode.video_info(fname)
685 metadata.update(from_mscore(vInfo['rawmeta']))
686 dump(sys.stdout, metadata)