Allow for pure numeric mpaaRating, as apparently generated by kmttg, as
[pyTivo/wmcbrine.git] / metadata.py
blob1aa96731d473132341763baf9eb106d257b12eb4
1 #!/usr/bin/env python
3 import os
4 import subprocess
5 import sys
6 from datetime import datetime
7 from xml.dom import minidom
8 from xml.parsers import expat
9 try:
10 import plistlib
11 except:
12 pass
14 import mutagen
15 from lrucache import LRUCache
17 import config
18 import plugins.video.transcode
20 # Something to strip
21 TRIBUNE_CR = ' Copyright Tribune Media Services, Inc.'
23 TV_RATINGS = {'TV-Y7': 1, 'TV-Y': 2, 'TV-G': 3, 'TV-PG': 4, 'TV-14': 5,
24 'TV-MA': 6, 'TV-NR': 7, 'TVY7': 1, 'TVY': 2, 'TVG': 3,
25 'TVPG': 4, 'TV14': 5, 'TVMA': 6, 'TVNR': 7, 'Y7': 1,
26 'Y': 2, 'G': 3, 'PG': 4, '14': 5, 'MA': 6, 'NR': 7,
27 'UNRATED': 7, 'X1': 1, 'X2': 2, 'X3': 3, 'X4': 4, 'X5': 5,
28 'X6': 6, 'X7': 7}
30 MPAA_RATINGS = {'G': 1, 'PG': 2, 'PG-13': 3, 'PG13': 3, 'R': 4, 'X': 5,
31 'NC-17': 6, 'NC17': 6, 'NR': 8, 'UNRATED': 8, 'G1': 1,
32 'P2': 2, 'P3': 3, 'R4': 4, 'X5': 5, 'N6': 6, 'N8': 8}
34 STAR_RATINGS = {'1': 1, '1.5': 2, '2': 3, '2.5': 4, '3': 5, '3.5': 6,
35 '4': 7, '*': 1, '**': 3, '***': 5, '****': 7, 'X1': 1,
36 'X2': 2, 'X3': 3, 'X4': 4, 'X5': 5, 'X6': 6, 'X7': 7}
38 HUMAN = {'mpaaRating': {1: 'G', 2: 'PG', 3: 'PG-13', 4: 'R', 5: 'X',
39 6: 'NC-17', 8: 'NR'},
40 'tvRating': {1: 'Y7', 2: 'Y', 3: 'G', 4: 'PG', 5: '14',
41 6: 'MA', 7: 'NR'},
42 'starRating': {1: '1', 2: '1.5', 3: '2', 4: '2.5', 5: '3',
43 6: '3.5', 7: '4'}}
45 BOM = '\xef\xbb\xbf'
47 tivo_cache = LRUCache(50)
48 mp4_cache = LRUCache(50)
49 dvrms_cache = LRUCache(50)
50 nfo_cache = LRUCache(50)
52 mswindows = (sys.platform == "win32")
54 def get_mpaa(rating):
55 return HUMAN['mpaaRating'].get(rating, 'NR')
57 def get_tv(rating):
58 return HUMAN['tvRating'].get(rating, 'NR')
60 def get_stars(rating):
61 return HUMAN['starRating'].get(rating, '')
63 def tag_data(element, tag):
64 for name in tag.split('/'):
65 found = False
66 for new_element in element.childNodes:
67 if new_element.nodeName == name:
68 found = True
69 element = new_element
70 break
71 if not found:
72 return ''
73 if not element.firstChild:
74 return ''
75 return element.firstChild.data
77 def _vtag_data(element, tag):
78 for name in tag.split('/'):
79 new_element = element.getElementsByTagName(name)
80 if not new_element:
81 return []
82 element = new_element[0]
83 elements = element.getElementsByTagName('element')
84 return [x.firstChild.data for x in elements if x.firstChild]
86 def _vtag_data_alternate(element, tag):
87 elements = [element]
88 for name in tag.split('/'):
89 new_elements = []
90 for elmt in elements:
91 new_elements += elmt.getElementsByTagName(name)
92 elements = new_elements
93 return [x.firstChild.data for x in elements if x.firstChild]
95 def _tag_value(element, tag):
96 item = element.getElementsByTagName(tag)
97 if item:
98 value = item[0].attributes['value'].value
99 return int(value[0])
101 def from_moov(full_path):
102 if full_path in mp4_cache:
103 return mp4_cache[full_path]
105 metadata = {}
106 len_desc = 0
108 try:
109 mp4meta = mutagen.File(unicode(full_path, 'utf-8'))
110 assert(mp4meta)
111 except:
112 mp4_cache[full_path] = {}
113 return {}
115 # The following 1-to-1 correspondence of atoms to pyTivo
116 # variables is TV-biased
117 keys = {'tvnn': 'callsign', 'tven': 'episodeNumber',
118 'tvsh': 'seriesTitle'}
120 for key, value in mp4meta.items():
121 if type(value) == list:
122 value = value[0]
123 if key == 'stik':
124 metadata['isEpisode'] = ['false', 'true'][value == 'TV Show']
125 elif key in keys:
126 metadata[keys[key]] = value
127 # These keys begin with the copyright symbol \xA9
128 elif key == '\xa9day':
129 if len(value) == 4:
130 value += '-01-01T16:00:00Z'
131 metadata['originalAirDate'] = value
132 #metadata['time'] = value
133 elif key in ['\xa9gen', 'gnre']:
134 for k in ('vProgramGenre', 'vSeriesGenre'):
135 if k in metadata:
136 metadata[k].append(value)
137 else:
138 metadata[k] = [value]
139 elif key == '\xa9nam':
140 if 'tvsh' in mp4meta:
141 metadata['episodeTitle'] = value
142 else:
143 metadata['title'] = value
145 # Description in desc, cmt, and/or ldes tags. Keep the longest.
146 elif key in ['desc', '\xa9cmt', 'ldes'] and len(value) > len_desc:
147 metadata['description'] = value
148 len_desc = len(value)
150 # A common custom "reverse DNS format" tag
151 elif (key == '----:com.apple.iTunes:iTunEXTC' and
152 ('us-tv' in value or 'mpaa' in value)):
153 rating = value.split("|")[1].upper()
154 if rating in TV_RATINGS and 'us-tv' in value:
155 metadata['tvRating'] = TV_RATINGS[rating]
156 elif rating in MPAA_RATINGS and 'mpaa' in value:
157 metadata['mpaaRating'] = MPAA_RATINGS[rating]
159 # Actors, directors, producers, AND screenwriters may be in a long
160 # embedded XML plist.
161 elif (key == '----:com.apple.iTunes:iTunMOVI' and
162 'plistlib' in sys.modules):
163 items = {'cast': 'vActor', 'directors': 'vDirector',
164 'producers': 'vProducer', 'screenwriters': 'vWriter'}
165 data = plistlib.readPlistFromString(value)
166 for item in items:
167 if item in data:
168 metadata[items[item]] = [x['name'] for x in data[item]]
170 mp4_cache[full_path] = metadata
171 return metadata
173 def from_mscore(rawmeta):
174 metadata = {}
175 keys = {'title': ['Title'],
176 'description': ['Description', 'WM/SubTitleDescription'],
177 'episodeTitle': ['WM/SubTitle'],
178 'callsign': ['WM/MediaStationCallSign'],
179 'displayMajorNumber': ['WM/MediaOriginalChannel'],
180 'originalAirDate': ['WM/MediaOriginalBroadcastDateTime'],
181 'rating': ['WM/ParentalRating'],
182 'credits': ['WM/MediaCredits'], 'genre': ['WM/Genre']}
184 for tagname in keys:
185 for tag in keys[tagname]:
186 try:
187 if tag in rawmeta:
188 value = rawmeta[tag][0]
189 if type(value) not in (str, unicode):
190 value = str(value)
191 if value:
192 metadata[tagname] = value
193 except:
194 pass
196 if 'episodeTitle' in metadata and 'title' in metadata:
197 metadata['seriesTitle'] = metadata['title']
198 if 'genre' in metadata:
199 value = metadata['genre'].split(',')
200 metadata['vProgramGenre'] = value
201 metadata['vSeriesGenre'] = value
202 del metadata['genre']
203 if 'credits' in metadata:
204 value = [x.split('/') for x in metadata['credits'].split(';')]
205 if len(value) > 3:
206 metadata['vActor'] = [x for x in (value[0] + value[3]) if x]
207 metadata['vDirector'] = [x for x in value[1] if x]
208 del metadata['credits']
209 if 'rating' in metadata:
210 rating = metadata['rating']
211 if rating in TV_RATINGS:
212 metadata['tvRating'] = TV_RATINGS[rating]
213 del metadata['rating']
215 return metadata
217 def from_dvrms(full_path):
218 if full_path in dvrms_cache:
219 return dvrms_cache[full_path]
221 try:
222 rawmeta = mutagen.File(unicode(full_path, 'utf-8'))
223 assert(rawmeta)
224 except:
225 dvrms_cache[full_path] = {}
226 return {}
228 metadata = from_mscore(rawmeta)
229 dvrms_cache[full_path] = metadata
230 return metadata
232 def from_eyetv(full_path):
233 keys = {'TITLE': 'title', 'SUBTITLE': 'episodeTitle',
234 'DESCRIPTION': 'description', 'YEAR': 'movieYear',
235 'EPISODENUM': 'episodeNumber'}
236 metadata = {}
237 path = os.path.dirname(unicode(full_path, 'utf-8'))
238 eyetvp = [x for x in os.listdir(path) if x.endswith('.eyetvp')][0]
239 eyetvp = os.path.join(path, eyetvp)
240 eyetv = plistlib.readPlist(eyetvp)
241 if 'epg info' in eyetv:
242 info = eyetv['epg info']
243 for key in keys:
244 if info[key]:
245 metadata[keys[key]] = info[key]
246 if info['SUBTITLE']:
247 metadata['seriesTitle'] = info['TITLE']
248 if info['ACTORS']:
249 metadata['vActor'] = [x.strip() for x in info['ACTORS'].split(',')]
250 if info['DIRECTOR']:
251 metadata['vDirector'] = [info['DIRECTOR']]
253 for ptag, etag, ratings in [('tvRating', 'TV_RATING', TV_RATINGS),
254 ('mpaaRating', 'MPAA_RATING', MPAA_RATINGS),
255 ('starRating', 'STAR_RATING', STAR_RATINGS)]:
256 x = info[etag].upper()
257 if x and x in ratings:
258 metadata[ptag] = ratings[x]
260 # movieYear must be set for the mpaa/star ratings to work
261 if (('mpaaRating' in metadata or 'starRating' in metadata) and
262 'movieYear' not in metadata):
263 metadata['movieYear'] = eyetv['info']['start'].year
264 return metadata
266 def from_text(full_path):
267 metadata = {}
268 full_path = unicode(full_path, 'utf-8')
269 path, name = os.path.split(full_path)
270 title, ext = os.path.splitext(name)
272 search_paths = []
273 ptmp = full_path
274 while ptmp:
275 parent = os.path.dirname(ptmp)
276 if ptmp != parent:
277 ptmp = parent
278 else:
279 break
280 search_paths.append(os.path.join(ptmp, 'default.txt'))
282 search_paths.append(os.path.join(path, title) + '.properties')
283 search_paths.reverse()
285 search_paths += [full_path + '.txt',
286 os.path.join(path, '.meta', 'default.txt'),
287 os.path.join(path, '.meta', name) + '.txt']
289 for metafile in search_paths:
290 if os.path.exists(metafile):
291 sep = ':='[metafile.endswith('.properties')]
292 for line in file(metafile, 'U'):
293 if line.startswith(BOM):
294 line = line[3:]
295 if line.strip().startswith('#') or not sep in line:
296 continue
297 key, value = [x.strip() for x in line.split(sep, 1)]
298 if not key or not value:
299 continue
300 if key.startswith('v'):
301 if key in metadata:
302 metadata[key].append(value)
303 else:
304 metadata[key] = [value]
305 else:
306 metadata[key] = value
308 for rating, ratings in [('tvRating', TV_RATINGS),
309 ('mpaaRating', MPAA_RATINGS),
310 ('starRating', STAR_RATINGS)]:
311 x = metadata.get(rating, '').upper()
312 if x in ratings:
313 metadata[rating] = ratings[x]
314 else:
315 try:
316 x = int(x)
317 metadata[rating] = x
318 except:
319 pass
321 return metadata
323 def basic(full_path):
324 base_path, name = os.path.split(full_path)
325 title, ext = os.path.splitext(name)
326 mtime = os.stat(unicode(full_path, 'utf-8')).st_mtime
327 if (mtime < 0):
328 mtime = 0
329 originalAirDate = datetime.utcfromtimestamp(mtime)
331 metadata = {'title': title,
332 'originalAirDate': originalAirDate.isoformat()}
333 ext = ext.lower()
334 if ext in ['.mp4', '.m4v', '.mov']:
335 metadata.update(from_moov(full_path))
336 elif ext in ['.dvr-ms', '.asf', '.wmv']:
337 metadata.update(from_dvrms(full_path))
338 elif 'plistlib' in sys.modules and base_path.endswith('.eyetv'):
339 metadata.update(from_eyetv(full_path))
340 metadata.update(from_nfo(full_path))
341 metadata.update(from_text(full_path))
343 return metadata
345 def from_container(xmldoc):
346 metadata = {}
348 keys = {'title': 'Title', 'episodeTitle': 'EpisodeTitle',
349 'description': 'Description', 'programId': 'ProgramId',
350 'seriesId': 'SeriesId', 'episodeNumber': 'EpisodeNumber',
351 'tvRating': 'TvRating', 'displayMajorNumber': 'SourceChannel',
352 'callsign': 'SourceStation', 'showingBits': 'ShowingBits',
353 'mpaaRating': 'MpaaRating'}
355 details = xmldoc.getElementsByTagName('Details')[0]
357 for key in keys:
358 data = tag_data(details, keys[key])
359 if data:
360 if key == 'description':
361 data = data.replace(TRIBUNE_CR, '')
362 elif key == 'tvRating':
363 data = int(data)
364 elif key == 'displayMajorNumber':
365 if '-' in data:
366 data, metadata['displayMinorNumber'] = data.split('-')
367 metadata[key] = data
369 return metadata
371 def from_details(xml):
372 metadata = {}
374 xmldoc = minidom.parse(xml)
375 showing = xmldoc.getElementsByTagName('showing')[0]
376 program = showing.getElementsByTagName('program')[0]
378 items = {'description': 'program/description',
379 'title': 'program/title',
380 'episodeTitle': 'program/episodeTitle',
381 'episodeNumber': 'program/episodeNumber',
382 'programId': 'program/uniqueId',
383 'seriesId': 'program/series/uniqueId',
384 'seriesTitle': 'program/series/seriesTitle',
385 'originalAirDate': 'program/originalAirDate',
386 'isEpisode': 'program/isEpisode',
387 'movieYear': 'program/movieYear',
388 'partCount': 'partCount',
389 'partIndex': 'partIndex',
390 'time': 'time'}
392 for item in items:
393 data = tag_data(showing, items[item])
394 if data:
395 if item == 'description':
396 data = data.replace(TRIBUNE_CR, '')
397 metadata[item] = data
399 vItems = ['vActor', 'vChoreographer', 'vDirector',
400 'vExecProducer', 'vProgramGenre', 'vGuestStar',
401 'vHost', 'vProducer', 'vWriter']
403 for item in vItems:
404 data = _vtag_data(program, item)
405 if data:
406 metadata[item] = data
408 sb = showing.getElementsByTagName('showingBits')
409 if sb:
410 metadata['showingBits'] = sb[0].attributes['value'].value
412 #for tag in ['starRating', 'mpaaRating', 'colorCode']:
413 for tag in ['starRating', 'mpaaRating']:
414 value = _tag_value(program, tag)
415 if value:
416 metadata[tag] = value
418 rating = _tag_value(showing, 'tvRating')
419 if rating:
420 metadata['tvRating'] = rating
422 return metadata
424 def _nfo_vitems(source, metadata):
426 vItems = {'vGenre': 'genre',
427 'vWriter': 'credits',
428 'vDirector': 'director',
429 'vActor': 'actor/name'}
431 for key in vItems:
432 data = _vtag_data_alternate(source, vItems[key])
433 if data:
434 metadata.setdefault(key, [])
435 for dat in data:
436 if not dat in metadata[key]:
437 metadata[key].append(dat)
439 if 'vGenre' in metadata:
440 metadata['vSeriesGenre'] = metadata['vProgramGenre'] = metadata['vGenre']
442 return metadata
444 def _parse_nfo(nfo_path, nfo_data=None):
445 # nfo files can contain XML or a URL to seed the XBMC metadata scrapers
446 # It's also possible to have both (a URL after the XML metadata)
447 # pyTivo only parses the XML metadata, but we'll try to stip the URL
448 # from mixed XML/URL files. Returns `None` when XML can't be parsed.
449 if nfo_data is None:
450 nfo_data = [line.strip() for line in file(nfo_path, 'rU')]
451 xmldoc = None
452 try:
453 xmldoc = minidom.parseString(os.linesep.join(nfo_data))
454 except expat.ExpatError, err:
455 if expat.ErrorString(err.code) == expat.errors.XML_ERROR_INVALID_TOKEN:
456 # might be a URL outside the xml
457 while len(nfo_data) > err.lineno:
458 if len(nfo_data[-1]) == 0:
459 nfo_data.pop()
460 else:
461 break
462 if len(nfo_data) == err.lineno:
463 # last non-blank line contains the error
464 nfo_data.pop()
465 return _parse_nfo(nfo_path, nfo_data)
466 return xmldoc
468 def _from_tvshow_nfo(tvshow_nfo_path):
469 if tvshow_nfo_path in nfo_cache:
470 return nfo_cache[tvshow_nfo_path]
472 items = {'description': 'plot',
473 'title': 'title',
474 'seriesTitle': 'showtitle',
475 'starRating': 'rating',
476 'tvRating': 'mpaa'}
478 nfo_cache[tvshow_nfo_path] = metadata = {}
480 xmldoc = _parse_nfo(tvshow_nfo_path)
481 if not xmldoc:
482 return metadata
484 tvshow = xmldoc.getElementsByTagName('tvshow')
485 if tvshow:
486 tvshow = tvshow[0]
487 else:
488 return metadata
490 for item in items:
491 data = tag_data(tvshow, items[item])
492 if data:
493 metadata[item] = data
495 metadata = _nfo_vitems(tvshow, metadata)
497 nfo_cache[tvshow_nfo_path] = metadata
498 return metadata
500 def _from_episode_nfo(nfo_path, xmldoc):
501 metadata = {}
503 items = {'description': 'plot',
504 'episodeTitle': 'title',
505 'seriesTitle': 'showtitle',
506 'originalAirDate': 'aired',
507 'starRating': 'rating',
508 'tvRating': 'mpaa'}
510 # find tvshow.nfo
511 path = nfo_path
512 while True:
513 basepath = os.path.dirname(path)
514 if path == basepath:
515 break
516 path = basepath
517 tv_nfo = os.path.join(path, 'tvshow.nfo')
518 if os.path.exists(tv_nfo):
519 metadata.update(_from_tvshow_nfo(tv_nfo))
520 break
522 episode = xmldoc.getElementsByTagName('episodedetails')
523 if episode:
524 episode = episode[0]
525 else:
526 return metadata
528 metadata['isEpisode'] = 'true'
529 for item in items:
530 data = tag_data(episode, items[item])
531 if data:
532 metadata[item] = data
534 season = tag_data(episode, 'displayseason')
535 if not season or season == "-1":
536 season = tag_data(episode, 'season')
537 if not season:
538 season = 1
540 ep_num = tag_data(episode, 'displayepisode')
541 if not ep_num or ep_num == "-1":
542 ep_num = tag_data(episode, 'episode')
543 if ep_num and ep_num != "-1":
544 metadata['episodeNumber'] = "%d%02d" % (int(season), int(ep_num))
546 if 'originalAirDate' in metadata:
547 metadata['originalAirDate'] += 'T00:00:00Z'
549 metadata = _nfo_vitems(episode, metadata)
551 return metadata
553 def _from_movie_nfo(xmldoc):
554 metadata = {}
556 movie = xmldoc.getElementsByTagName('movie')
557 if movie:
558 movie = movie[0]
559 else:
560 return metadata
562 items = {'description': 'plot',
563 'title': 'title',
564 'movieYear': 'year',
565 'starRating': 'rating',
566 'mpaaRating': 'mpaa'}
568 metadata['isEpisode'] = 'false'
570 for item in items:
571 data = tag_data(movie, items[item])
572 if data:
573 metadata[item] = data
575 metadata['movieYear'] = "%04d" % int(metadata.get('movieYear', 0))
577 metadata = _nfo_vitems(movie, metadata)
578 return metadata
580 def from_nfo(full_path):
581 if full_path in nfo_cache:
582 return nfo_cache[full_path]
584 metadata = nfo_cache[full_path] = {}
586 nfo_path = "%s.nfo" % os.path.splitext(full_path)[0]
587 if not os.path.exists(nfo_path):
588 return metadata
590 xmldoc = _parse_nfo(nfo_path)
591 if not xmldoc:
592 return metadata
594 if xmldoc.getElementsByTagName('episodedetails'):
595 # it's an episode
596 metadata.update(_from_episode_nfo(nfo_path, xmldoc))
597 elif xmldoc.getElementsByTagName('movie'):
598 # it's a movie
599 metadata.update(_from_movie_nfo(xmldoc))
601 # common nfo cleanup
602 if 'starRating' in metadata:
603 # .NFO 0-10 -> TiVo 1-7
604 rating = int(float(metadata['starRating']) * 6 / 10 + 1.5)
605 metadata['starRating'] = rating
607 for key, mapping in [('mpaaRating', MPAA_RATINGS),
608 ('tvRating', TV_RATINGS)]:
609 if key in metadata:
610 rating = mapping.get(metadata[key], None)
611 if rating:
612 metadata[key] = str(rating)
613 else:
614 del metadata[key]
616 nfo_cache[full_path] = metadata
617 return metadata
619 def from_tivo(full_path):
620 if full_path in tivo_cache:
621 return tivo_cache[full_path]
623 tdcat_path = config.get_bin('tdcat')
624 tivo_mak = config.get_server('tivo_mak')
625 try:
626 assert(tdcat_path and tivo_mak)
627 fname = unicode(full_path, 'utf-8')
628 if mswindows:
629 fname = fname.encode('iso8859-1')
630 tcmd = [tdcat_path, '-m', tivo_mak, '-2', fname]
631 tdcat = subprocess.Popen(tcmd, stdout=subprocess.PIPE)
632 metadata = from_details(tdcat.stdout)
633 tivo_cache[full_path] = metadata
634 except:
635 metadata = {}
637 return metadata
639 def force_utf8(text):
640 if type(text) == str:
641 try:
642 text = text.decode('utf8')
643 except:
644 if sys.platform == 'darwin':
645 text = text.decode('macroman')
646 else:
647 text = text.decode('iso8859-1')
648 return text.encode('utf-8')
650 def dump(output, metadata):
651 for key in metadata:
652 value = metadata[key]
653 if type(value) == list:
654 for item in value:
655 output.write('%s: %s\n' % (key, item.encode('utf-8')))
656 else:
657 if key in HUMAN and value in HUMAN[key]:
658 output.write('%s: %s\n' % (key, HUMAN[key][value]))
659 else:
660 output.write('%s: %s\n' % (key, value.encode('utf-8')))
662 if __name__ == '__main__':
663 if len(sys.argv) > 1:
664 metadata = {}
665 fname = force_utf8(sys.argv[1])
666 ext = os.path.splitext(fname)[1].lower()
667 if ext == '.tivo':
668 config.init([])
669 metadata.update(from_tivo(fname))
670 elif ext in ['.mp4', '.m4v', '.mov']:
671 metadata.update(from_moov(fname))
672 elif ext in ['.dvr-ms', '.asf', '.wmv']:
673 metadata.update(from_dvrms(fname))
674 elif ext == '.wtv':
675 vInfo = plugins.video.transcode.video_info(fname)
676 metadata.update(from_mscore(vInfo['rawmeta']))
677 dump(sys.stdout, metadata)