Merge branch 'master' of git://repo.or.cz/pyTivo/wmcbrine.git
[pyTivo/wmcbrine/lucasnz.git] / metadata.py
blob79e81144951bec5039f5b753c4cde0b7b9be4467
1 #!/usr/bin/env python
3 import os
4 import subprocess
5 import sys
6 from datetime import datetime
7 from xml.dom import minidom
8 from xml.parsers import expat
9 try:
10 import plistlib
11 except:
12 pass
14 import mutagen
15 from lrucache import LRUCache
17 import config
18 import plugins.video.transcode
20 # Something to strip
21 TRIBUNE_CR = ' Copyright Tribune Media Services, Inc.'
23 TV_RATINGS = {'TV-Y7': 1, 'TV-Y': 2, 'TV-G': 3, 'TV-PG': 4, 'TV-14': 5,
24 'TV-MA': 6, 'TV-NR': 7, 'TVY7': 1, 'TVY': 2, 'TVG': 3,
25 'TVPG': 4, 'TV14': 5, 'TVMA': 6, 'TVNR': 7, 'Y7': 1,
26 'Y': 2, 'G': 3, 'PG': 4, '14': 5, 'MA': 6, 'NR': 7,
27 'UNRATED': 7, 'X1': 1, 'X2': 2, 'X3': 3, 'X4': 4, 'X5': 5,
28 'X6': 6, 'X7': 7}
30 MPAA_RATINGS = {'G': 1, 'PG': 2, 'PG-13': 3, 'PG13': 3, 'R': 4, 'X': 5,
31 'NC-17': 6, 'NC17': 6, 'NR': 8, 'UNRATED': 8, 'G1': 1,
32 'P2': 2, 'P3': 3, 'R4': 4, 'X5': 5, 'N6': 6, 'N8': 8}
34 STAR_RATINGS = {'1': 1, '1.5': 2, '2': 3, '2.5': 4, '3': 5, '3.5': 6,
35 '4': 7, '*': 1, '**': 3, '***': 5, '****': 7, 'X1': 1,
36 'X2': 2, 'X3': 3, 'X4': 4, 'X5': 5, 'X6': 6, 'X7': 7}
38 HUMAN = {'mpaaRating': {1: 'G', 2: 'PG', 3: 'PG-13', 4: 'R', 5: 'X',
39 6: 'NC-17', 8: 'NR'},
40 'tvRating': {1: 'Y7', 2: 'Y', 3: 'G', 4: 'PG', 5: '14',
41 6: 'MA', 7: 'NR'},
42 'starRating': {1: '1', 2: '1.5', 3: '2', 4: '2.5', 5: '3',
43 6: '3.5', 7: '4'}}
45 BOM = '\xef\xbb\xbf'
47 tivo_cache = LRUCache(50)
48 mp4_cache = LRUCache(50)
49 dvrms_cache = LRUCache(50)
50 nfo_cache = LRUCache(50)
52 mswindows = (sys.platform == "win32")
54 def get_mpaa(rating):
55 return HUMAN['mpaaRating'].get(rating, 'NR')
57 def get_tv(rating):
58 return HUMAN['tvRating'].get(rating, 'NR')
60 def get_stars(rating):
61 return HUMAN['starRating'].get(rating, '')
63 def tag_data(element, tag):
64 for name in tag.split('/'):
65 new_element = element.getElementsByTagName(name)
66 if not new_element:
67 return ''
68 element = new_element[0]
69 if not element.firstChild:
70 return ''
71 return element.firstChild.data
73 def _vtag_data(element, tag):
74 for name in tag.split('/'):
75 new_element = element.getElementsByTagName(name)
76 if not new_element:
77 return []
78 element = new_element[0]
79 elements = element.getElementsByTagName('element')
80 return [x.firstChild.data for x in elements if x.firstChild]
82 def _vtag_data_alternate(element, tag):
83 elements = [element]
84 for name in tag.split('/'):
85 new_elements = []
86 for elmt in elements:
87 new_elements += elmt.getElementsByTagName(name)
88 elements = new_elements
89 return [x.firstChild.data for x in elements if x.firstChild]
91 def _tag_value(element, tag):
92 item = element.getElementsByTagName(tag)
93 if item:
94 value = item[0].attributes['value'].value
95 return int(value[0])
97 def from_moov(full_path):
98 if full_path in mp4_cache:
99 return mp4_cache[full_path]
101 metadata = {}
102 len_desc = 0
104 try:
105 mp4meta = mutagen.File(unicode(full_path, 'utf-8'))
106 assert(mp4meta)
107 except:
108 mp4_cache[full_path] = {}
109 return {}
111 # The following 1-to-1 correspondence of atoms to pyTivo
112 # variables is TV-biased
113 keys = {'tvnn': 'callsign', 'tven': 'episodeNumber',
114 'tvsh': 'seriesTitle'}
116 for key, value in mp4meta.items():
117 if type(value) == list:
118 value = value[0]
119 if key == 'stik':
120 metadata['isEpisode'] = ['false', 'true'][value == 'TV Show']
121 elif key in keys:
122 metadata[keys[key]] = value
123 # These keys begin with the copyright symbol \xA9
124 elif key == '\xa9day':
125 if len(value) == 4:
126 value += '-01-01T16:00:00Z'
127 metadata['originalAirDate'] = value
128 #metadata['time'] = value
129 elif key in ['\xa9gen', 'gnre']:
130 for k in ('vProgramGenre', 'vSeriesGenre'):
131 if k in metadata:
132 metadata[k].append(value)
133 else:
134 metadata[k] = [value]
135 elif key == '\xa9nam':
136 if 'tvsh' in mp4meta:
137 metadata['episodeTitle'] = value
138 else:
139 metadata['title'] = value
141 # Description in desc, cmt, and/or ldes tags. Keep the longest.
142 elif key in ['desc', '\xa9cmt', 'ldes'] and len(value) > len_desc:
143 metadata['description'] = value
144 len_desc = len(value)
146 # A common custom "reverse DNS format" tag
147 elif (key == '----:com.apple.iTunes:iTunEXTC' and
148 ('us-tv' in value or 'mpaa' in value)):
149 rating = value.split("|")[1].upper()
150 if rating in TV_RATINGS and 'us-tv' in value:
151 metadata['tvRating'] = TV_RATINGS[rating]
152 elif rating in MPAA_RATINGS and 'mpaa' in value:
153 metadata['mpaaRating'] = MPAA_RATINGS[rating]
155 # Actors, directors, producers, AND screenwriters may be in a long
156 # embedded XML plist.
157 elif (key == '----:com.apple.iTunes:iTunMOVI' and
158 'plistlib' in sys.modules):
159 items = {'cast': 'vActor', 'directors': 'vDirector',
160 'producers': 'vProducer', 'screenwriters': 'vWriter'}
161 data = plistlib.readPlistFromString(value)
162 for item in items:
163 if item in data:
164 metadata[items[item]] = [x['name'] for x in data[item]]
166 mp4_cache[full_path] = metadata
167 return metadata
169 def from_mscore(rawmeta):
170 metadata = {}
171 keys = {'title': ['Title'],
172 'description': ['Description', 'WM/SubTitleDescription'],
173 'episodeTitle': ['WM/SubTitle'],
174 'callsign': ['WM/MediaStationCallSign'],
175 'displayMajorNumber': ['WM/MediaOriginalChannel'],
176 'originalAirDate': ['WM/MediaOriginalBroadcastDateTime'],
177 'rating': ['WM/ParentalRating'],
178 'credits': ['WM/MediaCredits'], 'genre': ['WM/Genre']}
180 for tagname in keys:
181 for tag in keys[tagname]:
182 try:
183 if tag in rawmeta:
184 value = rawmeta[tag][0]
185 if type(value) not in (str, unicode):
186 value = str(value)
187 if value:
188 metadata[tagname] = value
189 except:
190 pass
192 if 'episodeTitle' in metadata and 'title' in metadata:
193 metadata['seriesTitle'] = metadata['title']
194 if 'genre' in metadata:
195 value = metadata['genre'].split(',')
196 metadata['vProgramGenre'] = value
197 metadata['vSeriesGenre'] = value
198 del metadata['genre']
199 if 'credits' in metadata:
200 value = [x.split('/') for x in metadata['credits'].split(';')]
201 if len(value) > 3:
202 metadata['vActor'] = [x for x in (value[0] + value[3]) if x]
203 metadata['vDirector'] = [x for x in value[1] if x]
204 del metadata['credits']
205 if 'rating' in metadata:
206 rating = metadata['rating']
207 if rating in TV_RATINGS:
208 metadata['tvRating'] = TV_RATINGS[rating]
209 del metadata['rating']
211 return metadata
213 def from_dvrms(full_path):
214 if full_path in dvrms_cache:
215 return dvrms_cache[full_path]
217 try:
218 rawmeta = mutagen.File(unicode(full_path, 'utf-8'))
219 assert(rawmeta)
220 except:
221 dvrms_cache[full_path] = {}
222 return {}
224 metadata = from_mscore(rawmeta)
225 dvrms_cache[full_path] = metadata
226 return metadata
228 def from_eyetv(full_path):
229 keys = {'TITLE': 'title', 'SUBTITLE': 'episodeTitle',
230 'DESCRIPTION': 'description', 'YEAR': 'movieYear',
231 'EPISODENUM': 'episodeNumber'}
232 metadata = {}
233 path = os.path.dirname(unicode(full_path, 'utf-8'))
234 eyetvp = [x for x in os.listdir(path) if x.endswith('.eyetvp')][0]
235 eyetvp = os.path.join(path, eyetvp)
236 eyetv = plistlib.readPlist(eyetvp)
237 if 'epg info' in eyetv:
238 info = eyetv['epg info']
239 for key in keys:
240 if info[key]:
241 metadata[keys[key]] = info[key]
242 if info['SUBTITLE']:
243 metadata['seriesTitle'] = info['TITLE']
244 if info['ACTORS']:
245 metadata['vActor'] = [x.strip() for x in info['ACTORS'].split(',')]
246 if info['DIRECTOR']:
247 metadata['vDirector'] = [info['DIRECTOR']]
249 for ptag, etag, ratings in [('tvRating', 'TV_RATING', TV_RATINGS),
250 ('mpaaRating', 'MPAA_RATING', MPAA_RATINGS),
251 ('starRating', 'STAR_RATING', STAR_RATINGS)]:
252 x = info[etag].upper()
253 if x and x in ratings:
254 metadata[ptag] = ratings[x]
256 # movieYear must be set for the mpaa/star ratings to work
257 if (('mpaaRating' in metadata or 'starRating' in metadata) and
258 'movieYear' not in metadata):
259 metadata['movieYear'] = eyetv['info']['start'].year
260 return metadata
262 def from_text(full_path):
263 metadata = {}
264 full_path = unicode(full_path, 'utf-8')
265 path, name = os.path.split(full_path)
266 title, ext = os.path.splitext(name)
268 search_paths = []
269 ptmp = full_path
270 while ptmp:
271 parent = os.path.dirname(ptmp)
272 if ptmp != parent:
273 ptmp = parent
274 else:
275 break
276 search_paths.append(os.path.join(ptmp, 'default.txt'))
278 search_paths.append(os.path.join(path, title) + '.properties')
279 search_paths.reverse()
281 search_paths += [full_path + '.txt',
282 os.path.join(path, '.meta', 'default.txt'),
283 os.path.join(path, '.meta', name) + '.txt']
285 for metafile in search_paths:
286 if os.path.exists(metafile):
287 sep = ':='[metafile.endswith('.properties')]
288 for line in file(metafile, 'U'):
289 if line.startswith(BOM):
290 line = line[3:]
291 if line.strip().startswith('#') or not sep in line:
292 continue
293 key, value = [x.strip() for x in line.split(sep, 1)]
294 if not key or not value:
295 continue
296 if key.startswith('v'):
297 if key in metadata:
298 metadata[key].append(value)
299 else:
300 metadata[key] = [value]
301 else:
302 metadata[key] = value
304 for rating, ratings in [('tvRating', TV_RATINGS),
305 ('mpaaRating', MPAA_RATINGS),
306 ('starRating', STAR_RATINGS)]:
307 x = metadata.get(rating, '').upper()
308 if x in ratings:
309 metadata[rating] = ratings[x]
311 return metadata
313 def basic(full_path):
314 base_path, name = os.path.split(full_path)
315 title, ext = os.path.splitext(name)
316 mtime = os.stat(unicode(full_path, 'utf-8')).st_mtime
317 if (mtime < 0):
318 mtime = 0
319 originalAirDate = datetime.utcfromtimestamp(mtime)
321 metadata = {'title': title,
322 'originalAirDate': originalAirDate.isoformat()}
323 ext = ext.lower()
324 if ext in ['.mp4', '.m4v', '.mov']:
325 metadata.update(from_moov(full_path))
326 elif ext in ['.dvr-ms', '.asf', '.wmv']:
327 metadata.update(from_dvrms(full_path))
328 elif 'plistlib' in sys.modules and base_path.endswith('.eyetv'):
329 metadata.update(from_eyetv(full_path))
330 metadata.update(from_nfo(full_path))
331 metadata.update(from_text(full_path))
333 return metadata
335 def from_container(xmldoc):
336 metadata = {}
338 keys = {'title': 'Title', 'episodeTitle': 'EpisodeTitle',
339 'description': 'Description', 'seriesId': 'SeriesId',
340 'episodeNumber': 'EpisodeNumber', 'tvRating': 'TvRating',
341 'displayMajorNumber': 'SourceChannel', 'callsign': 'SourceStation',
342 'showingBits': 'ShowingBits', 'mpaaRating': 'MpaaRating'}
344 details = xmldoc.getElementsByTagName('Details')[0]
346 for key in keys:
347 data = tag_data(details, keys[key])
348 if data:
349 if key == 'description':
350 data = data.replace(TRIBUNE_CR, '')
351 elif key == 'tvRating':
352 data = int(data)
353 elif key == 'displayMajorNumber':
354 if '-' in data:
355 data, metadata['displayMinorNumber'] = data.split('-')
356 metadata[key] = data
358 return metadata
360 def from_details(xml):
361 metadata = {}
363 xmldoc = minidom.parse(xml)
364 showing = xmldoc.getElementsByTagName('showing')[0]
365 program = showing.getElementsByTagName('program')[0]
367 items = {'description': 'program/description',
368 'title': 'program/title',
369 'episodeTitle': 'program/episodeTitle',
370 'episodeNumber': 'program/episodeNumber',
371 'seriesId': 'program/series/uniqueId',
372 'seriesTitle': 'program/series/seriesTitle',
373 'originalAirDate': 'program/originalAirDate',
374 'isEpisode': 'program/isEpisode',
375 'movieYear': 'program/movieYear',
376 'partCount': 'partCount',
377 'partIndex': 'partIndex',
378 'time': 'time'}
380 for item in items:
381 data = tag_data(showing, items[item])
382 if data:
383 if item == 'description':
384 data = data.replace(TRIBUNE_CR, '')
385 metadata[item] = data
387 vItems = ['vActor', 'vChoreographer', 'vDirector',
388 'vExecProducer', 'vProgramGenre', 'vGuestStar',
389 'vHost', 'vProducer', 'vWriter']
391 for item in vItems:
392 data = _vtag_data(program, item)
393 if data:
394 metadata[item] = data
396 sb = showing.getElementsByTagName('showingBits')
397 if sb:
398 metadata['showingBits'] = sb[0].attributes['value'].value
400 #for tag in ['starRating', 'mpaaRating', 'colorCode']:
401 for tag in ['starRating', 'mpaaRating']:
402 value = _tag_value(program, tag)
403 if value:
404 metadata[tag] = value
406 rating = _tag_value(showing, 'tvRating')
407 if rating:
408 metadata['tvRating'] = rating
410 return metadata
412 def _nfo_vitems(source, metadata):
414 vItems = {'vGenre': 'genre',
415 'vWriter': 'credits',
416 'vDirector': 'director',
417 'vActor': 'actor/name'}
419 for key in vItems:
420 data = _vtag_data_alternate(source, vItems[key])
421 if data:
422 metadata.setdefault(key, [])
423 for dat in data:
424 if not dat in metadata[key]:
425 metadata[key].append(dat)
427 if 'vGenre' in metadata:
428 metadata['vSeriesGenre'] = metadata['vProgramGenre'] = metadata['vGenre']
430 return metadata
432 def _parse_nfo(nfo_path, nfo_data=None):
433 # nfo files can contain XML or a URL to seed the XBMC metadata scrapers
434 # It's also possible to have both (a URL after the XML metadata)
435 # pyTivo only parses the XML metadata, but we'll try to stip the URL
436 # from mixed XML/URL files. Returns `None` when XML can't be parsed.
437 if nfo_data is None:
438 nfo_data = [line.strip() for line in file(nfo_path, 'rU')]
439 xmldoc = None
440 try:
441 xmldoc = minidom.parseString(os.linesep.join(nfo_data))
442 except expat.ExpatError, err:
443 if expat.ErrorString(err.code) == expat.errors.XML_ERROR_INVALID_TOKEN:
444 # might be a URL outside the xml
445 while len(nfo_data) > err.lineno:
446 if len(nfo_data[-1]) == 0:
447 nfo_data.pop()
448 else:
449 break
450 if len(nfo_data) == err.lineno:
451 # last non-blank line contains the error
452 nfo_data.pop()
453 return _parse_nfo(nfo_path, nfo_data)
454 return xmldoc
456 def _from_tvshow_nfo(tvshow_nfo_path):
457 if tvshow_nfo_path in nfo_cache:
458 return nfo_cache[tvshow_nfo_path]
460 items = {'description': 'plot',
461 'title': 'title',
462 'seriesTitle': 'showtitle',
463 'starRating': 'rating',
464 'tvRating': 'mpaa'}
466 nfo_cache[tvshow_nfo_path] = metadata = {}
468 xmldoc = _parse_nfo(tvshow_nfo_path)
469 if not xmldoc:
470 return metadata
472 tvshow = xmldoc.getElementsByTagName('tvshow')
473 if tvshow:
474 tvshow = tvshow[0]
475 else:
476 return metadata
478 for item in items:
479 data = tag_data(tvshow, items[item])
480 if data:
481 metadata[item] = data
483 metadata = _nfo_vitems(tvshow, metadata)
485 nfo_cache[tvshow_nfo_path] = metadata
486 return metadata
488 def _from_episode_nfo(nfo_path, xmldoc):
489 metadata = {}
491 items = {'description': 'plot',
492 'episodeTitle': 'title',
493 'seriesTitle': 'showtitle',
494 'originalAirDate': 'aired',
495 'starRating': 'rating',
496 'tvRating': 'mpaa'}
498 # find tvshow.nfo
499 path = nfo_path
500 while True:
501 basepath = os.path.dirname(path)
502 if path == basepath:
503 break
504 path = basepath
505 tv_nfo = os.path.join(path, 'tvshow.nfo')
506 if os.path.exists(tv_nfo):
507 metadata.update(_from_tvshow_nfo(tv_nfo))
508 break
510 episode = xmldoc.getElementsByTagName('episodedetails')
511 if episode:
512 episode = episode[0]
513 else:
514 return metadata
516 metadata['isEpisode'] = 'true'
517 for item in items:
518 data = tag_data(episode, items[item])
519 if data:
520 metadata[item] = data
522 season = tag_data(episode, 'displayseason')
523 if not season or season == "-1":
524 season = tag_data(episode, 'season')
525 if not season:
526 season = 1
528 ep_num = tag_data(episode, 'displayepisode')
529 if not ep_num or ep_num == "-1":
530 ep_num = tag_data(episode, 'episode')
531 if ep_num and ep_num != "-1":
532 metadata['episodeNumber'] = "%d%02d" % (int(season), int(ep_num))
534 if 'originalAirDate' in metadata:
535 metadata['originalAirDate'] += 'T00:00:00Z'
537 metadata = _nfo_vitems(episode, metadata)
539 return metadata
541 def _from_movie_nfo(xmldoc):
542 metadata = {}
544 movie = xmldoc.getElementsByTagName('movie')
545 if movie:
546 movie = movie[0]
547 else:
548 return metadata
550 items = {'description': 'plot',
551 'title': 'title',
552 'movieYear': 'year',
553 'starRating': 'rating',
554 'mpaaRating': 'mpaa'}
556 metadata['isEpisode'] = 'false'
558 for item in items:
559 data = tag_data(movie, items[item])
560 if data:
561 metadata[item] = data
563 metadata['movieYear'] = "%04d" % int(metadata.get('movieYear', 0))
565 metadata = _nfo_vitems(movie, metadata)
566 return metadata
568 def from_nfo(full_path):
569 if full_path in nfo_cache:
570 return nfo_cache[full_path]
572 metadata = nfo_cache[full_path] = {}
574 nfo_path = "%s.nfo" % os.path.splitext(full_path)[0]
575 if not os.path.exists(nfo_path):
576 return metadata
578 xmldoc = _parse_nfo(nfo_path)
579 if not xmldoc:
580 return metadata
582 if xmldoc.getElementsByTagName('episodedetails'):
583 # it's an episode
584 metadata.update(_from_episode_nfo(nfo_path, xmldoc))
585 elif xmldoc.getElementsByTagName('movie'):
586 # it's a movie
587 metadata.update(_from_movie_nfo(xmldoc))
589 # common nfo cleanup
590 if 'starRating' in metadata:
591 # .NFO 0-10 -> TiVo 1-7
592 rating = int(float(metadata['starRating']) * 6 / 10 + 1.5)
593 metadata['starRating'] = rating
595 for key, mapping in [('mpaaRating', MPAA_RATINGS),
596 ('tvRating', TV_RATINGS)]:
597 if key in metadata:
598 rating = mapping.get(metadata[key], None)
599 if rating:
600 metadata[key] = str(rating)
601 else:
602 del metadata[key]
604 nfo_cache[full_path] = metadata
605 return metadata
607 def from_tivo(full_path):
608 if full_path in tivo_cache:
609 return tivo_cache[full_path]
611 tdcat_path = config.get_bin('tdcat')
612 tivo_mak = config.get_server('tivo_mak')
613 try:
614 assert(tdcat_path and tivo_mak)
615 fname = unicode(full_path, 'utf-8')
616 if mswindows:
617 fname = fname.encode('iso8859-1')
618 tcmd = [tdcat_path, '-m', tivo_mak, '-2', fname]
619 tdcat = subprocess.Popen(tcmd, stdout=subprocess.PIPE)
620 metadata = from_details(tdcat.stdout)
621 tivo_cache[full_path] = metadata
622 except:
623 metadata = {}
625 return metadata
627 def force_utf8(text):
628 if type(text) == str:
629 try:
630 text = text.decode('utf8')
631 except:
632 if sys.platform == 'darwin':
633 text = text.decode('macroman')
634 else:
635 text = text.decode('iso8859-1')
636 return text.encode('utf-8')
638 def dump(output, metadata):
639 for key in metadata:
640 value = metadata[key]
641 if type(value) == list:
642 for item in value:
643 output.write('%s: %s\n' % (key, item.encode('utf-8')))
644 else:
645 if key in HUMAN and value in HUMAN[key]:
646 output.write('%s: %s\n' % (key, HUMAN[key][value]))
647 else:
648 output.write('%s: %s\n' % (key, value.encode('utf-8')))
650 if __name__ == '__main__':
651 if len(sys.argv) > 1:
652 metadata = {}
653 fname = force_utf8(sys.argv[1])
654 ext = os.path.splitext(fname)[1].lower()
655 if ext == '.tivo':
656 config.init([])
657 metadata.update(from_tivo(fname))
658 elif ext in ['.mp4', '.m4v', '.mov']:
659 metadata.update(from_moov(fname))
660 elif ext in ['.dvr-ms', '.asf', '.wmv']:
661 metadata.update(from_dvrms(fname))
662 elif ext == '.wtv':
663 vInfo = plugins.video.transcode.video_info(fname)
664 metadata.update(from_mscore(vInfo['rawmeta']))
665 dump(sys.stdout, metadata)