Don't need GetPlugin() here.
[pyTivo/wmcbrine.git] / metadata.py
bloba9a8c5d929c967c7ef74f7b34d79086b84800628
1 #!/usr/bin/env python
3 import logging
4 import os
5 import subprocess
6 import sys
7 from datetime import datetime
8 from xml.dom import minidom
9 from xml.parsers import expat
10 try:
11 import plistlib
12 except:
13 pass
15 import mutagen
16 from lrucache import LRUCache
18 import config
19 import plugins.video.transcode
21 # Something to strip
22 TRIBUNE_CR = ' Copyright Tribune Media Services, Inc.'
24 TV_RATINGS = {'TV-Y7': 1, 'TV-Y': 2, 'TV-G': 3, 'TV-PG': 4, 'TV-14': 5,
25 'TV-MA': 6, 'TV-NR': 7, 'TVY7': 1, 'TVY': 2, 'TVG': 3,
26 'TVPG': 4, 'TV14': 5, 'TVMA': 6, 'TVNR': 7, 'Y7': 1,
27 'Y': 2, 'G': 3, 'PG': 4, '14': 5, 'MA': 6, 'NR': 7,
28 'UNRATED': 7, 'X1': 1, 'X2': 2, 'X3': 3, 'X4': 4, 'X5': 5,
29 'X6': 6, 'X7': 7}
31 MPAA_RATINGS = {'G': 1, 'PG': 2, 'PG-13': 3, 'PG13': 3, 'R': 4, 'X': 5,
32 'NC-17': 6, 'NC17': 6, 'NR': 8, 'UNRATED': 8, 'G1': 1,
33 'P2': 2, 'P3': 3, 'R4': 4, 'X5': 5, 'N6': 6, 'N8': 8}
35 STAR_RATINGS = {'1': 1, '1.5': 2, '2': 3, '2.5': 4, '3': 5, '3.5': 6,
36 '4': 7, '*': 1, '**': 3, '***': 5, '****': 7, 'X1': 1,
37 'X2': 2, 'X3': 3, 'X4': 4, 'X5': 5, 'X6': 6, 'X7': 7}
39 HUMAN = {'mpaaRating': {1: 'G', 2: 'PG', 3: 'PG-13', 4: 'R', 5: 'X',
40 6: 'NC-17', 8: 'NR'},
41 'tvRating': {1: 'Y7', 2: 'Y', 3: 'G', 4: 'PG', 5: '14',
42 6: 'MA', 7: 'NR'},
43 'starRating': {1: '1', 2: '1.5', 3: '2', 4: '2.5', 5: '3',
44 6: '3.5', 7: '4'}}
46 BOM = '\xef\xbb\xbf'
48 GB = 1024 ** 3
49 MB = 1024 ** 2
50 KB = 1024
52 tivo_cache = LRUCache(50)
53 mp4_cache = LRUCache(50)
54 dvrms_cache = LRUCache(50)
55 nfo_cache = LRUCache(50)
57 mswindows = (sys.platform == "win32")
59 def get_mpaa(rating):
60 return HUMAN['mpaaRating'].get(rating, 'NR')
62 def get_tv(rating):
63 return HUMAN['tvRating'].get(rating, 'NR')
65 def get_stars(rating):
66 return HUMAN['starRating'].get(rating, '')
68 def human_size(raw):
69 raw = float(raw)
70 if raw > GB:
71 tsize = '%.2f GB' % (raw / GB)
72 elif raw > MB:
73 tsize = '%.2f MB' % (raw / MB)
74 elif raw > KB:
75 tsize = '%.2f KB' % (raw / KB)
76 else:
77 tsize = '%d Bytes' % raw
78 return tsize
80 def tag_data(element, tag):
81 for name in tag.split('/'):
82 found = False
83 for new_element in element.childNodes:
84 if new_element.nodeName == name:
85 found = True
86 element = new_element
87 break
88 if not found:
89 return ''
90 if not element.firstChild:
91 return ''
92 return element.firstChild.data
94 def _vtag_data(element, tag):
95 for name in tag.split('/'):
96 new_element = element.getElementsByTagName(name)
97 if not new_element:
98 return []
99 element = new_element[0]
100 elements = element.getElementsByTagName('element')
101 return [x.firstChild.data for x in elements if x.firstChild]
103 def _vtag_data_alternate(element, tag):
104 elements = [element]
105 for name in tag.split('/'):
106 new_elements = []
107 for elmt in elements:
108 new_elements += elmt.getElementsByTagName(name)
109 elements = new_elements
110 return [x.firstChild.data for x in elements if x.firstChild]
112 def _tag_value(element, tag):
113 item = element.getElementsByTagName(tag)
114 if item:
115 value = item[0].attributes['value'].value
116 return int(value[0])
118 def from_moov(full_path):
119 if full_path in mp4_cache:
120 return mp4_cache[full_path]
122 metadata = {}
123 len_desc = 0
125 try:
126 mp4meta = mutagen.File(unicode(full_path, 'utf-8'))
127 assert(mp4meta)
128 except:
129 mp4_cache[full_path] = {}
130 return {}
132 # The following 1-to-1 correspondence of atoms to pyTivo
133 # variables is TV-biased
134 keys = {'tvnn': 'callsign', 'tven': 'episodeNumber',
135 'tvsh': 'seriesTitle'}
137 for key, value in mp4meta.items():
138 if type(value) == list:
139 value = value[0]
140 if key == 'stik':
141 metadata['isEpisode'] = ['false', 'true'][value == 'TV Show']
142 elif key in keys:
143 metadata[keys[key]] = value
144 # These keys begin with the copyright symbol \xA9
145 elif key == '\xa9day':
146 if len(value) == 4:
147 value += '-01-01T16:00:00Z'
148 metadata['originalAirDate'] = value
149 #metadata['time'] = value
150 elif key in ['\xa9gen', 'gnre']:
151 for k in ('vProgramGenre', 'vSeriesGenre'):
152 if k in metadata:
153 metadata[k].append(value)
154 else:
155 metadata[k] = [value]
156 elif key == '\xa9nam':
157 if 'tvsh' in mp4meta:
158 metadata['episodeTitle'] = value
159 else:
160 metadata['title'] = value
162 # Description in desc, cmt, and/or ldes tags. Keep the longest.
163 elif key in ['desc', '\xa9cmt', 'ldes'] and len(value) > len_desc:
164 metadata['description'] = value
165 len_desc = len(value)
167 # A common custom "reverse DNS format" tag
168 elif (key == '----:com.apple.iTunes:iTunEXTC' and
169 ('us-tv' in value or 'mpaa' in value)):
170 rating = value.split("|")[1].upper()
171 if rating in TV_RATINGS and 'us-tv' in value:
172 metadata['tvRating'] = TV_RATINGS[rating]
173 elif rating in MPAA_RATINGS and 'mpaa' in value:
174 metadata['mpaaRating'] = MPAA_RATINGS[rating]
176 # Actors, directors, producers, AND screenwriters may be in a long
177 # embedded XML plist.
178 elif (key == '----:com.apple.iTunes:iTunMOVI' and
179 'plistlib' in sys.modules):
180 items = {'cast': 'vActor', 'directors': 'vDirector',
181 'producers': 'vProducer', 'screenwriters': 'vWriter'}
182 try:
183 data = plistlib.readPlistFromString(value)
184 except:
185 pass
186 else:
187 for item in items:
188 if item in data:
189 metadata[items[item]] = [x['name'] for x in data[item]]
191 mp4_cache[full_path] = metadata
192 return metadata
194 def from_mscore(rawmeta):
195 metadata = {}
196 keys = {'title': ['Title'],
197 'description': ['Description', 'WM/SubTitleDescription'],
198 'episodeTitle': ['WM/SubTitle'],
199 'callsign': ['WM/MediaStationCallSign'],
200 'displayMajorNumber': ['WM/MediaOriginalChannel'],
201 'originalAirDate': ['WM/MediaOriginalBroadcastDateTime'],
202 'rating': ['WM/ParentalRating'],
203 'credits': ['WM/MediaCredits'], 'genre': ['WM/Genre']}
205 for tagname in keys:
206 for tag in keys[tagname]:
207 try:
208 if tag in rawmeta:
209 value = rawmeta[tag][0]
210 if type(value) not in (str, unicode):
211 value = str(value)
212 if value:
213 metadata[tagname] = value
214 except:
215 pass
217 if 'episodeTitle' in metadata and 'title' in metadata:
218 metadata['seriesTitle'] = metadata['title']
219 if 'genre' in metadata:
220 value = metadata['genre'].split(',')
221 metadata['vProgramGenre'] = value
222 metadata['vSeriesGenre'] = value
223 del metadata['genre']
224 if 'credits' in metadata:
225 value = [x.split('/') for x in metadata['credits'].split(';')]
226 if len(value) > 3:
227 metadata['vActor'] = [x for x in (value[0] + value[3]) if x]
228 metadata['vDirector'] = [x for x in value[1] if x]
229 del metadata['credits']
230 if 'rating' in metadata:
231 rating = metadata['rating']
232 if rating in TV_RATINGS:
233 metadata['tvRating'] = TV_RATINGS[rating]
234 del metadata['rating']
236 return metadata
238 def from_dvrms(full_path):
239 if full_path in dvrms_cache:
240 return dvrms_cache[full_path]
242 try:
243 rawmeta = mutagen.File(unicode(full_path, 'utf-8'))
244 assert(rawmeta)
245 except:
246 dvrms_cache[full_path] = {}
247 return {}
249 metadata = from_mscore(rawmeta)
250 dvrms_cache[full_path] = metadata
251 return metadata
253 def from_eyetv(full_path):
254 keys = {'TITLE': 'title', 'SUBTITLE': 'episodeTitle',
255 'DESCRIPTION': 'description', 'YEAR': 'movieYear',
256 'EPISODENUM': 'episodeNumber'}
257 metadata = {}
258 path = os.path.dirname(unicode(full_path, 'utf-8'))
259 eyetvp = [x for x in os.listdir(path) if x.endswith('.eyetvp')][0]
260 eyetvp = os.path.join(path, eyetvp)
261 try:
262 eyetv = plistlib.readPlist(eyetvp)
263 except:
264 return metadata
265 if 'epg info' in eyetv:
266 info = eyetv['epg info']
267 for key in keys:
268 if info[key]:
269 metadata[keys[key]] = info[key]
270 if info['SUBTITLE']:
271 metadata['seriesTitle'] = info['TITLE']
272 if info['ACTORS']:
273 metadata['vActor'] = [x.strip() for x in info['ACTORS'].split(',')]
274 if info['DIRECTOR']:
275 metadata['vDirector'] = [info['DIRECTOR']]
277 for ptag, etag, ratings in [('tvRating', 'TV_RATING', TV_RATINGS),
278 ('mpaaRating', 'MPAA_RATING', MPAA_RATINGS),
279 ('starRating', 'STAR_RATING', STAR_RATINGS)]:
280 x = info[etag].upper()
281 if x and x in ratings:
282 metadata[ptag] = ratings[x]
284 # movieYear must be set for the mpaa/star ratings to work
285 if (('mpaaRating' in metadata or 'starRating' in metadata) and
286 'movieYear' not in metadata):
287 metadata['movieYear'] = eyetv['info']['start'].year
288 return metadata
290 def from_text(full_path):
291 metadata = {}
292 full_path = unicode(full_path, 'utf-8')
293 path, name = os.path.split(full_path)
294 title, ext = os.path.splitext(name)
296 search_paths = []
297 ptmp = full_path
298 while ptmp:
299 parent = os.path.dirname(ptmp)
300 if ptmp != parent:
301 ptmp = parent
302 else:
303 break
304 search_paths.append(os.path.join(ptmp, 'default.txt'))
306 search_paths.append(os.path.join(path, title) + '.properties')
307 search_paths.reverse()
309 search_paths += [full_path + '.txt',
310 os.path.join(path, '.meta', 'default.txt'),
311 os.path.join(path, '.meta', name) + '.txt']
313 for metafile in search_paths:
314 if os.path.exists(metafile):
315 sep = ':='[metafile.endswith('.properties')]
316 for line in file(metafile, 'U'):
317 if line.startswith(BOM):
318 line = line[3:]
319 if line.strip().startswith('#') or not sep in line:
320 continue
321 key, value = [x.strip() for x in line.split(sep, 1)]
322 if not key or not value:
323 continue
324 if key.startswith('v'):
325 if key in metadata:
326 metadata[key].append(value)
327 else:
328 metadata[key] = [value]
329 else:
330 metadata[key] = value
332 for rating, ratings in [('tvRating', TV_RATINGS),
333 ('mpaaRating', MPAA_RATINGS),
334 ('starRating', STAR_RATINGS)]:
335 x = metadata.get(rating, '').upper()
336 if x in ratings:
337 metadata[rating] = ratings[x]
338 else:
339 try:
340 x = int(x)
341 metadata[rating] = x
342 except:
343 pass
345 return metadata
347 def basic(full_path):
348 base_path, name = os.path.split(full_path)
349 title, ext = os.path.splitext(name)
350 mtime = os.stat(unicode(full_path, 'utf-8')).st_mtime
351 if (mtime < 0):
352 mtime = 0
353 originalAirDate = datetime.utcfromtimestamp(mtime)
355 metadata = {'title': title,
356 'originalAirDate': originalAirDate.isoformat()}
357 ext = ext.lower()
358 if ext in ['.mp4', '.m4v', '.mov']:
359 metadata.update(from_moov(full_path))
360 elif ext in ['.dvr-ms', '.asf', '.wmv']:
361 metadata.update(from_dvrms(full_path))
362 elif 'plistlib' in sys.modules and base_path.endswith('.eyetv'):
363 metadata.update(from_eyetv(full_path))
364 metadata.update(from_nfo(full_path))
365 metadata.update(from_text(full_path))
367 return metadata
369 def from_container(xmldoc):
370 metadata = {}
372 keys = {'title': 'Title', 'episodeTitle': 'EpisodeTitle',
373 'description': 'Description', 'programId': 'ProgramId',
374 'seriesId': 'SeriesId', 'episodeNumber': 'EpisodeNumber',
375 'tvRating': 'TvRating', 'displayMajorNumber': 'SourceChannel',
376 'callsign': 'SourceStation', 'showingBits': 'ShowingBits',
377 'mpaaRating': 'MpaaRating'}
379 details = xmldoc.getElementsByTagName('Details')[0]
381 for key in keys:
382 data = tag_data(details, keys[key])
383 if data:
384 if key == 'description':
385 data = data.replace(TRIBUNE_CR, '')
386 elif key == 'tvRating':
387 data = int(data)
388 elif key == 'displayMajorNumber':
389 if '-' in data:
390 data, metadata['displayMinorNumber'] = data.split('-')
391 metadata[key] = data
393 return metadata
395 def from_details(xml):
396 metadata = {}
398 xmldoc = minidom.parse(xml)
399 showing = xmldoc.getElementsByTagName('showing')[0]
400 program = showing.getElementsByTagName('program')[0]
402 items = {'description': 'program/description',
403 'title': 'program/title',
404 'episodeTitle': 'program/episodeTitle',
405 'episodeNumber': 'program/episodeNumber',
406 'programId': 'program/uniqueId',
407 'seriesId': 'program/series/uniqueId',
408 'seriesTitle': 'program/series/seriesTitle',
409 'originalAirDate': 'program/originalAirDate',
410 'isEpisode': 'program/isEpisode',
411 'movieYear': 'program/movieYear',
412 'partCount': 'partCount',
413 'partIndex': 'partIndex',
414 'time': 'time'}
416 for item in items:
417 data = tag_data(showing, items[item])
418 if data:
419 if item == 'description':
420 data = data.replace(TRIBUNE_CR, '')
421 metadata[item] = data
423 vItems = ['vActor', 'vChoreographer', 'vDirector',
424 'vExecProducer', 'vProgramGenre', 'vGuestStar',
425 'vHost', 'vProducer', 'vWriter']
427 for item in vItems:
428 data = _vtag_data(program, item)
429 if data:
430 metadata[item] = data
432 sb = showing.getElementsByTagName('showingBits')
433 if sb:
434 metadata['showingBits'] = sb[0].attributes['value'].value
436 #for tag in ['starRating', 'mpaaRating', 'colorCode']:
437 for tag in ['starRating', 'mpaaRating']:
438 value = _tag_value(program, tag)
439 if value:
440 metadata[tag] = value
442 rating = _tag_value(showing, 'tvRating')
443 if rating:
444 metadata['tvRating'] = rating
446 return metadata
448 def _nfo_vitems(source, metadata):
450 vItems = {'vGenre': 'genre',
451 'vWriter': 'credits',
452 'vDirector': 'director',
453 'vActor': 'actor/name'}
455 for key in vItems:
456 data = _vtag_data_alternate(source, vItems[key])
457 if data:
458 metadata.setdefault(key, [])
459 for dat in data:
460 if not dat in metadata[key]:
461 metadata[key].append(dat)
463 if 'vGenre' in metadata:
464 metadata['vSeriesGenre'] = metadata['vProgramGenre'] = metadata['vGenre']
466 return metadata
468 def _parse_nfo(nfo_path, nfo_data=None):
469 # nfo files can contain XML or a URL to seed the XBMC metadata scrapers
470 # It's also possible to have both (a URL after the XML metadata)
471 # pyTivo only parses the XML metadata, but we'll try to stip the URL
472 # from mixed XML/URL files. Returns `None` when XML can't be parsed.
473 if nfo_data is None:
474 nfo_data = [line.strip() for line in file(nfo_path, 'rU')]
475 xmldoc = None
476 try:
477 xmldoc = minidom.parseString(os.linesep.join(nfo_data))
478 except expat.ExpatError, err:
479 if expat.ErrorString(err.code) == expat.errors.XML_ERROR_INVALID_TOKEN:
480 # might be a URL outside the xml
481 while len(nfo_data) > err.lineno:
482 if len(nfo_data[-1]) == 0:
483 nfo_data.pop()
484 else:
485 break
486 if len(nfo_data) == err.lineno:
487 # last non-blank line contains the error
488 nfo_data.pop()
489 return _parse_nfo(nfo_path, nfo_data)
490 return xmldoc
492 def _from_tvshow_nfo(tvshow_nfo_path):
493 if tvshow_nfo_path in nfo_cache:
494 return nfo_cache[tvshow_nfo_path]
496 items = {'description': 'plot',
497 'title': 'title',
498 'seriesTitle': 'showtitle',
499 'starRating': 'rating',
500 'tvRating': 'mpaa'}
502 nfo_cache[tvshow_nfo_path] = metadata = {}
504 xmldoc = _parse_nfo(tvshow_nfo_path)
505 if not xmldoc:
506 return metadata
508 tvshow = xmldoc.getElementsByTagName('tvshow')
509 if tvshow:
510 tvshow = tvshow[0]
511 else:
512 return metadata
514 for item in items:
515 data = tag_data(tvshow, items[item])
516 if data:
517 metadata[item] = data
519 metadata = _nfo_vitems(tvshow, metadata)
521 nfo_cache[tvshow_nfo_path] = metadata
522 return metadata
524 def _from_episode_nfo(nfo_path, xmldoc):
525 metadata = {}
527 items = {'description': 'plot',
528 'episodeTitle': 'title',
529 'seriesTitle': 'showtitle',
530 'originalAirDate': 'aired',
531 'starRating': 'rating',
532 'tvRating': 'mpaa'}
534 # find tvshow.nfo
535 path = nfo_path
536 while True:
537 basepath = os.path.dirname(path)
538 if path == basepath:
539 break
540 path = basepath
541 tv_nfo = os.path.join(path, 'tvshow.nfo')
542 if os.path.exists(tv_nfo):
543 metadata.update(_from_tvshow_nfo(tv_nfo))
544 break
546 episode = xmldoc.getElementsByTagName('episodedetails')
547 if episode:
548 episode = episode[0]
549 else:
550 return metadata
552 metadata['isEpisode'] = 'true'
553 for item in items:
554 data = tag_data(episode, items[item])
555 if data:
556 metadata[item] = data
558 season = tag_data(episode, 'displayseason')
559 if not season or season == "-1":
560 season = tag_data(episode, 'season')
561 if not season:
562 season = 1
564 ep_num = tag_data(episode, 'displayepisode')
565 if not ep_num or ep_num == "-1":
566 ep_num = tag_data(episode, 'episode')
567 if ep_num and ep_num != "-1":
568 metadata['episodeNumber'] = "%d%02d" % (int(season), int(ep_num))
570 if 'originalAirDate' in metadata:
571 metadata['originalAirDate'] += 'T00:00:00Z'
573 metadata = _nfo_vitems(episode, metadata)
575 return metadata
577 def _from_movie_nfo(xmldoc):
578 metadata = {}
580 movie = xmldoc.getElementsByTagName('movie')
581 if movie:
582 movie = movie[0]
583 else:
584 return metadata
586 items = {'description': 'plot',
587 'title': 'title',
588 'movieYear': 'year',
589 'starRating': 'rating',
590 'mpaaRating': 'mpaa'}
592 metadata['isEpisode'] = 'false'
594 for item in items:
595 data = tag_data(movie, items[item])
596 if data:
597 metadata[item] = data
599 metadata['movieYear'] = "%04d" % int(metadata.get('movieYear', 0))
601 metadata = _nfo_vitems(movie, metadata)
602 return metadata
604 def from_nfo(full_path):
605 if full_path in nfo_cache:
606 return nfo_cache[full_path]
608 metadata = nfo_cache[full_path] = {}
610 nfo_path = "%s.nfo" % os.path.splitext(full_path)[0]
611 if not os.path.exists(nfo_path):
612 return metadata
614 xmldoc = _parse_nfo(nfo_path)
615 if not xmldoc:
616 return metadata
618 if xmldoc.getElementsByTagName('episodedetails'):
619 # it's an episode
620 metadata.update(_from_episode_nfo(nfo_path, xmldoc))
621 elif xmldoc.getElementsByTagName('movie'):
622 # it's a movie
623 metadata.update(_from_movie_nfo(xmldoc))
625 # common nfo cleanup
626 if 'starRating' in metadata:
627 # .NFO 0-10 -> TiVo 1-7
628 rating = int(float(metadata['starRating']) * 6 / 10 + 1.5)
629 metadata['starRating'] = rating
631 for key, mapping in [('mpaaRating', MPAA_RATINGS),
632 ('tvRating', TV_RATINGS)]:
633 if key in metadata:
634 rating = mapping.get(metadata[key], None)
635 if rating:
636 metadata[key] = str(rating)
637 else:
638 del metadata[key]
640 nfo_cache[full_path] = metadata
641 return metadata
643 def from_tivo(full_path):
644 if full_path in tivo_cache:
645 return tivo_cache[full_path]
647 tdcat_path = config.get_bin('tdcat')
648 tivo_mak = config.get_server('tivo_mak')
649 try:
650 assert(tdcat_path and tivo_mak)
651 fname = unicode(full_path, 'utf-8')
652 if mswindows:
653 fname = fname.encode('iso8859-1')
654 tcmd = [tdcat_path, '-m', tivo_mak, '-2', fname]
655 tdcat = subprocess.Popen(tcmd, stdout=subprocess.PIPE)
656 metadata = from_details(tdcat.stdout)
657 tivo_cache[full_path] = metadata
658 except:
659 metadata = {}
661 return metadata
663 def force_utf8(text):
664 if type(text) == str:
665 try:
666 text = text.decode('utf8')
667 except:
668 if sys.platform == 'darwin':
669 text = text.decode('macroman')
670 else:
671 text = text.decode('iso8859-1')
672 return text.encode('utf-8')
674 def dump(output, metadata):
675 for key in metadata:
676 value = metadata[key]
677 if type(value) == list:
678 for item in value:
679 output.write('%s: %s\n' % (key, item.encode('utf-8')))
680 else:
681 if key in HUMAN and value in HUMAN[key]:
682 output.write('%s: %s\n' % (key, HUMAN[key][value]))
683 else:
684 output.write('%s: %s\n' % (key, value.encode('utf-8')))
686 if __name__ == '__main__':
687 if len(sys.argv) > 1:
688 metadata = {}
689 config.init([])
690 logging.basicConfig()
691 fname = force_utf8(sys.argv[1])
692 ext = os.path.splitext(fname)[1].lower()
693 if ext == '.tivo':
694 metadata.update(from_tivo(fname))
695 elif ext in ['.mp4', '.m4v', '.mov']:
696 metadata.update(from_moov(fname))
697 elif ext in ['.dvr-ms', '.asf', '.wmv']:
698 metadata.update(from_dvrms(fname))
699 elif ext == '.wtv':
700 vInfo = plugins.video.transcode.video_info(fname)
701 metadata.update(from_mscore(vInfo['rawmeta']))
702 dump(sys.stdout, metadata)