Return empty metadata on failed parsing of XMBC .nfo files instead of
[pyTivo/wmcbrine/lucasnz.git] / metadata.py
bloba93eb38aa0f562a3d9bf5d3c742138e03922282f
1 #!/usr/bin/env python
3 import os
4 import subprocess
5 import sys
6 from datetime import datetime
7 from xml.dom import minidom
8 try:
9 import plistlib
10 except:
11 pass
13 import mutagen
14 from lrucache import LRUCache
16 import config
17 import plugins.video.transcode
19 # Something to strip
20 TRIBUNE_CR = ' Copyright Tribune Media Services, Inc.'
22 TV_RATINGS = {'TV-Y7': 1, 'TV-Y': 2, 'TV-G': 3, 'TV-PG': 4, 'TV-14': 5,
23 'TV-MA': 6, 'TV-NR': 7, 'TVY7': 1, 'TVY': 2, 'TVG': 3,
24 'TVPG': 4, 'TV14': 5, 'TVMA': 6, 'TVNR': 7, 'Y7': 1,
25 'Y': 2, 'G': 3, 'PG': 4, '14': 5, 'MA': 6, 'NR': 7,
26 'UNRATED': 7, 'X1': 1, 'X2': 2, 'X3': 3, 'X4': 4, 'X5': 5,
27 'X6': 6, 'X7': 7}
29 MPAA_RATINGS = {'G': 1, 'PG': 2, 'PG-13': 3, 'PG13': 3, 'R': 4, 'X': 5,
30 'NC-17': 6, 'NC17': 6, 'NR': 8, 'UNRATED': 8, 'G1': 1,
31 'P2': 2, 'P3': 3, 'R4': 4, 'X5': 5, 'N6': 6, 'N8': 8}
33 STAR_RATINGS = {'1': 1, '1.5': 2, '2': 3, '2.5': 4, '3': 5, '3.5': 6,
34 '4': 7, '*': 1, '**': 3, '***': 5, '****': 7}
36 HUMAN = {'mpaaRating': {1: 'G', 2: 'PG', 3: 'PG-13', 4: 'R', 5: 'X',
37 6: 'NC-17', 8: 'NR'},
38 'tvRating': {1: 'Y7', 2: 'Y', 3: 'G', 4: 'PG', 5: '14',
39 6: 'MA', 7: 'NR'},
40 'starRating': {1: '1', 2: '1.5', 3: '2', 4: '2.5', 5: '3',
41 6: '3.5', 7: '4'}}
43 BOM = '\xef\xbb\xbf'
45 tivo_cache = LRUCache(50)
46 mp4_cache = LRUCache(50)
47 dvrms_cache = LRUCache(50)
48 nfo_cache = LRUCache(50)
50 mswindows = (sys.platform == "win32")
52 def get_mpaa(rating):
53 return HUMAN['mpaaRating'].get(rating, 'NR')
55 def get_tv(rating):
56 return HUMAN['tvRating'].get(rating, 'NR')
58 def get_stars(rating):
59 return HUMAN['starRating'].get(rating, '')
61 def tag_data(element, tag):
62 for name in tag.split('/'):
63 new_element = element.getElementsByTagName(name)
64 if not new_element:
65 return ''
66 element = new_element[0]
67 if not element.firstChild:
68 return ''
69 return element.firstChild.data
71 def _vtag_data(element, tag):
72 for name in tag.split('/'):
73 new_element = element.getElementsByTagName(name)
74 if not new_element:
75 return []
76 element = new_element[0]
77 elements = element.getElementsByTagName('element')
78 return [x.firstChild.data for x in elements if x.firstChild]
80 def _vtag_data_alternate(element, tag):
81 elements = [element]
82 for name in tag.split('/'):
83 new_elements = []
84 for elmt in elements:
85 new_elements += elmt.getElementsByTagName(name)
86 elements = new_elements
87 return [x.firstChild.data for x in elements if x.firstChild]
89 def _tag_value(element, tag):
90 item = element.getElementsByTagName(tag)
91 if item:
92 value = item[0].attributes['value'].value
93 return int(value[0])
95 def from_moov(full_path):
96 if full_path in mp4_cache:
97 return mp4_cache[full_path]
99 metadata = {}
100 len_desc = 0
102 try:
103 mp4meta = mutagen.File(unicode(full_path, 'utf-8'))
104 assert(mp4meta)
105 except:
106 mp4_cache[full_path] = {}
107 return {}
109 # The following 1-to-1 correspondence of atoms to pyTivo
110 # variables is TV-biased
111 keys = {'tvnn': 'callsign', 'tven': 'episodeNumber',
112 'tvsh': 'seriesTitle'}
114 for key, value in mp4meta.items():
115 if type(value) == list:
116 value = value[0]
117 if key == 'stik':
118 metadata['isEpisode'] = ['false', 'true'][value == 'TV Show']
119 elif key in keys:
120 metadata[keys[key]] = value
121 # These keys begin with the copyright symbol \xA9
122 elif key == '\xa9day':
123 if len(value) == 4:
124 value += '-01-01T16:00:00Z'
125 metadata['originalAirDate'] = value
126 #metadata['time'] = value
127 elif key in ['\xa9gen', 'gnre']:
128 for k in ('vProgramGenre', 'vSeriesGenre'):
129 if k in metadata:
130 metadata[k].append(value)
131 else:
132 metadata[k] = [value]
133 elif key == '\xa9nam':
134 if 'tvsh' in mp4meta:
135 metadata['episodeTitle'] = value
136 else:
137 metadata['title'] = value
139 # Description in desc, cmt, and/or ldes tags. Keep the longest.
140 elif key in ['desc', '\xa9cmt', 'ldes'] and len(value) > len_desc:
141 metadata['description'] = value
142 len_desc = len(value)
144 # A common custom "reverse DNS format" tag
145 elif (key == '----:com.apple.iTunes:iTunEXTC' and
146 ('us-tv' in value or 'mpaa' in value)):
147 rating = value.split("|")[1].upper()
148 if rating in TV_RATINGS and 'us-tv' in value:
149 metadata['tvRating'] = TV_RATINGS[rating]
150 elif rating in MPAA_RATINGS and 'mpaa' in value:
151 metadata['mpaaRating'] = MPAA_RATINGS[rating]
153 # Actors, directors, producers, AND screenwriters may be in a long
154 # embedded XML plist.
155 elif (key == '----:com.apple.iTunes:iTunMOVI' and
156 'plistlib' in sys.modules):
157 items = {'cast': 'vActor', 'directors': 'vDirector',
158 'producers': 'vProducer', 'screenwriters': 'vWriter'}
159 data = plistlib.readPlistFromString(value)
160 for item in items:
161 if item in data:
162 metadata[items[item]] = [x['name'] for x in data[item]]
164 mp4_cache[full_path] = metadata
165 return metadata
167 def from_mscore(rawmeta):
168 metadata = {}
169 keys = {'title': ['Title'],
170 'description': ['Description', 'WM/SubTitleDescription'],
171 'episodeTitle': ['WM/SubTitle'],
172 'callsign': ['WM/MediaStationCallSign'],
173 'displayMajorNumber': ['WM/MediaOriginalChannel'],
174 'originalAirDate': ['WM/MediaOriginalBroadcastDateTime'],
175 'rating': ['WM/ParentalRating'],
176 'credits': ['WM/MediaCredits'], 'genre': ['WM/Genre']}
178 for tagname in keys:
179 for tag in keys[tagname]:
180 try:
181 if tag in rawmeta:
182 value = rawmeta[tag][0]
183 if type(value) not in (str, unicode):
184 value = str(value)
185 if value:
186 metadata[tagname] = value
187 except:
188 pass
190 if 'episodeTitle' in metadata and 'title' in metadata:
191 metadata['seriesTitle'] = metadata['title']
192 if 'genre' in metadata:
193 value = metadata['genre'].split(',')
194 metadata['vProgramGenre'] = value
195 metadata['vSeriesGenre'] = value
196 del metadata['genre']
197 if 'credits' in metadata:
198 value = [x.split('/') for x in metadata['credits'].split(';')]
199 if len(value) > 3:
200 metadata['vActor'] = [x for x in (value[0] + value[3]) if x]
201 metadata['vDirector'] = [x for x in value[1] if x]
202 del metadata['credits']
203 if 'rating' in metadata:
204 rating = metadata['rating']
205 if rating in TV_RATINGS:
206 metadata['tvRating'] = TV_RATINGS[rating]
207 del metadata['rating']
209 return metadata
211 def from_dvrms(full_path):
212 if full_path in dvrms_cache:
213 return dvrms_cache[full_path]
215 try:
216 rawmeta = mutagen.File(unicode(full_path, 'utf-8'))
217 assert(rawmeta)
218 except:
219 dvrms_cache[full_path] = {}
220 return {}
222 metadata = from_mscore(rawmeta)
223 dvrms_cache[full_path] = metadata
224 return metadata
226 def from_eyetv(full_path):
227 keys = {'TITLE': 'title', 'SUBTITLE': 'episodeTitle',
228 'DESCRIPTION': 'description', 'YEAR': 'movieYear',
229 'EPISODENUM': 'episodeNumber'}
230 metadata = {}
231 path = os.path.dirname(unicode(full_path, 'utf-8'))
232 eyetvp = [x for x in os.listdir(path) if x.endswith('.eyetvp')][0]
233 eyetvp = os.path.join(path, eyetvp)
234 eyetv = plistlib.readPlist(eyetvp)
235 if 'epg info' in eyetv:
236 info = eyetv['epg info']
237 for key in keys:
238 if info[key]:
239 metadata[keys[key]] = info[key]
240 if info['SUBTITLE']:
241 metadata['seriesTitle'] = info['TITLE']
242 if info['ACTORS']:
243 metadata['vActor'] = [x.strip() for x in info['ACTORS'].split(',')]
244 if info['DIRECTOR']:
245 metadata['vDirector'] = [info['DIRECTOR']]
247 for ptag, etag, ratings in [('tvRating', 'TV_RATING', TV_RATINGS),
248 ('mpaaRating', 'MPAA_RATING', MPAA_RATINGS),
249 ('starRating', 'STAR_RATING', STAR_RATINGS)]:
250 x = info[etag].upper()
251 if x and x in ratings:
252 metadata[ptag] = ratings[x]
254 # movieYear must be set for the mpaa/star ratings to work
255 if (('mpaaRating' in metadata or 'starRating' in metadata) and
256 'movieYear' not in metadata):
257 metadata['movieYear'] = eyetv['info']['start'].year
258 return metadata
260 def from_text(full_path):
261 metadata = {}
262 full_path = unicode(full_path, 'utf-8')
263 path, name = os.path.split(full_path)
264 title, ext = os.path.splitext(name)
266 search_paths = []
267 ptmp = full_path
268 while ptmp:
269 parent = os.path.dirname(ptmp)
270 if ptmp != parent:
271 ptmp = parent
272 else:
273 break
274 search_paths.append(os.path.join(ptmp, 'default.txt'))
276 search_paths.append(os.path.join(path, title) + '.properties')
277 search_paths.reverse()
279 search_paths += [full_path + '.txt',
280 os.path.join(path, '.meta', 'default.txt'),
281 os.path.join(path, '.meta', name) + '.txt']
283 for metafile in search_paths:
284 if os.path.exists(metafile):
285 sep = ':='[metafile.endswith('.properties')]
286 for line in file(metafile, 'U'):
287 if line.startswith(BOM):
288 line = line[3:]
289 if line.strip().startswith('#') or not sep in line:
290 continue
291 key, value = [x.strip() for x in line.split(sep, 1)]
292 if not key or not value:
293 continue
294 if key.startswith('v'):
295 if key in metadata:
296 metadata[key].append(value)
297 else:
298 metadata[key] = [value]
299 else:
300 metadata[key] = value
302 for rating, ratings in [('tvRating', TV_RATINGS),
303 ('mpaaRating', MPAA_RATINGS),
304 ('starRating', STAR_RATINGS)]:
305 x = metadata.get(rating, '').upper()
306 if x in ratings:
307 metadata[rating] = ratings[x]
309 return metadata
311 def basic(full_path):
312 base_path, name = os.path.split(full_path)
313 title, ext = os.path.splitext(name)
314 mtime = os.stat(unicode(full_path, 'utf-8')).st_mtime
315 if (mtime < 0):
316 mtime = 0
317 originalAirDate = datetime.utcfromtimestamp(mtime)
319 metadata = {'title': title,
320 'originalAirDate': originalAirDate.isoformat()}
321 ext = ext.lower()
322 if ext in ['.mp4', '.m4v', '.mov']:
323 metadata.update(from_moov(full_path))
324 elif ext in ['.dvr-ms', '.asf', '.wmv']:
325 metadata.update(from_dvrms(full_path))
326 elif 'plistlib' in sys.modules and base_path.endswith('.eyetv'):
327 metadata.update(from_eyetv(full_path))
328 metadata.update(from_nfo(full_path))
329 metadata.update(from_text(full_path))
331 return metadata
333 def from_container(xmldoc):
334 metadata = {}
336 keys = {'title': 'Title', 'episodeTitle': 'EpisodeTitle',
337 'description': 'Description', 'seriesId': 'SeriesId',
338 'episodeNumber': 'EpisodeNumber', 'tvRating': 'TvRating',
339 'displayMajorNumber': 'SourceChannel', 'callsign': 'SourceStation',
340 'showingBits': 'ShowingBits', 'mpaaRating': 'MpaaRating'}
342 details = xmldoc.getElementsByTagName('Details')[0]
344 for key in keys:
345 data = tag_data(details, keys[key])
346 if data:
347 if key == 'description':
348 data = data.replace(TRIBUNE_CR, '')
349 elif key == 'tvRating':
350 data = int(data)
351 elif key == 'displayMajorNumber':
352 if '-' in data:
353 data, metadata['displayMinorNumber'] = data.split('-')
354 metadata[key] = data
356 return metadata
358 def from_details(xml):
359 metadata = {}
361 xmldoc = minidom.parse(xml)
362 showing = xmldoc.getElementsByTagName('showing')[0]
363 program = showing.getElementsByTagName('program')[0]
365 items = {'description': 'program/description',
366 'title': 'program/title',
367 'episodeTitle': 'program/episodeTitle',
368 'episodeNumber': 'program/episodeNumber',
369 'seriesId': 'program/series/uniqueId',
370 'seriesTitle': 'program/series/seriesTitle',
371 'originalAirDate': 'program/originalAirDate',
372 'isEpisode': 'program/isEpisode',
373 'movieYear': 'program/movieYear',
374 'partCount': 'partCount',
375 'partIndex': 'partIndex',
376 'time': 'time'}
378 for item in items:
379 data = tag_data(showing, items[item])
380 if data:
381 if item == 'description':
382 data = data.replace(TRIBUNE_CR, '')
383 metadata[item] = data
385 vItems = ['vActor', 'vChoreographer', 'vDirector',
386 'vExecProducer', 'vProgramGenre', 'vGuestStar',
387 'vHost', 'vProducer', 'vWriter']
389 for item in vItems:
390 data = _vtag_data(program, item)
391 if data:
392 metadata[item] = data
394 sb = showing.getElementsByTagName('showingBits')
395 if sb:
396 metadata['showingBits'] = sb[0].attributes['value'].value
398 #for tag in ['starRating', 'mpaaRating', 'colorCode']:
399 for tag in ['starRating', 'mpaaRating']:
400 value = _tag_value(program, tag)
401 if value:
402 metadata[tag] = value
404 rating = _tag_value(showing, 'tvRating')
405 if rating:
406 metadata['tvRating'] = rating
408 return metadata
410 def _nfo_vitems(source, metadata):
412 vItems = {'vGenre': 'genre',
413 'vWriter': 'credits',
414 'vDirector': 'director',
415 'vActor': 'actor/name'}
417 for key in vItems:
418 data = _vtag_data_alternate(source, vItems[key])
419 if data:
420 metadata.setdefault(key, [])
421 for dat in data:
422 if not dat in metadata[key]:
423 metadata[key].append(dat)
425 if 'vGenre' in metadata:
426 metadata['vSeriesGenre'] = metadata['vProgramGenre'] = metadata['vGenre']
428 return metadata
430 def _from_tvshow_nfo(tvshow_nfo_path):
431 if tvshow_nfo_path in nfo_cache:
432 return nfo_cache[tvshow_nfo_path]
434 items = {'description': 'plot',
435 'title': 'title',
436 'seriesTitle': 'showtitle',
437 'starRating': 'rating',
438 'tvRating': 'mpaa'}
440 nfo_cache[tvshow_nfo_path] = metadata = {}
442 try:
443 xmldoc = minidom.parse(file(tvshow_nfo_path, 'U'))
444 except:
445 return metadata
447 tvshow = xmldoc.getElementsByTagName('tvshow')
448 if tvshow:
449 tvshow = tvshow[0]
450 else:
451 return metadata
453 for item in items:
454 data = tag_data(tvshow, items[item])
455 if data:
456 metadata[item] = data
458 metadata = _nfo_vitems(tvshow, metadata)
460 nfo_cache[tvshow_nfo_path] = metadata
461 return metadata
463 def _from_episode_nfo(nfo_path, xmldoc):
464 metadata = {}
466 items = {'description': 'plot',
467 'episodeTitle': 'title',
468 'seriesTitle': 'showtitle',
469 'originalAirDate': 'aired',
470 'starRating': 'rating',
471 'tvRating': 'mpaa'}
473 # find tvshow.nfo
474 path = nfo_path
475 while True:
476 basepath = os.path.dirname(path)
477 if path == basepath:
478 break
479 path = basepath
480 tv_nfo = os.path.join(path, 'tvshow.nfo')
481 if os.path.exists(tv_nfo):
482 metadata.update(_from_tvshow_nfo(tv_nfo))
483 break
485 episode = xmldoc.getElementsByTagName('episodedetails')
486 if episode:
487 episode = episode[0]
488 else:
489 return metadata
491 metadata['isEpisode'] = 'true'
492 for item in items:
493 data = tag_data(episode, items[item])
494 if data:
495 metadata[item] = data
497 season = tag_data(episode, 'displayseason')
498 if not season or season == "-1":
499 season = tag_data(episode, 'season')
500 if not season:
501 season = 1
503 ep_num = tag_data(episode, 'displayepisode')
504 if not ep_num or ep_num == "-1":
505 ep_num = tag_data(episode, 'episode')
506 if ep_num and ep_num != "-1":
507 metadata['episodeNumber'] = "%d%02d" % (int(season), int(ep_num))
509 if 'originalAirDate' in metadata:
510 metadata['originalAirDate'] += 'T00:00:00Z'
512 metadata = _nfo_vitems(episode, metadata)
514 return metadata
516 def _from_movie_nfo(xmldoc):
517 metadata = {}
519 movie = xmldoc.getElementsByTagName('movie')
520 if movie:
521 movie = movie[0]
522 else:
523 return metadata
525 items = {'description': 'plot',
526 'title': 'title',
527 'movieYear': 'year',
528 'starRating': 'rating',
529 'mpaaRating': 'mpaa'}
531 metadata['isEpisode'] = 'false'
533 for item in items:
534 data = tag_data(movie, items[item])
535 if data:
536 metadata[item] = data
538 metadata['movieYear'] = "%04d" % int(metadata.get('movieYear', 0))
540 metadata = _nfo_vitems(movie, metadata)
541 return metadata
543 def from_nfo(full_path):
544 if full_path in nfo_cache:
545 return nfo_cache[full_path]
547 metadata = nfo_cache[full_path] = {}
549 nfo_path = "%s.nfo" % os.path.splitext(full_path)[0]
550 if not os.path.exists(nfo_path):
551 return metadata
553 try:
554 xmldoc = minidom.parse(file(nfo_path, 'U'))
555 except:
556 return metadata
558 if xmldoc.getElementsByTagName('episodedetails'):
559 # it's an episode
560 metadata.update(_from_episode_nfo(nfo_path, xmldoc))
561 elif xmldoc.getElementsByTagName('movie'):
562 # it's a movie
563 metadata.update(_from_movie_nfo(xmldoc))
565 # common nfo cleanup
566 if 'starRating' in metadata:
567 # .NFO 0-10 -> TiVo 1-7
568 rating = int(float(metadata['starRating']) * 6 / 10 + 1.5)
569 metadata['starRating'] = rating
571 for key, mapping in [('mpaaRating', MPAA_RATINGS),
572 ('tvRating', TV_RATINGS)]:
573 if key in metadata:
574 rating = mapping.get(metadata[key], None)
575 if rating:
576 metadata[key] = str(rating)
577 else:
578 del metadata[key]
580 nfo_cache[full_path] = metadata
581 return metadata
583 def from_tivo(full_path):
584 if full_path in tivo_cache:
585 return tivo_cache[full_path]
587 tdcat_path = config.get_bin('tdcat')
588 tivo_mak = config.get_server('tivo_mak')
589 try:
590 assert(tdcat_path and tivo_mak)
591 fname = unicode(full_path, 'utf-8')
592 if mswindows:
593 fname = fname.encode('iso8859-1')
594 tcmd = [tdcat_path, '-m', tivo_mak, '-2', fname]
595 tdcat = subprocess.Popen(tcmd, stdout=subprocess.PIPE)
596 metadata = from_details(tdcat.stdout)
597 tivo_cache[full_path] = metadata
598 except:
599 metadata = {}
601 return metadata
603 def force_utf8(text):
604 if type(text) == str:
605 try:
606 text = text.decode('utf8')
607 except:
608 if sys.platform == 'darwin':
609 text = text.decode('macroman')
610 else:
611 text = text.decode('iso8859-1')
612 return text.encode('utf-8')
614 def dump(output, metadata):
615 for key in metadata:
616 value = metadata[key]
617 if type(value) == list:
618 for item in value:
619 output.write('%s: %s\n' % (key, item.encode('utf-8')))
620 else:
621 if key in HUMAN and value in HUMAN[key]:
622 output.write('%s: %s\n' % (key, HUMAN[key][value]))
623 else:
624 output.write('%s: %s\n' % (key, value.encode('utf-8')))
626 if __name__ == '__main__':
627 if len(sys.argv) > 1:
628 metadata = {}
629 fname = force_utf8(sys.argv[1])
630 ext = os.path.splitext(fname)[1].lower()
631 if ext == '.tivo':
632 config.init([])
633 metadata.update(from_tivo(fname))
634 elif ext in ['.mp4', '.m4v', '.mov']:
635 metadata.update(from_moov(fname))
636 elif ext in ['.dvr-ms', '.asf', '.wmv']:
637 metadata.update(from_dvrms(fname))
638 elif ext == '.wtv':
639 vInfo = plugins.video.transcode.video_info(fname)
640 metadata.update(from_mscore(vInfo['rawmeta']))
641 dump(sys.stdout, metadata)