Merge remote-tracking branch 'upstream/master'
[pyTivo/wmcbrine/lucasnz.git] / metadata.py
blobaaf72112ad7dfe450312eefef09173ab18a2e821
1 #!/usr/bin/env python
3 import os
4 import subprocess
5 import sys
6 from datetime import datetime
7 from xml.dom import minidom
8 try:
9 import plistlib
10 except:
11 pass
13 import mutagen
14 from lrucache import LRUCache
16 import config
17 import plugins.video.transcode
19 # Something to strip
20 TRIBUNE_CR = ' Copyright Tribune Media Services, Inc.'
22 TV_RATINGS = {'TV-Y7': 1, 'TV-Y': 2, 'TV-G': 3, 'TV-PG': 4, 'TV-14': 5,
23 'TV-MA': 6, 'TV-NR': 7, 'TVY7': 1, 'TVY': 2, 'TVG': 3,
24 'TVPG': 4, 'TV14': 5, 'TVMA': 6, 'TVNR': 7, 'Y7': 1,
25 'Y': 2, 'G': 3, 'PG': 4, '14': 5, 'MA': 6, 'NR': 7,
26 'UNRATED': 7, 'X1': 1, 'X2': 2, 'X3': 3, 'X4': 4, 'X5': 5,
27 'X6': 6, 'X7': 7}
29 MPAA_RATINGS = {'G': 1, 'PG': 2, 'PG-13': 3, 'PG13': 3, 'R': 4, 'X': 5,
30 'NC-17': 6, 'NC17': 6, 'NR': 8, 'UNRATED': 8, 'G1': 1,
31 'P2': 2, 'P3': 3, 'R4': 4, 'X5': 5, 'N6': 6, 'N8': 8}
33 STAR_RATINGS = {'1': 1, '1.5': 2, '2': 3, '2.5': 4, '3': 5, '3.5': 6,
34 '4': 7, '*': 1, '**': 3, '***': 5, '****': 7}
36 HUMAN = {'mpaaRating': {1: 'G', 2: 'PG', 3: 'PG-13', 4: 'R', 5: 'X',
37 6: 'NC-17', 8: 'NR'},
38 'tvRating': {1: 'Y7', 2: 'Y', 3: 'G', 4: 'PG', 5: '14',
39 6: 'MA', 7: 'NR'},
40 'starRating': {1: '1', 2: '1.5', 3: '2', 4: '2.5', 5: '3',
41 6: '3.5', 7: '4'}}
43 BOM = '\xef\xbb\xbf'
45 tivo_cache = LRUCache(50)
46 mp4_cache = LRUCache(50)
47 dvrms_cache = LRUCache(50)
48 nfo_cache = LRUCache(50)
50 mswindows = (sys.platform == "win32")
52 def get_mpaa(rating):
53 return HUMAN['mpaaRating'].get(rating, 'NR')
55 def get_tv(rating):
56 return HUMAN['tvRating'].get(rating, 'NR')
58 def get_stars(rating):
59 return HUMAN['starRating'].get(rating, '')
61 def convert_rating_scale(rating, scale):
62 stars = len(HUMAN['starRating']) - 1
63 result = ((float(rating) * stars) / float(scale)) + 1
64 return int(result)
66 def tag_data(element, tag):
67 for name in tag.split('/'):
68 new_element = element.getElementsByTagName(name)
69 if not new_element:
70 return ''
71 element = new_element[0]
72 if not element.firstChild:
73 return ''
74 return element.firstChild.data
76 def _vtag_data(element, tag):
77 for name in tag.split('/'):
78 new_element = element.getElementsByTagName(name)
79 if not new_element:
80 return []
81 element = new_element[0]
82 elements = element.getElementsByTagName('element')
83 return [x.firstChild.data for x in elements if x.firstChild]
85 def _vtag_data_alternate(element, tag):
86 elements = [element]
87 for name in tag.split('/'):
88 new_elements = []
89 for elmt in elements:
90 new_elements += elmt.getElementsByTagName(name)
91 elements = new_elements
92 return [x.firstChild.data for x in elements if x.firstChild]
94 def _tag_value(element, tag):
95 item = element.getElementsByTagName(tag)
96 if item:
97 value = item[0].attributes['value'].value
98 return int(value[0])
100 def from_moov(full_path):
101 if full_path in mp4_cache:
102 return mp4_cache[full_path]
104 metadata = {}
105 len_desc = 0
107 try:
108 mp4meta = mutagen.File(unicode(full_path, 'utf-8'))
109 assert(mp4meta)
110 except:
111 mp4_cache[full_path] = {}
112 return {}
114 # The following 1-to-1 correspondence of atoms to pyTivo
115 # variables is TV-biased
116 keys = {'tvnn': 'callsign', 'tven': 'episodeNumber',
117 'tvsh': 'seriesTitle'}
119 for key, value in mp4meta.items():
120 if type(value) == list:
121 value = value[0]
122 if key == 'stik':
123 metadata['isEpisode'] = ['false', 'true'][value == 'TV Show']
124 elif key in keys:
125 metadata[keys[key]] = value
126 # These keys begin with the copyright symbol \xA9
127 elif key == '\xa9day':
128 if len(value) == 4:
129 value += '-01-01T16:00:00Z'
130 metadata['originalAirDate'] = value
131 #metadata['time'] = value
132 elif key in ['\xa9gen', 'gnre']:
133 for k in ('vProgramGenre', 'vSeriesGenre'):
134 if k in metadata:
135 metadata[k].append(value)
136 else:
137 metadata[k] = [value]
138 elif key == '\xa9nam':
139 if 'tvsh' in mp4meta:
140 metadata['episodeTitle'] = value
141 else:
142 metadata['title'] = value
144 # Description in desc, cmt, and/or ldes tags. Keep the longest.
145 elif key in ['desc', '\xa9cmt', 'ldes'] and len(value) > len_desc:
146 metadata['description'] = value
147 len_desc = len(value)
149 # A common custom "reverse DNS format" tag
150 elif (key == '----:com.apple.iTunes:iTunEXTC' and
151 ('us-tv' in value or 'mpaa' in value)):
152 rating = value.split("|")[1].upper()
153 if rating in TV_RATINGS and 'us-tv' in value:
154 metadata['tvRating'] = TV_RATINGS[rating]
155 elif rating in MPAA_RATINGS and 'mpaa' in value:
156 metadata['mpaaRating'] = MPAA_RATINGS[rating]
158 # Actors, directors, producers, AND screenwriters may be in a long
159 # embedded XML plist.
160 elif (key == '----:com.apple.iTunes:iTunMOVI' and
161 'plistlib' in sys.modules):
162 items = {'cast': 'vActor', 'directors': 'vDirector',
163 'producers': 'vProducer', 'screenwriters': 'vWriter'}
164 data = plistlib.readPlistFromString(value)
165 for item in items:
166 if item in data:
167 metadata[items[item]] = [x['name'] for x in data[item]]
169 mp4_cache[full_path] = metadata
170 return metadata
172 def from_mscore(rawmeta):
173 metadata = {}
174 keys = {'title': ['Title'],
175 'description': ['Description', 'WM/SubTitleDescription'],
176 'episodeTitle': ['WM/SubTitle'],
177 'callsign': ['WM/MediaStationCallSign'],
178 'displayMajorNumber': ['WM/MediaOriginalChannel'],
179 'originalAirDate': ['WM/MediaOriginalBroadcastDateTime'],
180 'rating': ['WM/ParentalRating'],
181 'credits': ['WM/MediaCredits'], 'genre': ['WM/Genre']}
183 for tagname in keys:
184 for tag in keys[tagname]:
185 try:
186 if tag in rawmeta:
187 value = rawmeta[tag][0]
188 if type(value) not in (str, unicode):
189 value = str(value)
190 if value:
191 metadata[tagname] = value
192 except:
193 pass
195 if 'episodeTitle' in metadata and 'title' in metadata:
196 metadata['seriesTitle'] = metadata['title']
197 if 'genre' in metadata:
198 value = metadata['genre'].split(',')
199 metadata['vProgramGenre'] = value
200 metadata['vSeriesGenre'] = value
201 del metadata['genre']
202 if 'credits' in metadata:
203 value = [x.split('/') for x in metadata['credits'].split(';')]
204 if len(value) > 3:
205 metadata['vActor'] = [x for x in (value[0] + value[3]) if x]
206 metadata['vDirector'] = [x for x in value[1] if x]
207 del metadata['credits']
208 if 'rating' in metadata:
209 rating = metadata['rating']
210 if rating in TV_RATINGS:
211 metadata['tvRating'] = TV_RATINGS[rating]
212 del metadata['rating']
214 return metadata
216 def from_dvrms(full_path):
217 if full_path in dvrms_cache:
218 return dvrms_cache[full_path]
220 try:
221 rawmeta = mutagen.File(unicode(full_path, 'utf-8'))
222 assert(rawmeta)
223 except:
224 dvrms_cache[full_path] = {}
225 return {}
227 metadata = from_mscore(rawmeta)
228 dvrms_cache[full_path] = metadata
229 return metadata
231 def from_eyetv(full_path):
232 keys = {'TITLE': 'title', 'SUBTITLE': 'episodeTitle',
233 'DESCRIPTION': 'description', 'YEAR': 'movieYear',
234 'EPISODENUM': 'episodeNumber'}
235 metadata = {}
236 path, name = os.path.split(unicode(full_path, 'utf-8'))
237 eyetvp = [x for x in os.listdir(path) if x.endswith('.eyetvp')][0]
238 eyetvp = os.path.join(path, eyetvp)
239 eyetv = plistlib.readPlist(eyetvp)
240 if 'epg info' in eyetv:
241 info = eyetv['epg info']
242 for key in keys:
243 if info[key]:
244 metadata[keys[key]] = info[key]
245 if info['SUBTITLE']:
246 metadata['seriesTitle'] = info['TITLE']
247 if info['ACTORS']:
248 metadata['vActor'] = [x.strip() for x in info['ACTORS'].split(',')]
249 if info['DIRECTOR']:
250 metadata['vDirector'] = [info['DIRECTOR']]
252 for ptag, etag, ratings in [('tvRating', 'TV_RATING', TV_RATINGS),
253 ('mpaaRating', 'MPAA_RATING', MPAA_RATINGS),
254 ('starRating', 'STAR_RATING', STAR_RATINGS)]:
255 x = info[etag].upper()
256 if x and x in ratings:
257 metadata[ptag] = ratings[x]
259 # movieYear must be set for the mpaa/star ratings to work
260 if (('mpaaRating' in metadata or 'starRating' in metadata) and
261 'movieYear' not in metadata):
262 metadata['movieYear'] = eyetv['info']['start'].year
263 return metadata
265 def from_text(full_path):
266 metadata = {}
267 full_path = unicode(full_path, 'utf-8')
268 path, name = os.path.split(full_path)
269 title, ext = os.path.splitext(name)
271 search_paths = []
272 ptmp = full_path
273 while ptmp:
274 parent = os.path.dirname(ptmp)
275 if ptmp != parent:
276 ptmp = parent
277 else:
278 break
279 search_paths.append(os.path.join(ptmp, 'default.txt'))
281 search_paths.append(os.path.join(path, title) + '.properties')
282 search_paths.reverse()
284 search_paths += [ full_path + '.txt',
285 os.path.join(path, '.meta', 'default.txt'),
286 os.path.join(path, '.meta', name) + '.txt']
288 for metafile in search_paths:
289 if os.path.exists(metafile):
290 sep = ':='[metafile.endswith('.properties')]
291 for line in file(metafile, 'U'):
292 if line.startswith(BOM):
293 line = line[3:]
294 if line.strip().startswith('#') or not sep in line:
295 continue
296 key, value = [x.strip() for x in line.split(sep, 1)]
297 if not key or not value:
298 continue
299 if key.startswith('v'):
300 if key in metadata:
301 metadata[key].append(value)
302 else:
303 metadata[key] = [value]
304 else:
305 metadata[key] = value
307 for rating, ratings in [('tvRating', TV_RATINGS),
308 ('mpaaRating', MPAA_RATINGS),
309 ('starRating', STAR_RATINGS)]:
310 x = metadata.get(rating, '').upper()
311 if x in ratings:
312 metadata[rating] = ratings[x]
314 return metadata
316 def basic(full_path):
317 base_path, name = os.path.split(full_path)
318 title, ext = os.path.splitext(name)
319 mtime = os.stat(unicode(full_path, 'utf-8')).st_mtime
320 if (mtime < 0):
321 mtime = 0
322 originalAirDate = datetime.utcfromtimestamp(mtime)
324 metadata = {'title': title,
325 'originalAirDate': originalAirDate.isoformat()}
326 ext = ext.lower()
327 if ext in ['.mp4', '.m4v', '.mov']:
328 metadata.update(from_moov(full_path))
329 elif ext in ['.dvr-ms', '.asf', '.wmv']:
330 metadata.update(from_dvrms(full_path))
331 elif 'plistlib' in sys.modules and base_path.endswith('.eyetv'):
332 metadata.update(from_eyetv(full_path))
333 metadata.update(from_nfo(full_path))
334 metadata.update(from_text(full_path))
336 return metadata
338 def from_container(xmldoc):
339 metadata = {}
341 keys = {'title': 'Title', 'episodeTitle': 'EpisodeTitle',
342 'description': 'Description', 'seriesId': 'SeriesId',
343 'episodeNumber': 'EpisodeNumber', 'tvRating': 'TvRating',
344 'displayMajorNumber': 'SourceChannel', 'callsign': 'SourceStation',
345 'showingBits': 'ShowingBits', 'mpaaRating': 'MpaaRating'}
347 details = xmldoc.getElementsByTagName('Details')[0]
349 for key in keys:
350 data = tag_data(details, keys[key])
351 if data:
352 if key == 'description':
353 data = data.replace(TRIBUNE_CR, '')
354 elif key == 'tvRating':
355 data = int(data)
356 elif key == 'displayMajorNumber':
357 if '-' in data:
358 data, metadata['displayMinorNumber'] = data.split('-')
359 metadata[key] = data
361 return metadata
363 def from_details(xml):
364 metadata = {}
366 xmldoc = minidom.parse(xml)
367 showing = xmldoc.getElementsByTagName('showing')[0]
368 program = showing.getElementsByTagName('program')[0]
370 items = {'description': 'program/description',
371 'title': 'program/title',
372 'episodeTitle': 'program/episodeTitle',
373 'episodeNumber': 'program/episodeNumber',
374 'seriesId': 'program/series/uniqueId',
375 'seriesTitle': 'program/series/seriesTitle',
376 'originalAirDate': 'program/originalAirDate',
377 'isEpisode': 'program/isEpisode',
378 'movieYear': 'program/movieYear',
379 'partCount': 'partCount',
380 'partIndex': 'partIndex',
381 'time': 'time'}
383 for item in items:
384 data = tag_data(showing, items[item])
385 if data:
386 if item == 'description':
387 data = data.replace(TRIBUNE_CR, '')
388 metadata[item] = data
390 vItems = ['vActor', 'vChoreographer', 'vDirector',
391 'vExecProducer', 'vProgramGenre', 'vGuestStar',
392 'vHost', 'vProducer', 'vWriter']
394 for item in vItems:
395 data = _vtag_data(program, item)
396 if data:
397 metadata[item] = data
399 sb = showing.getElementsByTagName('showingBits')
400 if sb:
401 metadata['showingBits'] = sb[0].attributes['value'].value
403 #for tag in ['starRating', 'mpaaRating', 'colorCode']:
404 for tag in ['starRating', 'mpaaRating']:
405 value = _tag_value(program, tag)
406 if value:
407 metadata[tag] = value
409 rating = _tag_value(showing, 'tvRating')
410 if rating:
411 metadata['tvRating'] = rating
413 return metadata
415 def _nfo_vitems(source, metadata):
417 vItems = {'vGenre': 'genre',
418 'vWriter': 'credits',
419 'vDirector': 'director',
420 'vActor': 'actor/name'}
422 for key in vItems:
423 data = _vtag_data_alternate(source, vItems[key])
424 if data:
425 metadata.setdefault(key, [])
426 for dat in data:
427 if not dat in metadata[key]:
428 metadata[key].append(dat)
430 if 'vGenre' in metadata:
431 metadata['vSeriesGenre'] = metadata['vProgramGenre'] = metadata['vGenre']
433 return metadata
436 def _from_tvshow_nfo(tvshow_nfo_path):
437 if tvshow_nfo_path in nfo_cache:
438 return nfo_cache[tvshow_nfo_path]
440 items = {'description': 'plot',
441 'title': 'title',
442 'seriesTitle': 'showtitle',
443 'starRating': 'rating',
444 'tvRating': 'mpaa'}
446 nfo_cache[tvshow_nfo_path] = metadata = {}
448 xmldoc = minidom.parse(file(tvshow_nfo_path, 'rU'))
449 tvshow = xmldoc.getElementsByTagName('tvshow')
450 if len(tvshow) > 0:
451 tvshow = tvshow[0]
452 else:
453 return metadata
455 for item in items:
456 data = tag_data(tvshow, items[item])
457 if data:
458 metadata[item] = data
460 metadata = _nfo_vitems(tvshow, metadata)
462 nfo_cache[tvshow_nfo_path] = metadata
463 return metadata
465 def _from_episode_nfo(nfo_path, xmldoc=None):
466 metadata = {}
468 items = {'description': 'plot',
469 'episodeTitle': 'title',
470 'seriesTitle': 'showtitle',
471 'originalAirDate': 'aired',
472 'starRating': 'rating',
473 'tvRating': 'mpaa'}
475 # find tvshow.nfo
476 path = nfo_path
477 while True:
478 basepath = os.path.dirname(path)
479 if path == basepath:
480 break
481 path = basepath
482 tv_nfo = os.path.join(path, 'tvshow.nfo')
483 if os.path.exists(tv_nfo):
484 metadata.update(_from_tvshow_nfo(tv_nfo))
485 break
487 if not xmldoc:
488 xmldoc = minidom.parse(file(nfo_path, 'rU'))
490 episode = xmldoc.getElementsByTagName('episodedetails')
491 if len(episode) > 0:
492 episode = episode[0]
493 else:
494 return metadata
496 metadata['isEpisode'] = 'true'
497 for item in items:
498 data = tag_data(episode, items[item])
499 if data:
500 metadata[item] = data
502 season = tag_data(episode, 'displayseason')
503 if not season or season == "-1":
504 season = tag_data(episode, 'season')
505 if not season:
506 season = 1
508 ep_num = tag_data(episode, 'displayepisode')
509 if not ep_num or ep_num == "-1":
510 ep_num = tag_data(episode, 'episode')
511 if ep_num and ep_num != "-1":
512 metadata['episodeNumber'] = "%d%02d" % (int(season), int(ep_num))
514 if 'originalAirDate' in metadata:
515 metadata['originalAirDate'] += 'T00:00:00Z'
517 metadata = _nfo_vitems(episode, metadata)
519 return metadata
522 def _from_movie_nfo(nfo_path, xmldoc=None):
523 metadata = {}
525 if not xmldoc:
526 xmldoc = minidom.parse(file(nfo_path, 'rU'))
527 movie = xmldoc.getElementsByTagName('movie')
528 if len(movie) > 0:
529 movie = movie[0]
530 else:
531 return metadata
533 items = {'description': 'plot',
534 'title': 'title',
535 'movieYear': 'year',
536 'starRating': 'rating',
537 'mpaaRating': 'mpaa'}
539 metadata['isEpisode'] = 'false'
541 for item in items:
542 data = tag_data(movie, items[item])
543 if data:
544 metadata[item] = data
546 metadata['movieYear'] = "%04d" % int(metadata.get('movieYear', 0))
548 metadata = _nfo_vitems(movie, metadata)
549 return metadata
551 def from_nfo(full_path):
552 if full_path in nfo_cache:
553 return nfo_cache[full_path]
555 metadata = nfo_cache[full_path] = {}
557 nfo_path = "%s.nfo" % os.path.splitext(full_path)[0]
558 if not os.path.exists(nfo_path):
559 return metadata
561 xmldoc = minidom.parse(file(nfo_path, 'rU'))
563 if len(xmldoc.getElementsByTagName('episodedetails')) > 0:
564 # it's an episode
565 metadata.update(_from_episode_nfo(nfo_path, xmldoc))
566 elif len(xmldoc.getElementsByTagName('movie')) > 0:
567 # it's a movie
568 metadata.update(_from_movie_nfo(nfo_path, xmldoc))
570 # common nfo cleanup
571 if 'starRating' in metadata:
572 metadata['starRating'] = str(convert_rating_scale(metadata['starRating'], 10))
574 for key, mapping in [('mpaaRating', MPAA_RATINGS),
575 ('tvRating', TV_RATINGS)]:
576 if key in metadata:
577 rating = mapping.get(metadata[key], None)
578 if rating:
579 metadata[key] = str(rating)
580 else:
581 del metadata[key]
583 nfo_cache[full_path] = metadata
584 return metadata
586 def from_tivo(full_path):
587 if full_path in tivo_cache:
588 return tivo_cache[full_path]
590 tdcat_path = config.get_bin('tdcat')
591 tivo_mak = config.get_server('tivo_mak')
592 try:
593 assert(tdcat_path and tivo_mak)
594 fname = unicode(full_path, 'utf-8')
595 if mswindows:
596 fname = fname.encode('iso8859-1')
597 tcmd = [tdcat_path, '-m', tivo_mak, '-2', fname]
598 tdcat = subprocess.Popen(tcmd, stdout=subprocess.PIPE)
599 metadata = from_details(tdcat.stdout)
600 tivo_cache[full_path] = metadata
601 except:
602 metadata = {}
604 return metadata
606 def force_utf8(text):
607 if type(text) == str:
608 try:
609 text = text.decode('utf8')
610 except:
611 if sys.platform == 'darwin':
612 text = text.decode('macroman')
613 else:
614 text = text.decode('iso8859-1')
615 return text.encode('utf-8')
617 def dump(output, metadata):
618 for key in metadata:
619 value = metadata[key]
620 if type(value) == list:
621 for item in value:
622 output.write('%s: %s\n' % (key, item.encode('utf-8')))
623 else:
624 if key in HUMAN and value in HUMAN[key]:
625 output.write('%s: %s\n' % (key, HUMAN[key][value]))
626 else:
627 output.write('%s: %s\n' % (key, value.encode('utf-8')))
629 if __name__ == '__main__':
630 if len(sys.argv) > 1:
631 metadata = {}
632 fname = force_utf8(sys.argv[1])
633 ext = os.path.splitext(fname)[1].lower()
634 if ext == '.tivo':
635 config.init([])
636 metadata.update(from_tivo(fname))
637 elif ext in ['.mp4', '.m4v', '.mov']:
638 metadata.update(from_moov(fname))
639 elif ext in ['.dvr-ms', '.asf', '.wmv']:
640 metadata.update(from_dvrms(fname))
641 elif ext == '.wtv':
642 vInfo = plugins.video.transcode.video_info(fname)
643 metadata.update(from_mscore(vInfo['rawmeta']))
644 dump(sys.stdout, metadata)