mimetype.guess_type() returns a tuple, and it might be (None, None).
[pyTivo/TheBayer.git] / metadata.py
blob4fdd77cba55067b4fc438ca82c96f17a10be6052
1 #!/usr/bin/env python
3 import os
4 import subprocess
5 import sys
6 from datetime import datetime
7 from xml.dom import minidom
8 try:
9 import plistlib
10 except:
11 pass
13 import mutagen
14 from lrucache import LRUCache
16 import config
18 # Something to strip
19 TRIBUNE_CR = ' Copyright Tribune Media Services, Inc.'
21 TV_RATINGS = {'TV-Y7': 'x1', 'TV-Y': 'x2', 'TV-G': 'x3', 'TV-PG': 'x4',
22 'TV-14': 'x5', 'TV-MA': 'x6', 'TV-NR': 'x7',
23 'TVY7': 'x1', 'TVY': 'x2', 'TVG': 'x3', 'TVPG': 'x4',
24 'TV14': 'x5', 'TVMA': 'x6', 'TVNR': 'x7',
25 'Y7': 'x1', 'Y': 'x2', 'G': 'x3', 'PG': 'x4',
26 '14': 'x5', 'MA': 'x6', 'NR': 'x7', 'UNRATED': 'x7'}
28 MPAA_RATINGS = {'G': 'G1', 'PG': 'P2', 'PG-13': 'P3', 'PG13': 'P3',
29 'R': 'R4', 'X': 'X5', 'NC-17': 'N6', 'NC17': 'N6',
30 'NR': 'N8', 'UNRATED': 'N8'}
32 STAR_RATINGS = {'1': 'x1', '1.5': 'x2', '2': 'x3', '2.5': 'x4',
33 '3': 'x5', '3.5': 'x6', '4': 'x7',
34 '*': 'x1', '**': 'x3', '***': 'x5', '****': 'x7'}
36 HUMAN = {'mpaaRating': {'G1': 'G', 'P2': 'PG', 'P3': 'PG-13', 'R4': 'R',
37 'X5': 'X', 'N6': 'NC-17', 'N8': 'Unrated'},
38 'tvRating': {'x1': 'TV-Y7', 'x2': 'TV-Y', 'x3': 'TV-G',
39 'x4': 'TV-PG', 'x5': 'TV-14', 'x6': 'TV-MA',
40 'x7': 'Unrated'},
41 'starRating': {'x1': '1', 'x2': '1.5', 'x3': '2', 'x4': '2.5',
42 'x5': '3', 'x6': '3.5', 'x7': '4'}}
44 tivo_cache = LRUCache(50)
45 mp4_cache = LRUCache(50)
46 dvrms_cache = LRUCache(50)
48 def tag_data(element, tag):
49 for name in tag.split('/'):
50 new_element = element.getElementsByTagName(name)
51 if not new_element:
52 return ''
53 element = new_element[0]
54 if not element.firstChild:
55 return ''
56 return element.firstChild.data
58 def _vtag_data(element, tag):
59 for name in tag.split('/'):
60 new_element = element.getElementsByTagName(name)
61 if not new_element:
62 return []
63 element = new_element[0]
64 elements = element.getElementsByTagName('element')
65 return [x.firstChild.data for x in elements if x.firstChild]
67 def _tag_value(element, tag):
68 item = element.getElementsByTagName(tag)
69 if item:
70 value = item[0].attributes['value'].value
71 name = item[0].firstChild.data
72 return name[0] + value[0]
74 def from_moov(full_path):
75 if full_path in mp4_cache:
76 return mp4_cache[full_path]
78 metadata = {}
79 len_desc = 0
81 try:
82 mp4meta = mutagen.File(full_path)
83 assert(mp4meta)
84 except:
85 mp4_cache[full_path] = {}
86 return {}
88 # The following 1-to-1 correspondence of atoms to pyTivo
89 # variables is TV-biased
90 keys = {'tvnn': 'callsign', 'tven': 'episodeNumber',
91 'tvsh': 'seriesTitle'}
93 for key, value in mp4meta.items():
94 if type(value) == list:
95 value = value[0]
96 if key == 'stik':
97 metadata['isEpisode'] = ['false', 'true'][value == 'TV Show']
98 elif key in keys:
99 metadata[keys[key]] = value
100 # These keys begin with the copyright symbol \xA9
101 elif key == '\xa9day':
102 if len(value) == 4:
103 value += '-01-01T16:00:00Z'
104 metadata['originalAirDate'] = value
105 #metadata['time'] = value
106 elif key in ['\xa9gen', 'gnre']:
107 for k in ('vProgramGenre', 'vSeriesGenre'):
108 if k in metadata:
109 metadata[k].append(value)
110 else:
111 metadata[k] = [value]
112 elif key == '\xa9nam':
113 if 'tvsh' in mp4meta:
114 metadata['episodeTitle'] = value
115 else:
116 metadata['title'] = value
118 # Description in desc, cmt, and/or ldes tags. Keep the longest.
119 elif key in ['desc', '\xa9cmt', 'ldes'] and len(value) > len_desc:
120 metadata['description'] = value
121 len_desc = len(value)
123 # A common custom "reverse DNS format" tag
124 elif (key == '----:com.apple.iTunes:iTunEXTC' and
125 ('us-tv' in value or 'mpaa' in value)):
126 rating = value.split("|")[1].upper()
127 if rating in TV_RATINGS and 'us-tv' in value:
128 metadata['tvRating'] = TV_RATINGS[rating]
129 elif rating in MPAA_RATINGS and 'mpaa' in value:
130 metadata['mpaaRating'] = MPAA_RATINGS[rating]
132 # Actors, directors, producers, AND screenwriters may be in a long
133 # embedded XML plist.
134 elif (key == '----:com.apple.iTunes:iTunMOVI' and
135 'plistlib' in sys.modules):
136 items = {'cast': 'vActor', 'directors': 'vDirector',
137 'producers': 'vProducer', 'screenwriters': 'vWriter'}
138 data = plistlib.readPlistFromString(value)
139 for item in items:
140 if item in data:
141 metadata[items[item]] = [x['name'] for x in data[item]]
143 mp4_cache[full_path] = metadata
144 return metadata
146 def from_dvrms(full_path):
147 if full_path in dvrms_cache:
148 return dvrms_cache[full_path]
150 metadata = {}
152 try:
153 meta = mutagen.File(full_path)
154 assert(meta)
155 except:
156 dvrms_cache[full_path] = {}
157 return {}
159 keys = {'title': ['Title'],
160 'description': ['Description', 'WM/SubTitleDescription'],
161 'episodeTitle': ['WM/SubTitle'],
162 'callsign': ['WM/MediaStationCallSign'],
163 'displayMajorNumber': ['WM/MediaOriginalChannel'],
164 'originalAirDate': ['WM/MediaOriginalBroadcastDateTime'],
165 'rating': ['WM/ParentalRating'],
166 'credits': ['WM/MediaCredits'], 'genre': ['WM/Genre']}
168 for tagname in keys:
169 for tag in keys[tagname]:
170 try:
171 if tag in meta:
172 value = str(meta[tag][0])
173 if value:
174 metadata[tagname] = value
175 except:
176 pass
178 if 'episodeTitle' in metadata and 'title' in metadata:
179 metadata['seriesTitle'] = metadata['title']
180 if 'genre' in metadata:
181 value = metadata['genre'].split(',')
182 metadata['vProgramGenre'] = value
183 metadata['vSeriesGenre'] = value
184 del metadata['genre']
185 if 'credits' in metadata:
186 value = [x.split('/') for x in metadata['credits'].split(';')]
187 if len(value) > 3:
188 metadata['vActor'] = [x for x in (value[0] + value[3]) if x]
189 metadata['vDirector'] = [x for x in value[1] if x]
190 del metadata['credits']
191 if 'rating' in metadata:
192 rating = metadata['rating']
193 if rating in TV_RATINGS:
194 metadata['tvRating'] = TV_RATINGS[rating]
195 del metadata['rating']
197 dvrms_cache[full_path] = metadata
198 return metadata
200 def from_eyetv(full_path):
201 keys = {'TITLE': 'title', 'SUBTITLE': 'episodeTitle',
202 'DESCRIPTION': 'description', 'YEAR': 'movieYear',
203 'EPISODENUM': 'episodeNumber'}
204 metadata = {}
205 path, name = os.path.split(full_path)
206 eyetvp = [x for x in os.listdir(path) if x.endswith('.eyetvp')][0]
207 eyetvp = os.path.join(path, eyetvp)
208 eyetv = plistlib.readPlist(eyetvp)
209 if 'epg info' in eyetv:
210 info = eyetv['epg info']
211 for key in keys:
212 if info[key]:
213 metadata[keys[key]] = info[key]
214 if info['SUBTITLE']:
215 metadata['seriesTitle'] = info['TITLE']
216 if info['ACTORS']:
217 metadata['vActor'] = [x.strip() for x in info['ACTORS'].split(',')]
218 if info['DIRECTOR']:
219 metadata['vDirector'] = [info['DIRECTOR']]
221 for ptag, etag, ratings in [('tvRating', 'TV_RATING', TV_RATINGS),
222 ('mpaaRating', 'MPAA_RATING', MPAA_RATINGS),
223 ('starRating', 'STAR_RATING', STAR_RATINGS)]:
224 x = info[etag].upper()
225 if x and x in ratings:
226 metadata[ptag] = ratings[x]
228 # movieYear must be set for the mpaa/star ratings to work
229 if (('mpaaRating' in metadata or 'starRating' in metadata) and
230 'movieYear' not in metadata):
231 metadata['movieYear'] = eyetv['info']['start'].year
232 return metadata
234 def from_text(full_path):
235 metadata = {}
236 path, name = os.path.split(full_path)
237 title, ext = os.path.splitext(name)
239 for metafile in [os.path.join(path, title) + '.properties',
240 os.path.join(path, 'default.txt'), full_path + '.txt',
241 os.path.join(path, '.meta', 'default.txt'),
242 os.path.join(path, '.meta', name) + '.txt']:
243 if os.path.exists(metafile):
244 sep = ':='[metafile.endswith('.properties')]
245 for line in file(metafile, 'U'):
246 if line.strip().startswith('#') or not sep in line:
247 continue
248 key, value = [x.strip() for x in line.split(sep, 1)]
249 if not key or not value:
250 continue
251 if key.startswith('v'):
252 if key in metadata:
253 metadata[key].append(value)
254 else:
255 metadata[key] = [value]
256 else:
257 metadata[key] = value
259 for rating, ratings in [('tvRating', TV_RATINGS),
260 ('mpaaRating', MPAA_RATINGS),
261 ('starRating', STAR_RATINGS)]:
262 x = metadata.get(rating, '').upper()
263 if x in ratings:
264 metadata[rating] = ratings[x]
266 return metadata
268 def basic(full_path):
269 base_path, name = os.path.split(full_path)
270 title, ext = os.path.splitext(name)
271 mtime = os.stat(full_path).st_mtime
272 if (mtime < 0):
273 mtime = 0
274 originalAirDate = datetime.fromtimestamp(mtime)
276 metadata = {'title': title,
277 'originalAirDate': originalAirDate.isoformat()}
278 ext = ext.lower()
279 if ext in ['.mp4', '.m4v', '.mov']:
280 metadata.update(from_moov(full_path))
281 elif ext in ['.dvr-ms', '.asf', '.wmv']:
282 metadata.update(from_dvrms(full_path))
283 elif 'plistlib' in sys.modules and base_path.endswith('.eyetv'):
284 metadata.update(from_eyetv(full_path))
285 metadata.update(from_text(full_path))
287 return metadata
289 def from_container(xmldoc):
290 metadata = {}
292 keys = {'title': 'Title', 'episodeTitle': 'EpisodeTitle',
293 'description': 'Description', 'seriesId': 'SeriesId',
294 'episodeNumber': 'EpisodeNumber', 'tvRating': 'TvRating',
295 'displayMajorNumber': 'SourceChannel', 'callsign': 'SourceStation'}
297 details = xmldoc.getElementsByTagName('Details')[0]
299 for key in keys:
300 data = tag_data(details, keys[key])
301 if data:
302 if key == 'description':
303 data = data.replace(TRIBUNE_CR, '')
304 elif key == 'tvRating':
305 data = 'x' + data
306 elif key == 'displayMajorNumber':
307 if '-' in data:
308 data, metadata['displayMinorNumber'] = data.split('-')
309 metadata[key] = data
311 return metadata
313 def from_details(xmldoc):
314 metadata = {}
316 showing = xmldoc.getElementsByTagName('showing')[0]
317 program = showing.getElementsByTagName('program')[0]
319 items = {'description': 'program/description',
320 'title': 'program/title',
321 'episodeTitle': 'program/episodeTitle',
322 'episodeNumber': 'program/episodeNumber',
323 'seriesId': 'program/series/uniqueId',
324 'seriesTitle': 'program/series/seriesTitle',
325 'originalAirDate': 'program/originalAirDate',
326 'isEpisode': 'program/isEpisode',
327 'movieYear': 'program/movieYear',
328 'partCount': 'partCount',
329 'partIndex': 'partIndex',
330 'time': 'time'}
332 for item in items:
333 data = tag_data(showing, items[item])
334 if data:
335 if item == 'description':
336 data = data.replace(TRIBUNE_CR, '')
337 metadata[item] = data
339 vItems = ['vActor', 'vChoreographer', 'vDirector',
340 'vExecProducer', 'vProgramGenre', 'vGuestStar',
341 'vHost', 'vProducer', 'vWriter']
343 for item in vItems:
344 data = _vtag_data(program, item)
345 if data:
346 metadata[item] = data
348 sb = showing.getElementsByTagName('showingBits')
349 if sb:
350 metadata['showingBits'] = sb[0].attributes['value'].value
352 for tag in ['starRating', 'mpaaRating', 'colorCode']:
353 value = _tag_value(program, tag)
354 if value:
355 metadata[tag] = value
357 rating = _tag_value(showing, 'tvRating')
358 if rating:
359 metadata['tvRating'] = 'x' + rating[1]
361 return metadata
363 def from_tivo(full_path):
364 if full_path in tivo_cache:
365 return tivo_cache[full_path]
367 tdcat_path = config.get_bin('tdcat')
368 tivo_mak = config.get_server('tivo_mak')
369 if tdcat_path and tivo_mak:
370 tcmd = [tdcat_path, '-m', tivo_mak, '-2', full_path]
371 tdcat = subprocess.Popen(tcmd, stdout=subprocess.PIPE)
372 xmldoc = minidom.parse(tdcat.stdout)
373 metadata = from_details(xmldoc)
374 tivo_cache[full_path] = metadata
375 else:
376 metadata = {}
378 return metadata
380 if __name__ == '__main__':
381 if len(sys.argv) > 1:
382 metadata = {}
383 ext = os.path.splitext(sys.argv[1])[1].lower()
384 if ext == '.tivo':
385 config.init([])
386 metadata.update(from_tivo(sys.argv[1]))
387 elif ext in ['.mp4', '.m4v', '.mov']:
388 metadata.update(from_moov(sys.argv[1]))
389 elif ext in ['.dvr-ms', '.asf', '.wmv']:
390 metadata.update(from_dvrms(sys.argv[1]))
391 for key in metadata:
392 value = metadata[key]
393 if type(value) == list:
394 for item in value:
395 print '%s: %s' % (key, item.encode('utf8'))
396 else:
397 if key in HUMAN and value in HUMAN[key]:
398 print '%s: %s' % (key, HUMAN[key][value])
399 else:
400 print '%s: %s' % (key, value.encode('utf8'))