Preliminary support for metadata from DVR-MS/WMV/ASF, via mutagen. Much
[pyTivo/TheBayer.git] / metadata.py
blob971a0ca732c5152bc75323465ef69a02a4cea5eb
1 #!/usr/bin/env python
3 import os
4 import subprocess
5 import sys
6 from datetime import datetime
7 from xml.dom import minidom
8 try:
9 import plistlib
10 except:
11 pass
13 import mutagen
14 from lrucache import LRUCache
16 import config
18 # Something to strip
19 TRIBUNE_CR = ' Copyright Tribune Media Services, Inc.'
21 tivo_cache = LRUCache(50)
22 mp4_cache = LRUCache(50)
23 dvrms_cache = LRUCache(50)
25 def tag_data(element, tag):
26 for name in tag.split('/'):
27 new_element = element.getElementsByTagName(name)
28 if not new_element:
29 return ''
30 element = new_element[0]
31 if not element.firstChild:
32 return ''
33 return element.firstChild.data
35 def _vtag_data(element, tag):
36 for name in tag.split('/'):
37 new_element = element.getElementsByTagName(name)
38 if not new_element:
39 return []
40 element = new_element[0]
41 elements = element.getElementsByTagName('element')
42 return [x.firstChild.data for x in elements if x.firstChild]
44 def _tag_value(element, tag):
45 item = element.getElementsByTagName(tag)
46 if item:
47 value = item[0].attributes['value'].value
48 name = item[0].firstChild.data
49 return name[0] + value[0]
51 def from_moov(full_path):
52 if full_path in mp4_cache:
53 return mp4_cache[full_path]
55 metadata = {}
56 len_desc = 0
58 try:
59 mp4meta = mutagen.File(full_path)
60 assert(mp4meta)
61 except:
62 mp4_cache[full_path] = {}
63 return {}
65 # The following 1-to-1 correspondence of atoms to pyTivo
66 # variables is TV-biased
67 keys = {'tvnn': 'callsign', 'tven': 'episodeNumber',
68 'tvsh': 'seriesTitle'}
70 # Possible TV values: TV-Y7 TV-Y TV-G TV-PG TV-14 TV-MA Unrated
71 # Possible MPAA values: G PG PG-13 R NC-17 Unrated
72 ratings = {'TV-Y7': 'x1', 'TV-Y': 'x2', 'TV-G': 'x3',
73 'TV-PG': 'x4', 'TV-14': 'x5', 'TV-MA': 'x6',
74 'Unrated': 'x7', 'G': 'G1', 'PG': 'P2',
75 'PG-13': 'P3', 'R': 'R4', 'NC-17': 'N6'}
77 for key, value in mp4meta.items():
78 if type(value) == list:
79 value = value[0]
80 if key == 'stik':
81 metadata['isEpisode'] = ['false', 'true'][value == 'TV Show']
82 elif key in keys:
83 metadata[keys[key]] = value
84 # These keys begin with the copyright symbol \xA9
85 elif key == '\xa9day':
86 if len(value) == 4:
87 value += '-01-01T16:00:00Z'
88 metadata['originalAirDate'] = value
89 #metadata['time'] = value
90 elif key in ['\xa9gen', 'gnre']:
91 for k in ('vProgramGenre', 'vSeriesGenre'):
92 if k in metadata:
93 metadata[k].append(value)
94 else:
95 metadata[k] = [value]
96 elif key == '\xa9nam':
97 if 'tvsh' in mp4meta:
98 metadata['episodeTitle'] = value
99 else:
100 metadata['title'] = value
102 # Description in desc, cmt, and/or ldes tags. Keep the longest.
103 elif key in ['desc', '\xa9cmt', 'ldes'] and len(value) > len_desc:
104 metadata['description'] = value
105 len_desc = len(value)
107 # A common custom "reverse DNS format" tag
108 elif (key == '----:com.apple.iTunes:iTunEXTC' and
109 ('us-tv' in value or 'mpaa' in value)):
110 rating = value.split("|")[1]
111 if rating in ratings:
112 if 'us-tv' in value:
113 metadata['tvRating'] = ratings[rating]
114 elif 'mpaa' in value:
115 metadata['mpaaRating'] = ratings[rating]
117 # Actors, directors, producers, AND screenwriters may be in a long
118 # embedded XML plist, with key '----' and rDNS 'iTunMOVI'. Ughh!
120 mp4_cache[full_path] = metadata
121 return metadata
123 def from_dvrms(full_path):
124 if full_path in dvrms_cache:
125 return dvrms_cache[full_path]
127 metadata = {}
129 try:
130 meta = mutagen.File(full_path)
131 assert(meta)
132 except:
133 dvrms_cache[full_path] = {}
134 return {}
136 keys = {'title': ['Title'],
137 'description': ['Description', 'WM/SubTitleDescription'],
138 'episodeTitle': ['WM/SubTitle'],
139 'callsign': ['WM/MediaStationCallSign'],
140 'displayMajorNumber': ['WM/MediaOriginalChannel'],
141 'genre': ['WM/Genre']}
143 for tagname in keys:
144 for tag in keys[tagname]:
145 try:
146 if tag in meta:
147 value = str(meta[tag][0])
148 if value:
149 metadata[tagname] = value
150 except:
151 pass
153 if 'episodeTitle' in metadata and 'title' in metadata:
154 metadata['seriesTitle'] = metadata['title']
155 if 'genre' in metadata:
156 value = metadata['genre'].split(',')
157 metadata['vProgramGenre'] = value
158 metadata['vSeriesGenre'] = value
159 del metadata['genre']
161 dvrms_cache[full_path] = metadata
162 return metadata
164 def from_eyetv(full_path):
165 ratings = {'TVY7': 'x1', 'TVY': 'x2', 'TVG': 'x3',
166 'TVPG': 'x4', 'TV14': 'x5', 'TVMA': 'x6',
167 'G': 'G1', 'PG': 'P2', 'PG-13': 'P3',
168 'R': 'R4', 'NC-17': 'N6'}
169 keys = {'TITLE': 'title', 'SUBTITLE': 'episodeTitle',
170 'DESCRIPTION': 'description', 'YEAR': 'movieYear',
171 'EPISODENUM': 'episodeNumber'}
172 metadata = {}
173 path, name = os.path.split(full_path)
174 eyetvp = [x for x in os.listdir(path) if x.endswith('.eyetvp')][0]
175 eyetvp = os.path.join(path, eyetvp)
176 eyetv = plistlib.readPlist(eyetvp)
177 if 'epg info' in eyetv:
178 info = eyetv['epg info']
179 for key in keys:
180 if info[key]:
181 metadata[keys[key]] = info[key]
182 if info['SUBTITLE']:
183 metadata['seriesTitle'] = info['TITLE']
184 if info['ACTORS']:
185 metadata['vActor'] = [x.strip() for x in info['ACTORS'].split(',')]
186 if info['DIRECTOR']:
187 metadata['vDirector'] = [info['DIRECTOR']]
188 if info['TV_RATING']:
189 metadata['tvRating'] = ratings[info['TV_RATING']]
190 if info['STAR_RATING']:
191 metadata['starRating'] = 'x%d' % (len(info['STAR_RATING']) * 2 - 1)
192 mpaa = info['MPAA_RATING']
193 if mpaa and mpaa != 'NR':
194 metadata['mpaaRating'] = ratings[mpaa]
195 # movieYear must be set for the mpaa/star ratings to work
196 if (('mpaaRating' in metadata or 'starRating' in metadata) and
197 'movieYear' not in metadata):
198 metadata['movieYear'] = eyetv['info']['start'].year
199 return metadata
201 def from_text(full_path):
202 metadata = {}
203 path, name = os.path.split(full_path)
204 title, ext = os.path.splitext(name)
205 for metafile in [os.path.join(path, title) + '.properties',
206 os.path.join(path, 'default.txt'), full_path + '.txt',
207 os.path.join(path, '.meta', 'default.txt'),
208 os.path.join(path, '.meta', name) + '.txt']:
209 if os.path.exists(metafile):
210 sep = ':='[metafile.endswith('.properties')]
211 for line in file(metafile, 'U'):
212 if line.strip().startswith('#') or not sep in line:
213 continue
214 key, value = [x.strip() for x in line.split(sep, 1)]
215 if not key or not value:
216 continue
217 if key.startswith('v'):
218 if key in metadata:
219 metadata[key].append(value)
220 else:
221 metadata[key] = [value]
222 else:
223 metadata[key] = value
224 return metadata
226 def basic(full_path):
227 base_path, name = os.path.split(full_path)
228 title, ext = os.path.splitext(name)
229 mtime = os.stat(full_path).st_mtime
230 if (mtime < 0):
231 mtime = 0
232 originalAirDate = datetime.fromtimestamp(mtime)
234 metadata = {'title': title,
235 'originalAirDate': originalAirDate.isoformat()}
236 ext = ext.lower()
237 if ext in ['.mp4', '.m4v', '.mov']:
238 metadata.update(from_moov(full_path))
239 elif ext in ['.dvr-ms', '.asf', '.wmv']:
240 metadata.update(from_dvrms(full_path))
241 elif 'plistlib' in sys.modules and base_path.endswith('.eyetv'):
242 metadata.update(from_eyetv(full_path))
243 metadata.update(from_text(full_path))
245 return metadata
247 def from_container(xmldoc):
248 metadata = {}
250 keys = {'title': 'Title', 'episodeTitle': 'EpisodeTitle',
251 'description': 'Description', 'seriesId': 'SeriesId',
252 'episodeNumber': 'EpisodeNumber', 'tvRating': 'TvRating',
253 'displayMajorNumber': 'SourceChannel', 'callsign': 'SourceStation'}
255 details = xmldoc.getElementsByTagName('Details')[0]
257 for key in keys:
258 data = tag_data(details, keys[key])
259 if data:
260 if key == 'description':
261 data = data.replace(TRIBUNE_CR, '')
262 elif key == 'tvRating':
263 data = 'x' + data
264 elif key == 'displayMajorNumber':
265 if '-' in data:
266 data, metadata['displayMinorNumber'] = data.split('-')
267 metadata[key] = data
269 return metadata
271 def from_details(xmldoc):
272 metadata = {}
274 showing = xmldoc.getElementsByTagName('showing')[0]
275 program = showing.getElementsByTagName('program')[0]
277 items = {'description': 'program/description',
278 'title': 'program/title',
279 'episodeTitle': 'program/episodeTitle',
280 'episodeNumber': 'program/episodeNumber',
281 'seriesId': 'program/series/uniqueId',
282 'seriesTitle': 'program/series/seriesTitle',
283 'originalAirDate': 'program/originalAirDate',
284 'isEpisode': 'program/isEpisode',
285 'movieYear': 'program/movieYear',
286 'partCount': 'partCount',
287 'partIndex': 'partIndex',
288 'time': 'time'}
290 for item in items:
291 data = tag_data(showing, items[item])
292 if data:
293 if item == 'description':
294 data = data.replace(TRIBUNE_CR, '')
295 metadata[item] = data
297 vItems = ['vActor', 'vChoreographer', 'vDirector',
298 'vExecProducer', 'vProgramGenre', 'vGuestStar',
299 'vHost', 'vProducer', 'vWriter']
301 for item in vItems:
302 data = _vtag_data(program, item)
303 if data:
304 metadata[item] = data
306 sb = showing.getElementsByTagName('showingBits')
307 if sb:
308 metadata['showingBits'] = sb[0].attributes['value'].value
310 for tag in ['starRating', 'mpaaRating', 'colorCode']:
311 value = _tag_value(program, tag)
312 if value:
313 metadata[tag] = value
315 rating = _tag_value(showing, 'tvRating')
316 if rating:
317 metadata['tvRating'] = 'x' + rating[1]
319 return metadata
321 def from_tivo(full_path):
322 if full_path in tivo_cache:
323 return tivo_cache[full_path]
325 tdcat_path = config.get_bin('tdcat')
326 tivo_mak = config.get_server('tivo_mak')
327 if tdcat_path and tivo_mak:
328 tcmd = [tdcat_path, '-m', tivo_mak, '-2', full_path]
329 tdcat = subprocess.Popen(tcmd, stdout=subprocess.PIPE)
330 xmldoc = minidom.parse(tdcat.stdout)
331 metadata = from_details(xmldoc)
332 tivo_cache[full_path] = metadata
333 else:
334 metadata = {}
336 return metadata
338 if __name__ == '__main__':
339 if len(sys.argv) > 1:
340 metadata = {}
341 ext = os.path.splitext(sys.argv[1])[1].lower()
342 if ext == '.tivo':
343 config.init([])
344 metadata.update(from_tivo(sys.argv[1]))
345 elif ext in ['.mp4', '.m4v', '.mov']:
346 metadata.update(from_moov(sys.argv[1]))
347 elif ext in ['.dvr-ms', '.asf', '.wmv']:
348 metadata.update(from_dvrms(sys.argv[1]))
349 for key in metadata:
350 value = metadata[key]
351 if type(value) == list:
352 for item in value:
353 print '%s: %s' % (key, item.encode('utf8'))
354 else:
355 print '%s: %s' % (key, value.encode('utf8'))