Some commonality in video_info().
[pyTivo/TheBayer.git] / metadata.py
blob7e7d27d3f9511ba1260a45bfa44cf9ec061e8efe
1 #!/usr/bin/env python
3 import os
4 import subprocess
5 from datetime import datetime
6 from xml.dom import minidom
8 import mutagen
9 from lrucache import LRUCache
11 import config
13 # Something to strip
14 TRIBUNE_CR = ' Copyright Tribune Media Services, Inc.'
16 tivo_cache = LRUCache(50)
17 mp4_cache = LRUCache(50)
19 def tag_data(element, tag):
20 for name in tag.split('/'):
21 new_element = element.getElementsByTagName(name)
22 if not new_element:
23 return ''
24 element = new_element[0]
25 if not element.firstChild:
26 return ''
27 return element.firstChild.data
29 def _vtag_data(element, tag):
30 for name in tag.split('/'):
31 new_element = element.getElementsByTagName(name)
32 if not new_element:
33 return []
34 element = new_element[0]
35 elements = element.getElementsByTagName('element')
36 return [x.firstChild.data for x in elements if x.firstChild]
38 def _tag_value(element, tag):
39 item = element.getElementsByTagName(tag)
40 if item:
41 value = item[0].attributes['value'].value
42 name = item[0].firstChild.data
43 return name[0] + value[0]
45 def from_moov(full_path):
46 if full_path in mp4_cache:
47 return mp4_cache[full_path]
49 metadata = {}
50 len_desc = 0
52 try:
53 mp4meta = mutagen.File(full_path)
54 assert(mp4meta)
55 except:
56 mp4_cache[full_path] = {}
57 return {}
59 for key, value in mp4meta.items():
60 # The following 1-to-1 correspondence of atoms to pyTivo
61 # variables is TV-biased
62 keys = {'tvnn': 'callsign', 'tven': 'episodeNumber',
63 'tvsh': 'seriesTitle'}
64 if type(value) == list:
65 value = value[0]
66 if key == 'stik':
67 metadata['isEpisode'] = ['false', 'true'][value == 'TV Show']
68 elif key in keys:
69 metadata[keys[key]] = value
70 # These keys begin with the copyright symbol \xA9
71 elif key == '\xa9day':
72 if len(value) == 4:
73 value += '-01-01T16:00:00Z'
74 metadata['originalAirDate'] = value
75 #metadata['time'] = value
76 elif key in ['\xa9gen', 'gnre']:
77 for k in ('vProgramGenre', 'vSeriesGenre'):
78 if k in metadata:
79 metadata[k].append(value)
80 else:
81 metadata[k] = [value]
82 elif key == '\xa9nam':
83 if 'tvsh' in mp4meta:
84 metadata['episodeTitle'] = value
85 else:
86 metadata['title'] = value
88 # Description in desc, cmt, and/or ldes tags. Keep the longest.
89 elif key in ['desc', '\xa9cmt', 'ldes'] and len(value) > len_desc:
90 metadata['description'] = value
91 len_desc = len(value)
93 # A common custom "reverse DNS format" tag
94 # Possible TV values: TV-Y7 TV-Y TV-G TV-PG TV-14 TV-MA Unrated
95 # Possible MPAA values: G PG PG-13 R NC-17 Unrated
96 elif (key == '----:com.apple.iTunes:iTunEXTC' and
97 ('us-tv' in value or 'mpaa' in value)):
98 ratings = {'TV-Y7': 'x1', 'TV-Y': 'x2', 'TV-G': 'x3',
99 'TV-PG': 'x4', 'TV-14': 'x5', 'TV-MA': 'x6',
100 'Unrated': 'x7', 'G': 'G1', 'PG': 'P2',
101 'PG-13': 'P3', 'R': 'R4', 'NC-17': 'N6'}
102 rating = value.split("|")[1]
103 if rating in ratings:
104 if 'us-tv' in value:
105 metadata['tvRating'] = ratings[rating]
106 elif 'mpaa' in value:
107 metadata['mpaaRating'] = ratings[rating]
109 # Actors, directors, producers, AND screenwriters may be in a long
110 # embedded XML plist, with key '----' and rDNS 'iTunMOVI'. Ughh!
112 mp4_cache[full_path] = metadata
113 return metadata
115 def from_text(full_path):
116 metadata = {}
117 path, name = os.path.split(full_path)
118 for metafile in [os.path.join(path, 'default.txt'), full_path + '.txt',
119 os.path.join(path, '.meta', name) + '.txt']:
120 if os.path.exists(metafile):
121 for line in file(metafile):
122 if line.strip().startswith('#') or not ':' in line:
123 continue
124 key, value = [x.strip() for x in line.split(':', 1)]
125 if key.startswith('v'):
126 if key in metadata:
127 metadata[key].append(value)
128 else:
129 metadata[key] = [value]
130 else:
131 metadata[key] = value
132 return metadata
134 def basic(full_path):
135 base_path, name = os.path.split(full_path)
136 title, ext = os.path.splitext(name)
137 mtime = os.stat(full_path).st_mtime
138 if (mtime < 0):
139 mtime = 0
140 originalAirDate = datetime.fromtimestamp(mtime)
142 metadata = {'title': title,
143 'originalAirDate': originalAirDate.isoformat()}
144 if ext.lower() in ['.mp4', '.m4v', '.mov']:
145 metadata.update(from_moov(full_path))
146 metadata.update(from_text(full_path))
148 return metadata
150 def from_container(xmldoc):
151 metadata = {}
153 keys = {'title': 'Title', 'episodeTitle': 'EpisodeTitle',
154 'description': 'Description', 'seriesId': 'SeriesId',
155 'episodeNumber': 'EpisodeNumber', 'tvRating': 'TvRating',
156 'displayMajorNumber': 'SourceChannel', 'callsign': 'SourceStation'}
158 details = xmldoc.getElementsByTagName('Details')[0]
160 for key in keys:
161 data = tag_data(details, keys[key])
162 if data:
163 if key == 'description':
164 data = data.replace(TRIBUNE_CR, '')
165 elif key == 'tvRating':
166 data = 'x' + data
167 elif key == 'displayMajorNumber':
168 if '-' in data:
169 data, metadata['displayMinorNumber'] = data.split('-')
170 metadata[key] = data
172 return metadata
174 def from_details(xmldoc):
175 metadata = {}
177 showing = xmldoc.getElementsByTagName('showing')[0]
178 program = showing.getElementsByTagName('program')[0]
180 items = {'description': 'program/description',
181 'title': 'program/title',
182 'episodeTitle': 'program/episodeTitle',
183 'episodeNumber': 'program/episodeNumber',
184 'seriesId': 'program/series/uniqueId',
185 'seriesTitle': 'program/series/seriesTitle',
186 'originalAirDate': 'program/originalAirDate',
187 'isEpisode': 'program/isEpisode',
188 'movieYear': 'program/movieYear',
189 'partCount': 'partCount',
190 'partIndex': 'partIndex'}
192 for item in items:
193 data = tag_data(showing, items[item])
194 if data:
195 if item == 'description':
196 data = data.replace(TRIBUNE_CR, '')
197 metadata[item] = data
199 vItems = ['vActor', 'vChoreographer', 'vDirector',
200 'vExecProducer', 'vProgramGenre', 'vGuestStar',
201 'vHost', 'vProducer', 'vWriter']
203 for item in vItems:
204 data = _vtag_data(program, item)
205 if data:
206 metadata[item] = data
208 sb = showing.getElementsByTagName('showingBits')
209 if sb:
210 metadata['showingBits'] = sb[0].attributes['value'].value
212 for tag in ['starRating', 'mpaaRating', 'colorCode']:
213 value = _tag_value(program, tag)
214 if value:
215 metadata[tag] = value
217 rating = _tag_value(showing, 'tvRating')
218 if rating:
219 metadata['tvRating'] = 'x' + rating[1]
221 return metadata
223 def from_tivo(full_path):
224 if full_path in tivo_cache:
225 return tivo_cache[full_path]
227 tdcat_path = config.get_bin('tdcat')
228 tivo_mak = config.get_server('tivo_mak')
229 if tdcat_path and tivo_mak:
230 tcmd = [tdcat_path, '-m', tivo_mak, '-2', full_path]
231 tdcat = subprocess.Popen(tcmd, stdout=subprocess.PIPE)
232 xmldoc = minidom.parse(tdcat.stdout)
233 metadata = from_details(xmldoc)
234 tivo_cache[full_path] = metadata
235 else:
236 metadata = {}
238 return metadata
240 if __name__ == '__main__':
241 import sys
242 if len(sys.argv) > 1:
243 metadata = {}
244 ext = os.path.splitext(sys.argv[1])[1].lower()
245 if ext == '.tivo':
246 config.init([])
247 metadata.update(from_tivo(sys.argv[1]))
248 elif ext in ['.mp4', '.m4v', '.mov']:
249 metadata.update(from_moov(sys.argv[1]))
250 for key in metadata:
251 value = metadata[key]
252 if type(value) == list:
253 for item in value:
254 print '%s: %s' % (key, item.encode('utf8'))
255 else:
256 print '%s: %s' % (key, value.encode('utf8'))