Moved into a sub-dir, so that a svn checkout has the same structure as
[rox-lib/lack.git] / ROX-Lib2 / python / rox / mime.py
blob0a84cb4a26a6a2bd906fd1ef4897a3fed23e709d
1 """This module provides access to the shared MIME database.
3 types is a dictionary of all known MIME types, indexed by the type name, e.g.
4 types['application/x-python']
6 Applications can install information about MIME types by storing an
7 XML file as <MIME>/packages/<application>.xml and running the
8 update-mime-database command, which is provided by the freedesktop.org
9 shared mime database package.
11 See http://www.freedesktop.org/standards/shared-mime-info-spec/ for
12 information about the format of these files."""
14 import os
15 import stat
16 import fnmatch
18 import rox
19 import rox.choices
20 from rox import i18n, _, basedir, xattr
22 from xml.dom import minidom, XML_NAMESPACE
24 FREE_NS = 'http://www.freedesktop.org/standards/shared-mime-info'
26 types = {} # Maps MIME names to type objects
28 # Icon sizes when requesting MIME type icon
29 ICON_SIZE_HUGE=96
30 ICON_SIZE_LARGE=52
31 ICON_SIZE_SMALL=18
32 ICON_SIZE_UNSCALED=None
34 exts = None # Maps extensions to types
35 globs = None # List of (glob, type) pairs
36 literals = None # Maps liternal names to types
37 magic = None
39 def _get_node_data(node):
40 """Get text of XML node"""
41 return ''.join([n.nodeValue for n in node.childNodes]).strip()
43 def lookup(media, subtype = None):
44 "Get the MIMEtype object for this type, creating a new one if needed."
45 if subtype is None and '/' in media:
46 media, subtype = media.split('/', 1)
47 if (media, subtype) not in types:
48 types[(media, subtype)] = MIMEtype(media, subtype)
49 return types[(media, subtype)]
51 class MIMEtype:
52 """Type holding data about a MIME type"""
53 def __init__(self, media, subtype):
54 "Don't use this constructor directly; use mime.lookup() instead."
55 assert media and '/' not in media
56 assert subtype and '/' not in subtype
57 assert (media, subtype) not in types
59 self.media = media
60 self.subtype = subtype
61 self._comment = None
63 def _load(self):
64 "Loads comment for current language. Use get_comment() instead."
65 resource = os.path.join('mime', self.media, self.subtype + '.xml')
66 for path in basedir.load_data_paths(resource):
67 doc = minidom.parse(path)
68 if doc is None:
69 continue
70 for comment in doc.documentElement.getElementsByTagNameNS(FREE_NS, 'comment'):
71 lang = comment.getAttributeNS(XML_NAMESPACE, 'lang') or 'en'
72 goodness = 1 + (lang in i18n.langs)
73 if goodness > self._comment[0]:
74 self._comment = (goodness, _get_node_data(comment))
75 if goodness == 2: return
77 def get_comment(self):
78 """Returns comment for current language, loading it if needed."""
79 # Should we ever reload?
80 if self._comment is None:
81 self._comment = (0, str(self))
82 self._load()
83 return self._comment[1]
85 def __str__(self):
86 return self.media + '/' + self.subtype
88 def __repr__(self):
89 return '[%s: %s]' % (self, self._comment or '(comment not loaded)')
91 def get_icon(self, size=None):
92 """Return a GdkPixbuf with the icon for this type. If size
93 is None then the image is returned at its natural size,
94 otherwise the image is scaled to that width with the height
95 at the correct aspect ratio. The constants
96 ICON_SIZE_{HUGE,LARGE,SMALL} match the sizes used by the
97 filer."""
98 # I suppose it would make more sense to move the code
99 # from saving to here...
100 import saving
101 base=saving.image_for_type(self.media + '/' + self.subtype)
102 if not base or not size:
103 return base
105 h=int(base.get_width()*float(size)/base.get_height())
106 return base.scale_simple(size, h, rox.g.gdk.INTERP_BILINEAR)
108 class MagicRule:
109 def __init__(self, f):
110 self.next=None
111 self.prev=None
113 #print line
114 ind=''
115 while True:
116 c=f.read(1)
117 if c=='>':
118 break
119 ind+=c
120 if not ind:
121 self.nest=0
122 else:
123 self.nest=int(ind)
125 start=''
126 while True:
127 c=f.read(1)
128 if c=='=':
129 break
130 start+=c
131 self.start=int(start)
133 hb=f.read(1)
134 lb=f.read(1)
135 self.lenvalue=ord(lb)+(ord(hb)<<8)
137 self.value=f.read(self.lenvalue)
139 c=f.read(1)
140 if c=='&':
141 self.mask=f.read(self.lenvalue)
142 c=f.read(1)
143 else:
144 self.mask=None
146 if c=='~':
147 w=''
148 while c!='+' and c!='\n':
149 c=f.read(1)
150 if c=='+' or c=='\n':
151 break
152 w+=c
154 self.word=int(w)
155 else:
156 self.word=1
158 if c=='+':
159 r=''
160 while c!='\n':
161 c=f.read(1)
162 if c=='\n':
163 break
164 r+=c
165 #print r
166 self.range=int(r)
167 else:
168 self.range=1
170 if c!='\n':
171 raise 'Malformed MIME magic line'
173 def getLength(self):
174 return self.start+self.lenvalue+self.range
176 def appendRule(self, rule):
177 if self.nest<rule.nest:
178 self.next=rule
179 rule.prev=self
181 elif self.prev:
182 self.prev.appendRule(rule)
184 def match(self, buffer):
185 if self.match0(buffer):
186 if self.next:
187 return self.next.match(buffer)
188 return True
190 def match0(self, buffer):
191 l=len(buffer)
192 for o in range(self.range):
193 s=self.start+o
194 e=s+self.lenvalue
195 if l<e:
196 return False
197 if self.mask:
198 test=''
199 for i in range(self.lenvalue):
200 c=ord(buffer[s+i]) & ord(self.mask[i])
201 test+=chr(c)
202 else:
203 test=buffer[s:e]
205 if test==self.value:
206 return True
208 def __repr__(self):
209 return '<MagicRule %d>%d=[%d]%s&%s~%d+%d>' % (self.nest,
210 self.start,
211 self.lenvalue,
212 `self.value`,
213 `self.mask`,
214 self.word,
215 self.range)
217 class MagicType:
218 def __init__(self, mtype):
219 self.mtype=mtype
220 self.top_rules=[]
221 self.last_rule=None
223 def getLine(self, f):
224 nrule=MagicRule(f)
226 if nrule.nest and self.last_rule:
227 self.last_rule.appendRule(nrule)
228 else:
229 self.top_rules.append(nrule)
231 self.last_rule=nrule
233 return nrule
235 def match(self, buffer):
236 for rule in self.top_rules:
237 if rule.match(buffer):
238 return self.mtype
240 def __repr__(self):
241 return '<MagicType %s>' % self.mtype
243 class MagicDB:
244 def __init__(self):
245 self.types={} # Indexed by priority, each entry is a list of type rules
246 self.maxlen=0
248 def mergeFile(self, fname):
249 f=file(fname, 'r')
250 line=f.readline()
251 if line!='MIME-Magic\0\n':
252 raise 'Not a MIME magic file'
254 while True:
255 shead=f.readline()
256 #print shead
257 if not shead:
258 break
259 if shead[0]!='[' or shead[-2:]!=']\n':
260 raise 'Malformed section heading'
261 pri, tname=shead[1:-2].split(':')
262 #print shead[1:-2]
263 pri=int(pri)
264 mtype=lookup(tname)
266 try:
267 ents=self.types[pri]
268 except:
269 ents=[]
270 self.types[pri]=ents
272 magictype=MagicType(mtype)
273 #print tname
275 #rline=f.readline()
276 c=f.read(1)
277 f.seek(-1, 1)
278 while c and c!='[':
279 rule=magictype.getLine(f)
280 #print rule
281 if rule and rule.getLength()>self.maxlen:
282 self.maxlen=rule.getLength()
284 c=f.read(1)
285 f.seek(-1, 1)
287 ents.append(magictype)
288 #self.types[pri]=ents
289 if not c:
290 break
292 def match(self, path, max_pri=100, min_pri=0):
293 try:
294 buf=file(path, 'r').read(self.maxlen)
295 pris=self.types.keys()
296 pris.sort(lambda a, b: -cmp(a, b))
297 for pri in pris:
298 #print pri, max_pri, min_pri
299 if pri>max_pri:
300 continue
301 if pri<min_pri:
302 break
303 for type in self.types[pri]:
304 m=type.match(buf)
305 if m:
306 return m
307 except:
308 pass
310 return None
312 def __repr__(self):
313 return '<MagicDB %s>' % self.types
316 # Some well-known types
317 text = lookup('text', 'plain')
318 inode_block = lookup('inode', 'blockdevice')
319 inode_char = lookup('inode', 'chardevice')
320 inode_dir = lookup('inode', 'directory')
321 inode_fifo = lookup('inode', 'fifo')
322 inode_socket = lookup('inode', 'socket')
323 inode_symlink = lookup('inode', 'symlink')
324 inode_door = lookup('inode', 'door')
325 app_exe = lookup('application', 'executable')
327 _cache_uptodate = False
329 def _cache_database():
330 global exts, globs, literals, magic, _cache_uptodate
332 _cache_uptodate = True
334 exts = {} # Maps extensions to types
335 globs = [] # List of (glob, type) pairs
336 literals = {} # Maps liternal names to types
337 magic = MagicDB()
339 def _import_glob_file(path):
340 """Loads name matching information from a MIME directory."""
341 for line in file(path):
342 if line.startswith('#'): continue
343 line = line[:-1]
345 type_name, pattern = line.split(':', 1)
346 mtype = lookup(type_name)
348 if pattern.startswith('*.'):
349 rest = pattern[2:]
350 if not ('*' in rest or '[' in rest or '?' in rest):
351 exts[rest] = mtype
352 continue
353 if '*' in pattern or '[' in pattern or '?' in pattern:
354 globs.append((pattern, mtype))
355 else:
356 literals[pattern] = mtype
358 for path in basedir.load_data_paths(os.path.join('mime', 'globs')):
359 _import_glob_file(path)
360 for path in basedir.load_data_paths(os.path.join('mime', 'magic')):
361 magic.mergeFile(path)
363 # Sort globs by length
364 globs.sort(lambda a, b: cmp(len(b[0]), len(a[0])))
366 def get_type_by_name(path):
367 """Returns type of file by its name, or None if not known"""
368 if not _cache_uptodate:
369 _cache_database()
371 leaf = os.path.basename(path)
372 if leaf in literals:
373 return literals[leaf]
375 lleaf = leaf.lower()
376 if lleaf in literals:
377 return literals[lleaf]
379 ext = leaf
380 while 1:
381 p = ext.find('.')
382 if p < 0: break
383 ext = ext[p + 1:]
384 if ext in exts:
385 return exts[ext]
386 ext = lleaf
387 while 1:
388 p = ext.find('.')
389 if p < 0: break
390 ext = ext[p+1:]
391 if ext in exts:
392 return exts[ext]
393 for (glob, mime_type) in globs:
394 if fnmatch.fnmatch(leaf, glob):
395 return mime_type
396 if fnmatch.fnmatch(lleaf, glob):
397 return mime_type
398 return None
400 def get_type_by_contents(path, max_pri=100, min_pri=0):
401 """Returns type of file by its contents, or None if not known"""
402 if not _cache_uptodate:
403 _cache_database()
405 return magic.match(path, max_pri, min_pri)
407 def get_type(path, follow=1, name_pri=100):
408 """Returns type of file indicated by path.
409 path - pathname to check (need not exist)
410 follow - when reading file, follow symbolic links
411 name_pri - Priority to do name matches. 100=override magic"""
412 if not _cache_uptodate:
413 _cache_database()
415 try:
416 if follow:
417 st = os.stat(path)
418 else:
419 st = os.lstat(path)
420 except:
421 t = get_type_by_name(path)
422 return t or text
424 try:
425 if xattr.present(path):
426 name = xattr.get(path, xattr.USER_MIME_TYPE)
427 if name and '/' in name:
428 media, subtype=name.split('/')
429 return lookup(media, subtype)
430 except:
431 pass
433 if stat.S_ISREG(st.st_mode):
434 t = get_type_by_contents(path, min_pri=name_pri)
435 if not t: t = get_type_by_name(path)
436 if not t: t = get_type_by_contents(path, max_pri=name_pri)
437 if t is None:
438 if stat.S_IMODE(st.st_mode) & 0111:
439 return app_exe
440 else:
441 return text
442 return t
443 elif stat.S_ISDIR(st.st_mode): return inode_dir
444 elif stat.S_ISCHR(st.st_mode): return inode_char
445 elif stat.S_ISBLK(st.st_mode): return inode_block
446 elif stat.S_ISFIFO(st.st_mode): return inode_fifo
447 elif stat.S_ISLNK(st.st_mode): return inode_symlink
448 elif stat.S_ISSOCK(st.st_mode): return inode_socket
449 return inode_door
451 def install_mime_info(application, package_file = None):
452 """Copy 'package_file' as ~/.local/share/mime/packages/<application>.xml.
453 If package_file is None, install <app_dir>/<application>.xml.
454 If already installed, does nothing. May overwrite an existing
455 file with the same name (if the contents are different)"""
456 application += '.xml'
457 if not package_file:
458 package_file = os.path.join(rox.app_dir, application)
460 new_data = file(package_file).read()
462 # See if the file is already installed
464 package_dir = os.path.join('mime', 'packages')
465 resource = os.path.join(package_dir, application)
466 for x in basedir.load_data_paths(resource):
467 try:
468 old_data = file(x).read()
469 except:
470 continue
471 if old_data == new_data:
472 return # Already installed
474 global _cache_uptodate
475 _cache_uptodate = False
477 # Not already installed; add a new copy
478 try:
479 # Create the directory structure...
480 new_file = os.path.join(basedir.save_data_path(package_dir), application)
482 # Write the file...
483 file(new_file, 'w').write(new_data)
485 # Update the database...
486 if os.path.isdir('/uri/0install/zero-install.sourceforge.net'):
487 command = '/uri/0install/zero-install.sourceforge.net/bin/update-mime-database'
488 else:
489 command = 'update-mime-database'
490 if os.spawnlp(os.P_WAIT, command, command, basedir.save_data_path('mime')):
491 os.unlink(new_file)
492 raise Exception(_("The '%s' command returned an error code!\n" \
493 "Make sure you have the freedesktop.org shared MIME package:\n" \
494 "http://www.freedesktop.org/standards/shared-mime-info.html") % command)
495 except:
496 rox.report_exception()
498 def get_type_handler(mime_type, handler_type = 'MIME-types'):
499 """Lookup the ROX-defined run action for a given mime type.
500 mime_type is an object returned by lookup().
501 handler_type is a config directory leaf (e.g.'MIME-types')."""
502 handler = basedir.load_first_config('rox.sourceforge.net', handler_type,
503 mime_type.media + '_' + mime_type.subtype)
504 if not handler:
505 # Fall back to the base handler if no subtype handler exists
506 handler = basedir.load_first_config('rox.sourceforge.net', handler_type,
507 mime_type.media)
508 return handler
510 def _test(name):
511 """Print results for name. Test routine"""
512 t=get_type(name, name_pri=80)
513 print name, t, t.get_comment()
515 if __name__=='__main__':
516 import sys
517 if len(sys.argv)<2:
518 _test('file.txt')
519 else:
520 for f in sys.argv[1:]:
521 _test(f)
522 #print globs