From 6a04a1ed3c4199543707d9373e5283662905b64e Mon Sep 17 00:00:00 2001 From: Thomas Leonard Date: Sat, 8 Mar 2003 14:47:41 +0000 Subject: [PATCH] Switched to expat parser for major speed boost. git-svn-id: https://rox.svn.sourceforge.net/svnroot/rox/trunk/MIME-Editor@2536 66de3db3-b00d-0410-b41b-f4738ad19bea --- type.py | 312 ++++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 174 insertions(+), 138 deletions(-) diff --git a/type.py b/type.py index e3f8eab..0b9c14a 100644 --- a/type.py +++ b/type.py @@ -1,17 +1,8 @@ import os import rox from rox import g -from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE -try: - # The 4Suite parser is much faster, if present - assert 0 - from Ft.Xml.cDomlette import nonvalParse - from Ft.Xml.InputSource import InputSourceFactory - def parse(file): - isrc = InputSourceFactory() - return nonvalParse(isrc.fromUri(file)) -except: - parse = minidom.parse +from xml.parsers import expat +from xml.dom import XML_NAMESPACE types = {} @@ -20,20 +11,6 @@ user_override = os.path.join(home_mime, 'packages', 'Override.xml') FREE_NS='http://www.freedesktop.org/standards/shared-mime-info' -class Field: - def __init__(self, node): - self.node = node - - def get(self): - return "<%s>" % self.node.localName - -field_handlers = {} - -def data(node): - """Return all the text directly inside this DOM Node.""" - return ''.join([text.nodeValue for text in node.childNodes - if text.nodeType == Node.TEXT_NODE]) - def get_type(name): if name not in types: types[name] = MIME_Type(name) @@ -43,45 +20,55 @@ class MIME_Type: def __init__(self, name): assert name not in types self.media, self.subtype = name.split('/') - self.sources = [] + + self.comments = [] + self.globs = [] + self.magic = [] + self.xml = [] + self.others = [] - def add_from_node(self, file, node): - self.sources.append(Source(file, node)) + def add_from_node(self, file): + self.sources.append(Source(file)) + + def add_comment(self, lang, comment): + self.comments.append((lang, comment)) + def add_xml(self, uri, name): + self.xml.append((uri, name)) + + def add_magic(self, prio, root): + self.magic.append((prio, root)) + + def add_other(self, element): + self.others.append(element) + + def add_glob(self, pattern): + self.globs.append(pattern) + def get_comment(self): - for src in self.sources: - comment = src.get_comment() - if comment: + best = None + for lang, comment in self.comments: + if not lang: return comment - return self.get_name() + best = comment + return best or self.get_name() def get_name(self): return self.media + '/' + self.subtype - - def add_user(self): - if self.sources and self.sources[-1].file == user_override: - return - self.sources.append(Source(user_override, None)) - - def get_comments(self): return self.merge('get_comments') - def get_globs(self): return self.merge('get_globs') - def get_magic(self): return self.merge('get_magic') - def get_xml(self): return self.merge('get_xml') - def get_others(self): return self.merge('get_others') - - def merge(self, fn): - out = [] - for s in self.sources: - out += getattr(s, fn)() - return out + def get_comments(self): return map(Comment, self.comments) + def get_globs(self): return map(Glob, self.globs) + def get_magic(self): return map(Magic, self.magic) + def get_xml(self): return map(XML, self.xml) + def get_others(self): return map(Others, self.others) + class Field: - def __init__(self, node, src): - self.node = node - self.src = src + "MIME_Type.get_* functions return a list of these." + def __init__(self, item): + self.item = item def __str__(self): - return "<%s>" % self.node.localName + return "<%s>" % self.item def add_subtree(self, model, iter): return @@ -91,127 +78,162 @@ class Field: class Comment(Field): def __str__(self): - lang = self.node.getAttributeNS(XML_NAMESPACE, 'lang') + lang, data = self.item if lang: lang = '[' + lang + '] ' else: lang = '(default) ' - return lang + data(self.node) + return lang + data class Glob(Field): def __str__(self): - return "Match '%s'" % self.node.getAttributeNS(None, 'pattern') + return "Match '%s'" % self.item class Magic(Field): def __str__(self): - prio = self.node.getAttributeNS(None, 'priority') or '50' + prio, match = self.item return "Match with priority %s" % prio def add_subtree(self, model, parent): - def build(node, parent): - for c in node.childNodes: - if c.nodeType != Node.ELEMENT_NODE: continue - text = '%s at %s = %s' % (c.getAttributeNS(None, 'type'), - c.getAttributeNS(None, 'offset'), - c.getAttributeNS(None, 'value')) - mask = c.getAttributeNS(None, 'mask') - if mask: - text += ' masked with ' + mask + def build(match, parent): + for m in match.matches: + text = '%s at %s = %s' % (m.type, m.offset, m.value) + if m.mask: + text += ' masked with ' + m.mask iter = model.append(parent) model.set(iter, 0, text) - build(c, iter) - build(self.node, parent) + build(m, iter) + build(self.item[1], parent) def __cmp__(self, b): ret = cmp(str(self), str(b)) if ret: return ret - return cmp(self.node, b.node) + return cmp(self.item[1], b.item[1]) class XML(Field): def __str__(self): - ns = self.node.getAttributeNS(None, 'namespaceURI') - name = self.node.getAttributeNS(None, 'localName') - return "<%s> with namespace '%s'" % (name, ns) + return "<%s> with namespace '%s'" % (self.item[1], self.item[0]) class Others(Field): pass -class Source: - def __init__(self, file, node): - self.file = file - if not node: - newdoc = minidom.Document() - node = newdoc.createElementNS(FREE_NS, 'mime-type') - - self.comments = [] - self.globs = [] - self.magic = [] - self.xml = [] - self.others = [] +class FieldParser: + def __init__(self, type, attrs): + self.type = type - for c in node.childNodes: - if c.nodeType != Node.ELEMENT_NODE: continue - if c.namespaceURI == FREE_NS: - if c.localName == 'comment': - self.comments.append(c) - elif c.localName == 'glob': - self.globs.append(c) - elif c.localName == 'magic': - self.magic.append(c) - elif c.localName == 'root-XML': - self.xml.append(c) - else: - self.others.append(c) - else: - self.others.append(c) + def start(self, element, attrs): pass + def data(self, data): pass + def end(self): pass - def get_comment(self): - for field in self.comments: - return data(field) +class CommentParser(FieldParser): + def __init__(self, type, attrs): + FieldParser.__init__(self, type, attrs) + self.lang = attrs.get(XML_NAMESPACE + ' lang', None) + self.comment = '' - def make(self, const, list): - return [const(node, self) for node in list] + def data(self, data): + self.comment += data - def get_comments(self): return self.make(Comment, self.comments) - def get_globs(self): return self.make(Glob, self.globs) - def get_magic(self): return self.make(Magic, self.magic) - def get_xml(self): return self.make(XML, self.xml) - def get_others(self): return self.make(Others, self.others) - -def init(): - for mime_dir in ['/usr/share/mime', '/usr/local/share/mime', home_mime]: - packages_dir = os.path.join(mime_dir, 'packages') - if not os.path.isdir(packages_dir): - continue - for package in os.listdir(packages_dir): - if package == 'Override.xml' and mime_dir is home_mime: continue - scan_file(os.path.join(packages_dir, package)) - scan_file(user_override) + def end(self): + self.type.add_comment(self.lang, self.comment) + +class Match: + def __init__(self, parent): + self.parent = parent + self.matches = [] + + def __cmp__(self, b): + def child_cmp(): + for x, y in zip(self.matches, b.matches): + c = cmp(x, y) + if c: return c + return 0 + + return cmp(self.type, b.type) or cmp(self.offset, b.offset) or \ + cmp(self.value, b.value) or cmp(self.mask, b.mask) or \ + child_cmp() - for t in types.values(): - t.add_user() +class MagicParser(FieldParser): + def __init__(self, type, attrs): + FieldParser.__init__(self, type, attrs) + self.prio = attrs.get('priority', 50) + self.match = Match(None) + + def start(self, element, attrs): + new = Match(self.match) + new.offset = attrs.get('offset', '?') + new.type = attrs.get('type', '?') + new.value = attrs.get('value', '?') + new.mask = attrs.get('mask', None) + self.match.matches.append(new) + + def end(self): + if self.match.parent: + self.match = self.match.parent + else: + self.type.add_magic(self.prio, self.match) + +class Scanner: + def __init__(self): + self.level = 0 + self.type = None + self.handler = None + + def parse(self, path): + parser = expat.ParserCreate(namespace_separator = ' ') + parser.StartElementHandler = self.start + parser.EndElementHandler = self.end + parser.CharacterDataHandler = self.data + parser.ParseFile(file(path)) + + def start(self, element, attrs): + self.level += 1 + if self.level == 1: + assert element == FREE_NS + ' mime-info' + elif self.level == 2: + assert element == FREE_NS + ' mime-type' + self.type = get_type(attrs['type']) + elif self.level == 3: + if element == FREE_NS + ' comment': + self.handler = CommentParser(self.type, attrs) + elif element == FREE_NS + ' glob': + self.type.add_glob(attrs['pattern']) + elif element == FREE_NS + ' magic': + self.handler = MagicParser(self.type, attrs) + elif element == FREE_NS + ' root-XML': + self.type.add_xml(attrs['namespaceURI'], attrs['localName']) + else: + self.type.add_other(element) + else: + assert self.handler + self.handler.start(element, attrs) + + def end(self, element): + if self.handler: + self.handler.end() + self.level -=1 + if self.level == 1: + self.type = None + elif self.level == 2: + self.handler = None + + def data(self, data): + if self.handler: + self.handler.data(data) -def scan_file(file): - if not file.endswith('.xml'): return - if not os.path.exists(file): +def scan_file(path): + if not path.endswith('.xml'): return + if not os.path.exists(path): return + scanner = Scanner() try: - doc = parse(file) - root = doc.documentElement - for node in root.childNodes: - if node.nodeType != Node.ELEMENT_NODE: - continue - type_name = node.getAttributeNS(None, 'type') - if type_name: - type = get_type(type_name) - type.add_from_node(file, node) - else: - print "Skipping", node + scanner.parse(path) except: rox.report_exception() def get_override(): + from xml.dom import minidom if os.path.exists(user_override): - doc = parse(user_override) + doc = minidom.parse(user_override) else: doc = minidom.Document() node = doc.createElementNS(FREE_NS, 'mime-info') @@ -242,8 +264,6 @@ def write_override(doc): os.waitpid(child, 0) import __main__ - global types - types = {} __main__.box.update() def add_type(name): @@ -258,6 +278,7 @@ def add_type(name): write_override(doc) def delete_type(name): + from xml.dom import Node doc = get_override() removed = False for c in doc.documentElement.childNodes: @@ -274,3 +295,18 @@ def delete_type(name): g.STOCK_DELETE): return write_override(doc) + +def init(): + "(Re)read the database." + global types + types = {} + for mime_dir in ['/usr/share/mime', '/usr/local/share/mime', home_mime]: + packages_dir = os.path.join(mime_dir, 'packages') + if not os.path.isdir(packages_dir): + continue + packages = os.listdir(packages_dir) + packages.sort() + for package in packages: + if package == 'Override.xml' and mime_dir is home_mime: continue + scan_file(os.path.join(packages_dir, package)) + scan_file(user_override) -- 2.11.4.GIT