straw/opml.py

   1 """ OPML.py
   2
   3 """
   4
   5 __copyright__ = "Copyright (c) 2002-2005 Free Software Foundation, Inc."
   6 __author__  = "Juri Pakaste <juri@iki.fi>"
   7 __license__ = """
   8 Straw is free software; you can redistribute it and/or modify it under the
   9 terms of the GNU General Public License as published by the Free Software
  10 Foundation; either version 2 of the License, or (at your option) any later
  11 version.
  12
  13 Straw is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  15 A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License along with
  18 this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  19 Place - Suite 330, Boston, MA 02111-1307, USA. """
  20
  21 from Fetcher import FetchTask
  22 from JobManager import Job, TaskThread, JobHandler
  23 from model import Category, Feed
  24 from xml.sax import saxutils, make_parser, SAXParseException
  25 from xml.sax.handler import feature_namespaces, feature_namespace_prefixes
  26 from xml.sax.saxutils import XMLGenerator
  27 from xml.sax.xmlreader import AttributesImpl
  28 import Fetcher
  29 import gnomevfs
  30 import gobject
  31 import straw.JobManager as JobManager
  32 import sys
  33 import time
  34 import xml.sax._exceptions
  35 import xml.sax.handler
  36
  37 class OpmlImportJobHandler(JobHandler):
  38     job_id = "opml-import"
  39
  40     __gsignals__ = {
  41         "opml-imported" : (gobject.SIGNAL_RUN_LAST, gobject.TYPE_NONE, (gobject.TYPE_PYOBJECT,))
  42     }
  43
  44     def __init__(self, id, job):
  45         JobHandler.__init__(self, id, job)
  46
  47     def _on_url_fetched(self, handler, task_result):
  48         self.task_queue.put((task_result.task.user_data, task_result.result))
  49
  50     def _run(self):
  51         fetch_task = Fetcher.create_task(url = self.job.url, user_data = None)
  52         fetch_result = fetch_task.fetch()
  53
  54         if not fetch_result.error:
  55             opml = read(fetch_result.content)
  56             tree = self._build_tree(opml.roots(), parent = self.job.category)
  57             self._notify("opml-imported", tree)
  58
  59     def _build_tree(self, outlines, parent = None):
  60         save_list = []
  61         i = 0
  62
  63         for outline in outlines:
  64             if not outline.has_key("type"):
  65                 # Some feeds exported from Liferea don't have "type" attribute.
  66                 outline["type"] = "rss"
  67
  68             if outline["type"] == "folder" or len(outline.children) > 0:
  69                 category = Category()
  70                 category.norder = i
  71                 category.name = outline["text"]
  72                 category.parent = parent
  73
  74                 save_list.append(category)
  75
  76                 if not outline.children:
  77                     continue
  78
  79                 save_list.extend(self._build_tree(outline.children, category))
  80             else:
  81                 feed = Feed()
  82                 feed.norder = i
  83
  84                 if outline.has_key("title"):
  85                     feed.title = outline["title"]
  86                 elif outline.has_key("text"):
  87                     feed.title = outline["text"]
  88                 else:
  89                     feed.title = "[unknown title]"
  90
  91                 feed.parent = parent
  92                 feed.location = ""
  93
  94                 if outline.has_key("xmlUrl"):
  95                     feed.location = outline["xmlUrl"]
  96                 elif outline.has_key("url"):
  97                     feed.location = outline["url"]
  98
  99                 if outline.has_key("htmlUrl"):
 100                     feed.link = outline["htmlUrl"]
 101                 elif outline.has_key("url"):
 102                     feed.link = outline["url"]
 103                 else:
 104                     feed.link = ""
 105
 106                 save_list.append(feed)
 107
 108             i += 1
 109
 110         return save_list
 111
 112 class OpmlImportJob(Job):
 113     def __init__(self, url, category, observers):
 114         Job.__init__(self, "opml-import")
 115
 116         self.observers = observers
 117         self.url = url
 118         self.category = category
 119
 120 JobManager.register_handler(OpmlImportJobHandler)
 121
 122 def import_opml(url, category, observers):
 123     job = OpmlImportJob(url, category, observers)
 124     JobManager.start(job)
 125
 126 class OPML(dict):
 127     def __init__(self):
 128         self.outlines = []
 129
 130     def output(self, stream = sys.stdout):
 131         xg = XMLGenerator(stream, encoding='utf-8')
 132         def elemWithContent(name, content):
 133             xg.startElement(name, AttributesImpl({}))
 134             if content is not None:
 135                 xg.characters(content)
 136             xg.endElement(name)
 137             xg.characters("\n")
 138         xg.startElement("opml", AttributesImpl({'version': '1.1'}))
 139         xg.startElement("head", AttributesImpl({}))
 140         for key in ('title', 'dateCreated', 'dateModified', 'ownerName',
 141                     'ownerEmail', 'expansionState', 'vertScrollState',
 142                     'windowTop', 'windowBotton', 'windowRight', 'windowLeft'):
 143             if self.has_key(key) and self[key] != "":
 144                 elemWithContent(key, self[key])
 145         xg.endElement("head")
 146         xg.startElement("body", AttributesImpl({}))
 147         for o in self.outlines:
 148             o.output(xg)
 149         xg.endElement("body")
 150         xg.endElement("opml")
 151
 152 class Outline(dict):
 153     __slots__ = ('_children')
 154
 155     def __init__(self):
 156         self._children = []
 157
 158     def add_child(self, outline):
 159         self._children.append(outline)
 160
 161     def get_children_iter(self):
 162         return self.OIterator(self)
 163
 164     children = property(get_children_iter, None, None, "")
 165
 166     def output(self, xg):
 167         xg.startElement("outline", AttributesImpl(self))
 168         for c in self.children:
 169             c.output(xg)
 170         xg.endElement("outline")
 171         xg.characters("\n")
 172
 173     class OIterator:
 174         def __init__(self, o):
 175             self._o = o
 176             self._index = -1
 177
 178         def __iter__(self):
 179             return self
 180
 181         def __len__(self):
 182             return len(self._o._children)
 183
 184         def next(self):
 185             self._index += 1
 186             if self._index < len(self._o._children):
 187                 return self._o._children[self._index]
 188             else:
 189                 raise StopIteration
 190
 191 class OutlineList(object):
 192     def __init__(self):
 193         self._roots = []
 194         self._stack = []
 195
 196     def add_outline(self, outline):
 197         if len(self._stack):
 198             self._stack[-1].add_child(outline)
 199         else:
 200             self._roots.append(outline)
 201         self._stack.append(outline)
 202
 203     def close_outline(self):
 204         if len(self._stack):
 205             del self._stack[-1]
 206
 207     def roots(self):
 208         return self._roots
 209
 210 class OPMLHandler(xml.sax.handler.ContentHandler):
 211     def __init__(self):
 212         self._outlines = OutlineList()
 213         self._opml = None
 214         self._content = ""
 215
 216     def startElement(self, name, attrs):
 217         if self._opml is None:
 218             if name != 'opml':
 219                 raise ValueError, "This doesn't look like OPML"
 220             self._opml = OPML()
 221         if name == 'outline':
 222             o = Outline()
 223             o.update(attrs)
 224             self._outlines.add_outline(o)
 225         self._content = ""
 226
 227     def endElement(self, name):
 228         if name == 'outline':
 229             self._outlines.close_outline()
 230             return
 231         if name == 'opml':
 232             self._opml.outlines = self._outlines.roots()
 233             return
 234         for key in ('title', 'dateCreated', 'dateModified', 'ownerName',
 235                     'ownerEmail', 'expansionState', 'vertScrollState',
 236                     'windowTop', 'windowBotton', 'windowRight', 'windowLeft'):
 237             if name == key:
 238                 self._opml[key] = self._content
 239                 return
 240
 241     def characters(self, ch):
 242         self._content += ch
 243
 244     def get_opml(self):
 245         return self._opml
 246
 247     def get_outlines(self):
 248         return self._outlines
 249
 250 def parse(stream):
 251     """parser = make_parser()
 252     parser.setFeature(feature_namespaces, 0)
 253     handler = OPMLHandler()
 254     parser.setContentHandler(handler)"""
 255     handler = OPMLHandler()
 256     xml.sax.parseString(stream, handler)
 257     print handler.get_outlines()
 258     return handler.get_outlines()
 259
 260 def export(root, filename):
 261     opml = OPML()
 262     opml['title'] = "Exported from Straw"
 263
 264     def _export(node, opml):
 265         o = Outline()
 266
 267         if node.type == "F":
 268             o['text'] = node.title.encode('utf-8')
 269             o['description'] = node.title.encode('utf-8')
 270             o['htmlUrl'] = node.link
 271             o['language'] = 'unknown'
 272             o['title'] = node.title.encode('utf-8')
 273             o['type'] = 'rss'
 274             o['version'] = 'RSS'
 275             o['xmlUrl'] = node.location
 276         elif node.type == "C":
 277             o['text'] = node.name.encode('utf-8')
 278             o['description'] = node.name.encode('utf-8')
 279             o['type'] = 'folder'
 280
 281         for child_node in node.children:
 282             o.add_child(_export(child_node, opml))
 283
 284         return o
 285
 286     opml.outlines.append(_export(root, opml))
 287
 288     f = gnomevfs.create(filename, gnomevfs.OPEN_WRITE, 0)
 289     f.write('<!DOCTYPE opml PUBLIC "-//Userland//DTD OPML XML V1.0//EN" ' + \
 290         '"http://static.userland.com/gems/radiodiscuss/opmlDtd.txt">')
 291     f.write('<?xml version="1.0"?>\n')
 292     opml.output(f)
 293     f.close()
 294
 295 class BlogListEntry(object):
 296     __slots__ = ('text', 'url')
 297
 298 def _find_entries(outline):
 299     entries = []
 300     for c in outline.children:
 301         entries += _find_entries(c)
 302     type = outline.get('type', '')
 303     text = outline.get('text', '')
 304     e = None
 305     if type == 'link':
 306         url = outline.get('url', '')
 307         if url != '':
 308             e = BlogListEntry()
 309             e.text = text
 310             e.url = url
 311     else:
 312         xmlurl = outline.get('xmlUrl', '')
 313         e = BlogListEntry()
 314         e.text = text
 315         if text == '':
 316             title = outline.get('title', '')
 317             if title == '':
 318                 e = None
 319             e.text = title
 320         if e != None:
 321             if xmlurl != '':
 322                 # there's something in xmlurl. There's a good chance that's
 323                 # our feed's URL
 324                 e.url = xmlurl
 325             else:
 326                 htmlurl = outline.get('htmlUrl', '')
 327                 if htmlurl != '':
 328                     # there's something in htmlurl, and xmlurl is empty. This
 329                     # might be our feed's URL.
 330                     e.url = htmlurl
 331                 else:
 332                     # nothing else to try.
 333                     e = None
 334     if e is not None:
 335         entries[0:0] = [e]
 336     return entries
 337
 338 def find_entries(outlines):
 339     entries = []
 340     for o in outlines:
 341         entries += _find_entries(o)
 342     return entries
 343
 344 def read(stream):
 345     o = parse(stream)
 346     return o
 347     entries = find_entries(o.outlines)
 348     ret = list()
 349     edict = dict()
 350     # avoid duplicates.
 351     for e in entries:
 352         ek = (e.text, e.url)
 353         edict[ek] = edict.get(ek, 0) + 1
 354         if edict[ek] < 2:
 355             ret.append(e)
 356     return ret