Let's play it safe when restoring state in select_node.
[straw.git] / straw / opml.py
bloba5d80816c87fc0fffd92008f7dc28ee7a7d5751c
1 """ OPML.py
3 """
5 __copyright__ = "Copyright (c) 2002-2005 Free Software Foundation, Inc."
6 __author__ = "Juri Pakaste <juri@iki.fi>"
7 __license__ = """
8 Straw is free software; you can redistribute it and/or modify it under the
9 terms of the GNU General Public License as published by the Free Software
10 Foundation; either version 2 of the License, or (at your option) any later
11 version.
13 Straw is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15 A PARTICULAR PURPOSE. See the GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License along with
18 this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 Place - Suite 330, Boston, MA 02111-1307, USA. """
21 from Fetcher import FetchTask
22 from JobManager import Job, TaskThread, JobHandler
23 from model import Category, Feed
24 from xml.sax import saxutils, make_parser, SAXParseException
25 from xml.sax.handler import feature_namespaces, feature_namespace_prefixes
26 from xml.sax.saxutils import XMLGenerator
27 from xml.sax.xmlreader import AttributesImpl
28 import Fetcher
29 import gnomevfs
30 import gobject
31 import straw.JobManager as JobManager
32 import sys
33 import time
34 import xml.sax._exceptions
35 import xml.sax.handler
37 class OpmlImportJobHandler(JobHandler):
38 job_id = "opml-import"
40 __gsignals__ = {
41 "opml-imported" : (gobject.SIGNAL_RUN_LAST, gobject.TYPE_NONE, (gobject.TYPE_PYOBJECT,))
44 def __init__(self, id, job):
45 JobHandler.__init__(self, id, job)
47 def _on_url_fetched(self, handler, task_result):
48 self.task_queue.put((task_result.task.user_data, task_result.result))
50 def _run(self):
51 fetch_task = Fetcher.create_task(url = self.job.url, user_data = None)
52 fetch_result = fetch_task.fetch()
54 if not fetch_result.error:
55 opml = read(fetch_result.content)
56 tree = self._build_tree(opml.roots(), parent = self.job.category)
57 self._notify("opml-imported", tree)
59 def _build_tree(self, outlines, parent = None):
60 save_list = []
61 i = 0
63 for outline in outlines:
64 if not outline.has_key("type"):
65 # Some feeds exported from Liferea don't have "type" attribute.
66 outline["type"] = "rss"
68 if outline["type"] == "folder" or len(outline.children) > 0:
69 category = Category()
70 category.norder = i
71 category.name = outline["text"]
72 category.parent = parent
74 save_list.append(category)
76 if not outline.children:
77 continue
79 save_list.extend(self._build_tree(outline.children, category))
80 else:
81 feed = Feed()
82 feed.norder = i
84 if outline.has_key("title"):
85 feed.title = outline["title"]
86 elif outline.has_key("text"):
87 feed.title = outline["text"]
88 else:
89 feed.title = "[unknown title]"
91 feed.parent = parent
92 feed.location = ""
94 if outline.has_key("xmlUrl"):
95 feed.location = outline["xmlUrl"]
96 elif outline.has_key("url"):
97 feed.location = outline["url"]
99 if outline.has_key("htmlUrl"):
100 feed.link = outline["htmlUrl"]
101 elif outline.has_key("url"):
102 feed.link = outline["url"]
103 else:
104 feed.link = ""
106 save_list.append(feed)
108 i += 1
110 return save_list
112 class OpmlImportJob(Job):
113 def __init__(self, url, category, observers):
114 Job.__init__(self, "opml-import")
116 self.observers = observers
117 self.url = url
118 self.category = category
120 JobManager.register_handler(OpmlImportJobHandler)
122 def import_opml(url, category, observers):
123 job = OpmlImportJob(url, category, observers)
124 JobManager.start(job)
126 class OPML(dict):
127 def __init__(self):
128 self.outlines = []
130 def output(self, stream = sys.stdout):
131 xg = XMLGenerator(stream, encoding='utf-8')
132 def elemWithContent(name, content):
133 xg.startElement(name, AttributesImpl({}))
134 if content is not None:
135 xg.characters(content)
136 xg.endElement(name)
137 xg.characters("\n")
138 xg.startElement("opml", AttributesImpl({'version': '1.1'}))
139 xg.startElement("head", AttributesImpl({}))
140 for key in ('title', 'dateCreated', 'dateModified', 'ownerName',
141 'ownerEmail', 'expansionState', 'vertScrollState',
142 'windowTop', 'windowBotton', 'windowRight', 'windowLeft'):
143 if self.has_key(key) and self[key] != "":
144 elemWithContent(key, self[key])
145 xg.endElement("head")
146 xg.startElement("body", AttributesImpl({}))
147 for o in self.outlines:
148 o.output(xg)
149 xg.endElement("body")
150 xg.endElement("opml")
152 class Outline(dict):
153 __slots__ = ('_children')
155 def __init__(self):
156 self._children = []
158 def add_child(self, outline):
159 self._children.append(outline)
161 def get_children_iter(self):
162 return self.OIterator(self)
164 children = property(get_children_iter, None, None, "")
166 def output(self, xg):
167 xg.startElement("outline", AttributesImpl(self))
168 for c in self.children:
169 c.output(xg)
170 xg.endElement("outline")
171 xg.characters("\n")
173 class OIterator:
174 def __init__(self, o):
175 self._o = o
176 self._index = -1
178 def __iter__(self):
179 return self
181 def __len__(self):
182 return len(self._o._children)
184 def next(self):
185 self._index += 1
186 if self._index < len(self._o._children):
187 return self._o._children[self._index]
188 else:
189 raise StopIteration
191 class OutlineList(object):
192 def __init__(self):
193 self._roots = []
194 self._stack = []
196 def add_outline(self, outline):
197 if len(self._stack):
198 self._stack[-1].add_child(outline)
199 else:
200 self._roots.append(outline)
201 self._stack.append(outline)
203 def close_outline(self):
204 if len(self._stack):
205 del self._stack[-1]
207 def roots(self):
208 return self._roots
210 class OPMLHandler(xml.sax.handler.ContentHandler):
211 def __init__(self):
212 self._outlines = OutlineList()
213 self._opml = None
214 self._content = ""
216 def startElement(self, name, attrs):
217 if self._opml is None:
218 if name != 'opml':
219 raise ValueError, "This doesn't look like OPML"
220 self._opml = OPML()
221 if name == 'outline':
222 o = Outline()
223 o.update(attrs)
224 self._outlines.add_outline(o)
225 self._content = ""
227 def endElement(self, name):
228 if name == 'outline':
229 self._outlines.close_outline()
230 return
231 if name == 'opml':
232 self._opml.outlines = self._outlines.roots()
233 return
234 for key in ('title', 'dateCreated', 'dateModified', 'ownerName',
235 'ownerEmail', 'expansionState', 'vertScrollState',
236 'windowTop', 'windowBotton', 'windowRight', 'windowLeft'):
237 if name == key:
238 self._opml[key] = self._content
239 return
241 def characters(self, ch):
242 self._content += ch
244 def get_opml(self):
245 return self._opml
247 def get_outlines(self):
248 return self._outlines
250 def parse(stream):
251 """parser = make_parser()
252 parser.setFeature(feature_namespaces, 0)
253 handler = OPMLHandler()
254 parser.setContentHandler(handler)"""
255 handler = OPMLHandler()
256 xml.sax.parseString(stream, handler)
257 print handler.get_outlines()
258 return handler.get_outlines()
260 def export(root, filename):
261 opml = OPML()
262 opml['title'] = "Exported from Straw"
264 def _export(node, opml):
265 o = Outline()
267 if node.type == "F":
268 o['text'] = node.title.encode('utf-8')
269 o['description'] = node.title.encode('utf-8')
270 o['htmlUrl'] = node.link
271 o['language'] = 'unknown'
272 o['title'] = node.title.encode('utf-8')
273 o['type'] = 'rss'
274 o['version'] = 'RSS'
275 o['xmlUrl'] = node.location
276 elif node.type == "C":
277 o['text'] = node.name.encode('utf-8')
278 o['description'] = node.name.encode('utf-8')
279 o['type'] = 'folder'
281 for child_node in node.children:
282 o.add_child(_export(child_node, opml))
284 return o
286 opml.outlines.append(_export(root, opml))
288 f = gnomevfs.create(filename, gnomevfs.OPEN_WRITE, 0)
289 f.write('<!DOCTYPE opml PUBLIC "-//Userland//DTD OPML XML V1.0//EN" ' + \
290 '"http://static.userland.com/gems/radiodiscuss/opmlDtd.txt">')
291 f.write('<?xml version="1.0"?>\n')
292 opml.output(f)
293 f.close()
295 class BlogListEntry(object):
296 __slots__ = ('text', 'url')
298 def _find_entries(outline):
299 entries = []
300 for c in outline.children:
301 entries += _find_entries(c)
302 type = outline.get('type', '')
303 text = outline.get('text', '')
304 e = None
305 if type == 'link':
306 url = outline.get('url', '')
307 if url != '':
308 e = BlogListEntry()
309 e.text = text
310 e.url = url
311 else:
312 xmlurl = outline.get('xmlUrl', '')
313 e = BlogListEntry()
314 e.text = text
315 if text == '':
316 title = outline.get('title', '')
317 if title == '':
318 e = None
319 e.text = title
320 if e != None:
321 if xmlurl != '':
322 # there's something in xmlurl. There's a good chance that's
323 # our feed's URL
324 e.url = xmlurl
325 else:
326 htmlurl = outline.get('htmlUrl', '')
327 if htmlurl != '':
328 # there's something in htmlurl, and xmlurl is empty. This
329 # might be our feed's URL.
330 e.url = htmlurl
331 else:
332 # nothing else to try.
333 e = None
334 if e is not None:
335 entries[0:0] = [e]
336 return entries
338 def find_entries(outlines):
339 entries = []
340 for o in outlines:
341 entries += _find_entries(o)
342 return entries
344 def read(stream):
345 o = parse(stream)
346 return o
347 entries = find_entries(o.outlines)
348 ret = list()
349 edict = dict()
350 # avoid duplicates.
351 for e in entries:
352 ek = (e.text, e.url)
353 edict[ek] = edict.get(ek, 0) + 1
354 if edict[ek] < 2:
355 ret.append(e)
356 return ret