5 __copyright__
= "Copyright (c) 2002-2005 Free Software Foundation, Inc."
6 __author__
= "Juri Pakaste <juri@iki.fi>"
8 Straw is free software; you can redistribute it and/or modify it under the
9 terms of the GNU General Public License as published by the Free Software
10 Foundation; either version 2 of the License, or (at your option) any later
13 Straw is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15 A PARTICULAR PURPOSE. See the GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License along with
18 this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 Place - Suite 330, Boston, MA 02111-1307, USA. """
21 from Fetcher
import FetchTask
22 from straw
.JobManager
import Job
, TaskThread
, ThreadPoolJobHandler
23 from straw
.model
import Category
, Feed
24 from threading
import Lock
25 from xml
.sax
import saxutils
, make_parser
, SAXParseException
26 from xml
.sax
.handler
import feature_namespaces
, feature_namespace_prefixes
27 from xml
.sax
.saxutils
import XMLGenerator
28 from xml
.sax
.xmlreader
import AttributesImpl
31 import straw
.JobManager
as JobManager
34 import xml
.sax
._exceptions
35 import xml
.sax
.handler
39 class OPMLParseJobHandler(ThreadPoolJobHandler
):
42 def __init__(self
, job
):
43 ThreadPoolJobHandler
.__init
__(self
, job
)
46 self
.task_class
= OPMLParseTaskThread
48 def _on_url_fetched(self
, handler
, task_result
):
49 self
.task_queue
.put((task_result
.task
.user_data
, task_result
.result
))
52 category
= self
.job
.data
[1]
53 url
= self
.job
.data
[0]
55 fetch_tasks
= [FetchTask(url
= url
, user_data
= category
)]
56 observers
= [{ "task-done": [ self
._on
_url
_fetched
]}]
57 Fetcher
.fetch(fetch_tasks
, observers
= observers
)
59 class OPMLParseTaskThread(TaskThread
):
60 def __init__(self
, handler
):
61 TaskThread
.__init
__(self
, handler
)
63 def _process(self
, task
):
65 tree
= self
._build
_tree
(opml
.roots(), parent
= task
[0])
68 def _build_tree(self
, outlines
, parent
= None):
72 for outline
in outlines
:
73 if not outline
.has_key("type"):
74 # Some feeds exported from Liferea don't have "type" attribute.
75 outline
["type"] = "rss"
77 if outline
["type"] == "folder" or len(outline
.children
) > 0:
80 category
.name
= outline
["text"]
81 category
.parent
= parent
83 save_list
.append(category
)
85 if not outline
.children
:
88 save_list
.extend(self
._build
_tree
(outline
.children
, category
))
93 if outline
.has_key("title"):
94 feed
.title
= outline
["title"]
95 elif outline
.has_key("text"):
96 feed
.title
= outline
["text"]
98 feed
.title
= "[unknown title]"
103 if outline
.has_key("xmlUrl"):
104 feed
.location
= outline
["xmlUrl"]
105 elif outline
.has_key("url"):
106 feed
.location
= outline
["url"]
108 if outline
.has_key("htmlUrl"):
109 feed
.link
= outline
["htmlUrl"]
110 elif outline
.has_key("url"):
111 feed
.link
= outline
["url"]
115 save_list
.append(feed
)
121 JobManager
.register_handler(OPMLParseJobHandler
)
127 def output(self
, stream
= sys
.stdout
):
128 xg
= XMLGenerator(stream
, encoding
='utf-8')
129 def elemWithContent(name
, content
):
130 xg
.startElement(name
, AttributesImpl({}))
131 if content
is not None:
132 xg
.characters(content
)
135 xg
.startElement("opml", AttributesImpl({'version': '1.1'}))
136 xg
.startElement("head", AttributesImpl({}))
137 for key
in ('title', 'dateCreated', 'dateModified', 'ownerName',
138 'ownerEmail', 'expansionState', 'vertScrollState',
139 'windowTop', 'windowBotton', 'windowRight', 'windowLeft'):
140 if self
.has_key(key
) and self
[key
] != "":
141 elemWithContent(key
, self
[key
])
142 xg
.endElement("head")
143 xg
.startElement("body", AttributesImpl({}))
144 for o
in self
.outlines
:
146 xg
.endElement("body")
147 xg
.endElement("opml")
150 __slots__
= ('_children')
155 def add_child(self
, outline
):
156 self
._children
.append(outline
)
158 def get_children_iter(self
):
159 return self
.OIterator(self
)
161 children
= property(get_children_iter
, None, None, "")
163 def output(self
, xg
):
164 xg
.startElement("outline", AttributesImpl(self
))
165 for c
in self
.children
:
167 xg
.endElement("outline")
171 def __init__(self
, o
):
179 return len(self
._o
._children
)
183 if self
._index
< len(self
._o
._children
):
184 return self
._o
._children
[self
._index
]
188 class OutlineList(object):
193 def add_outline(self
, outline
):
195 self
._stack
[-1].add_child(outline
)
197 self
._roots
.append(outline
)
198 self
._stack
.append(outline
)
200 def close_outline(self
):
207 class OPMLHandler(xml
.sax
.handler
.ContentHandler
):
209 self
._outlines
= OutlineList()
213 def startElement(self
, name
, attrs
):
214 if self
._opml
is None:
216 raise ValueError, "This doesn't look like OPML"
218 if name
== 'outline':
221 self
._outlines
.add_outline(o
)
224 def endElement(self
, name
):
225 if name
== 'outline':
226 self
._outlines
.close_outline()
229 self
._opml
.outlines
= self
._outlines
.roots()
231 for key
in ('title', 'dateCreated', 'dateModified', 'ownerName',
232 'ownerEmail', 'expansionState', 'vertScrollState',
233 'windowTop', 'windowBotton', 'windowRight', 'windowLeft'):
235 self
._opml
[key
] = self
._content
238 def characters(self
, ch
):
244 def get_outlines(self
):
245 return self
._outlines
248 """parser = make_parser()
249 parser.setFeature(feature_namespaces, 0)
250 handler = OPMLHandler()
251 parser.setContentHandler(handler)"""
252 handler
= OPMLHandler()
253 xml
.sax
.parseString(stream
, handler
)
254 print handler
.get_outlines()
255 return handler
.get_outlines()
257 def export(root
, filename
):
259 opml
['title'] = "Exported from Straw"
261 def _export(node
, opml
):
265 o
['text'] = node
.title
.encode('utf-8')
266 o
['description'] = node
.title
.encode('utf-8')
267 o
['htmlUrl'] = node
.link
268 o
['language'] = 'unknown'
269 o
['title'] = node
.title
.encode('utf-8')
272 o
['xmlUrl'] = node
.location
273 elif node
.type == "C":
274 o
['text'] = node
.name
.encode('utf-8')
275 o
['description'] = node
.name
.encode('utf-8')
278 for child_node
in node
.children
:
279 o
.add_child(_export(child_node
, opml
))
283 opml
.outlines
.append(_export(root
, opml
))
285 f
= gnomevfs
.create(filename
, gnomevfs
.OPEN_WRITE
, 0)
286 f
.write('<!DOCTYPE opml PUBLIC "-//Userland//DTD OPML XML V1.0//EN" ' + \
287 '"http://static.userland.com/gems/radiodiscuss/opmlDtd.txt">')
288 f
.write('<?xml version="1.0"?>\n')
292 class BlogListEntry(object):
293 __slots__
= ('text', 'url')
295 def _find_entries(outline
):
297 for c
in outline
.children
:
298 entries
+= _find_entries(c
)
299 type = outline
.get('type', '')
300 text
= outline
.get('text', '')
303 url
= outline
.get('url', '')
309 xmlurl
= outline
.get('xmlUrl', '')
313 title
= outline
.get('title', '')
319 # there's something in xmlurl. There's a good chance that's
323 htmlurl
= outline
.get('htmlUrl', '')
325 # there's something in htmlurl, and xmlurl is empty. This
326 # might be our feed's URL.
329 # nothing else to try.
335 def find_entries(outlines
):
338 entries
+= _find_entries(o
)
344 entries
= find_entries(o
.outlines
)
350 edict
[ek
] = edict
.get(ek
, 0) + 1