5 __copyright__
= "Copyright (c) 2002-2005 Free Software Foundation, Inc."
6 __author__
= "Juri Pakaste <juri@iki.fi>"
8 Straw is free software; you can redistribute it and/or modify it under the
9 terms of the GNU General Public License as published by the Free Software
10 Foundation; either version 2 of the License, or (at your option) any later
13 Straw is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15 A PARTICULAR PURPOSE. See the GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License along with
18 this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 Place - Suite 330, Boston, MA 02111-1307, USA. """
22 from straw
.JobManager
import Job
, TaskThread
, TaskInfo
, ThreadPoolJobHandler
23 from straw
.model
import Category
, Feed
24 from threading
import Lock
25 from xml
.sax
import saxutils
, make_parser
, SAXParseException
26 from xml
.sax
.handler
import feature_namespaces
, feature_namespace_prefixes
27 from xml
.sax
.saxutils
import XMLGenerator
28 from xml
.sax
.xmlreader
import AttributesImpl
30 import straw
.JobManager
as JobManager
33 import xml
.sax
._exceptions
34 import xml
.sax
.handler
38 class OPMLParseJobHandler(ThreadPoolJobHandler
):
41 def __init__(self
, job
):
42 ThreadPoolJobHandler
.__init
__(self
, job
)
45 self
.task_class
= OPMLParseTaskThread
48 ti
= TaskInfo(0, { "file_path": self
.job
.data
[0], "category": self
.job
.data
[1] })
49 self
.task_queue
.put(ti
)
51 def _prepare_result(self
):
52 task_result
= self
.result_queue
.get()
53 tree
= self
._build
_tree
(task_result
.result
.roots())
54 return (tree
, task_result
.task_info
.data
["category"])
56 def _build_tree(self
, outlines
, parent
= None):
60 for outline
in outlines
:
61 if not outline
.has_key("type"):
62 # Some feeds exported from Liferea don't have "type" attribute.
63 outline
["type"] = "rss"
65 if outline
["type"] == "folder" or len(outline
.children
) > 0:
68 category
.name
= outline
["text"]
69 category
.parent
= parent
71 save_list
.append(category
)
73 if not outline
.children
:
76 save_list
.extend(self
._build
_tree
(outline
.children
, category
))
81 if outline
.has_key("title"):
82 feed
.title
= outline
["title"]
83 elif outline
.has_key("text"):
84 feed
.title
= outline
["text"]
86 feed
.title
= "[unknown title]"
91 if outline
.has_key("xmlUrl"):
92 feed
.location
= outline
["xmlUrl"]
93 elif outline
.has_key("url"):
94 feed
.location
= outline
["url"]
96 if outline
.has_key("htmlUrl"):
97 feed
.link
= outline
["htmlUrl"]
98 elif outline
.has_key("url"):
99 feed
.link
= outline
["url"]
103 save_list
.append(feed
)
109 class OPMLParseTaskThread(TaskThread
):
110 def __init__(self
, handler
, task_queue
, result_queue
):
111 TaskThread
.__init
__(self
, handler
, task_queue
, result_queue
)
113 def _process(self
, task
):
117 fstream
= open(task
.data
["file_path"])
119 except Exception, inst
:
124 JobManager
.register_handler(OPMLParseJobHandler
)
130 def output(self
, stream
= sys
.stdout
):
131 xg
= XMLGenerator(stream
, encoding
='utf-8')
132 def elemWithContent(name
, content
):
133 xg
.startElement(name
, AttributesImpl({}))
134 if content
is not None:
135 xg
.characters(content
)
138 xg
.startElement("opml", AttributesImpl({'version': '1.1'}))
139 xg
.startElement("head", AttributesImpl({}))
140 for key
in ('title', 'dateCreated', 'dateModified', 'ownerName',
141 'ownerEmail', 'expansionState', 'vertScrollState',
142 'windowTop', 'windowBotton', 'windowRight', 'windowLeft'):
143 if self
.has_key(key
) and self
[key
] != "":
144 elemWithContent(key
, self
[key
])
145 xg
.endElement("head")
146 xg
.startElement("body", AttributesImpl({}))
147 for o
in self
.outlines
:
149 xg
.endElement("body")
150 xg
.endElement("opml")
153 __slots__
= ('_children')
158 def add_child(self
, outline
):
159 self
._children
.append(outline
)
161 def get_children_iter(self
):
162 return self
.OIterator(self
)
164 children
= property(get_children_iter
, None, None, "")
166 def output(self
, xg
):
167 xg
.startElement("outline", AttributesImpl(self
))
168 for c
in self
.children
:
170 xg
.endElement("outline")
174 def __init__(self
, o
):
182 return len(self
._o
._children
)
186 if self
._index
< len(self
._o
._children
):
187 return self
._o
._children
[self
._index
]
191 class OutlineList(object):
196 def add_outline(self
, outline
):
198 self
._stack
[-1].add_child(outline
)
200 self
._roots
.append(outline
)
201 self
._stack
.append(outline
)
203 def close_outline(self
):
210 class OPMLHandler(xml
.sax
.handler
.ContentHandler
):
212 self
._outlines
= OutlineList()
216 def startElement(self
, name
, attrs
):
217 if self
._opml
is None:
219 raise ValueError, "This doesn't look like OPML"
221 if name
== 'outline':
224 self
._outlines
.add_outline(o
)
227 def endElement(self
, name
):
228 if name
== 'outline':
229 self
._outlines
.close_outline()
232 self
._opml
.outlines
= self
._outlines
.roots()
234 for key
in ('title', 'dateCreated', 'dateModified', 'ownerName',
235 'ownerEmail', 'expansionState', 'vertScrollState',
236 'windowTop', 'windowBotton', 'windowRight', 'windowLeft'):
238 self
._opml
[key
] = self
._content
241 def characters(self
, ch
):
247 def get_outlines(self
):
248 return self
._outlines
251 parser
= make_parser()
252 parser
.setFeature(feature_namespaces
, 0)
253 handler
= OPMLHandler()
254 parser
.setContentHandler(handler
)
257 return handler
.get_outlines()
259 def export(title
, list, fname
):
261 opml
['title'] = title
264 o
['text'] = feed
.title
.encode('utf-8')
265 o
['description'] = feed
.channel_description
.encode('utf-8')
266 o
['htmlUrl'] = feed
.channel_link
267 o
['language'] = 'unknown'
268 o
['title'] = feed
.channel_title
.encode('utf-8')
271 o
['xmlUrl'] = feed
.access_info
[0]
272 opml
.outlines
.append(o
)
273 f
= gnomevfs
.create(fname
, gnomevfs
.OPEN_WRITE
, 0)
274 f
.write('<?xml version="1.0"?>\n')
278 class BlogListEntry(object):
279 __slots__
= ('text', 'url')
281 def _find_entries(outline
):
283 for c
in outline
.children
:
284 entries
+= _find_entries(c
)
285 type = outline
.get('type', '')
286 text
= outline
.get('text', '')
289 url
= outline
.get('url', '')
295 xmlurl
= outline
.get('xmlUrl', '')
299 title
= outline
.get('title', '')
305 # there's something in xmlurl. There's a good chance that's
309 htmlurl
= outline
.get('htmlUrl', '')
311 # there's something in htmlurl, and xmlurl is empty. This
312 # might be our feed's URL.
315 # nothing else to try.
321 def find_entries(outlines
):
324 entries
+= _find_entries(o
)
333 entries
= find_entries(o
.outlines
)
339 edict
[ek
] = edict
.get(ek
, 0) + 1