5 __copyright__
= "Copyright (c) 2002-2005 Free Software Foundation, Inc."
6 __author__
= "Juri Pakaste <juri@iki.fi>"
8 Straw is free software; you can redistribute it and/or modify it under the
9 terms of the GNU General Public License as published by the Free Software
10 Foundation; either version 2 of the License, or (at your option) any later
13 Straw is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15 A PARTICULAR PURPOSE. See the GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License along with
18 this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19 Place - Suite 330, Boston, MA 02111-1307, USA. """
22 from straw
.JobManager
import Job
, TaskThread
, TaskInfo
, ThreadPoolJobHandler
23 from straw
.model
import Category
, Feed
24 from threading
import Lock
25 from xml
.sax
import saxutils
, make_parser
, SAXParseException
26 from xml
.sax
.handler
import feature_namespaces
, feature_namespace_prefixes
27 from xml
.sax
.saxutils
import XMLGenerator
28 from xml
.sax
.xmlreader
import AttributesImpl
30 import straw
.JobManager
as JobManager
33 import xml
.sax
._exceptions
34 import xml
.sax
.handler
38 class OPMLParseJobHandler(ThreadPoolJobHandler
):
41 def __init__(self
, job
):
42 ThreadPoolJobHandler
.__init
__(self
, job
)
45 self
.task_class
= OPMLParseTaskThread
48 ti
= TaskInfo(0, { "file_path": self
.job
.data
[0], "category": self
.job
.data
[1] })
49 self
.task_queue
.put(ti
)
51 def _prepare_result(self
):
52 task_result
= self
.result_queue
.get()
53 category
= task_result
.task_info
.data
["category"]
54 tree
= self
._build
_tree
(task_result
.result
.roots(), parent
= category
)
55 return (tree
, task_result
.task_info
.data
["category"])
57 def _build_tree(self
, outlines
, parent
= None):
61 for outline
in outlines
:
62 if not outline
.has_key("type"):
63 # Some feeds exported from Liferea don't have "type" attribute.
64 outline
["type"] = "rss"
66 if outline
["type"] == "folder" or len(outline
.children
) > 0:
69 category
.name
= outline
["text"]
70 category
.parent
= parent
72 save_list
.append(category
)
74 if not outline
.children
:
77 save_list
.extend(self
._build
_tree
(outline
.children
, category
))
82 if outline
.has_key("title"):
83 feed
.title
= outline
["title"]
84 elif outline
.has_key("text"):
85 feed
.title
= outline
["text"]
87 feed
.title
= "[unknown title]"
92 if outline
.has_key("xmlUrl"):
93 feed
.location
= outline
["xmlUrl"]
94 elif outline
.has_key("url"):
95 feed
.location
= outline
["url"]
97 if outline
.has_key("htmlUrl"):
98 feed
.link
= outline
["htmlUrl"]
99 elif outline
.has_key("url"):
100 feed
.link
= outline
["url"]
104 save_list
.append(feed
)
110 class OPMLParseTaskThread(TaskThread
):
111 def __init__(self
, handler
, task_queue
, result_queue
):
112 TaskThread
.__init
__(self
, handler
, task_queue
, result_queue
)
114 def _process(self
, task
):
118 fstream
= open(task
.data
["file_path"])
120 except Exception, inst
:
125 JobManager
.register_handler(OPMLParseJobHandler
)
131 def output(self
, stream
= sys
.stdout
):
132 xg
= XMLGenerator(stream
, encoding
='utf-8')
133 def elemWithContent(name
, content
):
134 xg
.startElement(name
, AttributesImpl({}))
135 if content
is not None:
136 xg
.characters(content
)
139 xg
.startElement("opml", AttributesImpl({'version': '1.1'}))
140 xg
.startElement("head", AttributesImpl({}))
141 for key
in ('title', 'dateCreated', 'dateModified', 'ownerName',
142 'ownerEmail', 'expansionState', 'vertScrollState',
143 'windowTop', 'windowBotton', 'windowRight', 'windowLeft'):
144 if self
.has_key(key
) and self
[key
] != "":
145 elemWithContent(key
, self
[key
])
146 xg
.endElement("head")
147 xg
.startElement("body", AttributesImpl({}))
148 for o
in self
.outlines
:
150 xg
.endElement("body")
151 xg
.endElement("opml")
154 __slots__
= ('_children')
159 def add_child(self
, outline
):
160 self
._children
.append(outline
)
162 def get_children_iter(self
):
163 return self
.OIterator(self
)
165 children
= property(get_children_iter
, None, None, "")
167 def output(self
, xg
):
168 xg
.startElement("outline", AttributesImpl(self
))
169 for c
in self
.children
:
171 xg
.endElement("outline")
175 def __init__(self
, o
):
183 return len(self
._o
._children
)
187 if self
._index
< len(self
._o
._children
):
188 return self
._o
._children
[self
._index
]
192 class OutlineList(object):
197 def add_outline(self
, outline
):
199 self
._stack
[-1].add_child(outline
)
201 self
._roots
.append(outline
)
202 self
._stack
.append(outline
)
204 def close_outline(self
):
211 class OPMLHandler(xml
.sax
.handler
.ContentHandler
):
213 self
._outlines
= OutlineList()
217 def startElement(self
, name
, attrs
):
218 if self
._opml
is None:
220 raise ValueError, "This doesn't look like OPML"
222 if name
== 'outline':
225 self
._outlines
.add_outline(o
)
228 def endElement(self
, name
):
229 if name
== 'outline':
230 self
._outlines
.close_outline()
233 self
._opml
.outlines
= self
._outlines
.roots()
235 for key
in ('title', 'dateCreated', 'dateModified', 'ownerName',
236 'ownerEmail', 'expansionState', 'vertScrollState',
237 'windowTop', 'windowBotton', 'windowRight', 'windowLeft'):
239 self
._opml
[key
] = self
._content
242 def characters(self
, ch
):
248 def get_outlines(self
):
249 return self
._outlines
252 parser
= make_parser()
253 parser
.setFeature(feature_namespaces
, 0)
254 handler
= OPMLHandler()
255 parser
.setContentHandler(handler
)
258 return handler
.get_outlines()
260 def export(root
, filename
):
262 opml
['title'] = "Exported from Straw"
264 def _export(node
, opml
):
268 o
['text'] = node
.title
.encode('utf-8')
269 o
['description'] = node
.title
.encode('utf-8')
270 o
['htmlUrl'] = node
.link
271 o
['language'] = 'unknown'
272 o
['title'] = node
.title
.encode('utf-8')
275 o
['xmlUrl'] = node
.location
276 elif node
.type == "C":
277 o
['text'] = node
.name
.encode('utf-8')
278 o
['description'] = node
.name
.encode('utf-8')
281 for child_node
in node
.children
:
282 o
.add_child(_export(child_node
, opml
))
286 opml
.outlines
.append(_export(root
, opml
))
288 f
= gnomevfs
.create(filename
, gnomevfs
.OPEN_WRITE
, 0)
289 f
.write('<?xml version="1.0"?>\n')
293 class BlogListEntry(object):
294 __slots__
= ('text', 'url')
296 def _find_entries(outline
):
298 for c
in outline
.children
:
299 entries
+= _find_entries(c
)
300 type = outline
.get('type', '')
301 text
= outline
.get('text', '')
304 url
= outline
.get('url', '')
310 xmlurl
= outline
.get('xmlUrl', '')
314 title
= outline
.get('title', '')
320 # there's something in xmlurl. There's a good chance that's
324 htmlurl
= outline
.get('htmlUrl', '')
326 # there's something in htmlurl, and xmlurl is empty. This
327 # might be our feed's URL.
330 # nothing else to try.
336 def find_entries(outlines
):
339 entries
+= _find_entries(o
)
348 entries
= find_entries(o
.outlines
)
354 edict
[ek
] = edict
.get(ek
, 0) + 1