From 2bc79858ae8bd01d28f6a7b13c6125084c475b9f Mon Sep 17 00:00:00 2001 From: DrFrasierCrane Date: Sun, 2 Dec 2007 16:34:05 +0100 Subject: [PATCH] Work on feed discovery dialog. --- data/subscribe.glade | 6 ++--- straw/FeedDiscovery.py | 2 +- straw/SummaryParser.py | 7 +++--- straw/feedfinder.py | 64 +++++++++++++++++++++++++++----------------------- straw/helpers.py | 1 + straw/subscribe.py | 44 +++++++++++++++++++++++++++++++--- 6 files changed, 85 insertions(+), 39 deletions(-) diff --git a/data/subscribe.glade b/data/subscribe.glade index cda0a46..8911566 100644 --- a/data/subscribe.glade +++ b/data/subscribe.glade @@ -3,8 +3,8 @@ - 350 - 400 + 400 + 350 True GDK_POINTER_MOTION_MASK | GDK_POINTER_MOTION_HINT_MASK | GDK_BUTTON_PRESS_MASK | GDK_BUTTON_RELEASE_MASK 5 @@ -303,7 +303,7 @@ - + True GDK_POINTER_MOTION_MASK | GDK_POINTER_MOTION_HINT_MASK | GDK_BUTTON_PRESS_MASK | GDK_BUTTON_RELEASE_MASK diff --git a/straw/FeedDiscovery.py b/straw/FeedDiscovery.py index cff4ad5..92a95c5 100644 --- a/straw/FeedDiscovery.py +++ b/straw/FeedDiscovery.py @@ -55,7 +55,7 @@ class FeedDiscoveryTaskThread(TaskThread): def _process(self, task): url = task.data["url"] data = feedfinder.feeds(url, True) - feeds = [SummaryParser.parse(content) for url, content in data] + feeds = [SummaryParser.parse(content, location = url) for url, content in data] return feeds JobManager.register_handler(FeedDiscoveryJobHandler) diff --git a/straw/SummaryParser.py b/straw/SummaryParser.py index d0cdffa..6789884 100644 --- a/straw/SummaryParser.py +++ b/straw/SummaryParser.py @@ -62,7 +62,7 @@ class TitleImgParser(HTMLParser.HTMLParser): if tag == 'img': for name, value in attrs: if name == 'src': - url = helpers.complete_url(value, self._feed.location) + url = helpers.complete_url(value, self._feed.link) self._image_urls.append(url) return @@ -135,18 +135,19 @@ def feedparser_parse(data): def parse_channel_info(parsed, parsed_content, encoding): parsed.title = _to_unicode(parsed_content.feed.get('title', ''), encoding) parsed.description = _to_unicode(parsed_content.feed.get('description', ''), encoding) - parsed.location = _to_unicode(parsed_content.feed.get('link', ''), encoding) + parsed.link = _to_unicode(parsed_content.feed.get('link', ''), encoding) parsed.copyright = _to_unicode(parsed_content.feed.get('copyright', ''), encoding) parsed.last_build_date = parsed_content.feed.get('modified') parsed.creator = _to_unicode(parsed_content.feed.get('creator', ''), encoding) return parsed -def parse(content, feed = None): +def parse(content, feed = None, location = None): parsed_content, encoding = feedparser_parse(content) if feed == None: from model import Feed feed = Feed() + feed.location = location feed = parse_channel_info(feed, parsed_content, encoding) for entry in parsed_content.entries: diff --git a/straw/feedfinder.py b/straw/feedfinder.py index 06d49de..98f7716 100644 --- a/straw/feedfinder.py +++ b/straw/feedfinder.py @@ -184,6 +184,7 @@ class LinkParser(BaseParser): 'application/atom+xml', 'application/x.atom+xml', 'application/x-atom+xml') + def do_link(self, attrs): attrsD = dict(self.normalize_attrs(attrs)) if not attrsD.has_key('rel'): return @@ -227,7 +228,7 @@ def getLocalLinks(links, baseuri): return [l for l in links if l.lower().startswith(baseuri)] def isFeedLink(link): - return link[-4:].lower() in ('.rss', '.rdf', '.xml', '.atom') + return link.endswith("feed/") or link[-4:].lower() in ('.rss', '.rdf', '.xml', '.atom') def isXMLRelatedLink(link): link = link.lower() @@ -249,6 +250,7 @@ def isFeed(uri): protocol = urlparse.urlparse(uri) if protocol[0] not in ('http', 'https'): return 0 data = _gatekeeper.get(uri) + print "isFeed -- %s" % uri return (couldBeFeedData(data), uri, data) def sortFeeds(feed1Info, feed2Info): @@ -269,7 +271,9 @@ def getFeedsFromSyndic8(uri): def feeds(uri, all=False, querySyndic8=False, _recurs=None): if _recurs is None: _recurs = [uri] + fulluri = makeFullURI(uri) + try: data = _gatekeeper.get(fulluri, check=False) except: @@ -278,41 +282,32 @@ def feeds(uri, all=False, querySyndic8=False, _recurs=None): # is this already a feed? if couldBeFeedData(data): return (fulluri, data) + newuri = tryBrokenRedirect(data) + if newuri and newuri not in _recurs: _recurs.append(newuri) return feeds(newuri, all=all, querySyndic8=querySyndic8, _recurs=_recurs) + # nope, it's a page, try LINK tags first _debuglog('looking for LINK tags') data = fix_quirks(data) + + links = set() + try: - outfeeds = getLinks(data, fulluri) - except: - outfeeds = [] - _debuglog('found %s feeds through LINK tags' % len(outfeeds)) - outfeeds = process(outfeeds) - if all or not outfeeds: - # no LINK tags, look for regular links that point to feeds - _debuglog('no LINK tags, looking at A tags') - try: - links = getALinks(data, fulluri) - except: - links = [] - locallinks = getLocalLinks(links, fulluri) - # look for obvious feed links on the same server - outfeeds.extend(process(filter(isFeedLink, locallinks))) - if all or not outfeeds: - # look harder for feed links on the same server - outfeeds.extend(process(filter(isXMLRelatedLink, locallinks))) - if all or not outfeeds: - # look for obvious feed links on another server - outfeeds.extend(process(filter(isFeedLink, links))) - if all or not outfeeds: - # look harder for feed links on another server - outfeeds.extend(process(filter(isXMLRelatedLink, links))) - if all or not outfeeds: - _debuglog('no A tags, guessing') - suffixes = [ # filenames used by popular software: + linktag_links = set(getLinks(data, fulluri)) + except Exception, e: + linktag_links = set() + _debuglog("Exception in getLinks: %s" % e) + + _debuglog('found %s feeds through LINK tags' % len(links)) + #outfeeds = process(outfeeds) + + links = links.union(getALinks(data, fulluri)) + links = links.union(getLocalLinks(links, fulluri)) + + suffixes = [ # filenames used by popular software: 'atom.xml', # blogger, TypePad 'index.atom', # MT, apparently 'index.rdf', # MT @@ -320,7 +315,18 @@ def feeds(uri, all=False, querySyndic8=False, _recurs=None): 'index.xml', # MT 'index.rss' # Slash ] - outfeeds.extend(process([urlparse.urljoin(fulluri, x) for x in suffixes])) + + #links = links.union([urlparse.urljoin(fulluri, x) for x in suffixes]) + + links -= linktag_links + + return process(linktag_links | set([url for url in links if isFeedLink(url) or isXMLRelatedLink(url)])) + + if all or not outfeeds: + _debuglog('no A tags, guessing') + + #link_list = list(set( + link_list)) + #outfeeds.extend(process(link_list)) if (all or not outfeeds) and querySyndic8: # still no luck, search Syndic8 for feeds (requires xmlrpclib) _debuglog('still no luck, searching Syndic8') diff --git a/straw/helpers.py b/straw/helpers.py index 935a592..a1cfe9b 100644 --- a/straw/helpers.py +++ b/straw/helpers.py @@ -236,6 +236,7 @@ def convert_entities(text): return ctext def complete_url(url, feed_location): + print "---- COMPLETING %s WITH %s" % (url, feed_location) url = urllib.quote(url, safe=string.punctuation) if urlparse.urlparse(url)[0] == '': return urlparse.urljoin(feed_location, url) diff --git a/straw/subscribe.py b/straw/subscribe.py index 4874bb1..659b876 100644 --- a/straw/subscribe.py +++ b/straw/subscribe.py @@ -22,8 +22,10 @@ Place - Suite 330, Boston, MA 02111-1307, USA. """ from gtk.glade import XML from straw import helpers import Config +import FeedManager import MVP import SummaryParser +import TreeViewManager import error import gettext import gobject @@ -34,6 +36,8 @@ import straw import time import urllib import urlparse +import xml + pygtk.require('2.0') STATE_INTRO = 1 @@ -50,11 +54,28 @@ class SubscribeView(MVP.GladeView): self._location_entry = self._widget.get_widget('feed_location_entry') self._username_entry = self._widget.get_widget('username_entry') self._password_entry = self._widget.get_widget('password_entry') + self._category_cb = self._widget.get_widget('category_cb') self._error_text = self._widget.get_widget('error_text') self._error_box = self._widget.get_widget('error_box') - self._result_tree = self._widget.get_widget('result_tree') + def _populate_tree(self, parent_id, path = ""): + if not self.nodes.has_key(parent_id): + return + + for node in self.nodes[parent_id]: + if node.type == "C": + #current_parent = self._create_row(node, parent_iter, path) + if path != "": + path = path + " > " + node.title + else: + path = node.title + + self.category_store.append([path]) + + if self.nodes.has_key(node.obj_id): + self._populate_tree(node.obj_id, path) + def _on_forward(self): location = self._location_entry.get_text() if not location: @@ -113,12 +134,25 @@ class SubscribeView(MVP.GladeView): #renderer.connect('toggled', self._sticky_toggled) renderer = gtk.CellRendererText() - column = gtk.TreeViewColumn(_('_Title'), renderer, text = 1) + column = gtk.TreeViewColumn(_('_Title'), renderer, markup = 1) self._result_tree.append_column(column) self.store = gtk.ListStore(bool, str) self._result_tree.set_model(self.store) + + self.setup_combobox() + + def setup_combobox(self): + self.category_store = gtk.ListStore(str) + cell = gtk.CellRendererText() + self._category_cb.pack_start(cell, True) + self._category_cb.add_attribute(cell, 'text', 0) + + self.categories, self.feeds = FeedManager.get_model() + self.nodes = TreeViewManager.get_nodes() + + self._category_cb.set_model(self.category_store) def display_error(self, text): self._error_box.set_property('visible',True) @@ -153,7 +187,8 @@ class SubscribeView(MVP.GladeView): gtk.gdk.threads_enter() for feed in feeds: - self.store.append([False, feed.title]) + label = xml.sax.saxutils.escape(feed.title) + '\n' + xml.sax.saxutils.escape(feed.location) + '' + self.store.append([False, label]) self._progress_bar.props.visible = False self._button2.set_sensitive(True) @@ -161,6 +196,9 @@ class SubscribeView(MVP.GladeView): self.state = STATE_FINISH self.set_state(gtk.STOCK_CLOSE, gtk.STOCK_GO_BACK, gtk.STOCK_APPLY) self._notebook.set_current_page(1) + + self._populate_tree(1) + self._category_cb.set_active(0) gtk.gdk.threads_leave() -- 2.11.4.GIT