App Engine Python SDK version 1.7.4 (2)
[gae.git] / python / lib / django_1_4 / django / utils / feedgenerator.py
blob592d7872000ab45bb652c81cf55ac0e4b68de743
1 """
2 Syndication feed generation library -- used for generating RSS, etc.
4 Sample usage:
6 >>> from django.utils import feedgenerator
7 >>> feed = feedgenerator.Rss201rev2Feed(
8 ... title=u"Poynter E-Media Tidbits",
9 ... link=u"http://www.poynter.org/column.asp?id=31",
10 ... description=u"A group Weblog by the sharpest minds in online media/journalism/publishing.",
11 ... language=u"en",
12 ... )
13 >>> feed.add_item(
14 ... title="Hello",
15 ... link=u"http://www.holovaty.com/test/",
16 ... description="Testing."
17 ... )
18 >>> fp = open('test.rss', 'w')
19 >>> feed.write(fp, 'utf-8')
20 >>> fp.close()
22 For definitions of the different versions of RSS, see:
23 http://diveintomark.org/archives/2004/02/04/incompatible-rss
24 """
26 import datetime
27 import urlparse
28 from django.utils.xmlutils import SimplerXMLGenerator
29 from django.utils.encoding import force_unicode, iri_to_uri
30 from django.utils import datetime_safe
31 from django.utils.timezone import is_aware
33 def rfc2822_date(date):
34 # We can't use strftime() because it produces locale-dependant results, so
35 # we have to map english month and day names manually
36 months = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec',)
37 days = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
38 # Support datetime objects older than 1900
39 date = datetime_safe.new_datetime(date)
40 # We do this ourselves to be timezone aware, email.Utils is not tz aware.
41 dow = days[date.weekday()]
42 month = months[date.month - 1]
43 time_str = date.strftime('%s, %%d %s %%Y %%H:%%M:%%S ' % (dow, month))
44 if is_aware(date):
45 offset = date.tzinfo.utcoffset(date)
46 timezone = (offset.days * 24 * 60) + (offset.seconds // 60)
47 hour, minute = divmod(timezone, 60)
48 return time_str + "%+03d%02d" % (hour, minute)
49 else:
50 return time_str + '-0000'
52 def rfc3339_date(date):
53 # Support datetime objects older than 1900
54 date = datetime_safe.new_datetime(date)
55 if is_aware(date):
56 time_str = date.strftime('%Y-%m-%dT%H:%M:%S')
57 offset = date.tzinfo.utcoffset(date)
58 timezone = (offset.days * 24 * 60) + (offset.seconds // 60)
59 hour, minute = divmod(timezone, 60)
60 return time_str + "%+03d:%02d" % (hour, minute)
61 else:
62 return date.strftime('%Y-%m-%dT%H:%M:%SZ')
64 def get_tag_uri(url, date):
65 """
66 Creates a TagURI.
68 See http://diveintomark.org/archives/2004/05/28/howto-atom-id
69 """
70 bits = urlparse.urlparse(url)
71 d = ''
72 if date is not None:
73 d = ',%s' % datetime_safe.new_datetime(date).strftime('%Y-%m-%d')
74 return u'tag:%s%s:%s/%s' % (bits.hostname, d, bits.path, bits.fragment)
76 class SyndicationFeed(object):
77 "Base class for all syndication feeds. Subclasses should provide write()"
78 def __init__(self, title, link, description, language=None, author_email=None,
79 author_name=None, author_link=None, subtitle=None, categories=None,
80 feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs):
81 to_unicode = lambda s: force_unicode(s, strings_only=True)
82 if categories:
83 categories = [force_unicode(c) for c in categories]
84 if ttl is not None:
85 # Force ints to unicode
86 ttl = force_unicode(ttl)
87 self.feed = {
88 'title': to_unicode(title),
89 'link': iri_to_uri(link),
90 'description': to_unicode(description),
91 'language': to_unicode(language),
92 'author_email': to_unicode(author_email),
93 'author_name': to_unicode(author_name),
94 'author_link': iri_to_uri(author_link),
95 'subtitle': to_unicode(subtitle),
96 'categories': categories or (),
97 'feed_url': iri_to_uri(feed_url),
98 'feed_copyright': to_unicode(feed_copyright),
99 'id': feed_guid or link,
100 'ttl': ttl,
102 self.feed.update(kwargs)
103 self.items = []
105 def add_item(self, title, link, description, author_email=None,
106 author_name=None, author_link=None, pubdate=None, comments=None,
107 unique_id=None, enclosure=None, categories=(), item_copyright=None,
108 ttl=None, **kwargs):
110 Adds an item to the feed. All args are expected to be Python Unicode
111 objects except pubdate, which is a datetime.datetime object, and
112 enclosure, which is an instance of the Enclosure class.
114 to_unicode = lambda s: force_unicode(s, strings_only=True)
115 if categories:
116 categories = [to_unicode(c) for c in categories]
117 if ttl is not None:
118 # Force ints to unicode
119 ttl = force_unicode(ttl)
120 item = {
121 'title': to_unicode(title),
122 'link': iri_to_uri(link),
123 'description': to_unicode(description),
124 'author_email': to_unicode(author_email),
125 'author_name': to_unicode(author_name),
126 'author_link': iri_to_uri(author_link),
127 'pubdate': pubdate,
128 'comments': to_unicode(comments),
129 'unique_id': to_unicode(unique_id),
130 'enclosure': enclosure,
131 'categories': categories or (),
132 'item_copyright': to_unicode(item_copyright),
133 'ttl': ttl,
135 item.update(kwargs)
136 self.items.append(item)
138 def num_items(self):
139 return len(self.items)
141 def root_attributes(self):
143 Return extra attributes to place on the root (i.e. feed/channel) element.
144 Called from write().
146 return {}
148 def add_root_elements(self, handler):
150 Add elements in the root (i.e. feed/channel) element. Called
151 from write().
153 pass
155 def item_attributes(self, item):
157 Return extra attributes to place on each item (i.e. item/entry) element.
159 return {}
161 def add_item_elements(self, handler, item):
163 Add elements on each item (i.e. item/entry) element.
165 pass
167 def write(self, outfile, encoding):
169 Outputs the feed in the given encoding to outfile, which is a file-like
170 object. Subclasses should override this.
172 raise NotImplementedError
174 def writeString(self, encoding):
176 Returns the feed in the given encoding as a string.
178 from StringIO import StringIO
179 s = StringIO()
180 self.write(s, encoding)
181 return s.getvalue()
183 def latest_post_date(self):
185 Returns the latest item's pubdate. If none of them have a pubdate,
186 this returns the current date/time.
188 updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None]
189 if len(updates) > 0:
190 updates.sort()
191 return updates[-1]
192 else:
193 return datetime.datetime.now()
195 class Enclosure(object):
196 "Represents an RSS enclosure"
197 def __init__(self, url, length, mime_type):
198 "All args are expected to be Python Unicode objects"
199 self.length, self.mime_type = length, mime_type
200 self.url = iri_to_uri(url)
202 class RssFeed(SyndicationFeed):
203 mime_type = 'application/rss+xml; charset=utf-8'
204 def write(self, outfile, encoding):
205 handler = SimplerXMLGenerator(outfile, encoding)
206 handler.startDocument()
207 handler.startElement(u"rss", self.rss_attributes())
208 handler.startElement(u"channel", self.root_attributes())
209 self.add_root_elements(handler)
210 self.write_items(handler)
211 self.endChannelElement(handler)
212 handler.endElement(u"rss")
214 def rss_attributes(self):
215 return {u"version": self._version,
216 u"xmlns:atom": u"http://www.w3.org/2005/Atom"}
218 def write_items(self, handler):
219 for item in self.items:
220 handler.startElement(u'item', self.item_attributes(item))
221 self.add_item_elements(handler, item)
222 handler.endElement(u"item")
224 def add_root_elements(self, handler):
225 handler.addQuickElement(u"title", self.feed['title'])
226 handler.addQuickElement(u"link", self.feed['link'])
227 handler.addQuickElement(u"description", self.feed['description'])
228 if self.feed['feed_url'] is not None:
229 handler.addQuickElement(u"atom:link", None,
230 {u"rel": u"self", u"href": self.feed['feed_url']})
231 if self.feed['language'] is not None:
232 handler.addQuickElement(u"language", self.feed['language'])
233 for cat in self.feed['categories']:
234 handler.addQuickElement(u"category", cat)
235 if self.feed['feed_copyright'] is not None:
236 handler.addQuickElement(u"copyright", self.feed['feed_copyright'])
237 handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('utf-8'))
238 if self.feed['ttl'] is not None:
239 handler.addQuickElement(u"ttl", self.feed['ttl'])
241 def endChannelElement(self, handler):
242 handler.endElement(u"channel")
244 class RssUserland091Feed(RssFeed):
245 _version = u"0.91"
246 def add_item_elements(self, handler, item):
247 handler.addQuickElement(u"title", item['title'])
248 handler.addQuickElement(u"link", item['link'])
249 if item['description'] is not None:
250 handler.addQuickElement(u"description", item['description'])
252 class Rss201rev2Feed(RssFeed):
253 # Spec: http://blogs.law.harvard.edu/tech/rss
254 _version = u"2.0"
255 def add_item_elements(self, handler, item):
256 handler.addQuickElement(u"title", item['title'])
257 handler.addQuickElement(u"link", item['link'])
258 if item['description'] is not None:
259 handler.addQuickElement(u"description", item['description'])
261 # Author information.
262 if item["author_name"] and item["author_email"]:
263 handler.addQuickElement(u"author", "%s (%s)" % \
264 (item['author_email'], item['author_name']))
265 elif item["author_email"]:
266 handler.addQuickElement(u"author", item["author_email"])
267 elif item["author_name"]:
268 handler.addQuickElement(u"dc:creator", item["author_name"], {u"xmlns:dc": u"http://purl.org/dc/elements/1.1/"})
270 if item['pubdate'] is not None:
271 handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('utf-8'))
272 if item['comments'] is not None:
273 handler.addQuickElement(u"comments", item['comments'])
274 if item['unique_id'] is not None:
275 handler.addQuickElement(u"guid", item['unique_id'])
276 if item['ttl'] is not None:
277 handler.addQuickElement(u"ttl", item['ttl'])
279 # Enclosure.
280 if item['enclosure'] is not None:
281 handler.addQuickElement(u"enclosure", '',
282 {u"url": item['enclosure'].url, u"length": item['enclosure'].length,
283 u"type": item['enclosure'].mime_type})
285 # Categories.
286 for cat in item['categories']:
287 handler.addQuickElement(u"category", cat)
289 class Atom1Feed(SyndicationFeed):
290 # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
291 mime_type = 'application/atom+xml; charset=utf-8'
292 ns = u"http://www.w3.org/2005/Atom"
294 def write(self, outfile, encoding):
295 handler = SimplerXMLGenerator(outfile, encoding)
296 handler.startDocument()
297 handler.startElement(u'feed', self.root_attributes())
298 self.add_root_elements(handler)
299 self.write_items(handler)
300 handler.endElement(u"feed")
302 def root_attributes(self):
303 if self.feed['language'] is not None:
304 return {u"xmlns": self.ns, u"xml:lang": self.feed['language']}
305 else:
306 return {u"xmlns": self.ns}
308 def add_root_elements(self, handler):
309 handler.addQuickElement(u"title", self.feed['title'])
310 handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']})
311 if self.feed['feed_url'] is not None:
312 handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']})
313 handler.addQuickElement(u"id", self.feed['id'])
314 handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('utf-8'))
315 if self.feed['author_name'] is not None:
316 handler.startElement(u"author", {})
317 handler.addQuickElement(u"name", self.feed['author_name'])
318 if self.feed['author_email'] is not None:
319 handler.addQuickElement(u"email", self.feed['author_email'])
320 if self.feed['author_link'] is not None:
321 handler.addQuickElement(u"uri", self.feed['author_link'])
322 handler.endElement(u"author")
323 if self.feed['subtitle'] is not None:
324 handler.addQuickElement(u"subtitle", self.feed['subtitle'])
325 for cat in self.feed['categories']:
326 handler.addQuickElement(u"category", "", {u"term": cat})
327 if self.feed['feed_copyright'] is not None:
328 handler.addQuickElement(u"rights", self.feed['feed_copyright'])
330 def write_items(self, handler):
331 for item in self.items:
332 handler.startElement(u"entry", self.item_attributes(item))
333 self.add_item_elements(handler, item)
334 handler.endElement(u"entry")
336 def add_item_elements(self, handler, item):
337 handler.addQuickElement(u"title", item['title'])
338 handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"})
339 if item['pubdate'] is not None:
340 handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
342 # Author information.
343 if item['author_name'] is not None:
344 handler.startElement(u"author", {})
345 handler.addQuickElement(u"name", item['author_name'])
346 if item['author_email'] is not None:
347 handler.addQuickElement(u"email", item['author_email'])
348 if item['author_link'] is not None:
349 handler.addQuickElement(u"uri", item['author_link'])
350 handler.endElement(u"author")
352 # Unique ID.
353 if item['unique_id'] is not None:
354 unique_id = item['unique_id']
355 else:
356 unique_id = get_tag_uri(item['link'], item['pubdate'])
357 handler.addQuickElement(u"id", unique_id)
359 # Summary.
360 if item['description'] is not None:
361 handler.addQuickElement(u"summary", item['description'], {u"type": u"html"})
363 # Enclosure.
364 if item['enclosure'] is not None:
365 handler.addQuickElement(u"link", '',
366 {u"rel": u"enclosure",
367 u"href": item['enclosure'].url,
368 u"length": item['enclosure'].length,
369 u"type": item['enclosure'].mime_type})
371 # Categories.
372 for cat in item['categories']:
373 handler.addQuickElement(u"category", u"", {u"term": cat})
375 # Rights.
376 if item['item_copyright'] is not None:
377 handler.addQuickElement(u"rights", item['item_copyright'])
379 # This isolates the decision of what the system default is, so calling code can
380 # do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed".
381 DefaultFeed = Rss201rev2Feed