2 Syndication feed generation library -- used for generating RSS, etc.
6 >>> from django.utils import feedgenerator
7 >>> feed = feedgenerator.Rss201rev2Feed(
8 ... title=u"Poynter E-Media Tidbits",
9 ... link=u"http://www.poynter.org/column.asp?id=31",
10 ... description=u"A group Weblog by the sharpest minds in online media/journalism/publishing.",
15 ... link=u"http://www.holovaty.com/test/",
16 ... description="Testing."
18 >>> fp = open('test.rss', 'w')
19 >>> feed.write(fp, 'utf-8')
22 For definitions of the different versions of RSS, see:
23 http://diveintomark.org/archives/2004/02/04/incompatible-rss
28 from django
.utils
.xmlutils
import SimplerXMLGenerator
29 from django
.utils
.encoding
import force_unicode
, iri_to_uri
30 from django
.utils
import datetime_safe
31 from django
.utils
.timezone
import is_aware
33 def rfc2822_date(date
):
34 # We can't use strftime() because it produces locale-dependant results, so
35 # we have to map english month and day names manually
36 months
= ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec',)
37 days
= ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
38 # Support datetime objects older than 1900
39 date
= datetime_safe
.new_datetime(date
)
40 # We do this ourselves to be timezone aware, email.Utils is not tz aware.
41 dow
= days
[date
.weekday()]
42 month
= months
[date
.month
- 1]
43 time_str
= date
.strftime('%s, %%d %s %%Y %%H:%%M:%%S ' % (dow
, month
))
45 offset
= date
.tzinfo
.utcoffset(date
)
46 timezone
= (offset
.days
* 24 * 60) + (offset
.seconds
// 60)
47 hour
, minute
= divmod(timezone
, 60)
48 return time_str
+ "%+03d%02d" % (hour
, minute
)
50 return time_str
+ '-0000'
52 def rfc3339_date(date
):
53 # Support datetime objects older than 1900
54 date
= datetime_safe
.new_datetime(date
)
56 time_str
= date
.strftime('%Y-%m-%dT%H:%M:%S')
57 offset
= date
.tzinfo
.utcoffset(date
)
58 timezone
= (offset
.days
* 24 * 60) + (offset
.seconds
// 60)
59 hour
, minute
= divmod(timezone
, 60)
60 return time_str
+ "%+03d:%02d" % (hour
, minute
)
62 return date
.strftime('%Y-%m-%dT%H:%M:%SZ')
64 def get_tag_uri(url
, date
):
68 See http://diveintomark.org/archives/2004/05/28/howto-atom-id
70 bits
= urlparse
.urlparse(url
)
73 d
= ',%s' % datetime_safe
.new_datetime(date
).strftime('%Y-%m-%d')
74 return u
'tag:%s%s:%s/%s' % (bits
.hostname
, d
, bits
.path
, bits
.fragment
)
76 class SyndicationFeed(object):
77 "Base class for all syndication feeds. Subclasses should provide write()"
78 def __init__(self
, title
, link
, description
, language
=None, author_email
=None,
79 author_name
=None, author_link
=None, subtitle
=None, categories
=None,
80 feed_url
=None, feed_copyright
=None, feed_guid
=None, ttl
=None, **kwargs
):
81 to_unicode
= lambda s
: force_unicode(s
, strings_only
=True)
83 categories
= [force_unicode(c
) for c
in categories
]
85 # Force ints to unicode
86 ttl
= force_unicode(ttl
)
88 'title': to_unicode(title
),
89 'link': iri_to_uri(link
),
90 'description': to_unicode(description
),
91 'language': to_unicode(language
),
92 'author_email': to_unicode(author_email
),
93 'author_name': to_unicode(author_name
),
94 'author_link': iri_to_uri(author_link
),
95 'subtitle': to_unicode(subtitle
),
96 'categories': categories
or (),
97 'feed_url': iri_to_uri(feed_url
),
98 'feed_copyright': to_unicode(feed_copyright
),
99 'id': feed_guid
or link
,
102 self
.feed
.update(kwargs
)
105 def add_item(self
, title
, link
, description
, author_email
=None,
106 author_name
=None, author_link
=None, pubdate
=None, comments
=None,
107 unique_id
=None, enclosure
=None, categories
=(), item_copyright
=None,
110 Adds an item to the feed. All args are expected to be Python Unicode
111 objects except pubdate, which is a datetime.datetime object, and
112 enclosure, which is an instance of the Enclosure class.
114 to_unicode
= lambda s
: force_unicode(s
, strings_only
=True)
116 categories
= [to_unicode(c
) for c
in categories
]
118 # Force ints to unicode
119 ttl
= force_unicode(ttl
)
121 'title': to_unicode(title
),
122 'link': iri_to_uri(link
),
123 'description': to_unicode(description
),
124 'author_email': to_unicode(author_email
),
125 'author_name': to_unicode(author_name
),
126 'author_link': iri_to_uri(author_link
),
128 'comments': to_unicode(comments
),
129 'unique_id': to_unicode(unique_id
),
130 'enclosure': enclosure
,
131 'categories': categories
or (),
132 'item_copyright': to_unicode(item_copyright
),
136 self
.items
.append(item
)
139 return len(self
.items
)
141 def root_attributes(self
):
143 Return extra attributes to place on the root (i.e. feed/channel) element.
148 def add_root_elements(self
, handler
):
150 Add elements in the root (i.e. feed/channel) element. Called
155 def item_attributes(self
, item
):
157 Return extra attributes to place on each item (i.e. item/entry) element.
161 def add_item_elements(self
, handler
, item
):
163 Add elements on each item (i.e. item/entry) element.
167 def write(self
, outfile
, encoding
):
169 Outputs the feed in the given encoding to outfile, which is a file-like
170 object. Subclasses should override this.
172 raise NotImplementedError
174 def writeString(self
, encoding
):
176 Returns the feed in the given encoding as a string.
178 from StringIO
import StringIO
180 self
.write(s
, encoding
)
183 def latest_post_date(self
):
185 Returns the latest item's pubdate. If none of them have a pubdate,
186 this returns the current date/time.
188 updates
= [i
['pubdate'] for i
in self
.items
if i
['pubdate'] is not None]
193 return datetime
.datetime
.now()
195 class Enclosure(object):
196 "Represents an RSS enclosure"
197 def __init__(self
, url
, length
, mime_type
):
198 "All args are expected to be Python Unicode objects"
199 self
.length
, self
.mime_type
= length
, mime_type
200 self
.url
= iri_to_uri(url
)
202 class RssFeed(SyndicationFeed
):
203 mime_type
= 'application/rss+xml; charset=utf-8'
204 def write(self
, outfile
, encoding
):
205 handler
= SimplerXMLGenerator(outfile
, encoding
)
206 handler
.startDocument()
207 handler
.startElement(u
"rss", self
.rss_attributes())
208 handler
.startElement(u
"channel", self
.root_attributes())
209 self
.add_root_elements(handler
)
210 self
.write_items(handler
)
211 self
.endChannelElement(handler
)
212 handler
.endElement(u
"rss")
214 def rss_attributes(self
):
215 return {u
"version": self
._version
,
216 u
"xmlns:atom": u
"http://www.w3.org/2005/Atom"}
218 def write_items(self
, handler
):
219 for item
in self
.items
:
220 handler
.startElement(u
'item', self
.item_attributes(item
))
221 self
.add_item_elements(handler
, item
)
222 handler
.endElement(u
"item")
224 def add_root_elements(self
, handler
):
225 handler
.addQuickElement(u
"title", self
.feed
['title'])
226 handler
.addQuickElement(u
"link", self
.feed
['link'])
227 handler
.addQuickElement(u
"description", self
.feed
['description'])
228 if self
.feed
['feed_url'] is not None:
229 handler
.addQuickElement(u
"atom:link", None,
230 {u
"rel": u
"self", u
"href": self
.feed
['feed_url']})
231 if self
.feed
['language'] is not None:
232 handler
.addQuickElement(u
"language", self
.feed
['language'])
233 for cat
in self
.feed
['categories']:
234 handler
.addQuickElement(u
"category", cat
)
235 if self
.feed
['feed_copyright'] is not None:
236 handler
.addQuickElement(u
"copyright", self
.feed
['feed_copyright'])
237 handler
.addQuickElement(u
"lastBuildDate", rfc2822_date(self
.latest_post_date()).decode('utf-8'))
238 if self
.feed
['ttl'] is not None:
239 handler
.addQuickElement(u
"ttl", self
.feed
['ttl'])
241 def endChannelElement(self
, handler
):
242 handler
.endElement(u
"channel")
244 class RssUserland091Feed(RssFeed
):
246 def add_item_elements(self
, handler
, item
):
247 handler
.addQuickElement(u
"title", item
['title'])
248 handler
.addQuickElement(u
"link", item
['link'])
249 if item
['description'] is not None:
250 handler
.addQuickElement(u
"description", item
['description'])
252 class Rss201rev2Feed(RssFeed
):
253 # Spec: http://blogs.law.harvard.edu/tech/rss
255 def add_item_elements(self
, handler
, item
):
256 handler
.addQuickElement(u
"title", item
['title'])
257 handler
.addQuickElement(u
"link", item
['link'])
258 if item
['description'] is not None:
259 handler
.addQuickElement(u
"description", item
['description'])
261 # Author information.
262 if item
["author_name"] and item
["author_email"]:
263 handler
.addQuickElement(u
"author", "%s (%s)" % \
264 (item
['author_email'], item
['author_name']))
265 elif item
["author_email"]:
266 handler
.addQuickElement(u
"author", item
["author_email"])
267 elif item
["author_name"]:
268 handler
.addQuickElement(u
"dc:creator", item
["author_name"], {u
"xmlns:dc": u
"http://purl.org/dc/elements/1.1/"})
270 if item
['pubdate'] is not None:
271 handler
.addQuickElement(u
"pubDate", rfc2822_date(item
['pubdate']).decode('utf-8'))
272 if item
['comments'] is not None:
273 handler
.addQuickElement(u
"comments", item
['comments'])
274 if item
['unique_id'] is not None:
275 handler
.addQuickElement(u
"guid", item
['unique_id'])
276 if item
['ttl'] is not None:
277 handler
.addQuickElement(u
"ttl", item
['ttl'])
280 if item
['enclosure'] is not None:
281 handler
.addQuickElement(u
"enclosure", '',
282 {u
"url": item
['enclosure'].url
, u
"length": item
['enclosure'].length
,
283 u
"type": item
['enclosure'].mime_type
})
286 for cat
in item
['categories']:
287 handler
.addQuickElement(u
"category", cat
)
289 class Atom1Feed(SyndicationFeed
):
290 # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
291 mime_type
= 'application/atom+xml; charset=utf-8'
292 ns
= u
"http://www.w3.org/2005/Atom"
294 def write(self
, outfile
, encoding
):
295 handler
= SimplerXMLGenerator(outfile
, encoding
)
296 handler
.startDocument()
297 handler
.startElement(u
'feed', self
.root_attributes())
298 self
.add_root_elements(handler
)
299 self
.write_items(handler
)
300 handler
.endElement(u
"feed")
302 def root_attributes(self
):
303 if self
.feed
['language'] is not None:
304 return {u
"xmlns": self
.ns
, u
"xml:lang": self
.feed
['language']}
306 return {u
"xmlns": self
.ns
}
308 def add_root_elements(self
, handler
):
309 handler
.addQuickElement(u
"title", self
.feed
['title'])
310 handler
.addQuickElement(u
"link", "", {u
"rel": u
"alternate", u
"href": self
.feed
['link']})
311 if self
.feed
['feed_url'] is not None:
312 handler
.addQuickElement(u
"link", "", {u
"rel": u
"self", u
"href": self
.feed
['feed_url']})
313 handler
.addQuickElement(u
"id", self
.feed
['id'])
314 handler
.addQuickElement(u
"updated", rfc3339_date(self
.latest_post_date()).decode('utf-8'))
315 if self
.feed
['author_name'] is not None:
316 handler
.startElement(u
"author", {})
317 handler
.addQuickElement(u
"name", self
.feed
['author_name'])
318 if self
.feed
['author_email'] is not None:
319 handler
.addQuickElement(u
"email", self
.feed
['author_email'])
320 if self
.feed
['author_link'] is not None:
321 handler
.addQuickElement(u
"uri", self
.feed
['author_link'])
322 handler
.endElement(u
"author")
323 if self
.feed
['subtitle'] is not None:
324 handler
.addQuickElement(u
"subtitle", self
.feed
['subtitle'])
325 for cat
in self
.feed
['categories']:
326 handler
.addQuickElement(u
"category", "", {u
"term": cat
})
327 if self
.feed
['feed_copyright'] is not None:
328 handler
.addQuickElement(u
"rights", self
.feed
['feed_copyright'])
330 def write_items(self
, handler
):
331 for item
in self
.items
:
332 handler
.startElement(u
"entry", self
.item_attributes(item
))
333 self
.add_item_elements(handler
, item
)
334 handler
.endElement(u
"entry")
336 def add_item_elements(self
, handler
, item
):
337 handler
.addQuickElement(u
"title", item
['title'])
338 handler
.addQuickElement(u
"link", u
"", {u
"href": item
['link'], u
"rel": u
"alternate"})
339 if item
['pubdate'] is not None:
340 handler
.addQuickElement(u
"updated", rfc3339_date(item
['pubdate']).decode('utf-8'))
342 # Author information.
343 if item
['author_name'] is not None:
344 handler
.startElement(u
"author", {})
345 handler
.addQuickElement(u
"name", item
['author_name'])
346 if item
['author_email'] is not None:
347 handler
.addQuickElement(u
"email", item
['author_email'])
348 if item
['author_link'] is not None:
349 handler
.addQuickElement(u
"uri", item
['author_link'])
350 handler
.endElement(u
"author")
353 if item
['unique_id'] is not None:
354 unique_id
= item
['unique_id']
356 unique_id
= get_tag_uri(item
['link'], item
['pubdate'])
357 handler
.addQuickElement(u
"id", unique_id
)
360 if item
['description'] is not None:
361 handler
.addQuickElement(u
"summary", item
['description'], {u
"type": u
"html"})
364 if item
['enclosure'] is not None:
365 handler
.addQuickElement(u
"link", '',
366 {u
"rel": u
"enclosure",
367 u
"href": item
['enclosure'].url
,
368 u
"length": item
['enclosure'].length
,
369 u
"type": item
['enclosure'].mime_type
})
372 for cat
in item
['categories']:
373 handler
.addQuickElement(u
"category", u
"", {u
"term": cat
})
376 if item
['item_copyright'] is not None:
377 handler
.addQuickElement(u
"rights", item
['item_copyright'])
379 # This isolates the decision of what the system default is, so calling code can
380 # do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed".
381 DefaultFeed
= Rss201rev2Feed