pkka-ordersbot.conf: change url
[pkka-orders.git] / get-last-order
blob5fd7ac89bf0138c21911001f62d8dcb9e29271de
1 #!/usr/bin/python
2 """Get last order for PKKA from tumblr using his API"""
4 import xml.dom
5 import xml.dom.minidom
6 import urllib2
7 import re
9 url='http://orders.ecpsu.ru/api/read'
11 def remove_html_tags(data):
12 """Remove all HTML tags except <a>"""
13 p = re.compile('<(?!\/?a(?=>|\s.*>))\/?.*?>')
14 return p.sub('', data)
16 def sanitize_urls(data):
17 """Get content inside <a></a> tags and "href" attribute"""
18 patt=r'<a.+?href="(http[^"]+)"(?:>|\s.*?>)([^<]+)</a>'
19 repl=r'\2: \1'
20 return re.sub(patt, repl, data)
22 doc = xml.dom.minidom.parse(urllib2.urlopen(url))
23 posts = doc.getElementsByTagName('post')
24 last_post = posts[0]
25 dirty_body = last_post.getElementsByTagName('regular-body')[0].firstChild.data
26 print dirty_body
27 body_with_a = remove_html_tags(dirty_body)
28 body = sanitize_urls(body_with_a)
29 print body