bug 313956: expand installer .exe contents to make complete mar. r=ted.
[gecko.git] / xpcom / analysis / deki.py
bloba7ffefb054f7da02a8c61441a056c824515fde39
1 """ deki.py - Access the wiki pages on a MindTouch Deki server via the API.
3 Here's what this code can do:
5 wiki = deki.Deki("http://developer.mozilla.org/@api/deki/", username, password)
6 page = wiki.get_page("Sheep")
7 print page.title
8 print page.doc.toxml()
10 page.title = "Bananas"
11 page.save()
13 There are also some additional methods:
14 wiki.create_page(path, content, title=, override=)
15 wiki.move_page(old, new)
16 wiki.get_subpages(page)
18 This module does not try to mimic the MindTouch "Plug" API. It's meant to be
19 higher-level than that.
20 """
22 import sys
23 import urllib2, cookielib, httplib
24 import xml.dom.minidom as dom
25 from urllib import quote as _urllib_quote
26 from urllib import urlencode as _urlencode
27 import urlparse
28 from datetime import datetime
29 import re
31 __all__ = ['Deki']
34 # === Utils
36 def _check(fact):
37 if not fact:
38 raise AssertionError('check failed')
40 def _urlquote(s, *args):
41 return _urllib_quote(s.encode('utf-8'), *args)
43 def _make_url(*dirs, **params):
44 """ dirs must already be url-encoded, params must not """
45 url = '/'.join(dirs)
46 if params:
47 url += '?' + _urlencode(params)
48 return url
50 class PutRequest(urllib2.Request):
51 def get_method(self):
52 return "PUT"
54 # === Dream framework client code
56 # This handler causes python to "always be logged in" when it's talking to the
57 # server. If you're just accessing public pages, it generates more requests
58 # than are strictly needed, but this is the behavior you want for a bot.
60 # The users/authenticate request is sent twice: once without any basic auth and
61 # once with. Dumb. Feel free to fix.
63 class _LoginHandler(urllib2.HTTPCookieProcessor):
64 def __init__(self, server):
65 policy = cookielib.DefaultCookiePolicy(rfc2965=True)
66 cookiejar = cookielib.CookieJar(policy)
67 urllib2.HTTPCookieProcessor.__init__(self, cookiejar)
68 self.server = server
70 def http_request(self, req):
71 #print "DEBUG- Requesting " + req.get_full_url()
72 s = self.server
73 req = urllib2.HTTPCookieProcessor.http_request(self, req)
74 if ('Cookie' not in req.unredirected_hdrs
75 and req.get_full_url() != s.base + 'users/authenticate'):
76 s.login()
77 # Retry - should have a new cookie.
78 req = urllib2.HTTPCookieProcessor.http_request(self, req)
79 _check('Cookie' in req.unredirected_hdrs)
80 return req
82 class DreamClient:
83 def __init__(self, base, user, password):
84 """
85 base - The base URI of the Deki API, with trailing slash.
86 Typically, 'http://wiki.example.org/@api/deki/'.
87 user, password - Your Deki login information.
88 """
89 self.base = base
90 pm = urllib2.HTTPPasswordMgrWithDefaultRealm()
91 pm.add_password(None, self.base, user, password)
92 ah = urllib2.HTTPBasicAuthHandler(pm)
93 lh = _LoginHandler(self)
94 self._opener = urllib2.build_opener(ah, lh)
96 def login(self):
97 response = self._opener.open(self.base + 'users/authenticate')
98 response.close()
100 def open(self, url):
101 return self._opener.open(self.base + url)
103 def _handleResponse(self, req):
104 """Helper method shared between post() and put()"""
105 resp = self._opener.open(req)
106 try:
107 ct = resp.headers.get('Content-Type', '(none)')
108 if '/xml' in ct or '+xml' in ct:
109 return dom.parse(resp)
110 else:
111 #print "DEBUG- Content-Type:", ct
112 crud = resp.read()
113 #print 'DEBUG- crud:\n---\n%s\n---' % re.sub(r'(?m)^', ' ', crud)
114 return None
115 finally:
116 resp.close()
119 def post(self, url, data, type):
120 #print "DEBUG- posting to:", self.base + url
121 req = urllib2.Request(self.base + url, data, {'Content-Type': type})
122 return self._handleResponse(req)
124 def put(self, url, data, type):
125 #print "DEBUG- putting to:", self.base + url
126 req = PutRequest(self.base + url, data, {'Content-Type': type})
127 return self._handleResponse(req)
129 def get_xml(self, url):
130 resp = self.open(url)
131 try:
132 return dom.parse(resp)
133 finally:
134 resp.close()
137 # === DOM
139 def _text_of(node):
140 if node.nodeType == node.ELEMENT_NODE:
141 return u''.join(_text_of(n) for n in node.childNodes)
142 elif node.nodeType == node.TEXT_NODE:
143 return node.nodeValue
144 else:
145 return u''
147 def _the_element_by_name(doc, tagName):
148 elts = doc.getElementsByTagName(tagName)
149 if len(elts) != 1:
150 raise ValueError("Expected exactly one <%s> tag, got %d." % (tagName, len(elts)))
151 return elts[0]
153 def _first_element(node):
154 n = node.firstChild
155 while n is not None:
156 if n.nodeType == n.ELEMENT_NODE:
157 return n
158 n = node.nextSibling
159 return None
161 def _find_elements(node, path):
162 if u'/' in path:
163 [first, rest] = path.split(u'/', 1)
164 for child in _find_elements(node, first):
165 for desc in _find_elements(child, rest):
166 yield desc
167 else:
168 for n in node.childNodes:
169 if n.nodeType == node.ELEMENT_NODE and n.nodeName == path:
170 yield n
173 # === Deki
175 def _format_page_id(id):
176 if isinstance(id, int):
177 return str(id)
178 elif id is Deki.HOME:
179 return 'home'
180 elif isinstance(id, basestring):
181 # Double-encoded, per the Deki API reference.
182 return '=' + _urlquote(_urlquote(id, ''))
184 class Deki(DreamClient):
185 HOME = object()
187 def get_page(self, page_id):
188 """ Get the content of a page from the wiki.
190 The page_id argument must be one of:
191 an int - The page id (an arbitrary number assigned by Deki)
192 a str - The page name (not the title, the full path that shows up in the URL)
193 Deki.HOME - Refers to the main page of the wiki.
195 Returns a Page object.
197 p = Page(self)
198 p._load(page_id)
199 return p
201 def create_page(self, path, content, title=None, overwrite=False):
202 """ Create a new wiki page.
204 Parameters:
205 path - str - The page id.
206 content - str - The XML content to put in the new page.
207 The document element must be a <body>.
208 title - str - The page title. Keyword argument only.
209 Defaults to the last path-segment of path.
210 overwrite - bool - Whether to overwrite an existing page. If false,
211 and the page already exists, the method will throw an error.
213 if title is None:
214 title = path.split('/')[-1]
215 doc = dom.parseString(content)
216 _check(doc.documentElement.tagName == 'body')
217 p = Page(self)
218 p._create(path, title, doc, overwrite)
220 def attach_file(self, page, name, data, mimetype, description=None):
221 """Create or update a file attachment.
223 Parameters:
224 page - str - the page ID this file is related to
225 name - str - the name of the file
226 data - str - the file data
227 mimetype - str - the MIME type of the file
228 description - str - a description of the file
231 p = {}
232 if description is not None:
233 p['description'] = description
235 url = _make_url('pages', _format_page_id(page),
236 'files', _format_page_id(name), **p)
238 r = self.put(url, data, mimetype)
239 _check(r.documentElement.nodeName == u'file')
241 def get_subpages(self, page_id):
242 """ Return the ids of all subpages of the given page. """
243 doc = self.get_xml(_make_url("pages", _format_page_id(page_id),
244 "files,subpages"))
245 for elt in _find_elements(doc, u'page/subpages/page.subpage/path'):
246 yield _text_of(elt)
248 def move_page(self, page_id, new_title, redirects=True):
249 """ Move an existing page to a new location.
251 A page cannot be moved to a destination that already exists, is a
252 descendant, or has a protected title (ex. Special:xxx, User:,
253 Template:).
255 When a page is moved, subpages under the specified page are also moved.
256 For each moved page, the system automatically creates an alias page
257 that redirects from the old to the new destination.
259 self.post(_make_url("pages", _format_page_id(page_id), "move",
260 to=new_title,
261 redirects=redirects and "1" or "0"),
262 "", "text/plain")
264 class Page:
265 """ A Deki wiki page.
267 To obtain a page, call wiki.get_page(id).
268 Attributes:
269 title : unicode - The page title.
270 doc : Document - The content of the page as a DOM Document.
271 The root element of this document is a <body>.
272 path : unicode - The path. Use this to detect redirects, as otherwise
273 page.save() will overwrite the redirect with a copy of the content!
274 deki : Deki - The Deki object from which the page was loaded.
275 page_id : str/id/Deki.HOME - The page id used to load the page.
276 load_time : datetime - The time the page was loaded,
277 according to the clock on the client machine.
278 Methods:
279 save() - Save the modified document back to the server.
280 Only the page.title and the contents of page.doc are saved.
283 def __init__(self, deki):
284 self.deki = deki
286 def _create(self, path, title, doc, overwrite):
287 self.title = title
288 self.doc = doc
289 self.page_id = path
290 if overwrite:
291 self.load_time = datetime(2500, 1, 1)
292 else:
293 self.load_time = datetime(1900, 1, 1)
294 self.path = path
295 self.save()
297 def _load(self, page_id):
298 """ page_id - See comment near the definition of `HOME`. """
299 load_time = datetime.utcnow()
301 # Getting the title is a whole separate query!
302 url = 'pages/%s/info' % _format_page_id(page_id)
303 doc = self.deki.get_xml(url)
304 title = _text_of(_the_element_by_name(doc, 'title'))
305 path = _text_of(_the_element_by_name(doc, 'path'))
307 # If you prefer to sling regexes, you can request format=raw instead.
308 # The result is an XML document with one big fat text node in the body.
309 url = _make_url('pages', _format_page_id(page_id), 'contents',
310 format='xhtml', mode='edit')
311 doc = self.deki.get_xml(url)
313 content = doc.documentElement
314 _check(content.tagName == u'content')
315 body = _first_element(content)
316 _check(body is not None)
317 _check(body.tagName == u'body')
319 doc.removeChild(content)
320 doc.appendChild(body)
322 self.page_id = page_id
323 self.load_time = load_time
324 self.title = title
325 self.path = path
326 self.doc = doc
328 def save(self):
329 p = {'edittime': _urlquote(self.load_time.strftime('%Y%m%d%H%M%S')),
330 'abort': 'modified'}
332 if self.title is not None:
333 p['title'] = _urlquote(self.title)
335 url = _make_url('pages', _format_page_id(self.page_id), 'contents', **p)
337 body = self.doc.documentElement
338 bodyInnerXML = ''.join(n.toxml('utf-8') for n in body.childNodes)
340 reply = self.deki.post(url, bodyInnerXML, 'text/plain; charset=utf-8')
341 _check(reply.documentElement.nodeName == u'edit')
342 _check(reply.documentElement.getAttribute(u'status') == u'success')