Add some auth basic stuff to _urlopen
[fuse-mediawiki.git] / fuse-mediawiki.py
blob9ba10034a1f4fcd881867bb3a0cc0dfa8e330b36
1 #!/usr/bin/env python
2 ###
3 # fuse-mediawiki - FUSE filesystem for editing MediaWiki websites
4 # Copyright (C) 2008 Ian Weller <ianweller@gmail.com>
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with this program; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 ###
21 """fuse-mediawiki provides a FUSE filesystem for MediaWiki websites."""
23 import fuse
24 from fuse import Fuse
25 import os
26 import sys
27 import stat
28 import errno
29 import time
30 import urllib
31 import urllib2
32 import cookielib
33 import re
34 from StringIO import StringIO
35 from getpass import getpass
36 import base64
38 __version__ = "(git)"
41 class FuseMediaWiki(Fuse):
42 """Class to provide FUSE filesystem."""
44 # variables used for auth
45 AUTH_ANON = 0
46 AUTH_USER = 1
47 AUTH_HTTP = 2
49 # The filesystem used is simply a dict of keys (filenames) and values.
50 # For attrs, the value of a key is a specific instance of fuse.Stat().
51 attrs = {}
52 # For files, the value of a key is the contents of that file.
53 files = {}
54 # Each edit page retrieval creates certain tokens we need to submit the new
55 # page properly -- the key is the name of the wiki page with no extensions
56 wikitokens = {}
58 # Regular expressions used to parse the edit page
59 # _re_textarea scrapes the data of the page out of the textarea
60 _re_textarea = re.compile("""<textarea.*name="wpTextbox1".*>(.*)"""+\
61 """</textarea>""", re.S)
62 # _re_starttime is the retrieval date of the data
63 # TODO Make the editor realize that the file has been changed before they
64 # write to it
65 _re_starttime = re.compile("""<input.*value="([0-9]{14})".*"""+\
66 """name="wpStarttime" />""")
67 # Last edit date
68 _re_edittime = re.compile("""<input.*value="([0-9]{14})".*"""+\
69 """name="wpEdittime" />""")
70 _re_edittoken = re.compile("""<input.*value="(.*)".*name"""+\
71 """="wpEditToken" />""")
72 _re_autosum = re.compile("""<input name="wpAutoSummary".*value"""+\
73 """="([0-9a-f]{32})" />""")
75 # Used to allow for edit summaries
76 _editsummary = """<!-- FUSEMW: Comments starting with FUSEMW: (like this \
77 one) are ignored and not saved within the page -->
78 <!-- FUSEMW: It is a good idea to provide an edit summary, place it below \
79 -->
80 <!-- FUSEMW:EDITSUMM: -->
81 """
82 # Regexp to get edit summary out of a saved page
83 _re_editsumm = re.compile("""<!-- FUSEMW:EDITSUMM:(.*)-->""")
84 # Regexp to get rid of all FUSEMW: comments before saving
85 _re_fmwcomm = re.compile("""<!-- ?FUSEMW:.*-->""", re.S)
87 def __init__(self, version, usage):
88 # init fuse
89 fuse.fuse_python_api = (0, 2)
90 Fuse.__init__(self, version=version, usage=usage)
91 # init default file attributes
92 for path in ['/', '/image', '/cat', '/content']:
93 self._setdirattr(path)
94 # init file system and default directories
95 self.files['/image'] = 0
96 self.files['/cat'] = 0
97 self.files['/content'] = 0
98 # init cookie jar
99 self.cookiejar = cookielib.LWPCookieJar()
100 urllib2.install_opener(urllib2.build_opener(
101 urllib2.HTTPCookieProcessor(self.cookiejar)))
103 def _file2fd(self, path):
104 """Create a file descriptor out of a file within this filesystem"""
105 try:
106 return StringIO(self.files[path])
107 except KeyError:
108 return StringIO()
110 def _unescape(self, text):
111 """Unescape text in the textarea on retrieval"""
112 text = text.replace('&lt;', '<')
113 text = text.replace('&gt;', '>')
114 text = text.replace('&amp;', '&')
115 return text
117 def _setdirattr(self, path):
118 """Set default attributes for a directory at the specified path"""
119 self.attrs[path] = fuse.Stat()
120 self.attrs[path].st_mode = stat.S_IFDIR | 0755 # drwxr-xr-x
121 self.attrs[path].st_uid = int(os.getuid())
122 self.attrs[path].st_gid = int(os.getgid())
123 self.attrs[path].st_size = 4096 # 4.0 K
124 self.attrs[path].st_atime = time.time()
125 self.attrs[path].st_mtime = time.time()
126 self.attrs[path].st_ctime = time.time()
127 self.attrs[path].st_nlink = 2
129 def _setregattr(self, path):
130 """Set default attributes for a regular file at the specified path"""
131 self.attrs[path] = fuse.Stat()
132 self.attrs[path].st_mode = stat.S_IFREG | 0644 # -rw-r--r--
133 self.attrs[path].st_uid = int(os.getuid())
134 self.attrs[path].st_gid = int(os.getgid())
135 self.attrs[path].st_size = 0
136 self.attrs[path].st_atime = time.time()
137 self.attrs[path].st_mtime = time.time()
138 self.attrs[path].st_ctime = time.time()
139 self.attrs[path].st_nlink = 1
141 def _log(self, message):
142 """Log a message. Currently it just uses print, and therefore logs only
143 if the -f or -d options are sent from the command line."""
144 # it's simple now, but in case we want to change it later... here we go
145 print message
146 return
148 def _setrooturl(self, url):
149 self.rooturl = url
150 return
152 def _urlfetch(self, getvars, postvars=None, headers={}):
153 get = urllib.urlencode(getvars)
154 url = self.rooturl + '?' + get
155 if postvars != None:
156 post = urllib.urlencode(postvars)
157 else:
158 post = None
159 request = urllib2.Request(url, post)
160 for header in headers:
161 request.add_header(header, headers[header])
162 try:
163 response = urllib2.urlopen(request)
164 except urllib2.HTTPError, args:
165 # we assume this works. if not, let me know.
166 if args.code == 401:
167 auth = base64.encodestring('%s:%s' % (self.username,
168 self.password))[:-1]
169 request.add_header('Authorization', 'Basic %s' % auth)
170 try:
171 response = urllib2.urlopen(request)
172 except urllib2.HTTPError, args:
173 # bad login
174 print "Login failed"
175 return (response, request)
177 def _setupauth(self, username, usebasicauth):
178 # if there is no username, anonymous auth
179 if username == None:
180 self.authtype = self.AUTH_ANON
181 self._log("Anonymously accessing wiki")
182 else:
183 # there is a user name. what kind of auth?
184 self.username = username
185 if usebasicauth:
186 self.authtype = self.AUTH_HTTP
187 else:
188 self.authtype = self.AUTH_USER
189 self._asklogin()
190 return
192 def _asklogin(self):
193 self.password = getpass(self.username + "'s password: ")
194 return self._login()
196 def _login(self):
197 """This will attempt to login to the wiki. It does it through this
198 procedure:
199 1. If --auth-basic was passed at startup, attempt to login at
200 Special:Userlogin by sending the username and password.
201 2. If cookies are received, we assume that we are logged in.
202 However, if at any point during editing we are asked for our
203 username and password again (401 Unauthorized), we'll decide
204 that we need to do that for every wiki page.
205 3. If no cookies are received, we assume that we have to send the
206 username and password on every request.
207 4. If --auth-basic was not passed at startup, attempt to login at
208 Special:Userlogin by POSTing the username and password.
209 5. If we are led to believe that the username/password combination
210 is incorrect (i.e., additional 401 Unauthorized errors, or the
211 wiki actually telling us), stop mounting."""
212 print "Logging in..."
213 if self.authtype == self.AUTH_HTTP:
214 try:
215 (response, request) = self._urlfetch({'title':
216 'Special:Userlogin'}, {})
217 except urllib2.HTTPError, args:
218 if args.code == 401:
219 auth = base64.encodestring('%s:%s' % (self.username,
220 self.password))[:-1]
221 try:
222 (response,
223 request) = self._urlfetch({'title':
224 'Special:Userlogin'}, {},
225 {'Authorization': 'Basic %s'
226 % auth})
227 except urllib2.HTTPError, args:
228 # bad login
229 print "Login failed"
230 elif self.authtype == self.AUTH_USER:
231 getvars = {'title': 'Special:Userlogin', 'action': 'submitlogin',
232 'type': 'login'}
233 postvars = {'wpName': self.username, 'wpPassword': self.password,
234 'wpLoginattempt': 'Log in', 'wpRemember': '1'}
235 response = self._urlfetch(getvars, postvars)[0]
236 data = response.read()
237 if re.search('var wgUserName = "%s";' % self.username, data,
238 re.I):
239 # login OK
240 self._log("Logged in successfully as %s" % self.username)
241 return True
242 else:
243 # bad login
244 print "Login failed"
245 sys.exit()
246 else:
247 # assume anonymous or something else that doesn't require login
248 return True
250 def getattr(self, path):
251 if path != '/':
252 self._log('*** getattr '+path)
253 if path in self.attrs:
254 # file found
255 return self.attrs[path]
256 elif path[-5:] == ".wiki" and path[:8] == "/content":
257 # file technically found... we need to go make it.
258 self.mknod(path, 0100644, 0)
259 return self.attrs[path]
260 else:
261 # file not found
262 return -errno.ENOENT
264 def getdir(self, path):
265 self._log('*** getdir '+path)
266 keys = self.files.keys()
267 flist = []
268 for key in keys:
269 l = len(path)
270 if key[0:l] == path:
271 if key[l:] != '':
272 if '/' not in key[l:]:
273 flist.append((key[l:], 0))
274 flist.append(('.', 0))
275 flist.append(('..', 0))
276 return flist
278 def readdir(self, path, offset):
279 self._log('*** readdir '+str([path, offset]))
280 return self.readdir_compat_0_1(path, offset)
282 def mythread(self):
283 self._log('*** mythread')
284 return -errno.ENOSYS
286 def chmod(self, path, mode):
287 self._log('*** chmod '+str([path, oct(mode)]))
288 self.attrs[path].st_mode = mode
289 return
291 def chown(self, path, uid, gid):
292 self._log('*** chown '+str([path, uid, gid]))
293 return -errno.ENOSYS
295 def fsync(self, path, isFsyncFile, fd=None):
296 self._log('*** fsync '+str([path, isFsyncFile, fd]))
297 return
299 def link(self, targetPath, linkPath):
300 self._log('*** link '+str([targetPath, linkPath]))
301 return -errno.ENOSYS
303 def mkdir(self, path, mode):
304 """Create a directory."""
305 self._log('*** mkdir '+str([path, oct(mode)]))
306 if re.match('^/image/.*$', path):
307 return -errno.EPERM
308 if re.match('^/cat/.*$', path):
309 return -errno.ENOSYS
310 self.files[path] = 0
311 self._setdirattr(path)
312 self.attrs[path].st_mode = stat.S_IFDIR | mode
313 return
315 def mknod(self, path, mode, dev):
316 """Create a file. Not sure what the dev argument is, but it doesn't
317 seem useful."""
318 self._log('*** mknod '+str([path, oct(mode), dev]))
319 self.files[path] = ""
320 self._setregattr(path)
321 self.attrs[path].st_mode = mode
322 if path[-5:] == '.wiki' and path[:8] == "/content":
323 # this is a wiki page, get page contents
324 wikititle = path[9:-5]
325 getvars = {'title': wikititle, 'action': 'edit'}
326 (response, request) = self._urlfetch(getvars)
327 data = response.read()
328 text = self._unescape(self._re_textarea.search(data).group(1))[:-1]
329 if text == "":
330 text = "<!-- FUSEMW: You are creating a new page -->"
331 text = self._editsummary + text + "\n"
332 self.files[path] = text
333 self.attrs[path].st_size = len(text)
334 starttime = self._re_starttime.search(data).group(1)
335 edittime = self._re_edittime.search(data).group(1)
336 edittoken = self._re_edittoken.search(data).group(1)
337 autosum = self._re_autosum.search(data).group(1)
338 self.wikitokens[wikititle] = {'start': starttime, 'edit': edittime,
339 'token': edittoken, 'auto': autosum}
340 return
342 def open(self, path, flags):
343 self._log('*** open '+str([path, flags]))
344 return self._file2fd(path)
346 def read(self, path, length, offset, fd=None):
347 self._log('*** read'+str([path, length, offset, fd]))
348 if fd != None:
349 # we'll just read from the provided StringIO
350 fd.seek(offset)
351 return fd.read(length)
352 else:
353 # this should never really happen
354 try:
355 cwd = self.files[path]
356 except KeyError:
357 return -errno.ENOENT
358 return cwd[offset:length]
360 def readlink(self, path):
361 self._log('*** readlink '+path)
362 return -errno.ENOSYS
364 def release(self, path, flags, fd=None):
365 self._log('*** release '+str([path, flags, fd]))
366 if fd != None:
367 fd.close()
368 return
369 else:
370 # i don't think we really care
371 return
373 def rename(self, oldPath, newPath):
374 self._log('*** rename '+str([oldPath, newPath]))
375 self.files[newPath] = self.files[oldPath]
376 del self.files[oldPath]
377 self.attrs[newPath] = self.attrs[oldPath]
378 del self.attrs[oldPath]
379 return
381 def rmdir(self, path):
382 self._log('*** rmdir '+path)
383 # -errno.ENOTEMPTY will be useful
384 return -errno.ENOSYS
386 def statfs(self):
387 #self._log('*** statfs')
388 vfs = fuse.StatVfs()
389 return vfs
391 def symlink(self, targetPath, linkPath):
392 self._log('*** symlink '+str([targetPath, linkPath]))
393 return -errno.ENOSYS
395 def truncate(self, path, size):
396 self._log('*** truncate '+str([path, size]))
397 self.files[path] = self.files[path][0:size]
398 self.attrs[path].st_size = len(self.files[path])
399 return None
401 def unlink(self, path):
402 self._log('*** unlink '+path)
403 if path not in self.files:
404 return -errno.ENOENT
405 del self.attrs[path]
406 del self.files[path]
407 return
409 def utime(self, path, times):
410 self._log('*** utime '+str([path, times]))
411 self.attrs[path].st_atime = times[0]
412 self.attrs[path].st_mtime = times[1]
413 return
415 def write(self, path, buf, offset, fd=None):
416 self._log('*** write'+str([path, len(buf), offset, fd]))
417 if fd != None:
418 fd.seek(offset)
419 fd.write(buf)
420 x = self.files[path]
421 self.files[path] = x[:offset] + buf + x[offset:]
422 self.attrs[path].st_size = len(self.files[path])
423 if path[-5:] == '.wiki' and path[:8] == "/content":
424 # this is a wiki page, save page contents
425 editsumm = self._re_editsumm.search(self.files[path]).group(1)
426 editsumm = editsumm.strip()
427 data = self._re_fmwcomm.sub('', self.files[path]).strip()
428 wikititle = path[9:-5]
429 getvars = {'title': wikititle, 'action': 'submit'}
430 postvars = {'wpSection': '', 'wpStarttime':
431 self.wikitokens[wikititle]['start'], 'wpEdittime':
432 self.wikitokens[wikititle]['edit'], 'wpScrolltop': '0',
433 'wpTextbox1': data, 'wpSummary': editsumm,
434 'wpSave': 'Save page', 'wpEditToken':
435 self.wikitokens[wikititle]['token'], 'wpAutoSummary':
436 self.wikitokens[wikititle]['auto']}
437 response = self._urlfetch(getvars, postvars)[0]
438 respdata = response.read()
439 return len(buf)
442 def main():
443 usage = """Usage: python %prog [OPTIONS] ROOT_URL MOUNTPOINT
444 fuse-mediawiki is a FUSE filesystem for editing MediaWiki websites.
446 # setup FUSE
447 fs = FuseMediaWiki(version="%prog "+__version__, usage=usage)
448 # setup option parser
449 # -t, --timeout: how long in seconds before the filesystem deems a wiki
450 # page as stale and downloads a new one on the next read
451 fs.parser.add_option("-t", "--timeout", dest="timeout", default=60,
452 type="int",
453 help="timeout, in seconds [default: %default]")
454 # -u, --username: set the username. password is asked for on mount
455 fs.parser.add_option("-u", "--username", dest="username",
456 help="username for login into wiki")
457 # --http-basic: used for those weird wikis that use HTTP basic
458 # authentication instead of the default login form
459 fs.parser.add_option("--http-basic", action="store_true",
460 dest="usebasicauth",
461 help="use HTTP basic authentication for login",
462 default=False)
463 # allow for FUSE mount options
464 fs.parser.mountopt = True
465 # get arguments
466 (options, args) = fs.parser.parse_args()
467 # if we have zero or two or more arguments before the MOUNTPOINT, fail.
468 if len(args) != 1:
469 print "Must specify exactly 1 root URL and 1 mount point, " + \
470 "in that order"
471 sys.exit(1)
472 # set the root URL
473 fs._setrooturl(args[0])
474 # tell the object what are authentication method is
475 fs._setupauth(options.username, options.usebasicauth)
476 # this does something, I'm not really sure what at the moment
477 fs.parse(values=fs, errex=1)
478 # go into main loop
479 fs.main()
482 # hi!
483 if __name__ == '__main__':
484 main()