fuse-mediawiki.py

   1 #!/usr/bin/env python
   2 ###
   3 # fuse-mediawiki - FUSE filesystem for editing MediaWiki websites
   4 # Copyright (C) 2008  Ian Weller <ianweller@gmail.com>
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License along
  17 # with this program; if not, write to the Free Software Foundation, Inc.,
  18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  19 ###
  20
  21 """fuse-mediawiki provides a FUSE filesystem for MediaWiki websites."""
  22
  23 import fuse
  24 from fuse import Fuse
  25 import os
  26 import sys
  27 import stat
  28 import errno
  29 import time
  30 import urllib
  31 import urllib2
  32 import cookielib
  33 import re
  34 from StringIO import StringIO
  35 from getpass import getpass
  36 import base64
  37
  38 __version__ = "(git)"
  39
  40
  41 class FuseMediaWiki(Fuse):
  42     """Class to provide FUSE filesystem."""
  43
  44     # variables used for auth
  45     AUTH_ANON = 0
  46     AUTH_USER = 1
  47     AUTH_HTTP = 2
  48
  49     # The filesystem used is simply a dict of keys (filenames) and values.
  50     # For attrs, the value of a key is a specific instance of fuse.Stat().
  51     attrs = {}
  52     # For files, the value of a key is the contents of that file.
  53     files = {}
  54     # Each edit page retrieval creates certain tokens we need to submit the new
  55     # page properly -- the key is the name of the wiki page with no extensions
  56     wikitokens = {}
  57
  58     # Regular expressions used to parse the edit page
  59     # _re_textarea scrapes the data of the page out of the textarea
  60     _re_textarea = re.compile("""<textarea.*name="wpTextbox1".*>(.*)"""+\
  61                               """</textarea>""", re.S)
  62     # _re_starttime is the retrieval date of the data
  63     # TODO Make the editor realize that the file has been changed before they
  64     # write to it
  65     _re_starttime = re.compile("""<input.*value="([0-9]{14})".*"""+\
  66                                """name="wpStarttime" />""")
  67     # Last edit date
  68     _re_edittime = re.compile("""<input.*value="([0-9]{14})".*"""+\
  69                               """name="wpEdittime" />""")
  70     _re_edittoken = re.compile("""<input.*value="(.*)".*name"""+\
  71                                """="wpEditToken" />""")
  72     _re_autosum = re.compile("""<input name="wpAutoSummary".*value"""+\
  73                              """="([0-9a-f]{32})" />""")
  74
  75     # Used to allow for edit summaries
  76     _editsummary = """<!-- FUSEMW: Comments starting with FUSEMW: (like this \
  77 one) are ignored and not saved within the page -->
  78 <!-- FUSEMW: It is a good idea to provide an edit summary, place it below \
  79 -->
  80 <!-- FUSEMW:EDITSUMM:  -->
  81 """
  82     # Regexp to get edit summary out of a saved page
  83     _re_editsumm = re.compile("""<!-- FUSEMW:EDITSUMM:(.*)-->""")
  84     # Regexp to get rid of all FUSEMW: comments before saving
  85     _re_fmwcomm = re.compile("""<!-- ?FUSEMW:.*-->""", re.S)
  86
  87     def __init__(self, version, usage):
  88         # init fuse
  89         fuse.fuse_python_api = (0, 2)
  90         Fuse.__init__(self, version=version, usage=usage)
  91         # init default file attributes
  92         for path in ['/', '/image', '/cat', '/content']:
  93             self._setdirattr(path)
  94         # init file system and default directories
  95         self.files['/image'] = 0
  96         self.files['/cat'] = 0
  97         self.files['/content'] = 0
  98         # init cookie jar
  99         self.cookiejar = cookielib.LWPCookieJar()
 100         urllib2.install_opener(urllib2.build_opener(
 101             urllib2.HTTPCookieProcessor(self.cookiejar)))
 102
 103     def _file2fd(self, path):
 104         """Create a file descriptor out of a file within this filesystem"""
 105         try:
 106             return StringIO(self.files[path])
 107         except KeyError:
 108             return StringIO()
 109
 110     def _unescape(self, text):
 111         """Unescape text in the textarea on retrieval"""
 112         text = text.replace('&lt;', '<')
 113         text = text.replace('&gt;', '>')
 114         text = text.replace('&amp;', '&')
 115         return text
 116
 117     def _setdirattr(self, path):
 118         """Set default attributes for a directory at the specified path"""
 119         self.attrs[path] = fuse.Stat()
 120         self.attrs[path].st_mode = stat.S_IFDIR | 0755 # drwxr-xr-x
 121         self.attrs[path].st_uid = int(os.getuid())
 122         self.attrs[path].st_gid = int(os.getgid())
 123         self.attrs[path].st_size = 4096 # 4.0 K
 124         self.attrs[path].st_atime = time.time()
 125         self.attrs[path].st_mtime = time.time()
 126         self.attrs[path].st_ctime = time.time()
 127         self.attrs[path].st_nlink = 2
 128
 129     def _setregattr(self, path):
 130         """Set default attributes for a regular file at the specified path"""
 131         self.attrs[path] = fuse.Stat()
 132         self.attrs[path].st_mode = stat.S_IFREG | 0644 # -rw-r--r--
 133         self.attrs[path].st_uid = int(os.getuid())
 134         self.attrs[path].st_gid = int(os.getgid())
 135         self.attrs[path].st_size = 0
 136         self.attrs[path].st_atime = time.time()
 137         self.attrs[path].st_mtime = time.time()
 138         self.attrs[path].st_ctime = time.time()
 139         self.attrs[path].st_nlink = 1
 140
 141     def _log(self, message):
 142         """Log a message. Currently it just uses print, and therefore logs only
 143         if the -f or -d options are sent from the command line."""
 144         # it's simple now, but in case we want to change it later... here we go
 145         print message
 146         return
 147
 148     def _setrooturl(self, url):
 149         self.rooturl = url
 150         return
 151
 152     def _urlfetch(self, getvars, postvars=None, headers={}):
 153         get = urllib.urlencode(getvars)
 154         url = self.rooturl + '?' + get
 155         if postvars != None:
 156             post = urllib.urlencode(postvars)
 157         else:
 158             post = None
 159         request = urllib2.Request(url, post)
 160         for header in headers:
 161             request.add_header(header, headers[header])
 162         try:
 163             response = urllib2.urlopen(request)
 164         except urllib2.HTTPError, args:
 165             # we assume this works. if not, let me know.
 166             if args.code == 401:
 167                 auth = base64.encodestring('%s:%s' % (self.username,
 168                                                       self.password))[:-1]
 169                 request.add_header('Authorization', 'Basic %s' % auth)
 170                 try:
 171                     response = urllib2.urlopen(request)
 172                 except urllib2.HTTPError, args:
 173                     # bad login
 174                     print "Login failed"
 175         return (response, request)
 176
 177     def _setupauth(self, username, usebasicauth):
 178         # if there is no username, anonymous auth
 179         if username == None:
 180             self.authtype = self.AUTH_ANON
 181             self._log("Anonymously accessing wiki")
 182         else:
 183             # there is a user name. what kind of auth?
 184             self.username = username
 185             if usebasicauth:
 186                 self.authtype = self.AUTH_HTTP
 187             else:
 188                 self.authtype = self.AUTH_USER
 189             self._asklogin()
 190         return
 191
 192     def _asklogin(self):
 193         self.password = getpass(self.username + "'s password: ")
 194         return self._login()
 195
 196     def _login(self):
 197         """This will attempt to login to the wiki. It does it through this
 198         procedure:
 199             1. If --auth-basic was passed at startup, attempt to login at
 200                Special:Userlogin by sending the username and password.
 201             2. If cookies are received, we assume that we are logged in.
 202                However, if at any point during editing we are asked for our
 203                username and password again (401 Unauthorized), we'll decide
 204                that we need to do that for every wiki page.
 205             3. If no cookies are received, we assume that we have to send the
 206                username and password on every request.
 207             4. If --auth-basic was not passed at startup, attempt to login at
 208                Special:Userlogin by POSTing the username and password.
 209             5. If we are led to believe that the username/password combination
 210                is incorrect (i.e., additional 401 Unauthorized errors, or the
 211                wiki actually telling us), stop mounting."""
 212         print "Logging in..."
 213         if self.authtype == self.AUTH_HTTP:
 214             try:
 215                 (response, request) = self._urlfetch({'title':
 216                                                       'Special:Userlogin'}, {})
 217             except urllib2.HTTPError, args:
 218                 if args.code == 401:
 219                     auth = base64.encodestring('%s:%s' % (self.username,
 220                                                           self.password))[:-1]
 221                     try:
 222                         (response,
 223                          request) = self._urlfetch({'title':
 224                                                     'Special:Userlogin'}, {},
 225                                                    {'Authorization': 'Basic %s'
 226                                                     % auth})
 227                     except urllib2.HTTPError, args:
 228                         # bad login
 229                         print "Login failed"
 230         elif self.authtype == self.AUTH_USER:
 231             getvars = {'title': 'Special:Userlogin', 'action': 'submitlogin',
 232                        'type': 'login'}
 233             postvars = {'wpName': self.username, 'wpPassword': self.password,
 234                         'wpLoginattempt': 'Log in', 'wpRemember': '1'}
 235             response = self._urlfetch(getvars, postvars)[0]
 236             data = response.read()
 237             if re.search('var wgUserName = "%s";' % self.username, data,
 238                          re.I):
 239                 # login OK
 240                 self._log("Logged in successfully as %s" % self.username)
 241                 return True
 242             else:
 243                 # bad login
 244                 print "Login failed"
 245                 sys.exit()
 246         else:
 247             # assume anonymous or something else that doesn't require login
 248             return True
 249
 250     def getattr(self, path):
 251         if path != '/':
 252             self._log('*** getattr '+path)
 253         if path in self.attrs:
 254             # file found
 255             return self.attrs[path]
 256         elif path[-5:] == ".wiki" and path[:8] == "/content":
 257             # file technically found... we need to go make it.
 258             self.mknod(path, 0100644, 0)
 259             return self.attrs[path]
 260         else:
 261             # file not found
 262             return -errno.ENOENT
 263
 264     def getdir(self, path):
 265         self._log('*** getdir '+path)
 266         keys = self.files.keys()
 267         flist = []
 268         for key in keys:
 269             l = len(path)
 270             if key[0:l] == path:
 271                 if key[l:] != '':
 272                     if '/' not in key[l:]:
 273                         flist.append((key[l:], 0))
 274         flist.append(('.', 0))
 275         flist.append(('..', 0))
 276         return flist
 277
 278     def readdir(self, path, offset):
 279         self._log('*** readdir '+str([path, offset]))
 280         return self.readdir_compat_0_1(path, offset)
 281
 282     def mythread(self):
 283         self._log('*** mythread')
 284         return -errno.ENOSYS
 285
 286     def chmod(self, path, mode):
 287         self._log('*** chmod '+str([path, oct(mode)]))
 288         self.attrs[path].st_mode = mode
 289         return
 290
 291     def chown(self, path, uid, gid):
 292         self._log('*** chown '+str([path, uid, gid]))
 293         return -errno.ENOSYS
 294
 295     def fsync(self, path, isFsyncFile, fd=None):
 296         self._log('*** fsync '+str([path, isFsyncFile, fd]))
 297         return
 298
 299     def link(self, targetPath, linkPath):
 300         self._log('*** link '+str([targetPath, linkPath]))
 301         return -errno.ENOSYS
 302
 303     def mkdir(self, path, mode):
 304         """Create a directory."""
 305         self._log('*** mkdir '+str([path, oct(mode)]))
 306         if re.match('^/image/.*$', path):
 307             return -errno.EPERM
 308         if re.match('^/cat/.*$', path):
 309             return -errno.ENOSYS
 310         self.files[path] = 0
 311         self._setdirattr(path)
 312         self.attrs[path].st_mode = stat.S_IFDIR | mode
 313         return
 314
 315     def mknod(self, path, mode, dev):
 316         """Create a file. Not sure what the dev argument is, but it doesn't
 317         seem useful."""
 318         self._log('*** mknod '+str([path, oct(mode), dev]))
 319         self.files[path] = ""
 320         self._setregattr(path)
 321         self.attrs[path].st_mode = mode
 322         if path[-5:] == '.wiki' and path[:8] == "/content":
 323             # this is a wiki page, get page contents
 324             wikititle = path[9:-5]
 325             getvars = {'title': wikititle, 'action': 'edit'}
 326             (response, request) = self._urlfetch(getvars)
 327             data = response.read()
 328             text = self._unescape(self._re_textarea.search(data).group(1))[:-1]
 329             if text == "":
 330                 text = "<!-- FUSEMW: You are creating a new page -->"
 331             text = self._editsummary + text + "\n"
 332             self.files[path] = text
 333             self.attrs[path].st_size = len(text)
 334             starttime = self._re_starttime.search(data).group(1)
 335             edittime = self._re_edittime.search(data).group(1)
 336             edittoken = self._re_edittoken.search(data).group(1)
 337             autosum = self._re_autosum.search(data).group(1)
 338             self.wikitokens[wikititle] = {'start': starttime, 'edit': edittime,
 339                                           'token': edittoken, 'auto': autosum}
 340         return
 341
 342     def open(self, path, flags):
 343         self._log('*** open '+str([path, flags]))
 344         return self._file2fd(path)
 345
 346     def read(self, path, length, offset, fd=None):
 347         self._log('*** read'+str([path, length, offset, fd]))
 348         if fd != None:
 349             # we'll just read from the provided StringIO
 350             fd.seek(offset)
 351             return fd.read(length)
 352         else:
 353             # this should never really happen
 354             try:
 355                 cwd = self.files[path]
 356             except KeyError:
 357                 return -errno.ENOENT
 358             return cwd[offset:length]
 359
 360     def readlink(self, path):
 361         self._log('*** readlink '+path)
 362         return -errno.ENOSYS
 363
 364     def release(self, path, flags, fd=None):
 365         self._log('*** release '+str([path, flags, fd]))
 366         if fd != None:
 367             fd.close()
 368             return
 369         else:
 370             # i don't think we really care
 371             return
 372
 373     def rename(self, oldPath, newPath):
 374         self._log('*** rename '+str([oldPath, newPath]))
 375         self.files[newPath] = self.files[oldPath]
 376         del self.files[oldPath]
 377         self.attrs[newPath] = self.attrs[oldPath]
 378         del self.attrs[oldPath]
 379         return
 380
 381     def rmdir(self, path):
 382         self._log('*** rmdir '+path)
 383         # -errno.ENOTEMPTY will be useful
 384         return -errno.ENOSYS
 385
 386     def statfs(self):
 387         #self._log('*** statfs')
 388         vfs = fuse.StatVfs()
 389         return vfs
 390
 391     def symlink(self, targetPath, linkPath):
 392         self._log('*** symlink '+str([targetPath, linkPath]))
 393         return -errno.ENOSYS
 394
 395     def truncate(self, path, size):
 396         self._log('*** truncate '+str([path, size]))
 397         self.files[path] = self.files[path][0:size]
 398         self.attrs[path].st_size = len(self.files[path])
 399         return None
 400
 401     def unlink(self, path):
 402         self._log('*** unlink '+path)
 403         if path not in self.files:
 404             return -errno.ENOENT
 405         del self.attrs[path]
 406         del self.files[path]
 407         return
 408
 409     def utime(self, path, times):
 410         self._log('*** utime '+str([path, times]))
 411         self.attrs[path].st_atime = times[0]
 412         self.attrs[path].st_mtime = times[1]
 413         return
 414
 415     def write(self, path, buf, offset, fd=None):
 416         self._log('*** write'+str([path, len(buf), offset, fd]))
 417         if fd != None:
 418             fd.seek(offset)
 419             fd.write(buf)
 420         x = self.files[path]
 421         self.files[path] = x[:offset] + buf + x[offset:]
 422         self.attrs[path].st_size = len(self.files[path])
 423         if path[-5:] == '.wiki' and path[:8] == "/content":
 424             # this is a wiki page, save page contents
 425             editsumm = self._re_editsumm.search(self.files[path]).group(1)
 426             editsumm = editsumm.strip()
 427             data = self._re_fmwcomm.sub('', self.files[path]).strip()
 428             wikititle = path[9:-5]
 429             getvars = {'title': wikititle, 'action': 'submit'}
 430             postvars = {'wpSection': '', 'wpStarttime':
 431                         self.wikitokens[wikititle]['start'], 'wpEdittime':
 432                         self.wikitokens[wikititle]['edit'], 'wpScrolltop': '0',
 433                         'wpTextbox1': data, 'wpSummary': editsumm,
 434                         'wpSave': 'Save page', 'wpEditToken':
 435                         self.wikitokens[wikititle]['token'], 'wpAutoSummary':
 436                         self.wikitokens[wikititle]['auto']}
 437             response = self._urlfetch(getvars, postvars)[0]
 438             respdata = response.read()
 439         return len(buf)
 440
 441
 442 def main():
 443     usage = """Usage: python %prog [OPTIONS] ROOT_URL MOUNTPOINT
 444 fuse-mediawiki is a FUSE filesystem for editing MediaWiki websites.
 445 """
 446     # setup FUSE
 447     fs = FuseMediaWiki(version="%prog "+__version__, usage=usage)
 448     # setup option parser
 449     # -t, --timeout: how long in seconds before the filesystem deems a wiki
 450     # page as stale and downloads a new one on the next read
 451     fs.parser.add_option("-t", "--timeout", dest="timeout", default=60,
 452                          type="int",
 453                          help="timeout, in seconds [default: %default]")
 454     # -u, --username: set the username. password is asked for on mount
 455     fs.parser.add_option("-u", "--username", dest="username",
 456                          help="username for login into wiki")
 457     # --http-basic: used for those weird wikis that use HTTP basic
 458     # authentication instead of the default login form
 459     fs.parser.add_option("--http-basic", action="store_true",
 460                          dest="usebasicauth",
 461                          help="use HTTP basic authentication for login",
 462                          default=False)
 463     # allow for FUSE mount options
 464     fs.parser.mountopt = True
 465     # get arguments
 466     (options, args) = fs.parser.parse_args()
 467     # if we have zero or two or more arguments before the MOUNTPOINT, fail.
 468     if len(args) != 1:
 469         print "Must specify exactly 1 root URL and 1 mount point, " + \
 470                 "in that order"
 471         sys.exit(1)
 472     # set the root URL
 473     fs._setrooturl(args[0])
 474     # tell the object what are authentication method is
 475     fs._setupauth(options.username, options.usebasicauth)
 476     # this does something, I'm not really sure what at the moment
 477     fs.parse(values=fs, errex=1)
 478     # go into main loop
 479     fs.main()
 480
 481
 482 # hi!
 483 if __name__ == '__main__':
 484     main()