3 # fuse-mediawiki - FUSE filesystem for editing MediaWiki websites
4 # Copyright (C) 2008 Ian Weller <ianweller@gmail.com>
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with this program; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """fuse-mediawiki provides a FUSE filesystem for MediaWiki websites."""
34 from StringIO
import StringIO
35 from getpass
import getpass
41 class FuseMediaWiki(Fuse
):
42 """Class to provide FUSE filesystem."""
44 # variables used for auth
49 # The filesystem used is simply a dict of keys (filenames) and values.
50 # For attrs, the value of a key is a specific instance of fuse.Stat().
52 # For files, the value of a key is the contents of that file.
54 # Each edit page retrieval creates certain tokens we need to submit the new
55 # page properly -- the key is the name of the wiki page with no extensions
58 # Regular expressions used to parse the edit page
59 # _re_textarea scrapes the data of the page out of the textarea
60 _re_textarea
= re
.compile("""<textarea.*name="wpTextbox1".*>(.*)"""+\
61 """</textarea>""", re
.S
)
62 # _re_starttime is the retrieval date of the data
63 # TODO Make the editor realize that the file has been changed before they
65 _re_starttime
= re
.compile("""<input.*value="([0-9]{14})".*"""+\
66 """name="wpStarttime" />""")
68 _re_edittime
= re
.compile("""<input.*value="([0-9]{14})".*"""+\
69 """name="wpEdittime" />""")
70 _re_edittoken
= re
.compile("""<input.*value="(.*)".*name"""+\
71 """="wpEditToken" />""")
72 _re_autosum
= re
.compile("""<input name="wpAutoSummary".*value"""+\
73 """="([0-9a-f]{32})" />""")
75 # Used to allow for edit summaries
76 _editsummary
= """<!-- FUSEMW: Comments starting with FUSEMW: (like this \
77 one) are ignored and not saved within the page -->
78 <!-- FUSEMW: It is a good idea to provide an edit summary, place it below \
80 <!-- FUSEMW:EDITSUMM: -->
82 # Regexp to get edit summary out of a saved page
83 _re_editsumm
= re
.compile("""<!-- FUSEMW:EDITSUMM:(.*)-->""")
84 # Regexp to get rid of all FUSEMW: comments before saving
85 _re_fmwcomm
= re
.compile("""<!-- ?FUSEMW:.*-->""", re
.S
)
87 def __init__(self
, version
, usage
):
89 fuse
.fuse_python_api
= (0, 2)
90 Fuse
.__init
__(self
, version
=version
, usage
=usage
)
91 # init default file attributes
92 for path
in ['/', '/image', '/cat', '/content']:
93 self
._setdirattr
(path
)
94 # init file system and default directories
95 self
.files
['/image'] = 0
96 self
.files
['/cat'] = 0
97 self
.files
['/content'] = 0
99 self
.cookiejar
= cookielib
.LWPCookieJar()
100 urllib2
.install_opener(urllib2
.build_opener(
101 urllib2
.HTTPCookieProcessor(self
.cookiejar
)))
103 def _file2fd(self
, path
):
104 """Create a file descriptor out of a file within this filesystem"""
106 return StringIO(self
.files
[path
])
110 def _unescape(self
, text
):
111 """Unescape text in the textarea on retrieval"""
112 text
= text
.replace('<', '<')
113 text
= text
.replace('>', '>')
114 text
= text
.replace('&', '&')
117 def _setdirattr(self
, path
):
118 """Set default attributes for a directory at the specified path"""
119 self
.attrs
[path
] = fuse
.Stat()
120 self
.attrs
[path
].st_mode
= stat
.S_IFDIR |
0755 # drwxr-xr-x
121 self
.attrs
[path
].st_uid
= int(os
.getuid())
122 self
.attrs
[path
].st_gid
= int(os
.getgid())
123 self
.attrs
[path
].st_size
= 4096 # 4.0 K
124 self
.attrs
[path
].st_atime
= time
.time()
125 self
.attrs
[path
].st_mtime
= time
.time()
126 self
.attrs
[path
].st_ctime
= time
.time()
127 self
.attrs
[path
].st_nlink
= 2
129 def _setregattr(self
, path
):
130 """Set default attributes for a regular file at the specified path"""
131 self
.attrs
[path
] = fuse
.Stat()
132 self
.attrs
[path
].st_mode
= stat
.S_IFREG |
0644 # -rw-r--r--
133 self
.attrs
[path
].st_uid
= int(os
.getuid())
134 self
.attrs
[path
].st_gid
= int(os
.getgid())
135 self
.attrs
[path
].st_size
= 0
136 self
.attrs
[path
].st_atime
= time
.time()
137 self
.attrs
[path
].st_mtime
= time
.time()
138 self
.attrs
[path
].st_ctime
= time
.time()
139 self
.attrs
[path
].st_nlink
= 1
141 def _log(self
, message
):
142 """Log a message. Currently it just uses print, and therefore logs only
143 if the -f or -d options are sent from the command line."""
144 # it's simple now, but in case we want to change it later... here we go
148 def _setrooturl(self
, url
):
152 def _urlfetch(self
, getvars
, postvars
=None, headers
={}):
153 get
= urllib
.urlencode(getvars
)
154 url
= self
.rooturl
+ '?' + get
156 post
= urllib
.urlencode(postvars
)
159 request
= urllib2
.Request(url
, post
)
160 for header
in headers
:
161 request
.add_header(header
, headers
[header
])
163 response
= urllib2
.urlopen(request
)
164 except urllib2
.HTTPError
, args
:
165 # we assume this works. if not, let me know.
167 auth
= base64
.encodestring('%s:%s' % (self
.username
,
169 request
.add_header('Authorization', 'Basic %s' % auth
)
171 response
= urllib2
.urlopen(request
)
172 except urllib2
.HTTPError
, args
:
175 return (response
, request
)
177 def _setupauth(self
, username
, usebasicauth
):
178 # if there is no username, anonymous auth
180 self
.authtype
= self
.AUTH_ANON
181 self
._log
("Anonymously accessing wiki")
183 # there is a user name. what kind of auth?
184 self
.username
= username
186 self
.authtype
= self
.AUTH_HTTP
188 self
.authtype
= self
.AUTH_USER
193 self
.password
= getpass(self
.username
+ "'s password: ")
197 """This will attempt to login to the wiki. It does it through this
199 1. If --auth-basic was passed at startup, attempt to login at
200 Special:Userlogin by sending the username and password.
201 2. If cookies are received, we assume that we are logged in.
202 However, if at any point during editing we are asked for our
203 username and password again (401 Unauthorized), we'll decide
204 that we need to do that for every wiki page.
205 3. If no cookies are received, we assume that we have to send the
206 username and password on every request.
207 4. If --auth-basic was not passed at startup, attempt to login at
208 Special:Userlogin by POSTing the username and password.
209 5. If we are led to believe that the username/password combination
210 is incorrect (i.e., additional 401 Unauthorized errors, or the
211 wiki actually telling us), stop mounting."""
212 print "Logging in..."
213 if self
.authtype
== self
.AUTH_HTTP
:
215 (response
, request
) = self
._urlfetch
({'title':
216 'Special:Userlogin'}, {})
217 except urllib2
.HTTPError
, args
:
219 auth
= base64
.encodestring('%s:%s' % (self
.username
,
223 request
) = self
._urlfetch
({'title':
224 'Special:Userlogin'}, {},
225 {'Authorization': 'Basic %s'
227 except urllib2
.HTTPError
, args
:
230 elif self
.authtype
== self
.AUTH_USER
:
231 getvars
= {'title': 'Special:Userlogin', 'action': 'submitlogin',
233 postvars
= {'wpName': self
.username
, 'wpPassword': self
.password
,
234 'wpLoginattempt': 'Log in', 'wpRemember': '1'}
235 response
= self
._urlfetch
(getvars
, postvars
)[0]
236 data
= response
.read()
237 if re
.search('var wgUserName = "%s";' % self
.username
, data
,
240 self
._log
("Logged in successfully as %s" % self
.username
)
247 # assume anonymous or something else that doesn't require login
250 def getattr(self
, path
):
252 self
._log
('*** getattr '+path
)
253 if path
in self
.attrs
:
255 return self
.attrs
[path
]
256 elif path
[-5:] == ".wiki" and path
[:8] == "/content":
257 # file technically found... we need to go make it.
258 self
.mknod(path
, 0100644, 0)
259 return self
.attrs
[path
]
264 def getdir(self
, path
):
265 self
._log
('*** getdir '+path
)
266 keys
= self
.files
.keys()
272 if '/' not in key
[l
:]:
273 flist
.append((key
[l
:], 0))
274 flist
.append(('.', 0))
275 flist
.append(('..', 0))
278 def readdir(self
, path
, offset
):
279 self
._log
('*** readdir '+str([path
, offset
]))
280 return self
.readdir_compat_0_1(path
, offset
)
283 self
._log
('*** mythread')
286 def chmod(self
, path
, mode
):
287 self
._log
('*** chmod '+str([path
, oct(mode
)]))
288 self
.attrs
[path
].st_mode
= mode
291 def chown(self
, path
, uid
, gid
):
292 self
._log
('*** chown '+str([path
, uid
, gid
]))
295 def fsync(self
, path
, isFsyncFile
, fd
=None):
296 self
._log
('*** fsync '+str([path
, isFsyncFile
, fd
]))
299 def link(self
, targetPath
, linkPath
):
300 self
._log
('*** link '+str([targetPath
, linkPath
]))
303 def mkdir(self
, path
, mode
):
304 """Create a directory."""
305 self
._log
('*** mkdir '+str([path
, oct(mode
)]))
306 if re
.match('^/image/.*$', path
):
308 if re
.match('^/cat/.*$', path
):
311 self
._setdirattr
(path
)
312 self
.attrs
[path
].st_mode
= stat
.S_IFDIR | mode
315 def mknod(self
, path
, mode
, dev
):
316 """Create a file. Not sure what the dev argument is, but it doesn't
318 self
._log
('*** mknod '+str([path
, oct(mode
), dev
]))
319 self
.files
[path
] = ""
320 self
._setregattr
(path
)
321 self
.attrs
[path
].st_mode
= mode
322 if path
[-5:] == '.wiki' and path
[:8] == "/content":
323 # this is a wiki page, get page contents
324 wikititle
= path
[9:-5]
325 getvars
= {'title': wikititle
, 'action': 'edit'}
326 (response
, request
) = self
._urlfetch
(getvars
)
327 data
= response
.read()
328 text
= self
._unescape
(self
._re
_textarea
.search(data
).group(1))[:-1]
330 text
= "<!-- FUSEMW: You are creating a new page -->"
331 text
= self
._editsummary
+ text
+ "\n"
332 self
.files
[path
] = text
333 self
.attrs
[path
].st_size
= len(text
)
334 starttime
= self
._re
_starttime
.search(data
).group(1)
335 edittime
= self
._re
_edittime
.search(data
).group(1)
336 edittoken
= self
._re
_edittoken
.search(data
).group(1)
337 autosum
= self
._re
_autosum
.search(data
).group(1)
338 self
.wikitokens
[wikititle
] = {'start': starttime
, 'edit': edittime
,
339 'token': edittoken
, 'auto': autosum
}
342 def open(self
, path
, flags
):
343 self
._log
('*** open '+str([path
, flags
]))
344 return self
._file
2fd
(path
)
346 def read(self
, path
, length
, offset
, fd
=None):
347 self
._log
('*** read'+str([path
, length
, offset
, fd
]))
349 # we'll just read from the provided StringIO
351 return fd
.read(length
)
353 # this should never really happen
355 cwd
= self
.files
[path
]
358 return cwd
[offset
:length
]
360 def readlink(self
, path
):
361 self
._log
('*** readlink '+path
)
364 def release(self
, path
, flags
, fd
=None):
365 self
._log
('*** release '+str([path
, flags
, fd
]))
370 # i don't think we really care
373 def rename(self
, oldPath
, newPath
):
374 self
._log
('*** rename '+str([oldPath
, newPath
]))
375 self
.files
[newPath
] = self
.files
[oldPath
]
376 del self
.files
[oldPath
]
377 self
.attrs
[newPath
] = self
.attrs
[oldPath
]
378 del self
.attrs
[oldPath
]
381 def rmdir(self
, path
):
382 self
._log
('*** rmdir '+path
)
383 # -errno.ENOTEMPTY will be useful
387 #self._log('*** statfs')
391 def symlink(self
, targetPath
, linkPath
):
392 self
._log
('*** symlink '+str([targetPath
, linkPath
]))
395 def truncate(self
, path
, size
):
396 self
._log
('*** truncate '+str([path
, size
]))
397 self
.files
[path
] = self
.files
[path
][0:size
]
398 self
.attrs
[path
].st_size
= len(self
.files
[path
])
401 def unlink(self
, path
):
402 self
._log
('*** unlink '+path
)
403 if path
not in self
.files
:
409 def utime(self
, path
, times
):
410 self
._log
('*** utime '+str([path
, times
]))
411 self
.attrs
[path
].st_atime
= times
[0]
412 self
.attrs
[path
].st_mtime
= times
[1]
415 def write(self
, path
, buf
, offset
, fd
=None):
416 self
._log
('*** write'+str([path
, len(buf
), offset
, fd
]))
421 self
.files
[path
] = x
[:offset
] + buf
+ x
[offset
:]
422 self
.attrs
[path
].st_size
= len(self
.files
[path
])
423 if path
[-5:] == '.wiki' and path
[:8] == "/content":
424 # this is a wiki page, save page contents
425 editsumm
= self
._re
_editsumm
.search(self
.files
[path
]).group(1)
426 editsumm
= editsumm
.strip()
427 data
= self
._re
_fmwcomm
.sub('', self
.files
[path
]).strip()
428 wikititle
= path
[9:-5]
429 getvars
= {'title': wikititle
, 'action': 'submit'}
430 postvars
= {'wpSection': '', 'wpStarttime':
431 self
.wikitokens
[wikititle
]['start'], 'wpEdittime':
432 self
.wikitokens
[wikititle
]['edit'], 'wpScrolltop': '0',
433 'wpTextbox1': data
, 'wpSummary': editsumm
,
434 'wpSave': 'Save page', 'wpEditToken':
435 self
.wikitokens
[wikititle
]['token'], 'wpAutoSummary':
436 self
.wikitokens
[wikititle
]['auto']}
437 response
= self
._urlfetch
(getvars
, postvars
)[0]
438 respdata
= response
.read()
443 usage
= """Usage: python %prog [OPTIONS] ROOT_URL MOUNTPOINT
444 fuse-mediawiki is a FUSE filesystem for editing MediaWiki websites.
447 fs
= FuseMediaWiki(version
="%prog "+__version__
, usage
=usage
)
448 # setup option parser
449 # -t, --timeout: how long in seconds before the filesystem deems a wiki
450 # page as stale and downloads a new one on the next read
451 fs
.parser
.add_option("-t", "--timeout", dest
="timeout", default
=60,
453 help="timeout, in seconds [default: %default]")
454 # -u, --username: set the username. password is asked for on mount
455 fs
.parser
.add_option("-u", "--username", dest
="username",
456 help="username for login into wiki")
457 # --http-basic: used for those weird wikis that use HTTP basic
458 # authentication instead of the default login form
459 fs
.parser
.add_option("--http-basic", action
="store_true",
461 help="use HTTP basic authentication for login",
463 # allow for FUSE mount options
464 fs
.parser
.mountopt
= True
466 (options
, args
) = fs
.parser
.parse_args()
467 # if we have zero or two or more arguments before the MOUNTPOINT, fail.
469 print "Must specify exactly 1 root URL and 1 mount point, " + \
473 fs
._setrooturl
(args
[0])
474 # tell the object what are authentication method is
475 fs
._setupauth
(options
.username
, options
.usebasicauth
)
476 # this does something, I'm not really sure what at the moment
477 fs
.parse(values
=fs
, errex
=1)
483 if __name__
== '__main__':