App Engine Python SDK version 1.7.4 (2)
[gae.git] / python / lib / django_1_4 / extras / csrf_migration_helper.py
blob94b5a20802e16581eccc120b523d1ea2e9107108
1 #!/usr/bin/env python
3 # This script aims to help developers locate forms and view code that needs to
4 # use the new CSRF protection in Django 1.2. It tries to find all the code that
5 # may need the steps described in the CSRF documentation. It does not modify
6 # any code directly, it merely attempts to locate it. Developers should be
7 # aware of its limitations, described below.
9 # For each template that contains at least one POST form, the following info is printed:
11 # <Absolute path to template>
12 # AKA: <Aliases (relative to template directory/directories that contain it)>
13 # POST forms: <Number of POST forms>
14 # With token: <Number of POST forms with the CSRF token already added>
15 # Without token:
16 # <File name and line number of form without token>
18 # Searching for:
19 # <Template names that need to be searched for in view code
20 # (includes templates that 'include' current template)>
22 # Found:
23 # <File name and line number of any view code found>
25 # The format used allows this script to be used in Emacs grep mode:
26 # M-x grep
27 # Run grep (like this): /path/to/my/virtualenv/python /path/to/django/src/extras/csrf_migration_helper.py --settings=mysettings /path/to/my/srcs
30 # Limitations
31 # ===========
33 # - All templates must be stored on disk in '.html' or '.htm' files.
34 # (extensions configurable below)
36 # - All Python code must be stored on disk in '.py' files. (extensions
37 # configurable below)
39 # - All templates must be accessible from TEMPLATE_DIRS or from the 'templates/'
40 # directory in apps specified in INSTALLED_APPS. Non-file based template
41 # loaders are out of the picture, because there is no way to ask them to
42 # return all templates.
44 # - It's impossible to programmatically determine which forms should and should
45 # not have the token added. The developer must decide when to do this,
46 # ensuring that the token is only added to internally targeted forms.
48 # - It's impossible to programmatically work out when a template is used. The
49 # attempts to trace back to view functions are guesses, and could easily fail
50 # in the following ways:
52 # * If the 'include' template tag is used with a variable
53 # i.e. {% include tname %} where tname is a variable containing the actual
54 # template name, rather than {% include "my_template.html" %}.
56 # * If the template name has been built up by view code instead of as a simple
57 # string. For example, generic views and the admin both do this. (These
58 # apps are both contrib and both use RequestContext already, as it happens).
60 # * If the 'ssl' tag (or any template tag other than 'include') is used to
61 # include the template in another template.
63 # - All templates belonging to apps referenced in INSTALLED_APPS will be
64 # searched, which may include third party apps or Django contrib. In some
65 # cases, this will be a good thing, because even if the templates of these
66 # apps have been fixed by someone else, your own view code may reference the
67 # same template and may need to be updated.
69 # You may, however, wish to comment out some entries in INSTALLED_APPS or
70 # TEMPLATE_DIRS before running this script.
72 # Improvements to this script are welcome!
74 # Configuration
75 # =============
77 TEMPLATE_EXTENSIONS = [
78 ".html",
79 ".htm",
82 PYTHON_SOURCE_EXTENSIONS = [
83 ".py",
86 TEMPLATE_ENCODING = "UTF-8"
88 PYTHON_ENCODING = "UTF-8"
90 # Method
91 # ======
93 # Find templates:
94 # - template dirs
95 # - installed apps
97 # Search for POST forms
98 # - Work out what the name of the template is, as it would appear in an
99 # 'include' or get_template() call. This can be done by comparing template
100 # filename to all template dirs. Some templates can have more than one
101 # 'name' e.g. if a directory and one of its child directories are both in
102 # TEMPLATE_DIRS. This is actually a common hack used for
103 # overriding-and-extending admin templates.
105 # For each POST form,
106 # - see if it already contains '{% csrf_token %}' immediately after <form>
107 # - work back to the view function(s):
108 # - First, see if the form is included in any other templates, then
109 # recursively compile a list of affected templates.
110 # - Find any code function that references that template. This is just a
111 # brute force text search that can easily return false positives
112 # and fail to find real instances.
115 import os
116 import sys
117 import re
118 from optparse import OptionParser
120 USAGE = """
121 This tool helps to locate forms that need CSRF tokens added and the
122 corresponding view code. This processing is NOT fool proof, and you should read
123 the help contained in the script itself. Also, this script may need configuring
124 (by editing the script) before use.
126 Usage:
128 python csrf_migration_helper.py [--settings=path.to.your.settings] /path/to/python/code [more paths...]
130 Paths can be specified as relative paths.
132 With no arguments, this help is printed.
135 _POST_FORM_RE = \
136 re.compile(r'(<form\W[^>]*\bmethod\s*=\s*(\'|"|)POST(\'|"|)\b[^>]*>)', re.IGNORECASE)
137 _FORM_CLOSE_RE = re.compile(r'</form\s*>')
138 _TOKEN_RE = re.compile('\{% csrf_token')
140 def get_template_dirs():
142 Returns a set of all directories that contain project templates.
144 from django.conf import settings
145 dirs = set()
146 if ('django.template.loaders.filesystem.load_template_source' in settings.TEMPLATE_LOADERS
147 or 'django.template.loaders.filesystem.Loader' in settings.TEMPLATE_LOADERS):
148 dirs.update(map(unicode, settings.TEMPLATE_DIRS))
150 if ('django.template.loaders.app_directories.load_template_source' in settings.TEMPLATE_LOADERS
151 or 'django.template.loaders.app_directories.Loader' in settings.TEMPLATE_LOADERS):
152 from django.template.loaders.app_directories import app_template_dirs
153 dirs.update(app_template_dirs)
154 return dirs
156 def make_template_info(filename, root_dirs):
158 Creates a Template object for a filename, calculating the possible
159 relative_filenames from the supplied filename and root template directories
161 return Template(filename,
162 [filename[len(d)+1:] for d in root_dirs if filename.startswith(d)])
165 class Template(object):
166 def __init__(self, absolute_filename, relative_filenames):
167 self.absolute_filename, self.relative_filenames = absolute_filename, relative_filenames
169 def content(self):
170 try:
171 return self._content
172 except AttributeError:
173 fd = open(self.absolute_filename)
174 try:
175 content = fd.read().decode(TEMPLATE_ENCODING)
176 except UnicodeDecodeError, e:
177 message = '%s in %s' % (
178 e[4], self.absolute_filename.encode('UTF-8', 'ignore'))
179 raise UnicodeDecodeError(*(e.args[:4] + (message,)))
180 fd.close()
181 self._content = content
182 return content
183 content = property(content)
185 def post_form_info(self):
187 Get information about any POST forms in the template.
188 Returns [(linenumber, csrf_token added)]
190 forms = {}
191 form_line = 0
192 for ln, line in enumerate(self.content.split("\n")):
193 if not form_line and _POST_FORM_RE.search(line):
194 # record the form with no CSRF token yet
195 form_line = ln + 1
196 forms[form_line] = False
197 if form_line and _TOKEN_RE.search(line):
198 # found the CSRF token
199 forms[form_line] = True
200 form_line = 0
201 if form_line and _FORM_CLOSE_RE.search(line):
202 # no token found by form closing tag
203 form_line = 0
205 return forms.items()
207 def includes_template(self, t):
209 Returns true if this template includes template 't' (via {% include %})
211 for r in t.relative_filenames:
212 if re.search(r'\{%\s*include\s+(\'|")' + re.escape(r) + r'(\1)\s*%\}', self.content):
213 return True
214 return False
216 def related_templates(self):
218 Returns all templates that include this one, recursively. (starting
219 with this one)
221 try:
222 return self._related_templates
223 except AttributeError:
224 pass
226 retval = set([self])
227 for t in self.all_templates:
228 if t.includes_template(self):
229 # If two templates mutually include each other, directly or
230 # indirectly, we have a problem here...
231 retval = retval.union(t.related_templates())
233 self._related_templates = retval
234 return retval
236 def __repr__(self):
237 return repr(self.absolute_filename)
239 def __eq__(self, other):
240 return self.absolute_filename == other.absolute_filename
242 def __hash__(self):
243 return hash(self.absolute_filename)
245 def get_templates(dirs):
247 Returns all files in dirs that have template extensions, as Template
248 objects.
250 templates = set()
251 for root in dirs:
252 for (dirpath, dirnames, filenames) in os.walk(root):
253 for f in filenames:
254 if len([True for e in TEMPLATE_EXTENSIONS if f.endswith(e)]) > 0:
255 t = make_template_info(os.path.join(dirpath, f), dirs)
256 # templates need to be able to search others:
257 t.all_templates = templates
258 templates.add(t)
259 return templates
261 def get_python_code(paths):
263 Returns all Python code, as a list of tuples, each one being:
264 (filename, list of lines)
266 retval = []
267 for p in paths:
268 if not os.path.isdir(p):
269 raise Exception("'%s' is not a directory." % p)
270 for (dirpath, dirnames, filenames) in os.walk(p):
271 for f in filenames:
272 if len([True for e in PYTHON_SOURCE_EXTENSIONS if f.endswith(e)]) > 0:
273 fn = os.path.join(dirpath, f)
274 fd = open(fn)
275 content = [l.decode(PYTHON_ENCODING) for l in fd.readlines()]
276 fd.close()
277 retval.append((fn, content))
278 return retval
280 def search_python_list(python_code, template_names):
282 Searches python code for a list of template names.
283 Returns a list of tuples, each one being:
284 (filename, line number)
286 retval = []
287 for tn in template_names:
288 retval.extend(search_python(python_code, tn))
289 retval = list(set(retval))
290 retval.sort()
291 return retval
293 def search_python(python_code, template_name):
295 Searches Python code for a template name.
296 Returns a list of tuples, each one being:
297 (filename, line number)
299 retval = []
300 for fn, content in python_code:
301 for ln, line in enumerate(content):
302 if ((u'"%s"' % template_name) in line) or \
303 ((u"'%s'" % template_name) in line):
304 retval.append((fn, ln + 1))
305 return retval
307 def main(pythonpaths):
308 template_dirs = get_template_dirs()
309 templates = get_templates(template_dirs)
310 python_code = get_python_code(pythonpaths)
311 for t in templates:
312 # Logic
313 form_matches = t.post_form_info()
314 num_post_forms = len(form_matches)
315 form_lines_without_token = [ln for (ln, has_token) in form_matches if not has_token]
316 if num_post_forms == 0:
317 continue
318 to_search = [rf for rt in t.related_templates() for rf in rt.relative_filenames]
319 found = search_python_list(python_code, to_search)
321 # Display:
322 print t.absolute_filename
323 for r in t.relative_filenames:
324 print u" AKA %s" % r
325 print u" POST forms: %s" % num_post_forms
326 print u" With token: %s" % (num_post_forms - len(form_lines_without_token))
327 if form_lines_without_token:
328 print u" Without token:"
329 for ln in form_lines_without_token:
330 print "%s:%d:" % (t.absolute_filename, ln)
331 print
332 print u" Searching for:"
333 for r in to_search:
334 print u" " + r
335 print
336 print u" Found:"
337 if len(found) == 0:
338 print " Nothing"
339 else:
340 for fn, ln in found:
341 print "%s:%d:" % (fn, ln)
343 print
344 print "----"
347 parser = OptionParser(usage=USAGE)
348 parser.add_option("", "--settings", action="store", dest="settings", help="Dotted path to settings file")
350 if __name__ == '__main__':
351 options, args = parser.parse_args()
352 if len(args) == 0:
353 parser.print_help()
354 sys.exit(1)
356 settings = getattr(options, 'settings', None)
357 if settings is None:
358 if os.environ.get("DJANGO_SETTINGS_MODULE", None) is None:
359 print "You need to set DJANGO_SETTINGS_MODULE or use the '--settings' parameter"
360 sys.exit(1)
361 else:
362 os.environ["DJANGO_SETTINGS_MODULE"] = settings
364 main(args)