1.9.30 sync.
[gae.git] / python / google / appengine / tools / handler.py
blob478dccbf8df52cc6e4651fbf66410b489f8537d5
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Code for representing and manipulating handlers in app.yaml.
19 App.yaml requires developers to list handlers - specifications for how URL
20 requests in their app are handled. This module contains classes for
21 representing handlers, as well as functions for creating handlers from
22 specifications in appengine-web.xml and web.xml, manipulating and matching
23 paths, and ordering them correctly so that the yaml file preserves the semantics
24 of the user-specified xml.
26 In this module:
27 Handler: Ancestor class that provides pattern matching and other utilities.
28 SimpleHandler: A representation of path handling specified in XML - a path
29 along with properties detailing how it is handled.
30 OverlappedHandler: A representation of combinations of paths specified by
31 users in Xml. OverlappedHandlers combine properties, such as security
32 settings, from the SimpleHandlers that they are made up of.
33 GetOrderedIntersection: Returns an ordered list of Handlers that can be
34 written directly to Yaml.
35 """
37 import fnmatch
38 import itertools
39 import re
42 class Handler(object):
43 """Ancestor class for Handler manipulation. Patterns are globs.
45 (http://en.wikipedia.org/wiki/Glob_(programming)).
46 """
48 ALL_PROPERTIES = [
49 'expiration',
50 'http_headers',
51 'required_role',
52 'transport_guarantee',
53 'type',
54 'welcome'
57 def __init__(self, pattern):
58 self.pattern = pattern
60 def _GetPattern(self):
61 return self._pattern
63 def _SetPattern(self, the_pattern):
64 self._pattern = the_pattern
65 self._regex = re.compile(re.escape(the_pattern).replace('\\*', '.*') + '$')
66 self.is_literal = '*' not in the_pattern
68 pattern = property(_GetPattern, _SetPattern)
70 @property
71 def regex(self):
72 return self._regex
74 def Regexify(self):
75 """Returns a regex-looking string to write to Yaml."""
77 return self.pattern.replace('.', '\\.').replace('*', '.*')
79 def MatchesString(self, pattern_str):
80 """Returns true if input path string is matched by glob pattern."""
81 return self._regex.match(pattern_str) is not None
83 def MatchesAll(self, other_glob):
84 """Returns True if self matches everything other_glob matches."""
103 return self.MatchesString(other_glob.pattern)
105 def HasMoreSpecificPatternThan(self, other_handler):
106 """Returns True if self is more specific than other_handler.
108 Priority in determining specificity is first determined by literal-ness,
109 second by length. This is according to the Java servlet spec for
110 mapping URL paths.
112 Args:
113 other_handler: another handler to compare against.
115 Returns:
116 True if self.pattern is a literal and other_handler.pattern is not,
117 False if vice versa, and otherwise True if self.pattern is longer.
120 if self.is_literal != other_handler.is_literal:
121 return self.is_literal
123 return len(self.pattern) > len(other_handler.pattern)
125 def __eq__(self, other_handler):
126 return (isinstance(other_handler, Handler) and
127 self.__dict__ == other_handler.__dict__)
129 def IsFullyHandledBy(self, other_handler):
130 """Returns True if self specifies something unique.
132 For example, If we have a Handler with pattern "foo*bar"
133 which has properties {'type': 'static'}, and other_handler
134 has pattern "foo*" with the same properties, then
135 other_handler does everything that self does.
137 Args:
138 other_handler: other handler to be matched against
139 Returns:
140 Boolean value of whether other_handler fully handles self.
142 return (other_handler.MatchesAll(self) and
143 self._PropertiesMatch(other_handler))
145 def _PropertiesMatch(self, other):
146 """Returns True if other.properties is superset of self.properties."""
147 for prop in Handler.ALL_PROPERTIES:
148 if self.GetProperty(prop) not in (None, other.GetProperty(prop)):
149 return False
150 return True
153 def _MakeHandlerList(*pattern_strings):
154 return [SimpleHandler(a) for a in pattern_strings]
157 class SimpleHandler(Handler):
158 """Subclass of Handler which includes user-defined settings with urls.
160 SimpleHandlers should be treated as immutable.
163 def __init__(self, pattern, properties=None):
164 super(SimpleHandler, self).__init__(pattern)
165 if properties:
166 self.properties = properties
167 else:
168 self.properties = {}
170 def __hash__(self):
171 return hash((self.pattern, tuple(sorted(self.properties.items()))))
173 def GetProperty(self, prop, default=None):
174 return self.properties.get(prop, default)
176 def CreateOverlappedHandler(self):
177 """Creates a Combined Handler with self's pattern and self as a child."""
178 return OverlappedHandler(self.pattern, matchers=[self])
181 class OverlappedHandler(Handler):
182 """Subclass of Handler which allows for the combination of SimpleHandlers.
184 An intuitive way to think about globbed patterns is as sets - for example, the
185 pattern "admin/*" is the set of all paths that are in the admin/ directory,
186 and the pattern "*.txt" is the set of all paths to text files. An
187 OverlappedHandler is designed to describe the intersection of the sets
188 of paths - ie the set of paths that is matched by EVERY one its
189 handler patterns.
191 In the Xml files, paths are specified in different places - in servlet
192 patterns, in static files includes, in security constraint specifications,
193 etc. There is often some overlap between the paths that are specified by
194 these patterns. Since App.Yaml does not have separate places to specify how
195 different paths are handled, but rather just lists paths along with a bunch
196 of ways that the paths is handled, we need to make sure that these properties
197 for various paths are being combined at some point in the translation process.
199 Thus an OverlappedHandler holds a list of "matchers" - ie. handlers with more
200 general patterns, to choose properties from. OverlappedHandlers do not
201 explicitly specify properties but rather choose the value from the most
202 specific "matcher" with the value of that property specified. The matchers
203 are SimpleHandlers.
205 Attributes:
206 pattern: inherited from Handler.
207 matchers: A list of SimpleHandlers. Matchers are handlers which happen to
208 match OverlappedHandler's pattern and whose properties are thus necessary
209 to track in order to make sure that OverlappedHandler's pattern is handled
210 correctly.
213 def __init__(self, pattern, matchers=()):
214 super(OverlappedHandler, self).__init__(pattern)
215 self.matchers = []
216 for sub_handler in matchers:
217 self.AddMatchingHandler(sub_handler)
219 def GetProperty(self, prop, default=None):
220 """Returns the property value of matcher with most specific pattern."""
222 largest_handler = None
223 prop_value = default
224 for sub_handler in self.matchers:
225 if sub_handler.GetProperty(prop) is not None:
226 if (not largest_handler or
227 sub_handler.HasMoreSpecificPatternThan(largest_handler)):
228 largest_handler = sub_handler
229 prop_value = sub_handler.GetProperty(prop)
230 return prop_value
232 def __eq__(self, other_handler):
233 return (isinstance(other_handler, OverlappedHandler) and
234 self.pattern == other_handler.pattern and
235 set(self.matchers) == set(other_handler.matchers))
237 def AddMatchingHandler(self, matcher):
238 """Flattens the handler if it is overlapped and adds to matchers."""
239 if isinstance(matcher, SimpleHandler):
240 self.matchers.append(matcher)
241 else:
242 self.matchers.extend(matcher.matchers)
245 def GetOrderedIntersection(handler_list):
246 """Implements algorithm for combining and reordering Handlers.
248 GetOrderedIntersection performs the heavy lifting of converting a randomly
249 ordered list of Handlers (globbed patterns, each with various potentially
250 conflicting properties attached), into an ordered list of handlers with
251 property values resolved.
253 The purpose of this process is to convert the Web.xml URL mapping scheme to
254 that of app.yaml. In Web.xml the most specific path, according to
255 literal-ness and length, is chosen. In app.yaml the first listed matching
256 path is chosen. Thus to preserve user preferences through the translation
257 process we order the patterns from specific to general.
259 For example, if three handlers are given as input (in any order):
261 "/admin/*" (security=admin)
262 "/*.png" (type=static)
263 "*" (type=dynamic, security=required)
265 we want to get this ordered list as output:
266 1. "/admin/*.png" (security=admin, type=static)
267 2. "/admin/*" (security=admin, type=dynamic)
268 3. "/*.png" (security=required, type=static)
269 4. "*" (type=dynamic, security=required).
271 so that the properties of any given path are those of the longest matching
272 path. The SimpleHandler and OverlappedHandler classes provide the logic for
273 attaching globbed patterns to the right properties and resolving potential
274 property value conflicts.
276 Args:
277 handler_list: List of SimpleHandlers in arbitrary order.
278 Returns:
279 An ordered list of OverlappedHandlers and SimpleHandlers. See the above
280 example for what this would look like.
282 results = _Intersect(handler_list)
287 results = sorted(results, key=lambda h: h.pattern)
288 _ReorderHandlers(results)
289 _GivePropertiesFromGeneralToSpecific(results)
290 return _RemoveRedundantHandlers(results)
293 def _RemoveRedundantHandlers(handler_list):
294 """Removes duplicated or unnecessary handlers from the list.
296 If a handler's pattern and functionality are fully matched by another handler
297 with a more general pattern, we do not need the first handler. At the same
298 time, we remove duplicates.
300 Args:
301 handler_list: list of ordered handlers with possibly redundant entries.
302 Returns:
303 new list which contains entries of handler_list, except redundant ones.
306 no_duplicates = []
307 patterns_found_so_far = set()
308 for i in xrange(len(handler_list)):
309 current_handler = handler_list[i]
310 matched_by_later = False
311 for j in xrange(i + 1, len(handler_list)):
312 if current_handler.IsFullyHandledBy(handler_list[j]):
313 matched_by_later = True
314 break
316 if (not matched_by_later and
317 current_handler.pattern not in patterns_found_so_far):
318 no_duplicates.append(current_handler)
319 patterns_found_so_far.add(current_handler.pattern)
321 return no_duplicates
324 def _ReorderHandlers(handler_list):
325 """Reorders handlers from specific to general for writing to yaml file.
327 This is a topological sort - ie. it makes sure that elements related to
328 each other are ordered correctly. In this case, we want to make sure that
329 any Handler with a pattern that matches all of the Handlers of another pattern
330 occurs later. Thus, we want to make sure that "foo*" occurs after "foo*bar",
331 but it does not matter how it is ordered relative to "*baz", since they have
332 an empty intersection of patterns.
334 The problem with using Python's built-in sorted is that it relies on the
335 class's less-than operator. We want an ordering such that
336 (handler1 < handler2) iff (not handler1.MatchesAll(handler2))
337 Then, since "foo*" and "*baz" do not contain each other, foo* < *baz == True
338 and *baz < foo* == True. This is a problem because Python's sorted does not
339 explicitly compare every pair of elements, but operates under the assumption
340 that if a < b and b < c, then a < c. Therefore, if we have
341 a = "foo*bar", b = "*baz", c = "foo*"
342 then a < b == True and b < c == True, so a < c is assumed to be True. This
343 often leads to wrong orderings.
345 Therefore, this function performs a topological sort
346 (http://en.wikipedia.org/wiki/Topological_sorting), reordering only those
347 patterns where one matches all of the other.
349 This is an in-place sort.
351 Args:
352 handler_list: Unordered list of handlers.
354 for i, j in itertools.combinations(xrange(len(handler_list)), 2):
355 if handler_list[i].MatchesAll(handler_list[j]):
356 handler_list[i], handler_list[j] = handler_list[j], handler_list[i]
359 def _GivePropertiesFromGeneralToSpecific(handler_list):
360 """Makes sure that handlers have all properties of more general ones.
362 Ex. Since "*" matches everything "admin/*" matches, we want everything
363 matched by "admin/*" to have all the properties specified to "*".
364 Therefore we give properties from the "*" handler to the "admin/*" handler.
365 If the "*" handler is a SimpleHandler, it carries its own properties, so it
366 becomes a child of the "admin/*" handler. Otherwise, its properties are
367 define by its children, so its children are copied to the "admin/*"
368 handler.
370 This is an in-place mutation of the list.
372 Args:
373 handler_list: List of ordered Handlers.
375 for i, j in itertools.combinations(xrange(len(handler_list)), 2):
376 if handler_list[j].MatchesAll(handler_list[i]):
377 if isinstance(handler_list[i], SimpleHandler):
378 handler_list[i] = handler_list[i].CreateOverlappedHandler()
379 handler_list[i].AddMatchingHandler(handler_list[j])
382 def _Intersect(handler_list):
383 """Returns an unordered list of all possible intersections of handlers."""
384 if not handler_list:
385 return set()
387 handlers = set([handler_list[0]])
392 for input_handler in handler_list[1:]:
393 new_handlers = set()
394 for g in handlers:
395 new_handlers |= _IntersectTwoHandlers(input_handler, g)
396 handlers = new_handlers
397 return list(handlers)
400 def _IntersectTwoHandlers(first_handler, second_handler):
401 """Returns intersections of first_handler and second_handler patterns."""
402 shared_prefix = _SharedPrefix(first_handler.pattern, second_handler.pattern)
404 if shared_prefix:
405 return _HandleCommonPrefix(first_handler, second_handler, shared_prefix)
408 shared_suffix = _SharedSuffix(first_handler.pattern, second_handler.pattern)
409 if shared_suffix:
410 return _HandleCommonSuffix(first_handler, second_handler, shared_suffix)
412 handler_set = set()
413 handler_set |= _HandleWildcardCases(first_handler, second_handler)
416 handler_set |= _HandleWildcardCases(second_handler, first_handler)
418 handler_set |= set([first_handler, second_handler])
420 return handler_set
423 def _HandleWildcardCases(first_handler, second_handler):
424 """Handle cases with trailing and leading wildcards.
426 This function finds the set of intersections of two handlers where one has a
427 leading wildcard (eg. *foo) in its pattern and at least one has a trailing
428 wildcard (eg. baz*) in its pattern. The arguments are not symmetric.
430 Args:
431 first_handler: A SimpleHandler instance
432 second_handler: A SimpleHandler instance
433 Returns:
434 A set of intersection patterns of the two Handlers. Example: If the
435 pattern of first_handler is abc* and that of the second is *xyz, we return
436 the intersection of the patterns, abc*xyz. Find more examples in the inline
437 comments.
439 merged_handlers = set()
441 if len(first_handler.pattern) <= 1 or len(second_handler.pattern) <= 1:
442 return merged_handlers
444 if (first_handler.pattern[-1], second_handler.pattern[0]) != ('*', '*'):
445 return merged_handlers
449 first_no_star = first_handler.pattern[:-1]
450 merged_handlers.add(SimpleHandler(first_no_star + second_handler.pattern))
451 if second_handler.MatchesString(first_no_star):
454 merged_handlers.add(SimpleHandler(first_no_star))
455 return merged_handlers
458 def _HandleCommonPrefix(first_handler, second_handler, common_prefix):
459 """Strips common literal prefix from handlers and intersects the substrings.
461 Ex. "abc" and "a*c" become "a | bc" and "a | *c". We find the set of
462 intersections of "bc" and "*c" and prepend "a" onto each member of that set.
463 By common literal prefix, we mean a prefix of the two patterns that contains
464 no wildcard characters; any string matched by either of the patterns must
465 begin with that prefix.
467 Args:
468 first_handler: A SimpleHandler.
469 second_handler: A SimpleHandler.
470 common_prefix: The shared literal prefix of the patterns of the two
471 handlers.
472 Returns:
473 The set of intersections of first_handler and second_handler. This is done
474 by stripping the common prefix to create new SimpleHandlers which we call
475 _IntersectTwoHandlers on, and then prepend the prefix to each member of that
476 set.
478 stripped_first_handler = SimpleHandler(
479 first_handler.pattern[len(common_prefix):], first_handler.properties)
480 stripped_second_handler = SimpleHandler(
481 second_handler.pattern[len(common_prefix):], second_handler.properties)
482 stripped_handlers = _IntersectTwoHandlers(stripped_first_handler,
483 stripped_second_handler)
484 handlers = set()
485 for stripped_handler in stripped_handlers:
486 handlers.add(SimpleHandler(common_prefix + stripped_handler.pattern,
487 stripped_handler.properties))
488 return handlers
491 def _HandleCommonSuffix(first_handler, second_handler, common_suffix):
492 """Strips matching suffix from handlers and intersects the substrings."""
494 stripped_first_handler = SimpleHandler(
495 first_handler.pattern[:-len(common_suffix)], first_handler.properties)
496 stripped_second_handler = SimpleHandler(
497 second_handler.pattern[:-len(common_suffix)], second_handler.properties)
498 stripped_handlers = _IntersectTwoHandlers(
499 stripped_first_handler, stripped_second_handler)
500 handlers = set()
501 for stripped_handler in stripped_handlers:
502 handlers.add(SimpleHandler(stripped_handler.pattern + common_suffix,
503 stripped_handler.properties))
504 return handlers
507 def _SharedPrefix(pattern1, pattern2):
508 """Returns the shared prefix of two patterns.
510 Args:
511 pattern1: A handler's pattern string
512 pattern2: A handler's pattern string
513 Returns:
514 The shared prefix of the two patterns, up to the index of the first
515 wildcard of either pattern. For example, the shared prefix of "a*bd" and
516 "ac*d" is a. The shared prefix of "*x" and "y" is the empty string. The
517 shared prefix of "john" and "johnny" is the empty string, and the shared
518 prefix of "bc*" and "c*" is also the empty string.
520 first_star1 = (pattern1 + '*').find('*')
521 first_star2 = (pattern2 + '*').find('*')
522 if (first_star1, first_star2) != (len(pattern1), len(pattern2)):
523 min_star = min(first_star1, first_star2)
524 if min_star and pattern1[:min_star] == pattern2[:min_star]:
525 return pattern1[:min_star]
526 return ''
529 def _SharedSuffix(pattern1, pattern2):
530 """Returns the shared suffix of two patterns."""
531 return _SharedPrefix(pattern1[::-1], pattern2[::-1])[::-1]