App Engine Python SDK version 1.8.1
[gae.git] / python / lib / protorpc / protorpc / protourlencode.py
blob00aba24c9fd481a0128533d343877426d2e6c2c6
1 #!/usr/bin/env python
3 # Copyright 2010 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
18 """URL encoding support for messages types.
20 Protocol support for URL encoded form parameters.
22 Nested Fields:
23 Nested fields are repesented by dot separated names. For example, consider
24 the following messages:
26 class WebPage(Message):
28 title = StringField(1)
29 tags = StringField(2, repeated=True)
31 class WebSite(Message):
33 name = StringField(1)
34 home = MessageField(WebPage, 2)
35 pages = MessageField(WebPage, 3, repeated=True)
37 And consider the object:
39 page = WebPage()
40 page.title = 'Welcome to NewSite 2010'
42 site = WebSite()
43 site.name = 'NewSite 2010'
44 site.home = page
46 The URL encoded representation of this constellation of objects is.
48 name=NewSite+2010&home.title=Welcome+to+NewSite+2010
50 An object that exists but does not have any state can be represented with
51 a reference to its name alone with no value assigned to it. For example:
53 page = WebSite()
54 page.name = 'My Empty Site'
55 page.home = WebPage()
57 is represented as:
59 name=My+Empty+Site&home=
61 This represents a site with an empty uninitialized home page.
63 Repeated Fields:
64 Repeated fields are represented by the name of and the index of each value
65 separated by a dash. For example, consider the following message:
67 home = Page()
68 home.title = 'Nome'
70 news = Page()
71 news.title = 'News'
72 news.tags = ['news', 'articles']
74 instance = WebSite()
75 instance.name = 'Super fun site'
76 instance.pages = [home, news, preferences]
78 An instance of this message can be represented as:
80 name=Super+fun+site&page-0.title=Home&pages-1.title=News&...
81 pages-1.tags-0=new&pages-1.tags-1=articles
83 Helper classes:
85 URLEncodedRequestBuilder: Used for encapsulating the logic used for building
86 a request message from a URL encoded RPC.
87 """
89 __author__ = 'rafek@google.com (Rafe Kaplan)'
91 import cgi
92 import re
93 import urllib
95 from . import message_types
96 from . import messages
97 from . import util
99 __all__ = ['CONTENT_TYPE',
100 'URLEncodedRequestBuilder',
101 'encode_message',
102 'decode_message',
105 CONTENT_TYPE = 'application/x-www-form-urlencoded'
107 _FIELD_NAME_REGEX = re.compile(r'^([a-zA-Z_][a-zA-Z_0-9]*)(?:-([0-9]+))?$')
110 class URLEncodedRequestBuilder(object):
111 """Helper that encapsulates the logic used for building URL encoded messages.
113 This helper is used to map query parameters from a URL encoded RPC to a
114 message instance.
117 @util.positional(2)
118 def __init__(self, message, prefix=''):
119 """Constructor.
121 Args:
122 message: Message instance to build from parameters.
123 prefix: Prefix expected at the start of valid parameters.
125 self.__parameter_prefix = prefix
127 # The empty tuple indicates the root message, which has no path.
128 # __messages is a full cache that makes it very easy to look up message
129 # instances by their paths. See make_path for details about what a path
130 # is.
131 self.__messages = {(): message}
133 # This is a cache that stores paths which have been checked for
134 # correctness. Correctness means that an index is present for repeated
135 # fields on the path and absent for non-repeated fields. The cache is
136 # also used to check that indexes are added in the right order so that
137 # dicontiguous ranges of indexes are ignored.
138 self.__checked_indexes = set([()])
140 def make_path(self, parameter_name):
141 """Parse a parameter name and build a full path to a message value.
143 The path of a method is a tuple of 2-tuples describing the names and
144 indexes within repeated fields from the root message (the message being
145 constructed by the builder) to an arbitrarily nested message within it.
147 Each 2-tuple node of a path (name, index) is:
148 name: The name of the field that refers to the message instance.
149 index: The index within a repeated field that refers to the message
150 instance, None if not a repeated field.
152 For example, consider:
154 class VeryInner(messages.Message):
157 class Inner(messages.Message):
159 very_inner = messages.MessageField(VeryInner, 1, repeated=True)
161 class Outer(messages.Message):
163 inner = messages.MessageField(Inner, 1)
165 If this builder is building an instance of Outer, that instance is
166 referred to in the URL encoded parameters without a path. Therefore
167 its path is ().
169 The child 'inner' is referred to by its path (('inner', None)).
171 The first child of repeated field 'very_inner' on the Inner instance
172 is referred to by (('inner', None), ('very_inner', 0)).
174 Examples:
175 # Correct reference to model where nation is a Message, district is
176 # repeated Message and county is any not repeated field type.
177 >>> make_path('nation.district-2.county')
178 (('nation', None), ('district', 2), ('county', None))
180 # Field is not part of model.
181 >>> make_path('nation.made_up_field')
182 None
184 # nation field is not repeated and index provided.
185 >>> make_path('nation-1')
186 None
188 # district field is repeated and no index provided.
189 >>> make_path('nation.district')
190 None
192 Args:
193 parameter_name: Name of query parameter as passed in from the request.
194 in order to make a path, this parameter_name must point to a valid
195 field within the message structure. Nodes of the path that refer to
196 repeated fields must be indexed with a number, non repeated nodes must
197 not have an index.
199 Returns:
200 Parsed version of the parameter_name as a tuple of tuples:
201 attribute: Name of attribute associated with path.
202 index: Postitive integer index when it is a repeated field, else None.
203 Will return None if the parameter_name does not have the right prefix,
204 does not point to a field within the message structure, does not have
205 an index if it is a repeated field or has an index but is not a repeated
206 field.
208 if parameter_name.startswith(self.__parameter_prefix):
209 parameter_name = parameter_name[len(self.__parameter_prefix):]
210 else:
211 return None
213 path = []
214 name = []
215 message_type = type(self.__messages[()]) # Get root message.
217 for item in parameter_name.split('.'):
218 # This will catch sub_message.real_message_field.not_real_field
219 if not message_type:
220 return None
222 item_match = _FIELD_NAME_REGEX.match(item)
223 if not item_match:
224 return None
225 attribute = item_match.group(1)
226 index = item_match.group(2)
227 if index:
228 index = int(index)
230 try:
231 field = message_type.field_by_name(attribute)
232 except KeyError:
233 return None
235 if field.repeated != (index is not None):
236 return None
238 if isinstance(field, messages.MessageField):
239 message_type = field.message_type
240 else:
241 message_type = None
243 # Path is valid so far. Append node and continue.
244 path.append((attribute, index))
246 return tuple(path)
248 def __check_index(self, parent_path, name, index):
249 """Check correct index use and value relative to a given path.
251 Check that for a given path the index is present for repeated fields
252 and that it is in range for the existing list that it will be inserted
253 in to or appended to.
255 Args:
256 parent_path: Path to check against name and index.
257 name: Name of field to check for existance.
258 index: Index to check. If field is repeated, should be a number within
259 range of the length of the field, or point to the next item for
260 appending.
262 # Don't worry about non-repeated fields.
263 # It's also ok if index is 0 because that means next insert will append.
264 if not index:
265 return True
267 parent = self.__messages.get(parent_path, None)
268 value_list = getattr(parent, name, None)
269 # If the list does not exist then the index should be 0. Since it is
270 # not, path is not valid.
271 if not value_list:
272 return False
274 # The index must either point to an element of the list or to the tail.
275 return len(value_list) >= index
277 def __check_indexes(self, path):
278 """Check that all indexes are valid and in the right order.
280 This method must iterate over the path and check that all references
281 to indexes point to an existing message or to the end of the list, meaning
282 the next value should be appended to the repeated field.
284 Args:
285 path: Path to check indexes for. Tuple of 2-tuples (name, index). See
286 make_path for more information.
288 Returns:
289 True if all the indexes of the path are within range, else False.
291 if path in self.__checked_indexes:
292 return True
294 # Start with the root message.
295 parent_path = ()
297 for name, index in path:
298 next_path = parent_path + ((name, index),)
299 # First look in the checked indexes cache.
300 if next_path not in self.__checked_indexes:
301 if not self.__check_index(parent_path, name, index):
302 return False
303 self.__checked_indexes.add(next_path)
305 parent_path = next_path
307 return True
309 def __get_or_create_path(self, path):
310 """Get a message from the messages cache or create it and add it.
312 This method will also create any parent messages based on the path.
314 When a new instance of a given message is created, it is stored in
315 __message by its path.
317 Args:
318 path: Path of message to get. Path must be valid, in other words
319 __check_index(path) returns true. Tuple of 2-tuples (name, index).
320 See make_path for more information.
322 Returns:
323 Message instance if the field being pointed to by the path is a
324 message, else will return None for non-message fields.
326 message = self.__messages.get(path, None)
327 if message:
328 return message
330 parent_path = ()
331 parent = self.__messages[()] # Get the root object
333 for name, index in path:
334 field = parent.field_by_name(name)
335 next_path = parent_path + ((name, index),)
336 next_message = self.__messages.get(next_path, None)
337 if next_message is None:
338 next_message = field.message_type()
339 self.__messages[next_path] = next_message
340 if not field.repeated:
341 setattr(parent, field.name, next_message)
342 else:
343 list_value = getattr(parent, field.name, None)
344 if list_value is None:
345 setattr(parent, field.name, [next_message])
346 else:
347 list_value.append(next_message)
349 parent_path = next_path
350 parent = next_message
352 return parent
354 def add_parameter(self, parameter, values):
355 """Add a single parameter.
357 Adds a single parameter and its value to the request message.
359 Args:
360 parameter: Query string parameter to map to request.
361 values: List of values to assign to request message.
363 Returns:
364 True if parameter was valid and added to the message, else False.
366 Raises:
367 DecodeError if the parameter refers to a valid field, and the values
368 parameter does not have one and only one value. Non-valid query
369 parameters may have multiple values and should not cause an error.
371 path = self.make_path(parameter)
373 if not path:
374 return False
376 # Must check that all indexes of all items in the path are correct before
377 # instantiating any of them. For example, consider:
379 # class Repeated(object):
380 # ...
382 # class Inner(object):
384 # repeated = messages.MessageField(Repeated, 1, repeated=True)
386 # class Outer(object):
388 # inner = messages.MessageField(Inner, 1)
390 # instance = Outer()
391 # builder = URLEncodedRequestBuilder(instance)
392 # builder.add_parameter('inner.repeated')
394 # assert not hasattr(instance, 'inner')
396 # The check is done relative to the instance of Outer pass in to the
397 # constructor of the builder. This instance is not referred to at all
398 # because all names are assumed to be relative to it.
400 # The 'repeated' part of the path is not correct because it is missing an
401 # index. Because it is missing an index, it should not create an instance
402 # of Repeated. In this case add_parameter will return False and have no
403 # side effects.
405 # A correct path that would cause a new Inner instance to be inserted at
406 # instance.inner and a new Repeated instance to be appended to the
407 # instance.inner.repeated list would be 'inner.repeated-0'.
408 if not self.__check_indexes(path):
409 return False
411 # Ok to build objects.
412 parent_path = path[:-1]
413 parent = self.__get_or_create_path(parent_path)
414 name, index = path[-1]
415 field = parent.field_by_name(name)
417 if len(values) != 1:
418 raise messages.DecodeError(
419 'Found repeated values for field %s.' % field.name)
421 value = values[0]
423 if isinstance(field, messages.IntegerField):
424 converted_value = int(value)
425 elif isinstance(field, message_types.DateTimeField):
426 try:
427 converted_value = util.decode_datetime(value)
428 except ValueError, e:
429 raise messages.DecodeError(e)
430 elif isinstance(field, messages.MessageField):
431 # Just make sure it's instantiated. Assignment to field or
432 # appending to list is done in __get_or_create_path.
433 self.__get_or_create_path(path)
434 return True
435 elif isinstance(field, messages.StringField):
436 converted_value = value.decode('utf-8')
437 elif isinstance(field, messages.BooleanField):
438 converted_value = value.lower() == 'true' and True or False
439 else:
440 try:
441 converted_value = field.type(value)
442 except TypeError:
443 raise messages.DecodeError('Invalid enum value "%s"' % value)
445 if field.repeated:
446 value_list = getattr(parent, field.name, None)
447 if value_list is None:
448 setattr(parent, field.name, [converted_value])
449 else:
450 if index == len(value_list):
451 value_list.append(converted_value)
452 else:
453 # Index should never be above len(value_list) because it was
454 # verified during the index check above.
455 value_list[index] = converted_value
456 else:
457 setattr(parent, field.name, converted_value)
459 return True
462 @util.positional(1)
463 def encode_message(message, prefix=''):
464 """Encode Message instance to url-encoded string.
466 Args:
467 message: Message instance to encode in to url-encoded string.
468 prefix: Prefix to append to field names of contained values.
470 Returns:
471 String encoding of Message in URL encoded format.
473 Raises:
474 messages.ValidationError if message is not initialized.
476 message.check_initialized()
478 parameters = []
479 def build_message(parent, prefix):
480 """Recursively build parameter list for URL response.
482 Args:
483 parent: Message to build parameters for.
484 prefix: Prefix to append to field names of contained values.
486 Returns:
487 True if some value of parent was added to the parameters list,
488 else False, meaning the object contained no values.
490 has_any_values = False
491 for field in sorted(parent.all_fields(), key=lambda f: f.number):
492 next_value = parent.get_assigned_value(field.name)
493 if next_value is None:
494 continue
496 # Found a value. Ultimate return value should be True.
497 has_any_values = True
499 # Normalize all values in to a list.
500 if not field.repeated:
501 next_value = [next_value]
503 for index, item in enumerate(next_value):
504 # Create a name with an index if it is a repeated field.
505 if field.repeated:
506 field_name = '%s%s-%s' % (prefix, field.name, index)
507 else:
508 field_name = prefix + field.name
510 if isinstance(field, message_types.DateTimeField):
511 # DateTimeField stores its data as a RFC 3339 compliant string.
512 parameters.append((field_name, item.isoformat()))
513 elif isinstance(field, messages.MessageField):
514 # Message fields must be recursed in to in order to construct
515 # their component parameter values.
516 if not build_message(item, field_name + '.'):
517 # The nested message is empty. Append an empty value to
518 # represent it.
519 parameters.append((field_name, ''))
520 elif isinstance(field, messages.BooleanField):
521 parameters.append((field_name, item and 'true' or 'false'))
522 else:
523 if isinstance(item, unicode):
524 item = item.encode('utf-8')
525 parameters.append((field_name, str(item)))
527 return has_any_values
529 build_message(message, prefix)
531 # Also add any unrecognized values from the decoded string.
532 for key in message.all_unrecognized_fields():
533 values, _ = message.get_unrecognized_field_info(key)
534 if not isinstance(values, (list, tuple)):
535 values = (values,)
536 for value in values:
537 parameters.append((key, value))
539 return urllib.urlencode(parameters)
542 def decode_message(message_type, encoded_message, **kwargs):
543 """Decode urlencoded content to message.
545 Args:
546 message_type: Message instance to merge URL encoded content into.
547 encoded_message: URL encoded message.
548 prefix: Prefix to append to field names of contained values.
550 Returns:
551 Decoded instance of message_type.
553 message = message_type()
554 builder = URLEncodedRequestBuilder(message, **kwargs)
555 arguments = cgi.parse_qs(encoded_message, keep_blank_values=True)
556 for argument, values in sorted(arguments.iteritems()):
557 added = builder.add_parameter(argument, values)
558 # Save off any unknown values, so they're still accessible.
559 if not added:
560 message.set_unrecognized_field(argument, values, messages.Variant.STRING)
561 message.check_initialized()
562 return message