Update Google App Engine to 1.2.2 in thirdparty folder.
[Melange.git] / thirdparty / google_appengine / google / appengine / ext / gql / __init__.py
blob4ead04e20005eb518554a70576beddeb12e72d23
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
18 """GQL -- the SQL-like interface to the datastore.
20 Defines the GQL-based query class, which is a query mechanism
21 for the datastore which provides an alternative model for interacting with
22 data stored.
23 """
29 import calendar
30 import datetime
31 import logging
32 import re
33 import time
35 from google.appengine.api import datastore
36 from google.appengine.api import datastore_errors
37 from google.appengine.api import datastore_types
38 from google.appengine.api import users
40 MultiQuery = datastore.MultiQuery
42 LOG_LEVEL = logging.DEBUG - 1
44 _EPOCH = datetime.datetime.utcfromtimestamp(0)
46 def Execute(query_string, *args, **keyword_args):
47 """Execute command to parse and run the query.
49 Calls the query parser code to build a proto-query which is an
50 unbound query. The proto-query is then bound into a real query and
51 executed.
53 Args:
54 query_string: properly formatted GQL query string.
55 args: rest of the positional arguments used to bind numeric references in
56 the query.
57 keyword_args: dictionary-based arguments (for named parameters).
59 Returns:
60 the result of running the query with *args.
61 """
62 app = keyword_args.pop('_app', None)
63 proto_query = GQL(query_string, _app=app)
64 return proto_query.Bind(args, keyword_args).Run()
67 class GQL(object):
68 """A GQL interface to the datastore.
70 GQL is a SQL-like language which supports more object-like semantics
71 in a langauge that is familiar to SQL users. The language supported by
72 GQL will change over time, but will start off with fairly simple
73 semantics.
75 - reserved words are case insensitive
76 - names are case sensitive
78 The syntax for SELECT is fairly straightforward:
80 SELECT [* | __key__ ] FROM <entity>
81 [WHERE <condition> [AND <condition> ...]]
82 [ORDER BY <property> [ASC | DESC] [, <property> [ASC | DESC] ...]]
83 [LIMIT [<offset>,]<count>]
84 [OFFSET <offset>]
85 [HINT (ORDER_FIRST | HINT FILTER_FIRST | HINT ANCESTOR_FIRST)]
87 <condition> := <property> {< | <= | > | >= | = | != | IN} <value>
88 <condition> := <property> {< | <= | > | >= | = | != | IN} CAST(<value>)
89 <condition> := <property> IN (<value>, ...)
90 <condition> := ANCESTOR IS <entity or key>
92 Currently the parser is LL(1) because of the simplicity of the grammer
93 (as it is largely predictive with one token lookahead).
95 The class is implemented using some basic regular expression tokenization
96 to pull out reserved tokens and then the recursive descent parser will act
97 as a builder for the pre-compiled query. This pre-compiled query is then
98 bound to arguments before executing the query.
100 Initially, three parameter passing mechanisms are supported when calling
101 Execute():
103 - Positional parameters
104 Execute('SELECT * FROM Story WHERE Author = :1 AND Date > :2')
105 - Named parameters
106 Execute('SELECT * FROM Story WHERE Author = :author AND Date > :date')
107 - Literals (numbers, and strings)
108 Execute('SELECT * FROM Story WHERE Author = \'James\'')
110 Users are also given the option of doing type conversions to other datastore
111 types (e.g. db.Email, db.GeoPt). The language provides a conversion function
112 which allows the caller to express conversions of both literals and
113 parameters. The current conversion operators are:
114 - GEOPT(float, float)
115 - USER(str)
116 - KEY(kind, id/name[, kind, id/name...])
117 - DATETIME(year, month, day, hour, minute, second)
118 - DATETIME('YYYY-MM-DD HH:MM:SS')
119 - DATE(year, month, day)
120 - DATE('YYYY-MM-DD')
121 - TIME(hour, minute, second)
122 - TIME('HH:MM:SS')
124 We will properly serialize and quote all values.
126 It should also be noted that there are some caveats to the queries that can
127 be expressed in the syntax. The parser will attempt to make these clear as
128 much as possible, but some of the caveats include:
129 - There is no OR operation. In most cases, you should prefer to use IN to
130 express the idea of wanting data matching one of a set of values.
131 - You cannot express inequality operators on multiple different properties
132 - You can only have one != operator per query (related to the previous
133 rule).
134 - The IN and != operators must be used carefully because they can
135 dramatically raise the amount of work done by the datastore. As such,
136 there is a limit on the number of elements you can use in IN statements.
137 This limit is set fairly low. Currently, a max of 30 datastore queries is
138 allowed in a given GQL query. != translates into 2x the number of
139 datastore queries, and IN multiplies by the number of elements in the
140 clause (so having two IN clauses, one with 5 elements, the other with 6
141 will cause 30 queries to occur).
142 - Literals can take the form of basic types or as type-cast literals. On
143 the other hand, literals within lists can currently only take the form of
144 simple types (strings, integers, floats).
147 SELECT * will return an iterable set of entities; SELECT __key__ will return
148 an iterable set of Keys.
151 TOKENIZE_REGEX = re.compile(r"""
152 (?:'[^'\n\r]*')+|
153 <=|>=|!=|=|<|>|
154 :\w+|
157 -?\d+(?:\.\d+)?|
158 \w+|
159 \(|\)|
161 """, re.VERBOSE | re.IGNORECASE)
163 MAX_ALLOWABLE_QUERIES = datastore.MAX_ALLOWABLE_QUERIES
165 __ANCESTOR = -1
167 def __init__(self, query_string, _app=None, _auth_domain=None):
168 """Ctor.
170 Parses the input query into the class as a pre-compiled query, allowing
171 for a later call to Bind() to bind arguments as defined in the
172 documentation.
174 Args:
175 query_string: properly formatted GQL query string.
177 Raises:
178 datastore_errors.BadQueryError: if the query is not parsable.
180 self._entity = ''
181 self.__filters = {}
182 self.__has_ancestor = False
183 self.__orderings = []
184 self.__offset = -1
185 self.__limit = -1
186 self.__hint = ''
187 self.__app = _app
188 self.__auth_domain = _auth_domain
190 self.__symbols = self.TOKENIZE_REGEX.findall(query_string)
191 self.__next_symbol = 0
192 if not self.__Select():
193 raise datastore_errors.BadQueryError(
194 'Unable to parse query')
195 else:
196 pass
198 def Bind(self, args, keyword_args):
199 """Bind the existing query to the argument list.
201 Assumes that the input args are first positional, then a dictionary.
202 So, if the query contains references to :1, :2 and :name, it is assumed
203 that arguments are passed as (:1, :2, dict) where dict contains a mapping
204 [name] -> value.
206 Args:
207 args: the arguments to bind to the object's unbound references.
208 keyword_args: dictionary-based arguments (for named parameters).
210 Raises:
211 datastore_errors.BadArgumentError: when arguments are left unbound
212 (missing from the inputs arguments) or when arguments do not match the
213 expected type.
215 Returns:
216 The bound datastore.Query object. This may take the form of a MultiQuery
217 object if the GQL query will require multiple backend queries to statisfy.
219 num_args = len(args)
220 input_args = frozenset(xrange(num_args))
221 used_args = set()
223 queries = []
224 enumerated_queries = self.EnumerateQueries(used_args, args, keyword_args)
225 if enumerated_queries:
226 query_count = len(enumerated_queries)
227 else:
228 query_count = 1
230 for i in xrange(query_count):
231 queries.append(datastore.Query(self._entity, _app=self.__app,
232 keys_only=self._keys_only))
234 logging.log(LOG_LEVEL,
235 'Binding with %i positional args %s and %i keywords %s'
236 , len(args), args, len(keyword_args), keyword_args)
237 for ((identifier, condition), value_list) in self.__filters.iteritems():
238 for (operator, params) in value_list:
239 value = self.__Operate(args, keyword_args, used_args, operator, params)
240 if not self.__IsMultiQuery(condition):
241 for query in queries:
242 self.__AddFilterToQuery(identifier, condition, value, query)
244 unused_args = input_args - used_args
245 if unused_args:
246 unused_values = [unused_arg + 1 for unused_arg in unused_args]
247 raise datastore_errors.BadArgumentError('Unused positional arguments %s' %
248 unused_values)
250 if enumerated_queries:
251 logging.log(LOG_LEVEL,
252 'Multiple Queries Bound: %s',
253 enumerated_queries)
255 for (query, enumerated_query) in zip(queries, enumerated_queries):
256 query.update(enumerated_query)
258 if self.__orderings:
259 for query in queries:
260 query.Order(*tuple(self.__orderings))
262 if query_count > 1:
263 return MultiQuery(queries, self.__orderings)
264 else:
265 return queries[0]
267 def EnumerateQueries(self, used_args, args, keyword_args):
268 """Create a list of all multi-query filter combinations required.
270 To satisfy multi-query requests ("IN" and "!=" filters), multiple queries
271 may be required. This code will enumerate the power-set of all multi-query
272 filters.
274 Args:
275 used_args: set of used positional parameters (output only variable used in
276 reporting for unused positional args)
277 args: positional arguments referenced by the proto-query in self. This
278 assumes the input is a tuple (and can also be called with a varargs
279 param).
280 keyword_args: dict of keyword arguments referenced by the proto-query in
281 self.
283 Returns:
284 A list of maps [(identifier, condition) -> value] of all queries needed
285 to satisfy the GQL query with the given input arguments.
287 enumerated_queries = []
289 for ((identifier, condition), value_list) in self.__filters.iteritems():
290 for (operator, params) in value_list:
291 value = self.__Operate(args, keyword_args, used_args, operator, params)
292 self.__AddMultiQuery(identifier, condition, value, enumerated_queries)
294 return enumerated_queries
296 def __CastError(self, operator, values, error_message):
297 """Query building error for type cast operations.
299 Args:
300 operator: the failed cast operation
301 values: value list passed to the cast operator
302 error_message: string to emit as part of the 'Cast Error' string.
304 Raises:
305 BadQueryError and passes on an error message from the caller. Will raise
306 BadQueryError on all calls.
308 raise datastore_errors.BadQueryError(
309 'Type Cast Error: unable to cast %r with operation %s (%s)' %
310 (values, operator.upper(), error_message))
312 def __CastNop(self, values):
313 """Return values[0] if it exists -- default for most where clauses."""
314 if len(values) != 1:
315 self.__CastError(values, 'nop', 'requires one and only one value')
316 else:
317 return values[0]
319 def __CastList(self, values):
320 """Return the full list of values -- only useful for IN clause."""
321 if values:
322 return values
323 else:
324 return None
326 def __CastKey(self, values):
327 """Cast input values to Key() class using encoded string or tuple list."""
328 if not len(values) % 2:
329 return datastore_types.Key.from_path(_app=self.__app, *values)
330 elif len(values) == 1 and isinstance(values[0], basestring):
331 return datastore_types.Key(values[0])
332 else:
333 self.__CastError('KEY', values,
334 'requires an even number of operands '
335 'or a single encoded string')
337 def __CastGeoPt(self, values):
338 """Cast input to GeoPt() class using 2 input parameters."""
339 if len(values) != 2:
340 self.__CastError('GEOPT', values, 'requires 2 input parameters')
341 return datastore_types.GeoPt(*values)
343 def __CastUser(self, values):
344 """Cast to User() class using the email address in values[0]."""
345 if len(values) != 1:
346 self.__CastError(values, 'user', 'requires one and only one value')
347 else:
348 return users.User(email=values[0], _auth_domain=self.__auth_domain)
350 def __EncodeIfNeeded(self, value):
351 """Simple helper function to create an str from possibly unicode strings.
352 Args:
353 value: input string (should pass as an instance of str or unicode).
355 if isinstance(value, unicode):
356 return value.encode('utf8')
357 else:
358 return value
360 def __CastDate(self, values):
361 """Cast DATE values (year/month/day) from input (to datetime.datetime).
363 Casts DATE input values formulated as ISO string or time tuple inputs.
365 Args:
366 values: either a single string with ISO time representation or 3
367 integer valued date tuple (year, month, day).
369 Returns:
370 datetime.datetime value parsed from the input values.
373 if len(values) == 1:
374 value = self.__EncodeIfNeeded(values[0])
375 if isinstance(value, str):
376 try:
377 time_tuple = time.strptime(value, '%Y-%m-%d')[0:6]
378 except ValueError, err:
379 self.__CastError('DATE', values, err)
380 else:
381 self.__CastError('DATE', values, 'Single input value not a string')
382 elif len(values) == 3:
383 time_tuple = (values[0], values[1], values[2], 0, 0, 0)
384 else:
385 self.__CastError('DATE', values,
386 'function takes 1 string or 3 integer values')
388 try:
389 return datetime.datetime(*time_tuple)
390 except ValueError, err:
391 self.__CastError('DATE', values, err)
393 def __CastTime(self, values):
394 """Cast TIME values (hour/min/sec) from input (to datetime.datetime).
396 Casts TIME input values formulated as ISO string or time tuple inputs.
398 Args:
399 values: either a single string with ISO time representation or 1-4
400 integer valued time tuple (hour), (hour, minute),
401 (hour, minute, second), (hour, minute, second, microsec).
403 Returns:
404 datetime.datetime value parsed from the input values.
406 if len(values) == 1:
407 value = self.__EncodeIfNeeded(values[0])
408 if isinstance(value, str):
409 try:
410 time_tuple = time.strptime(value, '%H:%M:%S')
411 except ValueError, err:
412 self.__CastError('TIME', values, err)
413 time_tuple = (1970, 1, 1) + time_tuple[3:]
414 time_tuple = time_tuple[0:6]
415 elif isinstance(value, int):
416 time_tuple = (1970, 1, 1, value)
417 else:
418 self.__CastError('TIME', values,
419 'Single input value not a string or integer hour')
420 elif len(values) <= 4:
421 time_tuple = (1970, 1, 1) + tuple(values)
422 else:
423 self.__CastError('TIME', values, err)
425 try:
426 return datetime.datetime(*time_tuple)
427 except ValueError, err:
428 self.__CastError('TIME', values, err)
430 def __CastDatetime(self, values):
431 """Cast DATETIME values (string or tuple) from input (to datetime.datetime).
433 Casts DATETIME input values formulated as ISO string or datetime tuple
434 inputs.
436 Args:
437 values: either a single string with ISO representation or 3-7
438 integer valued time tuple (year, month, day, ...).
440 Returns:
441 datetime.datetime value parsed from the input values.
443 if len(values) == 1:
444 value = self.__EncodeIfNeeded(values[0])
445 if isinstance(value, str):
446 try:
447 time_tuple = time.strptime(str(value), '%Y-%m-%d %H:%M:%S')[0:6]
448 except ValueError, err:
449 self.__CastError('DATETIME', values, err)
450 else:
451 self.__CastError('DATETIME', values, 'Single input value not a string')
452 else:
453 time_tuple = values
455 try:
456 return datetime.datetime(*time_tuple)
457 except ValueError, err:
458 self.__CastError('DATETIME', values, err)
460 def __Operate(self, args, keyword_args, used_args, operator, params):
461 """Create a single output value from params using the operator string given.
463 Args:
464 args,keyword_args: arguments passed in for binding purposes (used in
465 binding positional and keyword based arguments).
466 used_args: set of numeric arguments accessed in this call.
467 values are ints representing used zero-based positional arguments.
468 used as an output parameter with new used arguments appended to the
469 list.
470 operator: string representing the operator to use 'nop' just returns
471 the first value from params.
472 params: parameter list to operate on (positional references, named
473 references, or literals).
475 Returns:
476 A value which can be used as part of a GQL filter description (either a
477 list of datastore types -- for use with IN, or a single datastore type --
478 for use with other filters).
480 if not params:
481 return None
483 param_values = []
484 for param in params:
485 if isinstance(param, Literal):
486 value = param.Get()
487 else:
488 value = self.__GetParam(param, args, keyword_args)
489 if isinstance(param, int):
490 used_args.add(param - 1)
491 logging.log(LOG_LEVEL, 'found param for bind: %s value: %s',
492 param, value)
493 param_values.append(value)
495 logging.log(LOG_LEVEL, '%s Operating on values: %s',
496 operator, repr(param_values))
498 if operator in self.__cast_operators:
499 result = self.__cast_operators[operator](self, param_values)
500 else:
501 self.__Error('Operation %s is invalid' % operator)
503 return result
505 def __IsMultiQuery(self, condition):
506 """Return whether or not this condition could require multiple queries."""
507 return condition.lower() in ('in', '!=')
509 def __GetParam(self, reference, args, keyword_args):
510 """Get the specified parameter from the input arguments.
512 Args:
513 reference: id for a filter reference in the filter list (string or
514 number)
515 args: positional args passed in by the user (tuple of arguments, indexed
516 numerically by "reference")
517 keyword_args: dict of keyword based arguments (strings in "reference")
519 Returns:
520 The specified param from the input list.
522 Raises:
523 BadArgumentError if the referenced argument doesn't exist.
525 num_args = len(args)
526 if isinstance(reference, int):
527 if reference <= num_args:
528 return args[reference - 1]
529 else:
530 raise datastore_errors.BadArgumentError(
531 'Missing argument for bind, requires argument #%i, '
532 'but only has %i args.' % (reference, num_args))
533 elif isinstance(reference, basestring):
534 if reference in keyword_args:
535 return keyword_args[reference]
536 else:
537 raise datastore_errors.BadArgumentError(
538 'Missing named arguments for bind, requires argument %s' %
539 reference)
540 else:
541 assert False, 'Unknown reference %s' % reference
543 def __AddMultiQuery(self, identifier, condition, value, enumerated_queries):
544 """Helper function to add a muti-query to previously enumerated queries.
546 Args:
547 identifier: property being filtered by this condition
548 condition: filter condition (e.g. !=,in)
549 value: value being bound
550 enumerated_queries: in/out list of already bound queries -> expanded list
551 with the full enumeration required to satisfy the condition query
552 Raises:
553 BadArgumentError if the filter is invalid (namely non-list with IN)
555 if condition.lower() in ('!=', 'in') and self._keys_only:
556 raise datastore_errors.BadQueryError(
557 'Keys only queries do not support IN or != filters.')
559 def CloneQueries(queries, n):
560 """Do a full copy of the queries and append to the end of the queries.
562 Does an in-place replication of the input list and sorts the result to
563 put copies next to one-another.
565 Args:
566 queries: list of all filters to clone
567 n: number of copies to make
569 Returns:
570 Number of iterations needed to fill the structure
572 if not enumerated_queries:
573 for i in xrange(n):
574 queries.append({})
575 return 1
576 else:
577 old_size = len(queries)
578 tmp_queries = []
579 for i in xrange(n - 1):
580 [tmp_queries.append(filter_map.copy()) for filter_map in queries]
581 queries.extend(tmp_queries)
582 queries.sort()
583 return old_size
585 if condition == '!=':
586 if len(enumerated_queries) * 2 > self.MAX_ALLOWABLE_QUERIES:
587 raise datastore_errors.BadArgumentError(
588 'Cannot satisfy query -- too many IN/!= values.')
590 num_iterations = CloneQueries(enumerated_queries, 2)
591 for i in xrange(num_iterations):
592 enumerated_queries[2 * i]['%s <' % identifier] = value
593 enumerated_queries[2 * i + 1]['%s >' % identifier] = value
594 elif condition.lower() == 'in':
595 if not isinstance(value, list):
596 raise datastore_errors.BadArgumentError('List expected for "IN" filter')
598 in_list_size = len(value)
599 if len(enumerated_queries) * in_list_size > self.MAX_ALLOWABLE_QUERIES:
600 raise datastore_errors.BadArgumentError(
601 'Cannot satisfy query -- too many IN/!= values.')
603 num_iterations = CloneQueries(enumerated_queries, in_list_size)
604 for clone_num in xrange(num_iterations):
605 for value_num in xrange(len(value)):
606 list_val = value[value_num]
607 query_num = in_list_size * clone_num + value_num
608 filt = '%s =' % identifier
609 enumerated_queries[query_num][filt] = list_val
611 def __AddFilterToQuery(self, identifier, condition, value, query):
612 """Add a filter condition to a query based on the inputs.
614 Args:
615 identifier: name of the property (or self.__ANCESTOR for ancestors)
616 condition: test condition
617 value: test value passed from the caller
618 query: query to add the filter to
620 if identifier != self.__ANCESTOR:
621 filter_condition = '%s %s' % (identifier, condition)
622 logging.log(LOG_LEVEL, 'Setting filter on "%s" with value "%s"',
623 filter_condition, value.__class__)
624 datastore._AddOrAppend(query, filter_condition, value)
626 else:
627 logging.log(LOG_LEVEL, 'Setting ancestor query for ancestor %s', value)
628 query.Ancestor(value)
630 def Run(self, *args, **keyword_args):
631 """Runs this query.
633 Similar to datastore.Query.Run.
634 Assumes that limit == -1 or > 0
636 Args:
637 args: arguments used to bind to references in the compiled query object.
638 keyword_args: dictionary-based arguments (for named parameters).
640 Returns:
641 A list of results if a query count limit was passed.
642 A result iterator if no limit was given.
644 bind_results = self.Bind(args, keyword_args)
646 offset = 0
647 if self.__offset != -1:
648 offset = self.__offset
650 if self.__limit == -1:
651 it = bind_results.Run()
652 try:
653 for i in xrange(offset):
654 it.next()
655 except StopIteration:
656 pass
658 return it
659 else:
660 res = bind_results.Get(self.__limit, offset)
661 return res
663 def filters(self):
664 """Return the compiled list of filters."""
665 return self.__filters
667 def hint(self):
668 """Return the datastore hint."""
669 return self.__hint
671 def limit(self):
672 """Return numerical result count limit."""
673 return self.__limit
675 def orderings(self):
676 """Return the result ordering list."""
677 return self.__orderings
679 __iter__ = Run
681 __result_type_regex = re.compile(r'(\*|__key__)')
682 __quoted_string_regex = re.compile(r'((?:\'[^\'\n\r]*\')+)')
683 __ordinal_regex = re.compile(r':(\d+)$')
684 __named_regex = re.compile(r':(\w+)$')
685 __identifier_regex = re.compile(r'(\w+)$')
686 __conditions_regex = re.compile(r'(<=|>=|!=|=|<|>|is|in)$', re.IGNORECASE)
687 __number_regex = re.compile(r'(\d+)$')
688 __cast_regex = re.compile(
689 r'(geopt|user|key|date|time|datetime)$', re.IGNORECASE)
690 __cast_operators = {
691 'geopt': __CastGeoPt,
692 'user': __CastUser,
693 'key': __CastKey,
694 'datetime': __CastDatetime,
695 'date': __CastDate,
696 'time': __CastTime,
697 'list': __CastList,
698 'nop': __CastNop,
701 def __Error(self, error_message):
702 """Generic query error.
704 Args:
705 error_message: string to emit as part of the 'Parse Error' string.
707 Raises:
708 BadQueryError and passes on an error message from the caller. Will raise
709 BadQueryError on all calls to __Error()
711 if self.__next_symbol >= len(self.__symbols):
712 raise datastore_errors.BadQueryError(
713 'Parse Error: %s at end of string' % error_message)
714 else:
715 raise datastore_errors.BadQueryError(
716 'Parse Error: %s at symbol %s' %
717 (error_message, self.__symbols[self.__next_symbol]))
719 def __Accept(self, symbol_string):
720 """Advance the symbol and return true iff the next symbol matches input."""
721 if self.__next_symbol < len(self.__symbols):
722 logging.log(LOG_LEVEL, '\t%s', self.__symbols)
723 logging.log(LOG_LEVEL, '\tExpect: %s Got: %s',
724 symbol_string, self.__symbols[self.__next_symbol].upper())
725 if self.__symbols[self.__next_symbol].upper() == symbol_string:
726 self.__next_symbol += 1
727 return True
728 return False
730 def __Expect(self, symbol_string):
731 """Require that the next symbol matches symbol_string, or emit an error.
733 Args:
734 symbol_string: next symbol expected by the caller
736 Raises:
737 BadQueryError if the next symbol doesn't match the parameter passed in.
739 if not self.__Accept(symbol_string):
740 self.__Error('Unexpected Symbol: %s' % symbol_string)
742 def __AcceptRegex(self, regex):
743 """Advance and return the symbol if the next symbol matches the regex.
745 Args:
746 regex: the compiled regular expression to attempt acceptance on.
748 Returns:
749 The first group in the expression to allow for convenient access
750 to simple matches. Requires () around some objects in the regex.
751 None if no match is found.
753 if self.__next_symbol < len(self.__symbols):
754 match_symbol = self.__symbols[self.__next_symbol]
755 logging.log(LOG_LEVEL, '\taccept %s on symbol %s', regex, match_symbol)
756 match = regex.match(match_symbol)
757 if match:
758 self.__next_symbol += 1
759 if match.groups():
760 matched_string = match.group(1)
762 logging.log(LOG_LEVEL, '\taccepted %s', matched_string)
763 return matched_string
765 return None
767 def __AcceptTerminal(self):
768 """Only accept an empty string.
770 Returns:
771 True
773 Raises:
774 BadQueryError if there are unconsumed symbols in the query.
776 if self.__next_symbol < len(self.__symbols):
777 self.__Error('Expected no additional symbols')
778 return True
780 def __Select(self):
781 """Consume the SELECT clause and everything that follows it.
783 Assumes SELECT * to start.
784 Transitions to a FROM clause.
786 Returns:
787 True if parsing completed okay.
789 self.__Expect('SELECT')
790 result_type = self.__AcceptRegex(self.__result_type_regex)
791 self._keys_only = (result_type == '__key__')
792 return self.__From()
794 def __From(self):
795 """Consume the FROM clause.
797 Assumes a single well formed entity in the clause.
798 Assumes FROM <Entity Name>
799 Transitions to a WHERE clause.
801 Returns:
802 True if parsing completed okay.
804 self.__Expect('FROM')
805 entity = self.__AcceptRegex(self.__identifier_regex)
806 if entity:
807 self._entity = entity
808 return self.__Where()
809 else:
810 self.__Error('Identifier Expected')
811 return False
813 def __Where(self):
814 """Consume the WHERE cluase.
816 These can have some recursion because of the AND symbol.
818 Returns:
819 True if parsing the WHERE clause completed correctly, as well as all
820 subsequent clauses
822 if self.__Accept('WHERE'):
823 return self.__FilterList()
824 return self.__OrderBy()
826 def __FilterList(self):
827 """Consume the filter list (remainder of the WHERE clause)."""
828 identifier = self.__AcceptRegex(self.__identifier_regex)
829 if not identifier:
830 self.__Error('Invalid WHERE Identifier')
831 return False
833 condition = self.__AcceptRegex(self.__conditions_regex)
834 if not condition:
835 self.__Error('Invalid WHERE Condition')
836 return False
837 self.__CheckFilterSyntax(identifier, condition)
839 if not self.__AddSimpleFilter(identifier, condition, self.__Reference()):
840 if not self.__AddSimpleFilter(identifier, condition, self.__Literal()):
841 type_cast = self.__TypeCast()
842 if (not type_cast or
843 not self.__AddProcessedParameterFilter(identifier, condition,
844 *type_cast)):
845 self.__Error('Invalid WHERE condition')
847 if self.__Accept('AND'):
848 return self.__FilterList()
850 return self.__OrderBy()
852 def __GetValueList(self):
853 """Read in a list of parameters from the tokens and return the list.
855 Reads in a set of tokens, but currently only accepts literals, positional
856 parameters, or named parameters. Or empty list if nothing was parsed.
858 Returns:
859 A list of values parsed from the input, with values taking the form of
860 strings (unbound, named reference), integers (unbound, positional
861 reference), or Literal() (bound value usable directly as part of a filter
862 with no additional information).
864 params = []
866 while True:
867 reference = self.__Reference()
868 if reference:
869 params.append(reference)
870 else:
871 literal = self.__Literal()
872 if literal:
873 params.append(literal)
874 else:
875 self.__Error('Parameter list requires literal or reference parameter')
877 if not self.__Accept(','):
878 break
880 return params
882 def __CheckFilterSyntax(self, identifier, condition):
883 """Check that filter conditions are valid and throw errors if not.
885 Args:
886 identifier: identifier being used in comparison
887 condition: string form of the comparison operator used in the filter
889 if identifier.lower() == 'ancestor':
890 if condition.lower() == 'is':
891 if self.__has_ancestor:
892 self.__Error('Only one ANCESTOR IS" clause allowed')
893 else:
894 self.__Error('"IS" expected to follow "ANCESTOR"')
895 elif condition.lower() == 'is':
896 self.__Error('"IS" can only be used when comparing against "ANCESTOR"')
898 def __AddProcessedParameterFilter(self, identifier, condition,
899 operator, parameters):
900 """Add a filter with post-processing required.
902 Args:
903 identifier: property being compared.
904 condition: comparison operation being used with the property (e.g. !=).
905 operator: operation to perform on the parameters before adding the filter.
906 parameters: list of bound parameters passed to 'operator' before creating
907 the filter. When using the parameters as a pass-through, pass 'nop'
908 into the operator field and the first value will be used unprocessed).
910 Returns:
911 True if the filter was okay to add.
913 if parameters is None:
914 return False
915 if parameters[0] is None:
916 return False
918 logging.log(LOG_LEVEL, 'Adding Filter %s %s %s',
919 identifier, condition, repr(parameters))
920 filter_rule = (identifier, condition)
921 if identifier.lower() == 'ancestor':
922 self.__has_ancestor = True
923 filter_rule = (self.__ANCESTOR, 'is')
924 assert condition.lower() == 'is'
926 if condition.lower() != 'in' and operator == 'list':
927 self.__Error('Only IN can process a list of values')
929 self.__filters.setdefault(filter_rule, []).append((operator, parameters))
930 return True
932 def __AddSimpleFilter(self, identifier, condition, parameter):
933 """Add a filter to the query being built (no post-processing on parameter).
935 Args:
936 identifier: identifier being used in comparison
937 condition: string form of the comparison operator used in the filter
938 parameter: ID of the reference being made or a value of type Literal
940 Returns:
941 True if the filter could be added.
942 False otherwise.
944 return self.__AddProcessedParameterFilter(identifier, condition,
945 'nop', [parameter])
947 def __Reference(self):
948 """Consume a parameter reference and return it.
950 Consumes a reference to a positional parameter (:1) or a named parameter
951 (:email). Only consumes a single reference (not lists).
953 Returns:
954 The name of the reference (integer for positional parameters or string
955 for named parameters) to a bind-time parameter.
957 logging.log(LOG_LEVEL, 'Try Reference')
958 reference = self.__AcceptRegex(self.__ordinal_regex)
959 if reference:
960 return int(reference)
961 else:
962 reference = self.__AcceptRegex(self.__named_regex)
963 if reference:
964 return reference
966 return None
968 def __Literal(self):
969 """Parse literals from our token list.
971 Returns:
972 The parsed literal from the input string (currently either a string,
973 integer, or floating point value).
975 logging.log(LOG_LEVEL, 'Try Literal')
976 literal = None
977 try:
978 literal = int(self.__symbols[self.__next_symbol])
979 except ValueError:
980 pass
981 else:
982 self.__next_symbol += 1
984 if literal is None:
985 try:
986 literal = float(self.__symbols[self.__next_symbol])
987 except ValueError:
988 pass
989 else:
990 self.__next_symbol += 1
992 if literal is None:
993 literal = self.__AcceptRegex(self.__quoted_string_regex)
994 if literal:
995 literal = literal[1:-1].replace("''", "'")
997 if literal is None:
998 if self.__Accept('TRUE'):
999 literal = True
1000 elif self.__Accept('FALSE'):
1001 literal = False
1003 if literal is not None:
1004 return Literal(literal)
1005 else:
1006 return None
1008 def __TypeCast(self):
1009 """Check if the next operation is a type-cast and return the cast if so.
1011 Casting operators look like simple function calls on their parameters. This
1012 code returns the cast operator found and the list of parameters provided by
1013 the user to complete the cast operation.
1015 Returns:
1016 A tuple (cast operator, params) which represents the cast operation
1017 requested and the parameters parsed from the cast clause.
1019 None - if there is no TypeCast function.
1021 logging.log(LOG_LEVEL, 'Try Type Cast')
1022 cast_op = self.__AcceptRegex(self.__cast_regex)
1023 if not cast_op:
1024 if self.__Accept('('):
1025 cast_op = 'list'
1026 else:
1027 return None
1028 else:
1029 cast_op = cast_op.lower()
1030 self.__Expect('(')
1032 params = self.__GetValueList()
1033 self.__Expect(')')
1035 logging.log(LOG_LEVEL, 'Got casting operator %s with params %s',
1036 cast_op, repr(params))
1037 return (cast_op, params)
1039 def __OrderBy(self):
1040 """Consume the ORDER BY clause."""
1041 if self.__Accept('ORDER'):
1042 self.__Expect('BY')
1043 return self.__OrderList()
1044 return self.__Limit()
1046 def __OrderList(self):
1047 """Consume variables and sort order for ORDER BY clause."""
1048 identifier = self.__AcceptRegex(self.__identifier_regex)
1049 if identifier:
1050 if self.__Accept('DESC'):
1051 self.__orderings.append((identifier, datastore.Query.DESCENDING))
1052 elif self.__Accept('ASC'):
1053 self.__orderings.append((identifier, datastore.Query.ASCENDING))
1054 else:
1055 self.__orderings.append((identifier, datastore.Query.ASCENDING))
1056 else:
1057 self.__Error('Invalid ORDER BY Property')
1059 logging.log(LOG_LEVEL, self.__orderings)
1060 if self.__Accept(','):
1061 return self.__OrderList()
1062 return self.__Limit()
1064 def __Limit(self):
1065 """Consume the LIMIT clause."""
1066 if self.__Accept('LIMIT'):
1067 maybe_limit = self.__AcceptRegex(self.__number_regex)
1069 if maybe_limit:
1070 if self.__Accept(','):
1071 self.__offset = int(maybe_limit)
1072 if self.__offset < 0:
1073 self.__Error('Bad offset in LIMIT Value')
1074 else:
1075 logging.log(LOG_LEVEL, 'Set offset to %i', self.__offset)
1076 maybe_limit = self.__AcceptRegex(self.__number_regex)
1078 self.__limit = int(maybe_limit)
1079 if self.__limit < 1:
1080 self.__Error('Bad Limit in LIMIT Value')
1081 else:
1082 logging.log(LOG_LEVEL, 'Set limit to %i', self.__limit)
1083 else:
1084 self.__Error('Non-number limit in LIMIT clause')
1086 return self.__Offset()
1088 def __Offset(self):
1089 """Consume the OFFSET clause."""
1090 if self.__Accept('OFFSET'):
1091 if self.__offset != -1:
1092 self.__Error('Offset already defined in LIMIT clause')
1094 offset = self.__AcceptRegex(self.__number_regex)
1096 if offset:
1097 self.__offset = int(offset)
1098 if self.__offset < 0:
1099 self.__Error('Bad offset in OFFSET clause')
1100 else:
1101 logging.log(LOG_LEVEL, 'Set offset to %i', self.__offset)
1102 else:
1103 self.__Error('Non-number offset in OFFSET clause')
1105 return self.__Hint()
1107 def __Hint(self):
1108 """Consume the HINT clause.
1110 Requires one of three options (mirroring the rest of the datastore):
1111 HINT ORDER_FIRST
1112 HINT ANCESTOR_FIRST
1113 HINT FILTER_FIRST
1115 Returns:
1116 True if the hint clause and later clauses all parsed okay
1118 if self.__Accept('HINT'):
1119 if self.__Accept('ORDER_FIRST'):
1120 self.__hint = 'ORDER_FIRST'
1121 elif self.__Accept('FILTER_FIRST'):
1122 self.__hint = 'FILTER_FIRST'
1123 elif self.__Accept('ANCESTOR_FIRST'):
1124 self.__hint = 'ANCESTOR_FIRST'
1125 else:
1126 self.__Error('Unknown HINT')
1127 return False
1128 return self.__AcceptTerminal()
1131 class Literal(object):
1132 """Class for representing literal values in a way unique from unbound params.
1134 This is a simple wrapper class around basic types and datastore types.
1137 def __init__(self, value):
1138 self.__value = value
1140 def Get(self):
1141 """Return the value of the literal."""
1142 return self.__value
1144 def __repr__(self):
1145 return 'Literal(%s)' % repr(self.__value)