App Engine Python SDK version 1.8.9
[gae.git] / python / google / appengine / datastore / datastore_index.py
blob43ae0ff9380dfaa7a2ee9d4c06e43ae635ae2a08
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
21 """Primitives for dealing with datastore indexes.
23 Example index.yaml file:
24 ------------------------
26 indexes:
28 - kind: Cat
29 ancestor: no
30 properties:
31 - name: name
32 - name: age
33 direction: desc
35 - kind: Cat
36 properties:
37 - name: name
38 direction: ascending
39 - name: whiskers
40 direction: descending
42 - kind: Store
43 ancestor: yes
44 properties:
45 - name: business
46 direction: asc
47 - name: owner
48 direction: asc
49 """
59 import itertools
61 from google.appengine.api import appinfo
62 from google.appengine.api import datastore_types
63 from google.appengine.api import validation
64 from google.appengine.api import yaml_errors
65 from google.appengine.api import yaml_object
66 from google.appengine.datastore import datastore_pb
67 from google.appengine.datastore import entity_pb
70 class Property(validation.Validated):
71 """Representation for an individual property of an index.
73 This class must be kept in sync with
74 java/com/google/apphosting/utils/config/IndexYamlReader.java.
76 Attributes:
77 name: Name of attribute to sort by.
78 direction: Direction of sort.
79 """
81 ATTRIBUTES = {
82 'name': validation.Type(str, convert=False),
83 'direction': validation.Options(('asc', ('ascending',)),
84 ('desc', ('descending',)),
85 default='asc'),
89 class Index(validation.Validated):
90 """Individual index definition.
92 Order of the properties determines a given indexes sort priority.
94 Attributes:
95 kind: Datastore kind that index belongs to.
96 ancestors: Include ancestors in index.
97 properties: Properties to sort on.
98 """
100 ATTRIBUTES = {
101 'kind': validation.Type(str, convert=False),
102 'ancestor': validation.Type(bool, convert=False, default=False),
103 'properties': validation.Optional(validation.Repeated(Property)),
107 class IndexDefinitions(validation.Validated):
108 """Top level for index definition file.
110 Attributes:
111 indexes: List of Index definitions.
114 ATTRIBUTES = {
115 appinfo.APPLICATION: validation.Optional(appinfo.APPLICATION_RE_STRING),
116 'indexes': validation.Optional(validation.Repeated(Index)),
120 def ParseIndexDefinitions(document, open_fn=None):
121 """Parse an individual index definitions document from string or stream.
123 Args:
124 document: Yaml document as a string or file-like stream.
125 open_fn: Function for opening files. Unused.
127 Raises:
128 EmptyConfigurationFile when the configuration file is empty.
129 MultipleConfigurationFile when the configuration file contains more than
130 one document.
132 Returns:
133 Single parsed yaml file if one is defined, else None.
135 try:
136 return yaml_object.BuildSingleObject(IndexDefinitions, document)
137 except yaml_errors.EmptyConfigurationFile:
138 return None
141 def ParseMultipleIndexDefinitions(document):
142 """Parse multiple index definitions documents from a string or stream.
144 Args:
145 document: Yaml document as a string or file-like stream.
147 Returns:
148 A list of datstore_index.IndexDefinitions objects, one for each document.
150 return yaml_object.BuildObjects(IndexDefinitions, document)
153 def IndexDefinitionsToKeys(indexes):
154 """Convert IndexDefinitions to set of keys.
156 Args:
157 indexes: A datastore_index.IndexDefinitions instance, or None.
159 Returns:
160 A set of keys constructed from the argument, each key being a
161 tuple of the form (kind, ancestor, properties) where properties is
162 a tuple of (name, direction) pairs, direction being ASCENDING or
163 DESCENDING (the enums).
165 keyset = set()
166 if indexes is not None:
167 if indexes.indexes:
168 for index in indexes.indexes:
169 keyset.add(IndexToKey(index))
170 return keyset
173 def IndexToKey(index):
174 """Convert Index to key.
176 Args:
177 index: A datastore_index.Index instance (not None!).
179 Returns:
180 A tuple of the form (kind, ancestor, properties) where properties
181 is a tuple of (name, direction) pairs, direction being ASCENDING
182 or DESCENDING (the enums).
184 props = []
185 if index.properties is not None:
186 for prop in index.properties:
187 if prop.direction == 'asc':
188 direction = ASCENDING
189 else:
190 direction = DESCENDING
191 props.append((prop.name, direction))
192 return index.kind, index.ancestor, tuple(props)
199 ASCENDING = datastore_pb.Query_Order.ASCENDING
200 DESCENDING = datastore_pb.Query_Order.DESCENDING
203 EQUALITY_OPERATORS = set((datastore_pb.Query_Filter.EQUAL,
205 INEQUALITY_OPERATORS = set((datastore_pb.Query_Filter.LESS_THAN,
206 datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL,
207 datastore_pb.Query_Filter.GREATER_THAN,
208 datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL,
210 EXISTS_OPERATORS = set((datastore_pb.Query_Filter.EXISTS,
214 _DIRECTION_MAP = {
215 'asc': entity_pb.Index_Property.ASCENDING,
216 'ascending': entity_pb.Index_Property.ASCENDING,
217 'desc': entity_pb.Index_Property.DESCENDING,
218 'descending': entity_pb.Index_Property.DESCENDING,
221 def Normalize(filters, orders, exists):
222 """ Normalizes filter and order query components.
224 The resulting components have the same effect as the given components if used
225 in a query.
227 Args:
228 filters: the filters set on the query
229 orders: the orders set on the query
230 exists: the names of properties that require an exists filter if
231 not already specified
233 Returns:
234 (filter, orders) the reduced set of filters and orders
238 eq_properties = set()
239 inequality_properties = set()
242 for f in filters:
243 if f.op() == datastore_pb.Query_Filter.IN and f.property_size() == 1:
244 f.set_op(datastore_pb.Query_Filter.EQUAL)
245 if f.op() in EQUALITY_OPERATORS:
246 eq_properties.add(f.property(0).name())
247 elif f.op() in INEQUALITY_OPERATORS:
248 inequality_properties.add(f.property(0).name())
250 eq_properties -= inequality_properties
253 remove_set = eq_properties.copy()
254 new_orders = []
255 for o in orders:
256 if o.property() not in remove_set:
257 remove_set.add(o.property())
258 new_orders.append(o)
259 orders = new_orders
261 remove_set.update(inequality_properties)
264 new_filters = []
265 for f in filters:
266 if f.op() not in EXISTS_OPERATORS:
267 new_filters.append(f)
268 continue
269 name = f.property(0).name()
270 if name not in remove_set:
271 remove_set.add(name)
272 new_filters.append(f)
275 for prop in exists:
276 if prop not in remove_set:
277 remove_set.add(prop)
278 new_filter = datastore_pb.Query_Filter()
279 new_filter.set_op(datastore_pb.Query_Filter.EXISTS)
280 new_prop = new_filter.add_property()
281 new_prop.set_name(prop)
282 new_prop.set_multiple(False)
283 new_prop.mutable_value()
284 new_filters.append(new_filter)
286 filters = new_filters
291 if datastore_types.KEY_SPECIAL_PROPERTY in eq_properties:
292 orders = []
296 new_orders = []
297 for o in orders:
298 if o.property() == datastore_types.KEY_SPECIAL_PROPERTY:
299 new_orders.append(o)
300 break
301 new_orders.append(o)
302 orders = new_orders
304 return (filters, orders)
307 def RemoveNativelySupportedComponents(filters, orders, exists):
308 """ Removes query components that are natively supported by the datastore.
310 The resulting filters and orders should not be used in an actual query.
312 Args:
313 filters: the filters set on the query
314 orders: the orders set on the query
315 exists: the names of properties that require an exists filter if
316 not already specified
318 Returns:
319 (filters, orders) the reduced set of filters and orders
321 (filters, orders) = Normalize(filters, orders, exists)
323 for f in filters:
324 if f.op() in EXISTS_OPERATORS:
328 return (filters, orders)
332 has_key_desc_order = False
333 if orders and orders[-1].property() == datastore_types.KEY_SPECIAL_PROPERTY:
334 if orders[-1].direction() == ASCENDING:
335 orders = orders[:-1]
336 else:
337 has_key_desc_order = True
344 if not has_key_desc_order:
345 for f in filters:
346 if (f.op() in INEQUALITY_OPERATORS and
347 f.property(0).name() != datastore_types.KEY_SPECIAL_PROPERTY):
348 break
349 else:
350 filters = [f for f in filters
351 if f.property(0).name() != datastore_types.KEY_SPECIAL_PROPERTY]
353 return (filters, orders)
356 def CompositeIndexForQuery(query):
357 """Return the composite index needed for a query.
359 A query is translated into a tuple, as follows:
361 - The first item is the kind string, or None if we're not filtering
362 on kind (see below).
364 - The second item is a bool giving whether the query specifies an
365 ancestor.
367 - After that come (property, ASCENDING) pairs for those Filter
368 entries whose operator is EQUAL or IN. Since the order of these
369 doesn't matter, they are sorted by property name to normalize them
370 in order to avoid duplicates.
372 - After that comes at most one (property, ASCENDING) pair for a
373 Filter entry whose operator is on of the four inequalities. There
374 can be at most one of these.
376 - After that come all the (property, direction) pairs for the Order
377 entries, in the order given in the query. Exceptions:
378 (a) if there is a Filter entry with an inequality operator that matches
379 the first Order entry, the first order pair is omitted (or,
380 equivalently, in this case the inequality pair is omitted).
381 (b) if an Order entry corresponds to an equality filter, it is ignored
382 (since there will only ever be one value returned).
383 (c) if there is an equality filter on __key__ all orders are dropped
384 (since there will be at most one result returned).
385 (d) if there is an order on __key__ all further orders are dropped (since
386 keys are unique).
387 (e) orders on __key__ ASCENDING are dropped (since this is supported
388 natively by the datastore).
390 - Finally, if there are Filter entries whose operator is EXISTS, and
391 whose property names are not already listed, they are added, with
392 the direction set to ASCENDING.
394 This algorithm should consume all Filter and Order entries.
396 Additional notes:
398 - The low-level implementation allows queries that don't specify a
399 kind; but the Python API doesn't support this yet.
401 - If there's an inequality filter and one or more sort orders, the
402 first sort order *must* match the inequality filter.
404 - The following indexes are always built in and should be suppressed:
405 - query on kind only;
406 - query on kind and one filter *or* one order;
407 - query on ancestor only, without kind (not exposed in Python yet);
408 - query on kind and equality filters only, no order (with or without
409 ancestor).
411 - While the protocol buffer allows a Filter to contain multiple
412 properties, we don't use this. It is only needed for the IN operator
413 but this is (currently) handled on the client side, so in practice
414 each Filter is expected to have exactly one property.
416 Args:
417 query: A datastore_pb.Query instance.
419 Returns:
420 A tuple of the form (required, kind, ancestor, properties).
421 required: boolean, whether the index is required;
422 kind: the kind or None;
423 ancestor: True if this is an ancestor query;
424 properties: A tuple consisting of:
425 - the prefix, represented by a set of property names
426 - the postfix, represented by a tuple consisting of any number of:
427 - Sets of property names: Indicates these properties can appear in any
428 order with any direction.
429 - Tuples of (property name, direction) tuples. Indicating the properties
430 must appear in the exact order with the given direction. direction can
431 be None if direction does not matter.
433 required = True
436 kind = query.kind()
437 ancestor = query.has_ancestor()
438 filters = query.filter_list()
439 orders = query.order_list()
443 for filter in filters:
444 assert filter.op() != datastore_pb.Query_Filter.IN, 'Filter.op()==IN'
445 nprops = len(filter.property_list())
446 assert nprops == 1, 'Filter has %s properties, expected 1' % nprops
448 if not kind:
451 required = False
453 exists = list(query.property_name_list())
454 exists.extend(query.group_by_property_name_list())
456 filters, orders = RemoveNativelySupportedComponents(filters, orders, exists)
459 eq_filters = [f for f in filters if f.op() in EQUALITY_OPERATORS]
460 ineq_filters = [f for f in filters if f.op() in INEQUALITY_OPERATORS]
461 exists_filters = [f for f in filters if f.op() in EXISTS_OPERATORS]
462 assert (len(eq_filters) + len(ineq_filters) +
463 len(exists_filters)) == len(filters), 'Not all filters used'
465 if (kind and not ineq_filters and not exists_filters and
466 not orders):
470 names = set(f.property(0).name() for f in eq_filters)
471 if not names.intersection(datastore_types._SPECIAL_PROPERTIES):
472 required = False
476 ineq_property = None
477 if ineq_filters:
478 for filter in ineq_filters:
479 if (filter.property(0).name() ==
480 datastore_types._UNAPPLIED_LOG_TIMESTAMP_SPECIAL_PROPERTY):
481 continue
482 if not ineq_property:
483 ineq_property = filter.property(0).name()
484 else:
485 assert filter.property(0).name() == ineq_property
489 group_by_props = set(query.group_by_property_name_list())
492 prefix = frozenset(f.property(0).name() for f in eq_filters)
494 postfix_ordered = [(order.property(), order.direction()) for order in orders]
497 postfix_group_by = frozenset(f.property(0).name() for f in exists_filters
498 if f.property(0).name() in group_by_props)
500 postfix_unordered = frozenset(f.property(0).name() for f in exists_filters
501 if f.property(0).name() not in group_by_props)
504 if ineq_property:
505 if orders:
508 assert ineq_property == orders[0].property()
509 else:
510 postfix_ordered.append((ineq_property, None))
512 property_count = (len(prefix) + len(postfix_ordered) + len(postfix_group_by)
513 + len(postfix_unordered))
514 if kind and not ancestor and property_count <= 1:
517 required = False
520 if postfix_ordered:
521 prop, dir = postfix_ordered[0]
522 if prop == datastore_types.KEY_SPECIAL_PROPERTY and dir is DESCENDING:
523 required = True
526 props = prefix, (tuple(postfix_ordered), postfix_group_by, postfix_unordered)
527 return required, kind, ancestor, props
530 def GetRecommendedIndexProperties(properties):
531 """Converts the properties returned by datastore_index.CompositeIndexForQuery
532 into a recommended list of index properties and directions.
534 All unordered components are sorted and assigned an ASCENDING direction. All
535 ordered components with out a direction are assigned an ASCEDNING direction.
537 Args:
538 properties: See datastore_index.CompositeIndexForQuery
540 Returns:
541 A tuple of (name, direction) tuples where:
542 name: a property name
543 direction: datastore_pb.Query_Order.ASCENDING or ...DESCENDING
546 prefix, postfix = properties
547 result = []
548 for sub_list in itertools.chain((prefix,), postfix):
549 if isinstance(sub_list, (frozenset, set)):
551 for prop in sorted(sub_list):
552 result.append((prop, ASCENDING))
553 else:
556 for prop, dir in sub_list:
557 result.append((prop, dir if dir is not None else ASCENDING))
559 return tuple(result)
562 def _MatchPostfix(postfix_props, index_props):
563 """Matches a postfix constraint with an existing index.
565 postfix_props constraints are specified through a list of:
566 - sets of string: any order any direction;
567 - list of tuples(string, direction): the given order, and, if specified, the
568 given direction.
570 For example:
571 [set('A', 'B'), [('C', None), ('D', ASC)]]
572 matches:
573 [('F', ASC), ('B', ASC), ('A', DESC), ('C', DESC), ('D', ASC)]
574 with a return value of [('F', ASC)], but does not match:
575 [('F', ASC), ('A', DESC), ('C', DESC), ('D', ASC)]
576 [('B', ASC), ('F', ASC), ('A', DESC), ('C', DESC), ('D', ASC)]
577 [('F', ASC), ('B', ASC), ('A', DESC), ('C', DESC), ('D', DESC)]
579 Args:
580 postfix_props: A tuple of sets and lists, as output by
581 CompositeIndexForQuery. They should define the requirements for the
582 postfix of the index.
583 index_props: A list of tuples (property_name, property_direction), that
584 define the index to try and match.
586 Returns:
587 The list of tuples that define the prefix properties in the given index.
588 None if the constraints could not be satisfied.
592 index_props_rev = reversed(index_props)
593 for property_group in reversed(postfix_props):
594 index_group_iter = itertools.islice(index_props_rev, len(property_group))
595 if isinstance(property_group, (frozenset, set)):
597 index_group = set(prop for prop, _ in index_group_iter)
598 if index_group != property_group:
599 return None
600 else:
602 index_group = list(index_group_iter)
603 if len(index_group) != len(property_group):
604 return None
605 for (index_prop, index_dir), (prop, direction) in itertools.izip(
606 index_group, reversed(property_group)):
607 if index_prop != prop or (direction and index_dir != direction):
608 return None
609 remaining = list(index_props_rev)
610 remaining.reverse()
611 return remaining
614 def MinimalCompositeIndexForQuery(query, index_defs):
615 """Computes the minimal composite index for this query.
617 Unlike datastore_index.CompositeIndexForQuery, this function takes into
618 account indexes that already exist in the system.
620 Args:
621 query: the datastore_pb.Query to compute suggestions for
622 index_defs: a list of datastore_index.Index objects that already exist.
624 Returns:
625 None if no index is needed, otherwise the minimal index in the form
626 (is_most_efficient, kind, ancestor, properties). Where is_most_efficient is a
627 boolean denoting if the suggested index is the most efficient (i.e. the one
628 returned by datastore_index.CompositeIndexForQuery). kind and ancestor
629 are the same variables returned by datastore_index.CompositeIndexForQuery.
630 properties is a tuple consisting of the prefix and postfix properties
631 returend by datastore_index.CompositeIndexForQuery.
634 required, kind, ancestor, (prefix, postfix) = CompositeIndexForQuery(query)
636 if not required:
637 return None
640 remaining_dict = {}
642 for definition in index_defs:
643 if (kind != definition.kind or
645 (not ancestor and definition.ancestor)):
646 continue
648 _, _, index_props = IndexToKey(definition)
650 index_prefix = _MatchPostfix(postfix, index_props)
652 if index_prefix is None:
654 continue
656 remaining_index_props = set([prop for prop, _ in index_prefix])
658 if remaining_index_props - prefix:
660 continue
663 index_postfix = tuple(index_props[len(index_prefix):])
664 remaining = remaining_dict.get(index_postfix)
665 if remaining is None:
666 remaining = prefix.copy(), ancestor
669 props_remaining, ancestor_remaining = remaining
670 props_remaining -= remaining_index_props
671 if definition.ancestor:
672 ancestor_remaining = False
674 if not (props_remaining or ancestor_remaining):
675 return None
677 if (props_remaining, ancestor_remaining) == remaining:
678 continue
681 remaining_dict[index_postfix] = (props_remaining, ancestor_remaining)
683 if not remaining_dict:
684 return (True, kind, ancestor, (prefix, postfix))
686 def calc_cost(minimal_props, minimal_ancestor):
687 result = len(minimal_props)
688 if minimal_ancestor:
689 result += 2
690 return result
693 minimal_postfix, remaining = remaining_dict.popitem()
694 minimal_props, minimal_ancestor = remaining
695 minimal_cost = calc_cost(minimal_props, minimal_ancestor)
696 for index_postfix, (props_remaining, ancestor_remaining) in (
697 remaining_dict.iteritems()):
698 cost = calc_cost(props_remaining, ancestor_remaining)
699 if cost < minimal_cost:
700 minimal_cost = cost
701 minimal_postfix = index_postfix
702 minimal_props = props_remaining
703 minimal_ancestor = ancestor_remaining
706 props = frozenset(minimal_props), (minimal_postfix, frozenset(), frozenset())
707 return False, kind, minimal_ancestor, props
710 def IndexYamlForQuery(kind, ancestor, props):
711 """Return the composite index definition YAML needed for a query.
713 Given a query, the arguments for this method can be computed with:
714 _, kind, ancestor, props = datastore_index.CompositeIndexForQuery(query)
715 props = datastore_index.GetRecommendedIndexProperties(props)
717 Args:
718 kind: the kind or None
719 ancestor: True if this is an ancestor query, False otherwise
720 props: tuples of the form (name, direction) where:
721 name - a property name;
722 direction - datastore_pb.Query_Order.ASCENDING or ...DESCENDING;
724 Returns:
725 A string with the YAML for the composite index needed by the query.
727 yaml = []
728 yaml.append('- kind: %s' % kind)
729 if ancestor:
730 yaml.append(' ancestor: yes')
731 if props:
732 yaml.append(' properties:')
733 for name, direction in props:
734 yaml.append(' - name: %s' % name)
735 if direction == DESCENDING:
736 yaml.append(' direction: desc')
737 return '\n'.join(yaml)
740 def IndexXmlForQuery(kind, ancestor, props):
741 """Return the composite index definition XML needed for a query.
743 Given a query, the arguments for this method can be computed with:
744 _, kind, ancestor, props = datastore_index.CompositeIndexForQuery(query)
745 props = datastore_index.GetRecommendedIndexProperties(props)
747 Args:
748 kind: the kind or None
749 ancestor: True if this is an ancestor query, False otherwise
750 props: tuples of the form (name, direction) where:
751 name - a property name;
752 direction - datastore_pb.Query_Order.ASCENDING or ...DESCENDING;
754 Returns:
755 A string with the XML for the composite index needed by the query.
757 xml = []
758 xml.append('<datastore-index kind="%s" ancestor="%s">'
759 % (kind, 'true' if ancestor else 'false'))
760 for name, direction in props:
761 xml.append(' <property name="%s" direction="%s" />'
762 % (name, 'asc' if direction == ASCENDING else 'desc'))
763 xml.append('</datastore-index>')
764 return '\n'.join(xml)
767 def IndexDefinitionToProto(app_id, index_definition):
768 """Transform individual Index definition to protocol buffer.
770 Args:
771 app_id: Application id for new protocol buffer CompositeIndex.
772 index_definition: datastore_index.Index object to transform.
774 Returns:
775 New entity_pb.CompositeIndex with default values set and index
776 information filled in.
778 proto = entity_pb.CompositeIndex()
780 proto.set_app_id(app_id)
781 proto.set_id(0)
782 proto.set_state(entity_pb.CompositeIndex.WRITE_ONLY)
784 definition_proto = proto.mutable_definition()
785 definition_proto.set_entity_type(index_definition.kind)
786 definition_proto.set_ancestor(index_definition.ancestor)
788 if index_definition.properties is not None:
789 for prop in index_definition.properties:
790 prop_proto = definition_proto.add_property()
791 prop_proto.set_name(prop.name)
792 prop_proto.set_direction(_DIRECTION_MAP[prop.direction])
794 return proto
797 def IndexDefinitionsToProtos(app_id, index_definitions):
798 """Transform multiple index definitions to composite index records
800 Args:
801 app_id: Application id for new protocol buffer CompositeIndex.
802 index_definition: A list of datastore_index.Index objects to transform.
804 Returns:
805 A list of tranformed entity_pb.Compositeindex entities with default values
806 set and index information filled in.
808 return [IndexDefinitionToProto(app_id, index)
809 for index in index_definitions]
812 def ProtoToIndexDefinition(proto):
813 """Transform individual index protocol buffer to index definition.
815 Args:
816 proto: An instance of entity_pb.CompositeIndex to transform.
818 Returns:
819 A new instance of datastore_index.Index.
821 properties = []
822 proto_index = proto.definition()
823 for prop_proto in proto_index.property_list():
824 prop_definition = Property(name=prop_proto.name())
825 if prop_proto.direction() == entity_pb.Index_Property.DESCENDING:
826 prop_definition.direction = 'descending'
827 properties.append(prop_definition)
829 index = Index(kind=proto_index.entity_type(), properties=properties)
830 if proto_index.ancestor():
831 index.ancestor = True
832 return index
835 def ProtosToIndexDefinitions(protos):
836 """Transform multiple index protocol buffers to index definitions.
838 Args:
839 A list of entity_pb.Index records.
841 return [ProtoToIndexDefinition(definition) for definition in protos]