3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
21 """Primitives for dealing with datastore indexes.
23 Example index.yaml file:
24 ------------------------
71 from google
.appengine
.datastore
import entity_pb
73 from google
.appengine
.api
import appinfo
74 from google
.appengine
.api
import datastore_types
75 from google
.appengine
.api
import validation
76 from google
.appengine
.api
import yaml_errors
77 from google
.appengine
.api
import yaml_object
78 from google
.appengine
.datastore
import datastore_pb
81 class Property(validation
.Validated
):
82 """Representation for an individual property of an index.
84 This class must be kept in sync with
85 java/com/google/apphosting/utils/config/IndexYamlReader.java.
88 name: Name of attribute to sort by.
89 direction: Direction of sort.
90 mode: How the property is indexed. Either 'geospatial'
91 or None (unspecified).
95 'name': validation
.Type(str, convert
=False),
96 'direction': validation
.Options(('asc', ('ascending',)),
97 ('desc', ('descending',)),
99 'mode': validation
.Optional(['geospatial']),
102 def __init__(self
, **attributes
):
104 object.__setattr
__(self
, '_is_set', set([]))
105 super(Property
, self
).__init
__(**attributes
)
107 def __setattr__(self
, key
, value
):
108 self
._is
_set
.add(key
)
109 super(Property
, self
).__setattr
__(key
, value
)
111 def IsAscending(self
):
112 return self
.direction
!= 'desc'
114 def CheckInitialized(self
):
115 if 'direction' in self
._is
_set
and self
.mode
== 'geospatial':
116 raise validation
.ValidationError('Direction on a geospatial-mode '
117 'property is not allowed.')
120 def _PropertyPresenter(dumper
, prop
):
121 """A PyYaml presenter for Property.
123 It differs from the default by not outputting 'mode: null' and direction when
124 mode is specified. This is done in order to ensure backwards compatibility.
127 dumper: the Dumper object provided by PyYaml.
128 prop: the Property object to serialize.
131 A PyYaml object mapping.
135 prop_copy
= copy
.copy(prop
)
137 del prop_copy
._is
_set
140 if prop
.mode
is not None:
141 del prop_copy
.direction
145 return dumper
.represent_object(prop_copy
)
147 yaml
.add_representer(Property
, _PropertyPresenter
)
150 class Index(validation
.Validated
):
151 """Individual index definition.
153 Order of the properties determines a given indexes sort priority.
156 kind: Datastore kind that index belongs to.
157 ancestors: Include ancestors in index.
158 properties: Properties to sort on.
162 'kind': validation
.Type(str, convert
=False),
163 'ancestor': validation
.Type(bool, convert
=False, default
=False),
164 'properties': validation
.Optional(validation
.Repeated(Property
)),
168 class IndexDefinitions(validation
.Validated
):
169 """Top level for index definition file.
172 indexes: List of Index definitions.
176 appinfo
.APPLICATION
: validation
.Optional(appinfo
.APPLICATION_RE_STRING
),
177 'indexes': validation
.Optional(validation
.Repeated(Index
)),
181 def ParseIndexDefinitions(document
, open_fn
=None):
182 """Parse an individual index definitions document from string or stream.
185 document: Yaml document as a string or file-like stream.
186 open_fn: Function for opening files. Unused.
189 EmptyConfigurationFile when the configuration file is empty.
190 MultipleConfigurationFile when the configuration file contains more than
194 Single parsed yaml file if one is defined, else None.
197 return yaml_object
.BuildSingleObject(IndexDefinitions
, document
)
198 except yaml_errors
.EmptyConfigurationFile
:
202 def ParseMultipleIndexDefinitions(document
):
203 """Parse multiple index definitions documents from a string or stream.
206 document: Yaml document as a string or file-like stream.
209 A list of datstore_index.IndexDefinitions objects, one for each document.
211 return yaml_object
.BuildObjects(IndexDefinitions
, document
)
214 def IndexDefinitionsToKeys(indexes
):
215 """Convert IndexDefinitions to set of keys.
218 indexes: A datastore_index.IndexDefinitions instance, or None.
221 A set of keys constructed from the argument, each key being a
222 tuple of the form (kind, ancestor, properties) where properties is
223 a tuple of (name, direction) pairs, direction being ASCENDING or
224 DESCENDING (the enums).
227 if indexes
is not None:
229 for index
in indexes
.indexes
:
230 keyset
.add(IndexToKey(index
))
234 def IndexToKey(index
):
235 """Convert Index to key.
238 index: A datastore_index.Index instance (not None!).
241 A tuple of the form (kind, ancestor, properties) where properties
242 is a tuple of (name, direction) pairs, direction being ASCENDING
243 or DESCENDING (the enums).
248 if index
.properties
is not None:
249 for prop
in index
.properties
:
250 if prop
.IsAscending():
251 direction
= ASCENDING
253 direction
= DESCENDING
254 props
.append((prop
.name
, direction
))
255 return index
.kind
, index
.ancestor
, tuple(props
)
262 ASCENDING
= datastore_pb
.Query_Order
.ASCENDING
263 DESCENDING
= datastore_pb
.Query_Order
.DESCENDING
266 EQUALITY_OPERATORS
= set((datastore_pb
.Query_Filter
.EQUAL
,
268 INEQUALITY_OPERATORS
= set((datastore_pb
.Query_Filter
.LESS_THAN
,
269 datastore_pb
.Query_Filter
.LESS_THAN_OR_EQUAL
,
270 datastore_pb
.Query_Filter
.GREATER_THAN
,
271 datastore_pb
.Query_Filter
.GREATER_THAN_OR_EQUAL
,
273 EXISTS_OPERATORS
= set((datastore_pb
.Query_Filter
.EXISTS
,
277 def Normalize(filters
, orders
, exists
):
278 """ Normalizes filter and order query components.
280 The resulting components have the same effect as the given components if used
284 filters: the filters set on the query
285 orders: the orders set on the query
286 exists: the names of properties that require an exists filter if
287 not already specified
290 (filter, orders) the reduced set of filters and orders
294 eq_properties
= set()
295 inequality_properties
= set()
299 if f
.op() == datastore_pb
.Query_Filter
.IN
and f
.property_size() == 1:
300 f
.set_op(datastore_pb
.Query_Filter
.EQUAL
)
301 if f
.op() in EQUALITY_OPERATORS
:
302 eq_properties
.add(f
.property(0).name())
303 elif f
.op() in INEQUALITY_OPERATORS
:
304 inequality_properties
.add(f
.property(0).name())
306 eq_properties
-= inequality_properties
309 remove_set
= eq_properties
.copy()
312 if o
.property() not in remove_set
:
313 remove_set
.add(o
.property())
317 remove_set
.update(inequality_properties
)
322 if f
.op() not in EXISTS_OPERATORS
:
323 new_filters
.append(f
)
325 name
= f
.property(0).name()
326 if name
not in remove_set
:
328 new_filters
.append(f
)
332 if prop
not in remove_set
:
334 new_filter
= datastore_pb
.Query_Filter()
335 new_filter
.set_op(datastore_pb
.Query_Filter
.EXISTS
)
336 new_prop
= new_filter
.add_property()
337 new_prop
.set_name(prop
)
338 new_prop
.set_multiple(False)
339 new_prop
.mutable_value()
340 new_filters
.append(new_filter
)
342 filters
= new_filters
347 if datastore_types
.KEY_SPECIAL_PROPERTY
in eq_properties
:
354 if o
.property() == datastore_types
.KEY_SPECIAL_PROPERTY
:
360 return (filters
, orders
)
363 def RemoveNativelySupportedComponents(filters
, orders
, exists
):
364 """ Removes query components that are natively supported by the datastore.
366 The resulting filters and orders should not be used in an actual query.
369 filters: the filters set on the query
370 orders: the orders set on the query
371 exists: the names of properties that require an exists filter if
372 not already specified
375 (filters, orders) the reduced set of filters and orders
377 (filters
, orders
) = Normalize(filters
, orders
, exists
)
380 if f
.op() in EXISTS_OPERATORS
:
384 return (filters
, orders
)
388 has_key_desc_order
= False
389 if orders
and orders
[-1].property() == datastore_types
.KEY_SPECIAL_PROPERTY
:
390 if orders
[-1].direction() == ASCENDING
:
393 has_key_desc_order
= True
400 if not has_key_desc_order
:
402 if (f
.op() in INEQUALITY_OPERATORS
and
403 f
.property(0).name() != datastore_types
.KEY_SPECIAL_PROPERTY
):
406 filters
= [f
for f
in filters
407 if f
.property(0).name() != datastore_types
.KEY_SPECIAL_PROPERTY
]
409 return (filters
, orders
)
412 def CompositeIndexForQuery(query
):
413 """Return the composite index needed for a query.
415 A query is translated into a tuple, as follows:
417 - The first item is the kind string, or None if we're not filtering
420 - The second item is a bool giving whether the query specifies an
423 - After that come (property, ASCENDING) pairs for those Filter
424 entries whose operator is EQUAL or IN. Since the order of these
425 doesn't matter, they are sorted by property name to normalize them
426 in order to avoid duplicates.
428 - After that comes at most one (property, ASCENDING) pair for a
429 Filter entry whose operator is on of the four inequalities. There
430 can be at most one of these.
432 - After that come all the (property, direction) pairs for the Order
433 entries, in the order given in the query. Exceptions:
434 (a) if there is a Filter entry with an inequality operator that matches
435 the first Order entry, the first order pair is omitted (or,
436 equivalently, in this case the inequality pair is omitted).
437 (b) if an Order entry corresponds to an equality filter, it is ignored
438 (since there will only ever be one value returned).
439 (c) if there is an equality filter on __key__ all orders are dropped
440 (since there will be at most one result returned).
441 (d) if there is an order on __key__ all further orders are dropped (since
443 (e) orders on __key__ ASCENDING are dropped (since this is supported
444 natively by the datastore).
446 - Finally, if there are Filter entries whose operator is EXISTS, and
447 whose property names are not already listed, they are added, with
448 the direction set to ASCENDING.
450 This algorithm should consume all Filter and Order entries.
454 - The low-level implementation allows queries that don't specify a
455 kind; but the Python API doesn't support this yet.
457 - If there's an inequality filter and one or more sort orders, the
458 first sort order *must* match the inequality filter.
460 - The following indexes are always built in and should be suppressed:
461 - query on kind only;
462 - query on kind and one filter *or* one order;
463 - query on ancestor only, without kind (not exposed in Python yet);
464 - query on kind and equality filters only, no order (with or without
467 - While the protocol buffer allows a Filter to contain multiple
468 properties, we don't use this. It is only needed for the IN operator
469 but this is (currently) handled on the client side, so in practice
470 each Filter is expected to have exactly one property.
473 query: A datastore_pb.Query instance.
476 A tuple of the form (required, kind, ancestor, properties).
477 required: boolean, whether the index is required;
478 kind: the kind or None;
479 ancestor: True if this is an ancestor query;
480 properties: A tuple consisting of:
481 - the prefix, represented by a set of property names
482 - the postfix, represented by a tuple consisting of any number of:
483 - Sets of property names: Indicates these properties can appear in any
484 order with any direction.
485 - Tuples of (property name, direction) tuples. Indicating the properties
486 must appear in the exact order with the given direction. direction can
487 be None if direction does not matter.
493 ancestor
= query
.has_ancestor()
494 filters
= query
.filter_list()
495 orders
= query
.order_list()
499 for filter in filters
:
500 assert filter.op() != datastore_pb
.Query_Filter
.IN
, 'Filter.op()==IN'
501 nprops
= len(filter.property_list())
502 assert nprops
== 1, 'Filter has %s properties, expected 1' % nprops
509 exists
= list(query
.property_name_list())
510 exists
.extend(query
.group_by_property_name_list())
512 filters
, orders
= RemoveNativelySupportedComponents(filters
, orders
, exists
)
515 eq_filters
= [f
for f
in filters
if f
.op() in EQUALITY_OPERATORS
]
516 ineq_filters
= [f
for f
in filters
if f
.op() in INEQUALITY_OPERATORS
]
517 exists_filters
= [f
for f
in filters
if f
.op() in EXISTS_OPERATORS
]
518 assert (len(eq_filters
) + len(ineq_filters
) +
519 len(exists_filters
)) == len(filters
), 'Not all filters used'
521 if (kind
and not ineq_filters
and not exists_filters
and
526 names
= set(f
.property(0).name() for f
in eq_filters
)
527 if not names
.intersection(datastore_types
._SPECIAL
_PROPERTIES
):
534 for filter in ineq_filters
:
535 if (filter.property(0).name() ==
536 datastore_types
._UNAPPLIED
_LOG
_TIMESTAMP
_SPECIAL
_PROPERTY
):
538 if not ineq_property
:
539 ineq_property
= filter.property(0).name()
541 assert filter.property(0).name() == ineq_property
545 group_by_props
= set(query
.group_by_property_name_list())
548 prefix
= frozenset(f
.property(0).name() for f
in eq_filters
)
550 postfix_ordered
= [(order
.property(), order
.direction()) for order
in orders
]
553 postfix_group_by
= frozenset(f
.property(0).name() for f
in exists_filters
554 if f
.property(0).name() in group_by_props
)
556 postfix_unordered
= frozenset(f
.property(0).name() for f
in exists_filters
557 if f
.property(0).name() not in group_by_props
)
564 assert ineq_property
== orders
[0].property()
566 postfix_ordered
.append((ineq_property
, None))
568 property_count
= (len(prefix
) + len(postfix_ordered
) + len(postfix_group_by
)
569 + len(postfix_unordered
))
570 if kind
and not ancestor
and property_count
<= 1:
577 prop
, dir = postfix_ordered
[0]
578 if prop
== datastore_types
.KEY_SPECIAL_PROPERTY
and dir is DESCENDING
:
582 props
= prefix
, (tuple(postfix_ordered
), postfix_group_by
, postfix_unordered
)
583 return required
, kind
, ancestor
, props
586 def GetRecommendedIndexProperties(properties
):
587 """Converts the properties returned by datastore_index.CompositeIndexForQuery
588 into a recommended list of index properties and directions.
590 All unordered components are sorted and assigned an ASCENDING direction. All
591 ordered components with out a direction are assigned an ASCEDNING direction.
594 properties: See datastore_index.CompositeIndexForQuery
597 A tuple of (name, direction) tuples where:
598 name: a property name
599 direction: datastore_pb.Query_Order.ASCENDING or ...DESCENDING
602 prefix
, postfix
= properties
604 for sub_list
in itertools
.chain((prefix
,), postfix
):
605 if isinstance(sub_list
, (frozenset, set)):
607 for prop
in sorted(sub_list
):
608 result
.append((prop
, ASCENDING
))
612 for prop
, dir in sub_list
:
613 result
.append((prop
, dir if dir is not None else ASCENDING
))
618 def _MatchPostfix(postfix_props
, index_props
):
619 """Matches a postfix constraint with an existing index.
621 postfix_props constraints are specified through a list of:
622 - sets of string: any order any direction;
623 - list of tuples(string, direction): the given order, and, if specified, the
627 [set('A', 'B'), [('C', None), ('D', ASC)]]
629 [('F', ASC), ('B', ASC), ('A', DESC), ('C', DESC), ('D', ASC)]
630 with a return value of [('F', ASC)], but does not match:
631 [('F', ASC), ('A', DESC), ('C', DESC), ('D', ASC)]
632 [('B', ASC), ('F', ASC), ('A', DESC), ('C', DESC), ('D', ASC)]
633 [('F', ASC), ('B', ASC), ('A', DESC), ('C', DESC), ('D', DESC)]
636 postfix_props: A tuple of sets and lists, as output by
637 CompositeIndexForQuery. They should define the requirements for the
638 postfix of the index.
639 index_props: A list of tuples (property_name, property_direction), that
640 define the index to try and match.
643 The list of tuples that define the prefix properties in the given index.
644 None if the constraints could not be satisfied.
648 index_props_rev
= reversed(index_props
)
649 for property_group
in reversed(postfix_props
):
650 index_group_iter
= itertools
.islice(index_props_rev
, len(property_group
))
651 if isinstance(property_group
, (frozenset, set)):
653 index_group
= set(prop
for prop
, _
in index_group_iter
)
654 if index_group
!= property_group
:
658 index_group
= list(index_group_iter
)
659 if len(index_group
) != len(property_group
):
661 for (index_prop
, index_dir
), (prop
, direction
) in itertools
.izip(
662 index_group
, reversed(property_group
)):
663 if index_prop
!= prop
or (direction
and index_dir
!= direction
):
665 remaining
= list(index_props_rev
)
670 def MinimalCompositeIndexForQuery(query
, index_defs
):
671 """Computes the minimal composite index for this query.
673 Unlike datastore_index.CompositeIndexForQuery, this function takes into
674 account indexes that already exist in the system.
677 query: the datastore_pb.Query to compute suggestions for
678 index_defs: a list of datastore_index.Index objects that already exist.
681 None if no index is needed, otherwise the minimal index in the form
682 (is_most_efficient, kind, ancestor, properties). Where is_most_efficient is a
683 boolean denoting if the suggested index is the most efficient (i.e. the one
684 returned by datastore_index.CompositeIndexForQuery). kind and ancestor
685 are the same variables returned by datastore_index.CompositeIndexForQuery.
686 properties is a tuple consisting of the prefix and postfix properties
687 returend by datastore_index.CompositeIndexForQuery.
690 required
, kind
, ancestor
, (prefix
, postfix
) = CompositeIndexForQuery(query
)
698 for definition
in index_defs
:
699 if (kind
!= definition
.kind
or
701 (not ancestor
and definition
.ancestor
)):
704 _
, _
, index_props
= IndexToKey(definition
)
706 index_prefix
= _MatchPostfix(postfix
, index_props
)
708 if index_prefix
is None:
712 remaining_index_props
= set([prop
for prop
, _
in index_prefix
])
714 if remaining_index_props
- prefix
:
719 index_postfix
= tuple(index_props
[len(index_prefix
):])
720 remaining
= remaining_dict
.get(index_postfix
)
721 if remaining
is None:
722 remaining
= prefix
.copy(), ancestor
725 props_remaining
, ancestor_remaining
= remaining
726 props_remaining
-= remaining_index_props
727 if definition
.ancestor
:
728 ancestor_remaining
= False
730 if not (props_remaining
or ancestor_remaining
):
733 if (props_remaining
, ancestor_remaining
) == remaining
:
737 remaining_dict
[index_postfix
] = (props_remaining
, ancestor_remaining
)
739 if not remaining_dict
:
740 return (True, kind
, ancestor
, (prefix
, postfix
))
742 def calc_cost(minimal_props
, minimal_ancestor
):
743 result
= len(minimal_props
)
749 minimal_postfix
, remaining
= remaining_dict
.popitem()
750 minimal_props
, minimal_ancestor
= remaining
751 minimal_cost
= calc_cost(minimal_props
, minimal_ancestor
)
752 for index_postfix
, (props_remaining
, ancestor_remaining
) in (
753 remaining_dict
.iteritems()):
754 cost
= calc_cost(props_remaining
, ancestor_remaining
)
755 if cost
< minimal_cost
:
757 minimal_postfix
= index_postfix
758 minimal_props
= props_remaining
759 minimal_ancestor
= ancestor_remaining
762 props
= frozenset(minimal_props
), (minimal_postfix
, frozenset(), frozenset())
763 return False, kind
, minimal_ancestor
, props
766 def IndexYamlForQuery(kind
, ancestor
, props
):
767 """Return the composite index definition YAML needed for a query.
769 Given a query, the arguments for this method can be computed with:
770 _, kind, ancestor, props = datastore_index.CompositeIndexForQuery(query)
771 props = datastore_index.GetRecommendedIndexProperties(props)
774 kind: the kind or None
775 ancestor: True if this is an ancestor query, False otherwise
776 props: tuples of the form (name, direction) where:
777 name - a property name;
778 direction - datastore_pb.Query_Order.ASCENDING or ...DESCENDING;
781 A string with the YAML for the composite index needed by the query.
786 serialized_yaml
.append('- kind: %s' % kind
)
788 serialized_yaml
.append(' ancestor: yes')
790 serialized_yaml
.append(' properties:')
791 for name
, direction
in props
:
792 serialized_yaml
.append(' - name: %s' % name
)
793 if direction
== DESCENDING
:
794 serialized_yaml
.append(' direction: desc')
795 return '\n'.join(serialized_yaml
)
798 def IndexXmlForQuery(kind
, ancestor
, props
):
799 """Return the composite index definition XML needed for a query.
801 Given a query, the arguments for this method can be computed with:
802 _, kind, ancestor, props = datastore_index.CompositeIndexForQuery(query)
803 props = datastore_index.GetRecommendedIndexProperties(props)
806 kind: the kind or None
807 ancestor: True if this is an ancestor query, False otherwise
808 props: tuples of the form (name, direction) where:
809 name - a property name;
810 direction - datastore_pb.Query_Order.ASCENDING or ...DESCENDING;
813 A string with the XML for the composite index needed by the query.
818 serialized_xml
.append('<datastore-index kind="%s" ancestor="%s">'
819 % (kind
, 'true' if ancestor
else 'false'))
820 for name
, direction
in props
:
821 serialized_xml
.append(' <property name="%s" direction="%s" />'
822 % (name
, 'asc' if direction
== ASCENDING
else 'desc'))
823 serialized_xml
.append('</datastore-index>')
824 return '\n'.join(serialized_xml
)
827 def IndexDefinitionToProto(app_id
, index_definition
):
828 """Transform individual Index definition to protocol buffer.
831 app_id: Application id for new protocol buffer CompositeIndex.
832 index_definition: datastore_index.Index object to transform.
835 New entity_pb.CompositeIndex with default values set and index
836 information filled in.
838 proto
= entity_pb
.CompositeIndex()
840 proto
.set_app_id(app_id
)
842 proto
.set_state(entity_pb
.CompositeIndex
.WRITE_ONLY
)
844 definition_proto
= proto
.mutable_definition()
845 definition_proto
.set_entity_type(index_definition
.kind
)
846 definition_proto
.set_ancestor(index_definition
.ancestor
)
848 if index_definition
.properties
is not None:
849 for prop
in index_definition
.properties
:
850 prop_proto
= definition_proto
.add_property()
851 prop_proto
.set_name(prop
.name
)
853 if prop
.mode
== 'geospatial':
854 prop_proto
.set_mode(entity_pb
.Index_Property
.GEOSPATIAL
)
855 elif prop
.IsAscending():
856 prop_proto
.set_direction(entity_pb
.Index_Property
.ASCENDING
)
858 prop_proto
.set_direction(entity_pb
.Index_Property
.DESCENDING
)
863 def IndexDefinitionsToProtos(app_id
, index_definitions
):
864 """Transform multiple index definitions to composite index records
867 app_id: Application id for new protocol buffer CompositeIndex.
868 index_definition: A list of datastore_index.Index objects to transform.
871 A list of tranformed entity_pb.Compositeindex entities with default values
872 set and index information filled in.
874 return [IndexDefinitionToProto(app_id
, index
)
875 for index
in index_definitions
]
878 def ProtoToIndexDefinition(proto
):
879 """Transform individual index protocol buffer to index definition.
882 proto: An instance of entity_pb.CompositeIndex to transform.
885 A new instance of datastore_index.Index.
888 proto_index
= proto
.definition()
889 for prop_proto
in proto_index
.property_list():
890 prop_definition
= Property(name
=prop_proto
.name())
892 if prop_proto
.mode() == entity_pb
.Index_Property
.GEOSPATIAL
:
893 prop_definition
.mode
= 'geospatial'
894 elif prop_proto
.direction() == entity_pb
.Index_Property
.DESCENDING
:
895 prop_definition
.direction
= 'desc'
896 elif prop_proto
.direction() == entity_pb
.Index_Property
.ASCENDING
:
897 prop_definition
.direction
= 'asc'
899 properties
.append(prop_definition
)
901 index
= Index(kind
=proto_index
.entity_type(), properties
=properties
)
902 if proto_index
.ancestor():
903 index
.ancestor
= True
907 def ProtosToIndexDefinitions(protos
):
908 """Transform multiple index protocol buffers to index definitions.
911 A list of entity_pb.Index records.
913 return [ProtoToIndexDefinition(definition
) for definition
in protos
]