1 // Copyright 2009 Google Inc. All Rights Reserved.
3 package com
.google
.appengine
.api
.datastore
;
5 import static com
.google
.common
.base
.Preconditions
.checkArgument
;
7 import com
.google
.appengine
.api
.datastore
.Query
.CompositeFilter
;
8 import com
.google
.appengine
.api
.datastore
.Query
.Filter
;
9 import com
.google
.appengine
.api
.datastore
.Query
.FilterPredicate
;
10 import com
.google
.common
.collect
.ImmutableSet
;
11 import com
.google
.common
.collect
.Lists
;
12 import com
.google
.common
.collect
.Sets
;
14 import java
.util
.Arrays
;
15 import java
.util
.Collection
;
16 import java
.util
.Collections
;
17 import java
.util
.List
;
21 * Helper class for query splitting.
23 * <p>This class creates a {@link MultiQueryBuilder} that will produce a
24 * sequence of lists of queries. Each list of queries in the sequence should
25 * have their results merged (this list could consist of a single query, which
26 * can just be executed normally). The results of each list should then be
27 * concatenated with the next list in the sequence.
29 * <p>This class guarantees that the result of merging the result in the manner
30 * described above will produce a valid result set.
32 * <p>The algorithm employed here is as efficient as possible. It has been
33 * optimized to favor concatenation over merging results in memory, as
34 * concatenating results allows for greater leveraging of limit, prefetch,
35 * and count parameters. This should also improve the performance slightly
36 * when compared to a query splitter algorithm that attempts to merge all result
37 * as the results for all queries are fetched synchronously. Even when we start
38 * to use the async query framework the only loss of speed would be the time
39 * saved by the first async prefetch for each set of queries. This potential
40 * loss is both small and greatly out weighed by the value of respecting limit,
41 * prefetch and count more accurately. It can also be eliminated by starting the
42 * next round of queries before the last is done.
44 * <p>There are also many situations where all queries can be done sequentially.
45 * In these cases we can also support sorts on keys_only queries.
47 * <p>This class does not preserve the order in which the filters appear in the
50 * <p>As the number of queries that need to be executed to generate the result
51 * set is mult_i(|component_i.filters|) (which grows very fast) we rely on
52 * {@link #MAX_PARALLEL_QUERIES} to limit the number of queries that need to be
56 final class QuerySplitHelper
{
57 private QuerySplitHelper() {}
59 private static final int MAX_PARALLEL_QUERIES
= 30;
60 private static final Collection
<QuerySplitter
> QUERY_SPLITTERS
=
61 Collections
.synchronizedCollection(Arrays
.<QuerySplitter
>asList(
62 new NotEqualQuerySplitter(),
63 new InQuerySplitter()));
66 * Splits the provided {@link Query} into a list of datastore supported
67 * sub-queries using the default set of {@link QuerySplitter}s.
69 * @return the resulting list of {@code MultiQueryBuilder}.
71 static List
<MultiQueryBuilder
> splitQuery(Query query
) {
72 return splitQuery(query
, QUERY_SPLITTERS
);
76 * Splits the provided {@link Query} into a list of datastore supported
79 * @param query the query to split
80 * @param splitters the splitters to use
81 * @return the resulting list of {@code MultiQueryBuilder}.
83 static List
<MultiQueryBuilder
> splitQuery(Query query
, Collection
<QuerySplitter
> splitters
) {
84 List
<MultiQueryBuilder
> result
;
85 if (query
.getFilter() == null) {
86 result
= Collections
.singletonList(splitQuery(query
.getFilterPredicates(), query
, splitters
));
88 Set
<Set
<FilterPredicate
>> dnf
= getDisjunctiveNormalForm(query
.getFilter());
89 result
= Lists
.newArrayListWithCapacity(dnf
.size());
90 for (Set
<FilterPredicate
> filters
: dnf
) {
91 result
.add(splitQuery(filters
, query
, splitters
));
95 int totalParallelQueries
= 0;
96 for (MultiQueryBuilder builder
: result
) {
97 totalParallelQueries
+= builder
.getParallelQuerySize();
100 checkArgument(totalParallelQueries
<= MAX_PARALLEL_QUERIES
,
101 "Splitting the provided query requires that too many subqueries are merged in memory.");
106 * Splits a single list of {@link FilterPredicate}s.
108 * @param filters the filters to split
109 * @param baseQuery the base query to consider
110 * @param splitters the splitters to use
111 * @return the resulting list of {@code MultiQueryBuilder}.
113 static MultiQueryBuilder
splitQuery(Collection
<FilterPredicate
> filters
, Query baseQuery
,
114 Collection
<QuerySplitter
> splitters
) {
115 List
<FilterPredicate
> remainingFilters
= Lists
.newLinkedList(filters
);
116 List
<QuerySplitComponent
> components
= Lists
.newArrayList();
117 for (QuerySplitter splitter
: splitters
) {
118 components
.addAll(splitter
.split(remainingFilters
, baseQuery
.getSortPredicates()));
121 return new MultiQueryBuilder(remainingFilters
, components
,
122 !baseQuery
.getSortPredicates().isEmpty());
126 * Returns the disjunctive normal form of the given filter.
128 * @return A set of sets of filters, where the outer set should be combined with OR and the inner
129 * sets should be combined with AND.
131 static Set
<Set
<FilterPredicate
>> getDisjunctiveNormalForm(Filter filter
) {
132 if (filter
instanceof CompositeFilter
) {
133 return getDisjunctiveNormalForm((CompositeFilter
) filter
);
134 } else if (filter
instanceof FilterPredicate
) {
135 return Collections
.<Set
<FilterPredicate
>>singleton(
136 Sets
.newLinkedHashSet(ImmutableSet
.of((FilterPredicate
) filter
)));
139 throw new IllegalArgumentException("Unknown expression type: " + filter
.getClass());
143 * @return the disjunctive normal form of the given composite filter
144 * @see #getDisjunctiveNormalForm(Filter)
146 static Set
<Set
<FilterPredicate
>> getDisjunctiveNormalForm(CompositeFilter filter
) {
147 switch (filter
.getOperator()) {
149 return getDisjunctiveNormalFormAnd(filter
.getSubFilters());
151 return getDisjunctiveNormalFormOr(filter
.getSubFilters());
153 throw new IllegalArgumentException("Unknown expression operator: " + filter
.getOperator());
157 * @return the disjunctive normal form of the given sub filters using the distributive law
158 * @see #getDisjunctiveNormalForm(Filter)
160 static Set
<Set
<FilterPredicate
>> getDisjunctiveNormalFormAnd(Collection
<Filter
> subFilter
) {
161 Set
<FilterPredicate
> predicates
= Sets
.newLinkedHashSetWithExpectedSize(subFilter
.size());
162 Set
<Set
<FilterPredicate
>> result
= null;
163 for (Filter subExp
: subFilter
) {
164 if (subExp
instanceof FilterPredicate
) {
165 predicates
.add((FilterPredicate
) subExp
);
166 } else if (subExp
instanceof CompositeFilter
) {
167 Set
<Set
<FilterPredicate
>> dnf
= getDisjunctiveNormalForm((CompositeFilter
) subExp
);
168 if (result
== null) {
171 Set
<Set
<FilterPredicate
>> combinedDnf
=
172 Sets
.newLinkedHashSetWithExpectedSize(dnf
.size() * result
.size());
173 for (Set
<FilterPredicate
> rhs
: result
) {
174 for (Set
<FilterPredicate
> lhs
: dnf
) {
175 Set
<FilterPredicate
> combined
= Sets
.newLinkedHashSetWithExpectedSize(
176 rhs
.size() + lhs
.size());
177 combined
.addAll(rhs
);
178 combined
.addAll(lhs
);
179 combinedDnf
.add(combined
);
182 result
= combinedDnf
;
185 throw new IllegalArgumentException("Unknown expression type: " + subExp
.getClass());
189 if (result
== null) {
190 return Collections
.singleton(predicates
);
193 if (!predicates
.isEmpty()) {
194 for (Set
<FilterPredicate
> clause
: result
) {
195 clause
.addAll(predicates
);
202 * @return the disjunctive normal form of the given sub filters by flattening them
203 * @see #getDisjunctiveNormalForm(Filter)
205 static Set
<Set
<FilterPredicate
>> getDisjunctiveNormalFormOr(Collection
<Filter
> subFilters
) {
206 Set
<Set
<FilterPredicate
>> result
= Sets
.newLinkedHashSetWithExpectedSize(subFilters
.size());
208 for (Filter subExp
: subFilters
) {
209 result
.addAll(getDisjunctiveNormalForm(subExp
));