Revision created by MOE tool push_codebase.
[gae.git] / java / src / main / com / google / appengine / api / datastore / QuerySplitHelper.java
blob2d9f8e56f16efc68566eef9e2bc000c2841a960e
1 // Copyright 2009 Google Inc. All Rights Reserved.
3 package com.google.appengine.api.datastore;
5 import static com.google.common.base.Preconditions.checkArgument;
7 import com.google.appengine.api.datastore.Query.CompositeFilter;
8 import com.google.appengine.api.datastore.Query.Filter;
9 import com.google.appengine.api.datastore.Query.FilterPredicate;
10 import com.google.common.collect.ImmutableSet;
11 import com.google.common.collect.Lists;
12 import com.google.common.collect.Sets;
14 import java.util.Arrays;
15 import java.util.Collection;
16 import java.util.Collections;
17 import java.util.List;
18 import java.util.Set;
20 /**
21 * Helper class for query splitting.
23 * <p>This class creates a {@link MultiQueryBuilder} that will produce a
24 * sequence of lists of queries. Each list of queries in the sequence should
25 * have their results merged (this list could consist of a single query, which
26 * can just be executed normally). The results of each list should then be
27 * concatenated with the next list in the sequence.
29 * <p>This class guarantees that the result of merging the result in the manner
30 * described above will produce a valid result set.
32 * <p>The algorithm employed here is as efficient as possible. It has been
33 * optimized to favor concatenation over merging results in memory, as
34 * concatenating results allows for greater leveraging of limit, prefetch,
35 * and count parameters. This should also improve the performance slightly
36 * when compared to a query splitter algorithm that attempts to merge all result
37 * as the results for all queries are fetched synchronously. Even when we start
38 * to use the async query framework the only loss of speed would be the time
39 * saved by the first async prefetch for each set of queries. This potential
40 * loss is both small and greatly out weighed by the value of respecting limit,
41 * prefetch and count more accurately. It can also be eliminated by starting the
42 * next round of queries before the last is done.
44 * <p>There are also many situations where all queries can be done sequentially.
45 * In these cases we can also support sorts on keys_only queries.
47 * <p>This class does not preserve the order in which the filters appear in the
48 * query provided it.
50 * <p>As the number of queries that need to be executed to generate the result
51 * set is mult_i(|component_i.filters|) (which grows very fast) we rely on
52 * {@link #MAX_PARALLEL_QUERIES} to limit the number of queries that need to be
53 * run in parallel.
56 final class QuerySplitHelper {
57 private QuerySplitHelper() {}
59 private static final int MAX_PARALLEL_QUERIES = 30;
60 private static final Collection<QuerySplitter> QUERY_SPLITTERS =
61 Collections.synchronizedCollection(Arrays.<QuerySplitter>asList(
62 new NotEqualQuerySplitter(),
63 new InQuerySplitter()));
65 /**
66 * Splits the provided {@link Query} into a list of datastore supported
67 * sub-queries using the default set of {@link QuerySplitter}s.
69 * @return the resulting list of {@code MultiQueryBuilder}.
71 static List<MultiQueryBuilder> splitQuery(Query query) {
72 return splitQuery(query, QUERY_SPLITTERS);
75 /**
76 * Splits the provided {@link Query} into a list of datastore supported
77 * sub-queries.
79 * @param query the query to split
80 * @param splitters the splitters to use
81 * @return the resulting list of {@code MultiQueryBuilder}.
83 static List<MultiQueryBuilder> splitQuery(Query query, Collection<QuerySplitter> splitters) {
84 List<MultiQueryBuilder> result;
85 if (query.getFilter() == null) {
86 result = Collections.singletonList(splitQuery(query.getFilterPredicates(), query, splitters));
87 } else {
88 Set<Set<FilterPredicate>> dnf = getDisjunctiveNormalForm(query.getFilter());
89 result = Lists.newArrayListWithCapacity(dnf.size());
90 for (Set<FilterPredicate> filters : dnf) {
91 result.add(splitQuery(filters, query, splitters));
95 int totalParallelQueries = 0;
96 for (MultiQueryBuilder builder : result) {
97 totalParallelQueries += builder.getParallelQuerySize();
100 checkArgument(totalParallelQueries <= MAX_PARALLEL_QUERIES,
101 "Splitting the provided query requires that too many subqueries are merged in memory.");
102 return result;
106 * Splits a single list of {@link FilterPredicate}s.
108 * @param filters the filters to split
109 * @param baseQuery the base query to consider
110 * @param splitters the splitters to use
111 * @return the resulting list of {@code MultiQueryBuilder}.
113 static MultiQueryBuilder splitQuery(Collection<FilterPredicate> filters, Query baseQuery,
114 Collection<QuerySplitter> splitters) {
115 List<FilterPredicate> remainingFilters = Lists.newLinkedList(filters);
116 List<QuerySplitComponent> components = Lists.newArrayList();
117 for (QuerySplitter splitter : splitters) {
118 components.addAll(splitter.split(remainingFilters, baseQuery.getSortPredicates()));
121 return new MultiQueryBuilder(remainingFilters, components,
122 !baseQuery.getSortPredicates().isEmpty());
126 * Returns the disjunctive normal form of the given filter.
128 * @return A set of sets of filters, where the outer set should be combined with OR and the inner
129 * sets should be combined with AND.
131 static Set<Set<FilterPredicate>> getDisjunctiveNormalForm(Filter filter) {
132 if (filter instanceof CompositeFilter) {
133 return getDisjunctiveNormalForm((CompositeFilter) filter);
134 } else if (filter instanceof FilterPredicate) {
135 return Collections.<Set<FilterPredicate>>singleton(
136 Sets.newLinkedHashSet(ImmutableSet.of((FilterPredicate) filter)));
139 throw new IllegalArgumentException("Unknown expression type: " + filter.getClass());
143 * @return the disjunctive normal form of the given composite filter
144 * @see #getDisjunctiveNormalForm(Filter)
146 static Set<Set<FilterPredicate>> getDisjunctiveNormalForm(CompositeFilter filter) {
147 switch (filter.getOperator()) {
148 case AND:
149 return getDisjunctiveNormalFormAnd(filter.getSubFilters());
150 case OR:
151 return getDisjunctiveNormalFormOr(filter.getSubFilters());
153 throw new IllegalArgumentException("Unknown expression operator: " + filter.getOperator());
157 * @return the disjunctive normal form of the given sub filters using the distributive law
158 * @see #getDisjunctiveNormalForm(Filter)
160 static Set<Set<FilterPredicate>> getDisjunctiveNormalFormAnd(Collection<Filter> subFilter) {
161 Set<FilterPredicate> predicates = Sets.newLinkedHashSetWithExpectedSize(subFilter.size());
162 Set<Set<FilterPredicate>> result = null;
163 for (Filter subExp : subFilter) {
164 if (subExp instanceof FilterPredicate) {
165 predicates.add((FilterPredicate) subExp);
166 } else if (subExp instanceof CompositeFilter) {
167 Set<Set<FilterPredicate>> dnf = getDisjunctiveNormalForm((CompositeFilter) subExp);
168 if (result == null) {
169 result = dnf;
170 } else {
171 Set<Set<FilterPredicate>> combinedDnf =
172 Sets.newLinkedHashSetWithExpectedSize(dnf.size() * result.size());
173 for (Set<FilterPredicate> rhs : result) {
174 for (Set<FilterPredicate> lhs : dnf) {
175 Set<FilterPredicate> combined = Sets.newLinkedHashSetWithExpectedSize(
176 rhs.size() + lhs.size());
177 combined.addAll(rhs);
178 combined.addAll(lhs);
179 combinedDnf.add(combined);
182 result = combinedDnf;
184 } else {
185 throw new IllegalArgumentException("Unknown expression type: " + subExp.getClass());
189 if (result == null) {
190 return Collections.singleton(predicates);
193 if (!predicates.isEmpty()) {
194 for (Set<FilterPredicate> clause : result) {
195 clause.addAll(predicates);
198 return result;
202 * @return the disjunctive normal form of the given sub filters by flattening them
203 * @see #getDisjunctiveNormalForm(Filter)
205 static Set<Set<FilterPredicate>> getDisjunctiveNormalFormOr(Collection<Filter> subFilters) {
206 Set<Set<FilterPredicate>> result = Sets.newLinkedHashSetWithExpectedSize(subFilters.size());
208 for (Filter subExp : subFilters) {
209 result.addAll(getDisjunctiveNormalForm(subExp));
211 return result;