App Engine Python SDK version 1.9.12
[gae.git] / python / google / appengine / api / search / search.py
blob27dc8ab2a10afdaec3ec5006b7940ebd60dafc26
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
19 """A Python Search API used by app developers.
21 Contains methods used to interface with Search API.
22 Contains API classes that forward to apiproxy.
23 """
30 import datetime
31 import logging
32 import re
33 import string
34 import sys
35 import warnings
37 from google.appengine.datastore import document_pb
38 from google.appengine.api import apiproxy_stub_map
39 from google.appengine.api import datastore_types
40 from google.appengine.api import namespace_manager
41 from google.appengine.api.search import expression_parser
42 from google.appengine.api.search import query_parser
43 from google.appengine.api.search import search_service_pb
44 from google.appengine.api.search import search_util
45 from google.appengine.datastore import datastore_rpc
46 from google.appengine.runtime import apiproxy_errors
49 __all__ = [
50 'AtomField',
51 'ConcurrentTransactionError',
52 'Cursor',
53 'DateField',
54 'DeleteError',
55 'DeleteResult',
56 'Document',
57 'DOCUMENT_ID_FIELD_NAME',
58 'Error',
59 'ExpressionError',
60 'Field',
61 'FieldExpression',
62 'HtmlField',
63 'GeoField',
64 'GeoPoint',
65 'get_indexes',
66 'get_indexes_async',
67 'GetResponse',
68 'Index',
69 'InternalError',
70 'InvalidRequest',
71 'LANGUAGE_FIELD_NAME',
72 'MatchScorer',
73 'MAXIMUM_DOCUMENT_ID_LENGTH',
74 'MAXIMUM_DOCUMENTS_PER_PUT_REQUEST',
75 'MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH',
76 'MAXIMUM_EXPRESSION_LENGTH',
77 'MAXIMUM_FIELD_ATOM_LENGTH',
78 'MAXIMUM_FIELD_NAME_LENGTH',
79 'MAXIMUM_FIELD_VALUE_LENGTH',
80 'MAXIMUM_FIELDS_RETURNED_PER_SEARCH',
81 'MAXIMUM_GET_INDEXES_OFFSET',
82 'MAXIMUM_INDEX_NAME_LENGTH',
83 'MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST',
84 'MAXIMUM_NUMBER_FOUND_ACCURACY',
85 'MAXIMUM_QUERY_LENGTH',
86 'MAXIMUM_SEARCH_OFFSET',
87 'MAXIMUM_SORTED_DOCUMENTS',
88 'MAX_DATE',
89 'MAX_NUMBER_VALUE',
90 'MIN_DATE',
91 'MIN_NUMBER_VALUE',
92 'NumberField',
93 'OperationResult',
94 'PutError',
95 'PutResult',
96 'Query',
97 'QueryError',
98 'QueryOptions',
99 'RANK_FIELD_NAME',
100 'RescoringMatchScorer',
101 'SCORE_FIELD_NAME',
102 'ScoredDocument',
103 'SearchResults',
104 'SortExpression',
105 'SortOptions',
106 'TextField',
107 'Timeout',
108 'TIMESTAMP_FIELD_NAME',
109 'TransientError',
112 MAXIMUM_INDEX_NAME_LENGTH = 100
113 MAXIMUM_FIELD_VALUE_LENGTH = 1024 * 1024
114 MAXIMUM_FIELD_ATOM_LENGTH = 500
115 MAXIMUM_FIELD_NAME_LENGTH = 500
116 MAXIMUM_DOCUMENT_ID_LENGTH = 500
117 MAXIMUM_DOCUMENTS_PER_PUT_REQUEST = 200
118 MAXIMUM_EXPRESSION_LENGTH = 5000
119 MAXIMUM_QUERY_LENGTH = 2000
120 MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH = 1000
121 MAXIMUM_SEARCH_OFFSET = 1000
123 MAXIMUM_SORTED_DOCUMENTS = 10000
124 MAXIMUM_NUMBER_FOUND_ACCURACY = 10000
125 MAXIMUM_FIELDS_RETURNED_PER_SEARCH = 100
126 MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST = 1000
127 MAXIMUM_GET_INDEXES_OFFSET = 1000
130 DOCUMENT_ID_FIELD_NAME = '_doc_id'
132 LANGUAGE_FIELD_NAME = '_lang'
134 RANK_FIELD_NAME = '_rank'
136 SCORE_FIELD_NAME = '_score'
140 TIMESTAMP_FIELD_NAME = '_timestamp'
145 _LANGUAGE_RE = re.compile('^(.{2}|.{2}_.{2})$')
147 _MAXIMUM_STRING_LENGTH = 500
148 _MAXIMUM_CURSOR_LENGTH = 10000
150 _VISIBLE_PRINTABLE_ASCII = frozenset(
151 set(string.printable) - set(string.whitespace))
152 _FIELD_NAME_PATTERN = '^[A-Za-z][A-Za-z0-9_]*$'
154 MAX_DATE = datetime.datetime(
155 datetime.MAXYEAR, 12, 31, 23, 59, 59, 999999, tzinfo=None)
156 MIN_DATE = datetime.datetime(
157 datetime.MINYEAR, 1, 1, 0, 0, 0, 0, tzinfo=None)
160 MAX_NUMBER_VALUE = 2147483647
161 MIN_NUMBER_VALUE = -2147483647
164 _PROTO_FIELDS_STRING_VALUE = frozenset([document_pb.FieldValue.TEXT,
165 document_pb.FieldValue.HTML,
166 document_pb.FieldValue.ATOM])
169 class Error(Exception):
170 """Indicates a call on the search API has failed."""
173 class InternalError(Error):
174 """Indicates a call on the search API has failed on the internal backend."""
177 class TransientError(Error):
178 """Indicates a call on the search API has failed, but retrying may succeed."""
181 class InvalidRequest(Error):
182 """Indicates an invalid request was made on the search API by the client."""
185 class QueryError(Error):
186 """An error occurred while parsing a query input string."""
189 class ExpressionError(Error):
190 """An error occurred while parsing an expression input string."""
193 class Timeout(Error):
194 """Indicates a call on the search API could not finish before its deadline."""
197 class ConcurrentTransactionError(Error):
198 """Indicates a call on the search API failed due to concurrent updates."""
201 def _ConvertToUnicode(some_string):
202 """Convert UTF-8 encoded string to unicode."""
203 if some_string is None:
204 return None
205 if isinstance(some_string, unicode):
206 return some_string
207 return unicode(some_string, 'utf-8')
210 def _ConcatenateErrorMessages(prefix, status):
211 """Returns an error message combining prefix and status.error_detail()."""
212 if status.error_detail():
213 return prefix + ': ' + status.error_detail()
214 return prefix
217 class _RpcOperationFuture(object):
218 """Represents the future result a search RPC sent to a backend."""
220 def __init__(self, call, request, response, deadline, get_result_hook):
221 """Initializer.
223 Args:
224 call: Method name to call, as a string
225 request: The request object
226 response: The response object
227 deadline: Deadline for RPC call in seconds; if None use the default.
228 get_result_hook: Required result hook. Must be a function that takes
229 no arguments. Its return value is returned by get_result().
231 _ValidateDeadline(deadline)
232 self._get_result_hook = get_result_hook
233 self._rpc = apiproxy_stub_map.UserRPC('search', deadline=deadline)
234 self._rpc.make_call(call, request, response)
236 def get_result(self):
237 self._rpc.wait();
238 try:
239 self._rpc.check_success();
240 except apiproxy_errors.ApplicationError, e:
241 raise _ToSearchError(e)
242 return self._get_result_hook()
245 class _SimpleOperationFuture(object):
246 """Adapts a late-binding function to a future."""
248 def __init__(self, future, function):
249 self._future = future
250 self._function = function
252 def get_result(self):
253 return self._function(self._future.get_result())
256 class _WrappedValueFuture(object):
257 """Adapts an immediately-known result to a future."""
259 def __init__(self, result):
260 self._result = result
262 def get_result(self):
263 return self._result
266 class OperationResult(object):
267 """Represents result of individual operation of a batch index or removal.
269 This is an abstract class.
272 (OK, INVALID_REQUEST, TRANSIENT_ERROR, INTERNAL_ERROR,
273 TIMEOUT, CONCURRENT_TRANSACTION) = (
274 'OK', 'INVALID_REQUEST', 'TRANSIENT_ERROR', 'INTERNAL_ERROR',
275 'TIMEOUT', 'CONCURRENT_TRANSACTION')
277 _CODES = frozenset([OK, INVALID_REQUEST, TRANSIENT_ERROR, INTERNAL_ERROR,
278 TIMEOUT, CONCURRENT_TRANSACTION])
280 def __init__(self, code, message=None, id=None):
281 """Initializer.
283 Args:
284 code: The error or success code of the operation.
285 message: An error message associated with any error.
286 id: The id of the object some operation was performed on.
288 Raises:
289 TypeError: If an unknown attribute is passed.
290 ValueError: If an unknown code is passed.
292 self._message = _ConvertToUnicode(message)
293 self._code = code
294 if self._code not in self._CODES:
295 raise ValueError('Unknown operation result code %r, must be one of %s'
296 % (self._code, self._CODES))
297 self._id = _ConvertToUnicode(id)
299 @property
300 def code(self):
301 """Returns the code indicating the status of the operation."""
302 return self._code
304 @property
305 def message(self):
306 """Returns any associated error message if the operation was in error."""
307 return self._message
309 @property
310 def id(self):
311 """Returns the Id of the object the operation was performed on."""
312 return self._id
314 def __repr__(self):
315 return _Repr(self, [('code', self.code), ('message', self.message),
316 ('id', self.id)])
319 _ERROR_OPERATION_CODE_MAP = {
320 search_service_pb.SearchServiceError.OK: OperationResult.OK,
321 search_service_pb.SearchServiceError.INVALID_REQUEST:
322 OperationResult.INVALID_REQUEST,
323 search_service_pb.SearchServiceError.TRANSIENT_ERROR:
324 OperationResult.TRANSIENT_ERROR,
325 search_service_pb.SearchServiceError.INTERNAL_ERROR:
326 OperationResult.INTERNAL_ERROR,
327 search_service_pb.SearchServiceError.TIMEOUT:
328 OperationResult.TIMEOUT,
329 search_service_pb.SearchServiceError.CONCURRENT_TRANSACTION:
330 OperationResult.CONCURRENT_TRANSACTION,
334 class PutResult(OperationResult):
335 """The result of indexing a single object."""
338 class DeleteResult(OperationResult):
339 """The result of deleting a single document."""
342 class PutError(Error):
343 """Indicates some error occurred indexing one of the objects requested."""
345 def __init__(self, message, results):
346 """Initializer.
348 Args:
349 message: A message detailing the cause of the failure to index some
350 document.
351 results: A list of PutResult corresponding to the list of objects
352 requested to be indexed.
354 super(PutError, self).__init__(message)
355 self._results = results
357 @property
358 def results(self):
359 """Returns PutResult list corresponding to objects indexed."""
360 return self._results
363 class DeleteError(Error):
364 """Indicates some error occured deleting one of the objects requested."""
366 def __init__(self, message, results):
367 """Initializer.
369 Args:
370 message: A message detailing the cause of the failure to delete some
371 document.
372 results: A list of DeleteResult corresponding to the list of Ids of
373 objects requested to be deleted.
375 super(DeleteError, self).__init__(message)
376 self._results = results
378 @property
379 def results(self):
380 """Returns DeleteResult list corresponding to Documents deleted."""
381 return self._results
384 _ERROR_MAP = {
385 search_service_pb.SearchServiceError.INVALID_REQUEST: InvalidRequest,
386 search_service_pb.SearchServiceError.TRANSIENT_ERROR: TransientError,
387 search_service_pb.SearchServiceError.INTERNAL_ERROR: InternalError,
388 search_service_pb.SearchServiceError.TIMEOUT: Timeout,
389 search_service_pb.SearchServiceError.CONCURRENT_TRANSACTION:
390 ConcurrentTransactionError,
394 def _ToSearchError(error):
395 """Translate an application error to a search Error, if possible.
397 Args:
398 error: An ApplicationError to translate.
400 Returns:
401 An Error if the error is known, otherwise the given
402 apiproxy_errors.ApplicationError.
404 if error.application_error in _ERROR_MAP:
405 return _ERROR_MAP[error.application_error](error.error_detail)
406 return error
409 def _CheckInteger(value, name, zero_ok=True, upper_bound=None):
410 """Checks whether value is an integer between the lower and upper bounds.
412 Args:
413 value: The value to check.
414 name: The name of the value, to use in error messages.
415 zero_ok: True if zero is allowed.
416 upper_bound: The upper (inclusive) bound of the value. Optional.
418 Returns:
419 The checked value.
421 Raises:
422 ValueError: If the value is not a int or long, or is out of range.
424 datastore_types.ValidateInteger(value, name, ValueError, empty_ok=True,
425 zero_ok=zero_ok)
426 if upper_bound is not None and value > upper_bound:
427 raise ValueError('%s, %d must be <= %d' % (name, value, upper_bound))
428 return value
431 def _CheckEnum(value, name, values=None):
432 """Checks whether value is a member of the set of values given.
434 Args:
435 value: The value to check.
436 name: The name of the value, to use in error messages.
437 values: The iterable of possible values.
439 Returns:
440 The checked value.
442 Raises:
443 ValueError: If the value is not one of the allowable values.
445 if value not in values:
446 raise ValueError('%s, %r must be in %s' % (name, value, values))
447 return value
450 def _CheckNumber(value, name):
451 """Checks whether value is a number.
453 Args:
454 value: The value to check.
455 name: The name of the value, to use in error messages.
457 Returns:
458 The checked value.
460 Raises:
461 TypeError: If the value is not a number.
463 if not isinstance(value, (int, long, float)):
464 raise TypeError('%s must be a int, long or float, got %s' %
465 (name, value.__class__.__name__))
466 return value
469 def _CheckStatus(status):
470 """Checks whether a RequestStatus has a value of OK.
472 Args:
473 status: The RequestStatus to check.
475 Raises:
476 Error: A subclass of Error if the value of status is not OK.
477 The subclass of Error is chosen based on value of the status code.
478 InternalError: If the status value is unknown.
480 if status.code() != search_service_pb.SearchServiceError.OK:
481 if status.code() in _ERROR_MAP:
482 raise _ERROR_MAP[status.code()](status.error_detail())
483 else:
484 raise InternalError(status.error_detail())
487 def _ValidateString(value,
488 name='unused',
489 max_len=_MAXIMUM_STRING_LENGTH,
490 empty_ok=False,
491 type_exception=TypeError,
492 value_exception=ValueError):
493 """Raises an exception if value is not a valid string or a subclass thereof.
495 A string is valid if it's not empty, no more than _MAXIMUM_STRING_LENGTH
496 bytes. The exception type can be specified with the exception
497 arguments for type and value issues.
499 Args:
500 value: The value to validate.
501 name: The name of this value; used in the exception message.
502 max_len: The maximum allowed length, in bytes.
503 empty_ok: Allow empty value.
504 type_exception: The type of exception to raise if not a basestring.
505 value_exception: The type of exception to raise if invalid value.
507 Returns:
508 The checked string.
510 Raises:
511 TypeError: If value is not a basestring or subclass.
512 ValueError: If the value is None or longer than max_len.
514 if value is None and empty_ok:
515 return
516 if value is not None and not isinstance(value, basestring):
517 raise type_exception('%s must be a basestring; got %s:' %
518 (name, value.__class__.__name__))
519 if not value and not empty_ok:
520 raise value_exception('%s must not be empty.' % name)
522 if len(value.encode('utf-8')) > max_len:
523 raise value_exception('%s must be under %d bytes.' % (name, max_len))
524 return value
527 def _ValidateVisiblePrintableAsciiNotReserved(value, name):
528 """Checks if value is a visible printable ASCII string not starting with '!'.
530 Whitespace characters are excluded. Printable visible ASCII
531 strings starting with '!' are reserved for internal use.
533 Args:
534 value: The string to validate.
535 name: The name of this string; used in the exception message.
537 Returns:
538 The checked string.
540 Raises:
541 ValueError: If the string is not visible printable ASCII, or starts with
542 '!'.
544 for char in value:
545 if char not in _VISIBLE_PRINTABLE_ASCII:
546 raise ValueError(
547 '%r must be visible printable ASCII: %r'
548 % (name, value))
549 if value.startswith('!'):
550 raise ValueError('%r must not start with "!": %r' % (name, value))
551 return value
554 def _CheckIndexName(index_name):
555 """Checks index_name is a string which is not too long, and returns it.
557 Index names must be visible printable ASCII and not start with '!'.
559 _ValidateString(index_name, 'index name', MAXIMUM_INDEX_NAME_LENGTH)
560 return _ValidateVisiblePrintableAsciiNotReserved(index_name, 'index_name')
563 def _CheckFieldName(name):
564 """Checks field name is not too long and matches field name pattern.
566 Field name pattern: "[A-Za-z][A-Za-z0-9_]*".
568 _ValidateString(name, 'name', MAXIMUM_FIELD_NAME_LENGTH)
569 if not re.match(_FIELD_NAME_PATTERN, name):
570 raise ValueError('field name "%s" should match pattern: %s' %
571 (name, _FIELD_NAME_PATTERN))
572 return name
575 def _CheckExpression(expression):
576 """Checks whether the expression is a string."""
577 expression = _ValidateString(expression, max_len=MAXIMUM_EXPRESSION_LENGTH)
578 try:
579 expression_parser.Parse(expression)
580 except expression_parser.ExpressionException, e:
581 raise ExpressionError('Failed to parse expression "%s"' % expression)
582 return expression
585 def _CheckFieldNames(names):
586 """Checks each name in names is a valid field name."""
587 for name in names:
588 _CheckFieldName(name)
589 return names
592 def _GetList(a_list):
593 """Utility function that converts None to the empty list."""
594 if a_list is None:
595 return []
596 else:
597 return list(a_list)
600 def _ConvertToList(arg):
601 """Converts arg to a list, empty if None, single element if not a list."""
602 if isinstance(arg, basestring):
603 return [arg]
604 if arg is not None:
605 try:
606 return list(iter(arg))
607 except TypeError:
608 return [arg]
609 return []
612 def _ConvertToUnicodeList(arg):
613 """Converts arg to a list of unicode objects."""
614 return [_ConvertToUnicode(value) for value in _ConvertToList(arg)]
617 def _CheckDocumentId(doc_id):
618 """Checks doc_id is a valid document identifier, and returns it.
620 Document ids must be visible printable ASCII and not start with '!'.
622 _ValidateString(doc_id, 'doc_id', MAXIMUM_DOCUMENT_ID_LENGTH)
623 _ValidateVisiblePrintableAsciiNotReserved(doc_id, 'doc_id')
624 return doc_id
627 def _CheckText(value, name='value', empty_ok=True):
628 """Checks the field text is a valid string."""
629 return _ValidateString(value, name, MAXIMUM_FIELD_VALUE_LENGTH, empty_ok)
632 def _CheckHtml(html):
633 """Checks the field html is a valid HTML string."""
634 return _ValidateString(html, 'html', MAXIMUM_FIELD_VALUE_LENGTH,
635 empty_ok=True)
638 def _CheckAtom(atom):
639 """Checks the field atom is a valid string."""
640 return _ValidateString(atom, 'atom', MAXIMUM_FIELD_ATOM_LENGTH,
641 empty_ok=True)
644 def _CheckDate(date):
645 """Checks the date is in the correct range."""
646 if isinstance(date, datetime.datetime):
647 if date < MIN_DATE or date > MAX_DATE:
648 raise TypeError('date must be between %s and %s (got %s)' %
649 (MIN_DATE, MAX_DATE, date))
650 elif isinstance(date, datetime.date):
651 if date < MIN_DATE.date() or date > MAX_DATE.date():
652 raise TypeError('date must be between %s and %s (got %s)' %
653 (MIN_DATE, MAX_DATE, date))
654 else:
655 raise TypeError('date must be datetime.datetime or datetime.date')
656 return date
659 def _CheckLanguage(language):
660 """Checks language is None or a string that matches _LANGUAGE_RE."""
661 if language is None:
662 return None
663 if not isinstance(language, basestring):
664 raise TypeError('language must be a basestring, got %s' %
665 language.__class__.__name__)
666 if not re.match(_LANGUAGE_RE, language):
667 raise ValueError('invalid language %s. Languages should be two letters.'
668 % language)
669 return language
672 def _CheckDocument(document):
673 """Check that the document is valid.
675 This checks for all server-side requirements on Documents. Currently, that
676 means ensuring that there are no repeated number or date fields.
678 Args:
679 document: The search.Document to check for validity.
681 Raises:
682 ValueError if the document is invalid in a way that would trigger an
683 PutError from the server.
685 no_repeat_date_names = set()
686 no_repeat_number_names = set()
687 for field in document.fields:
688 if isinstance(field, NumberField):
689 if field.name in no_repeat_number_names:
690 raise ValueError(
691 'Invalid document %s: field %s with type date or number may not '
692 'be repeated.' % (document.doc_id, field.name))
693 no_repeat_number_names.add(field.name)
694 elif isinstance(field, DateField):
695 if field.name in no_repeat_date_names:
696 raise ValueError(
697 'Invalid document %s: field %s with type date or number may not '
698 'be repeated.' % (document.doc_id, field.name))
699 no_repeat_date_names.add(field.name)
702 def _CheckSortLimit(limit):
703 """Checks the limit on number of docs to score or sort is not too large."""
704 return _CheckInteger(limit, 'limit', upper_bound=MAXIMUM_SORTED_DOCUMENTS)
707 def _Repr(class_instance, ordered_dictionary):
708 """Generates an unambiguous representation for instance and ordered dict."""
709 return u'search.%s(%s)' % (class_instance.__class__.__name__, ', '.join(
710 ['%s=%r' % (key, value) for (key, value) in ordered_dictionary
711 if value is not None and value != []]))
714 def _ListIndexesResponsePbToGetResponse(response):
715 """Returns a GetResponse constructed from get_indexes response pb."""
716 return GetResponse(
717 results=[_NewIndexFromPb(index)
718 for index in response.index_metadata_list()])
721 @datastore_rpc._positional(7)
722 def get_indexes(namespace='', offset=None, limit=20,
723 start_index_name=None, include_start_index=True,
724 index_name_prefix=None, fetch_schema=False, deadline=None,
725 **kwargs):
726 """Returns a list of available indexes.
728 Args:
729 namespace: The namespace of indexes to be returned. If not set
730 then the current namespace is used.
731 offset: The offset of the first returned index.
732 limit: The number of indexes to return.
733 start_index_name: The name of the first index to be returned.
734 include_start_index: Whether or not to return the start index.
735 index_name_prefix: The prefix used to select returned indexes.
736 fetch_schema: Whether to retrieve Schema for each Index or not.
738 Kwargs:
739 deadline: Deadline for RPC call in seconds; if None use the default.
741 Returns:
742 The GetResponse containing a list of available indexes.
744 Raises:
745 InternalError: If the request fails on internal servers.
746 TypeError: If any of the parameters have invalid types, or an unknown
747 attribute is passed.
748 ValueError: If any of the parameters have invalid values (e.g., a
749 negative deadline).
751 return get_indexes_async(
752 namespace, offset, limit, start_index_name, include_start_index,
753 index_name_prefix, fetch_schema, deadline=deadline, **kwargs).get_result()
756 @datastore_rpc._positional(7)
757 def get_indexes_async(namespace='', offset=None, limit=20,
758 start_index_name=None, include_start_index=True,
759 index_name_prefix=None, fetch_schema=False, deadline=None,
760 **kwargs):
761 """Asynchronously returns a list of available indexes.
763 Identical to get_indexes() except that it returns a future. Call
764 get_result() on the return value to block on the call and get its result.
767 app_id = kwargs.pop('app_id', None)
768 if kwargs:
769 raise TypeError('Invalid arguments: %s' % ', '.join(kwargs))
771 request = search_service_pb.ListIndexesRequest()
772 params = request.mutable_params()
774 if namespace is None:
775 namespace = namespace_manager.get_namespace()
776 if namespace is None:
777 namespace = u''
778 namespace_manager.validate_namespace(namespace, exception=ValueError)
779 params.set_namespace(namespace)
780 if offset is not None:
781 params.set_offset(_CheckInteger(offset, 'offset', zero_ok=True,
782 upper_bound=MAXIMUM_GET_INDEXES_OFFSET))
783 params.set_limit(_CheckInteger(
784 limit, 'limit', zero_ok=False,
785 upper_bound=MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST))
786 if start_index_name is not None:
787 params.set_start_index_name(
788 _ValidateString(start_index_name, 'start_index_name',
789 MAXIMUM_INDEX_NAME_LENGTH,
790 empty_ok=False))
791 if include_start_index is not None:
792 params.set_include_start_index(bool(include_start_index))
793 if index_name_prefix is not None:
794 params.set_index_name_prefix(
795 _ValidateString(index_name_prefix, 'index_name_prefix',
796 MAXIMUM_INDEX_NAME_LENGTH,
797 empty_ok=False))
798 params.set_fetch_schema(fetch_schema)
800 response = search_service_pb.ListIndexesResponse()
801 if app_id:
802 request.set_app_id(app_id)
804 def hook():
805 _CheckStatus(response.status())
806 return _ListIndexesResponsePbToGetResponse(response)
807 return _RpcOperationFuture(
808 'ListIndexes', request, response, deadline, hook)
811 class Field(object):
812 """An abstract base class which represents a field of a document.
814 This class should not be directly instantiated.
818 TEXT, HTML, ATOM, DATE, NUMBER, GEO_POINT = ('TEXT', 'HTML', 'ATOM', 'DATE',
819 'NUMBER', 'GEO_POINT')
821 _FIELD_TYPES = frozenset([TEXT, HTML, ATOM, DATE, NUMBER, GEO_POINT])
823 def __init__(self, name, value, language=None):
824 """Initializer.
826 Args:
827 name: The name of the field. Field names must have maximum length
828 MAXIMUM_FIELD_NAME_LENGTH and match pattern "[A-Za-z][A-Za-z0-9_]*".
829 value: The value of the field which can be a str, unicode or date.
830 language: The ISO 693-1 two letter code of the language used in the value.
831 See http://www.sil.org/iso639-3/codes.asp?order=639_1&letter=%25 for a
832 list of valid codes. Correct specification of language code will assist
833 in correct tokenization of the field. If None is given, then the
834 language code of the document will be used.
836 Raises:
837 TypeError: If any of the parameters have invalid types, or an unknown
838 attribute is passed.
839 ValueError: If any of the parameters have invalid values.
841 self._name = _CheckFieldName(_ConvertToUnicode(name))
842 self._value = self._CheckValue(value)
843 self._language = _CheckLanguage(_ConvertToUnicode(language))
845 @property
846 def name(self):
847 """Returns the name of the field."""
848 return self._name
850 @property
851 def language(self):
852 """Returns the code of the language the content in value is written in."""
853 return self._language
855 @property
856 def value(self):
857 """Returns the value of the field."""
858 return self._value
860 def _CheckValue(self, value):
861 """Checks the value is valid for the given type.
863 Args:
864 value: The value to check.
866 Returns:
867 The checked value.
869 raise NotImplementedError('_CheckValue is an abstract method')
871 def __repr__(self):
872 return _Repr(self, [('name', self.name), ('language', self.language),
873 ('value', self.value)])
875 def __eq__(self, other):
876 return isinstance(other, type(self)) and self.__key() == other.__key()
878 def __ne__(self, other):
879 return not self == other
881 def __key(self):
882 return (self.name, self.value, self.language)
884 def __hash__(self):
885 return hash(self.__key())
887 def __str__(self):
888 return repr(self)
890 def _CopyStringValueToProtocolBuffer(self, field_value_pb):
891 """Copies value to a string value in proto buf."""
892 field_value_pb.set_string_value(self.value.encode('utf-8'))
895 def _CopyFieldToProtocolBuffer(field, pb):
896 """Copies field's contents to a document_pb.Field protocol buffer."""
897 pb.set_name(field.name.encode('utf-8'))
898 field_value_pb = pb.mutable_value()
899 if field.language:
900 field_value_pb.set_language(field.language.encode('utf-8'))
901 if field.value is not None:
902 field._CopyValueToProtocolBuffer(field_value_pb)
903 return pb
906 class TextField(Field):
907 """A Field that has text content.
909 The following example shows a text field named signature with Polish content:
910 TextField(name='signature', value='brzydka pogoda', language='pl')
913 def __init__(self, name, value=None, language=None):
914 """Initializer.
916 Args:
917 name: The name of the field.
918 value: A str or unicode object containing text.
919 language: The code of the language the value is encoded in.
921 Raises:
922 TypeError: If value is not a string.
923 ValueError: If value is longer than allowed.
925 Field.__init__(self, name, _ConvertToUnicode(value), language)
927 def _CheckValue(self, value):
928 return _CheckText(value)
930 def _CopyValueToProtocolBuffer(self, field_value_pb):
931 field_value_pb.set_type(document_pb.FieldValue.TEXT)
932 self._CopyStringValueToProtocolBuffer(field_value_pb)
935 class HtmlField(Field):
936 """A Field that has HTML content.
938 The following example shows an html field named content:
939 HtmlField(name='content', value='<html>herbata, kawa</html>', language='pl')
942 def __init__(self, name, value=None, language=None):
943 """Initializer.
945 Args:
946 name: The name of the field.
947 value: A str or unicode object containing the searchable content of the
948 Field.
949 language: The code of the language the value is encoded in.
951 Raises:
952 TypeError: If value is not a string.
953 ValueError: If value is longer than allowed.
955 Field.__init__(self, name, _ConvertToUnicode(value), language)
957 def _CheckValue(self, value):
958 return _CheckHtml(value)
960 def _CopyValueToProtocolBuffer(self, field_value_pb):
961 field_value_pb.set_type(document_pb.FieldValue.HTML)
962 self._CopyStringValueToProtocolBuffer(field_value_pb)
965 class AtomField(Field):
966 """A Field that has content to be treated as a single token for indexing.
968 The following example shows an atom field named contributor:
969 AtomField(name='contributor', value='foo@bar.com')
972 def __init__(self, name, value=None, language=None):
973 """Initializer.
975 Args:
976 name: The name of the field.
977 value: A str or unicode object to be treated as an indivisible text value.
978 language: The code of the language the value is encoded in.
980 Raises:
981 TypeError: If value is not a string.
982 ValueError: If value is longer than allowed.
984 Field.__init__(self, name, _ConvertToUnicode(value), language)
986 def _CheckValue(self, value):
987 return _CheckAtom(value)
989 def _CopyValueToProtocolBuffer(self, field_value_pb):
990 field_value_pb.set_type(document_pb.FieldValue.ATOM)
991 self._CopyStringValueToProtocolBuffer(field_value_pb)
994 class DateField(Field):
995 """A Field that has a date or datetime value.
997 The following example shows a date field named creation_date:
998 DateField(name='creation_date', value=datetime.date(2011, 03, 11))
1001 def __init__(self, name, value=None):
1002 """Initializer.
1004 Args:
1005 name: The name of the field.
1006 value: A datetime.date or a datetime.datetime.
1008 Raises:
1009 TypeError: If value is not a datetime.date or a datetime.datetime.
1011 Field.__init__(self, name, value)
1013 def _CheckValue(self, value):
1014 return _CheckDate(value)
1016 def _CopyValueToProtocolBuffer(self, field_value_pb):
1017 field_value_pb.set_type(document_pb.FieldValue.DATE)
1018 field_value_pb.set_string_value(search_util.SerializeDate(self.value))
1021 class NumberField(Field):
1022 """A Field that has a numeric value.
1024 The following example shows a number field named size:
1025 NumberField(name='size', value=10)
1028 def __init__(self, name, value=None):
1029 """Initializer.
1031 Args:
1032 name: The name of the field.
1033 value: A numeric value.
1035 Raises:
1036 TypeError: If value is not numeric.
1037 ValueError: If value is out of range.
1039 Field.__init__(self, name, value)
1041 def _CheckValue(self, value):
1042 value = _CheckNumber(value, 'field value')
1043 if value is not None and (value < MIN_NUMBER_VALUE or
1044 value > MAX_NUMBER_VALUE):
1045 raise ValueError('value, %d must be between %d and %d' %
1046 (value, MIN_NUMBER_VALUE, MAX_NUMBER_VALUE))
1047 return value
1049 def _CopyValueToProtocolBuffer(self, field_value_pb):
1050 field_value_pb.set_type(document_pb.FieldValue.NUMBER)
1051 field_value_pb.set_string_value(str(self.value))
1054 class GeoPoint(object):
1055 """Represents a point on the Earth's surface, in lat, long coordinates."""
1057 def __init__(self, latitude, longitude):
1058 """Initializer.
1060 Args:
1061 latitude: The angle between the equatorial plan and a line that passes
1062 through the GeoPoint, between -90 and 90 degrees.
1063 longitude: The angle east or west from a reference meridian to another
1064 meridian that passes through the GeoPoint, between -180 and 180 degrees.
1066 Raises:
1067 TypeError: If any of the parameters have invalid types, or an unknown
1068 attribute is passed.
1069 ValueError: If any of the parameters have invalid values.
1071 self._latitude = self._CheckLatitude(latitude)
1072 self._longitude = self._CheckLongitude(longitude)
1074 @property
1075 def latitude(self):
1076 """Returns the angle between equatorial plan and line thru the geo point."""
1077 return self._latitude
1079 @property
1080 def longitude(self):
1081 """Returns the angle from a reference meridian to another meridian."""
1082 return self._longitude
1084 def _CheckLatitude(self, value):
1085 _CheckNumber(value, 'latitude')
1086 if value < -90.0 or value > 90.0:
1087 raise ValueError('latitude must be between -90 and 90 degrees '
1088 'inclusive, was %f' % value)
1089 return value
1091 def _CheckLongitude(self, value):
1092 _CheckNumber(value, 'longitude')
1093 if value < -180.0 or value > 180.0:
1094 raise ValueError('longitude must be between -180 and 180 degrees '
1095 'inclusive, was %f' % value)
1096 return value
1098 def __eq__(self, other):
1099 return (self.latitude == other.latitude and
1100 self.longitude == other.longitude)
1102 def __repr__(self):
1103 return _Repr(self,
1104 [('latitude', self.latitude),
1105 ('longitude', self.longitude)])
1108 def _CheckGeoPoint(geo_point):
1109 """Checks geo_point is a GeoPoint and returns it."""
1110 if not isinstance(geo_point, GeoPoint):
1111 raise TypeError('geo_point must be a GeoPoint, got %s' %
1112 geo_point.__class__.__name__)
1113 return geo_point
1116 class GeoField(Field):
1117 """A Field that has a GeoPoint value.
1119 The following example shows a geo field named place:
1121 GeoField(name='place', value=GeoPoint(latitude=-33.84, longitude=151.26))
1124 def __init__(self, name, value=None):
1125 """Initializer.
1127 Args:
1128 name: The name of the field.
1129 value: A GeoPoint value.
1131 Raises:
1132 TypeError: If value is not numeric.
1134 Field.__init__(self, name, value)
1136 def _CheckValue(self, value):
1137 return _CheckGeoPoint(value)
1139 def _CopyValueToProtocolBuffer(self, field_value_pb):
1140 field_value_pb.set_type(document_pb.FieldValue.GEO)
1141 geo_pb = field_value_pb.mutable_geo()
1142 geo_pb.set_lat(self.value.latitude)
1143 geo_pb.set_lng(self.value.longitude)
1146 def _GetValue(value_pb):
1147 """Gets the value from the value_pb."""
1148 if value_pb.type() in _PROTO_FIELDS_STRING_VALUE:
1149 if value_pb.has_string_value():
1150 return value_pb.string_value()
1151 return None
1152 if value_pb.type() == document_pb.FieldValue.DATE:
1153 if value_pb.has_string_value():
1154 return search_util.DeserializeDate(value_pb.string_value())
1155 return None
1156 if value_pb.type() == document_pb.FieldValue.NUMBER:
1157 if value_pb.has_string_value():
1158 return float(value_pb.string_value())
1159 return None
1160 if value_pb.type() == document_pb.FieldValue.GEO:
1161 if value_pb.has_geo():
1162 geo_pb = value_pb.geo()
1163 return GeoPoint(latitude=geo_pb.lat(), longitude=geo_pb.lng())
1164 return None
1165 raise TypeError('unknown FieldValue type %d' % value_pb.type())
1168 _STRING_TYPES = set([document_pb.FieldValue.TEXT,
1169 document_pb.FieldValue.HTML,
1170 document_pb.FieldValue.ATOM])
1173 def _DecodeUTF8(pb_value):
1174 """Decodes a UTF-8 encoded string into unicode."""
1175 if pb_value is not None:
1176 return pb_value.decode('utf-8')
1177 return None
1180 def _DecodeValue(pb_value, val_type):
1181 """Decodes a possible UTF-8 encoded string value to unicode."""
1182 if val_type in _STRING_TYPES:
1183 return _DecodeUTF8(pb_value)
1184 return pb_value
1187 def _NewFieldFromPb(pb):
1188 """Constructs a Field from a document_pb.Field protocol buffer."""
1189 name = _DecodeUTF8(pb.name())
1190 val_type = pb.value().type()
1191 value = _DecodeValue(_GetValue(pb.value()), val_type)
1192 lang = None
1193 if pb.value().has_language():
1194 lang = _DecodeUTF8(pb.value().language())
1195 if val_type == document_pb.FieldValue.TEXT:
1196 return TextField(name, value, lang)
1197 elif val_type == document_pb.FieldValue.HTML:
1198 return HtmlField(name, value, lang)
1199 elif val_type == document_pb.FieldValue.ATOM:
1200 return AtomField(name, value, lang)
1201 elif val_type == document_pb.FieldValue.DATE:
1202 return DateField(name, value)
1203 elif val_type == document_pb.FieldValue.NUMBER:
1204 return NumberField(name, value)
1205 elif val_type == document_pb.FieldValue.GEO:
1206 return GeoField(name, value)
1207 return InvalidRequest('Unknown field value type %d' % val_type)
1210 class Document(object):
1211 """Represents a user generated document.
1213 The following example shows how to create a document consisting of a set
1214 of fields, some plain text and some in HTML.
1216 Document(doc_id='document_id',
1217 fields=[TextField(name='subject', value='going for dinner'),
1218 HtmlField(name='body',
1219 value='<html>I found a place.</html>',
1220 TextField(name='signature', value='brzydka pogoda',
1221 language='pl')],
1222 language='en')
1224 _FIRST_JAN_2011 = datetime.datetime(2011, 1, 1)
1226 def __init__(self, doc_id=None, fields=None, language='en', rank=None):
1227 """Initializer.
1229 Args:
1230 doc_id: The visible printable ASCII string identifying the document which
1231 does not start with '!'. Whitespace is excluded from ids. If no id is
1232 provided, the search service will provide one.
1233 fields: An iterable of Field instances representing the content of the
1234 document.
1235 language: The code of the language used in the field values.
1236 rank: The rank of this document used to specify the order in which
1237 documents are returned by search. Rank must be a non-negative integer.
1238 If not specified, the number of seconds since 1st Jan 2011 is used.
1239 Documents are returned in descending order of their rank, in absence
1240 of sorting or scoring options.
1242 Raises:
1243 TypeError: If any of the parameters have invalid types, or an unknown
1244 attribute is passed.
1245 ValueError: If any of the parameters have invalid values.
1247 doc_id = _ConvertToUnicode(doc_id)
1248 if doc_id is not None:
1249 _CheckDocumentId(doc_id)
1250 self._doc_id = doc_id
1251 self._fields = _GetList(fields)
1252 self._language = _CheckLanguage(_ConvertToUnicode(language))
1255 self._field_map = None
1257 doc_rank = rank
1258 if doc_rank is None:
1259 doc_rank = self._GetDefaultRank()
1260 self._rank = self._CheckRank(doc_rank)
1262 _CheckDocument(self)
1264 @property
1265 def doc_id(self):
1266 """Returns the document identifier."""
1267 return self._doc_id
1269 @property
1270 def fields(self):
1271 """Returns a list of fields of the document."""
1272 return self._fields
1274 @property
1275 def language(self):
1276 """Returns the code of the language the document fields are written in."""
1277 return self._language
1279 @property
1280 def rank(self):
1281 """Returns the rank of this document."""
1282 return self._rank
1284 def field(self, field_name):
1285 """Returns the field with the provided field name.
1287 Args:
1288 field_name: The name of the field to return.
1290 Returns:
1291 A field with the given name.
1293 Raises:
1294 ValueError: There is not exactly one field with the given name.
1296 fields = self[field_name]
1297 if len(fields) == 1:
1298 return fields[0]
1299 raise ValueError(
1300 'Must have exactly one field with name %s, but found %d.' %
1301 (field_name, len(fields)))
1303 def __getitem__(self, field_name):
1304 """Returns a list of all fields with the provided field name.
1306 Args:
1307 field_name: The name of the field to return.
1309 Returns:
1310 All fields with the given name, or an empty list if no field with that
1311 name exists.
1313 return self._BuildFieldMap().get(field_name, [])
1315 def __iter__(self):
1316 """Documents do not support iteration.
1318 This is provided to raise an explicit exception.
1320 raise TypeError('Documents do not support iteration.')
1322 def _BuildFieldMap(self):
1323 """Lazily build the field map."""
1324 if self._field_map is None:
1325 self._field_map = {}
1326 for field in self._fields:
1327 self._field_map.setdefault(field.name, []).append(field)
1328 return self._field_map
1330 def _CheckRank(self, rank):
1331 """Checks if rank is valid, then returns it."""
1332 return _CheckInteger(rank, 'rank', upper_bound=sys.maxint)
1334 def _GetDefaultRank(self):
1335 """Returns a default rank as total seconds since 1st Jan 2011."""
1336 td = datetime.datetime.now() - Document._FIRST_JAN_2011
1337 return td.seconds + (td.days * 24 * 3600)
1339 def __repr__(self):
1340 return _Repr(
1341 self, [('doc_id', self.doc_id), ('fields', self.fields),
1342 ('language', self.language), ('rank', self.rank)])
1344 def __eq__(self, other):
1345 return (isinstance(other, type(self)) and self.doc_id == other.doc_id and
1346 self.rank == other.rank and self.language == other.language
1347 and self.fields == other.fields)
1349 def __ne__(self, other):
1350 return not self == other
1352 def __key(self):
1353 return self.doc_id
1355 def __hash__(self):
1356 return hash(self.__key())
1358 def __str__(self):
1359 return repr(self)
1362 def _CopyDocumentToProtocolBuffer(document, pb):
1363 """Copies Document to a document_pb.Document protocol buffer."""
1364 pb.set_storage(document_pb.Document.DISK)
1365 if document.doc_id:
1366 pb.set_id(document.doc_id.encode('utf-8'))
1367 if document.language:
1368 pb.set_language(document.language.encode('utf-8'))
1369 for field in document.fields:
1370 field_pb = pb.add_field()
1371 _CopyFieldToProtocolBuffer(field, field_pb)
1372 pb.set_order_id(document.rank)
1373 return pb
1376 def _NewFieldsFromPb(field_list):
1377 """Returns a list of Field copied from a document_pb.Document proto buf."""
1378 return [_NewFieldFromPb(f) for f in field_list]
1381 def _NewDocumentFromPb(doc_pb):
1382 """Constructs a Document from a document_pb.Document protocol buffer."""
1383 lang = None
1384 if doc_pb.has_language():
1385 lang = _DecodeUTF8(doc_pb.language())
1386 return Document(doc_id=_DecodeUTF8(doc_pb.id()),
1387 fields=_NewFieldsFromPb(doc_pb.field_list()),
1388 language=lang,
1389 rank=doc_pb.order_id())
1392 def _QuoteString(argument):
1393 return '"' + argument.replace('"', '\\\"') + '"'
1396 class FieldExpression(object):
1397 """Represents an expression that will be computed for each result returned.
1399 For example,
1400 FieldExpression(name='content_snippet',
1401 expression='snippet("very important", content)')
1402 means a computed field 'content_snippet' will be returned with each search
1403 result, which contains HTML snippets of the 'content' field which match
1404 the query 'very important'.
1407 MAXIMUM_EXPRESSION_LENGTH = 1000
1408 MAXIMUM_OPERATOR_LENGTH = 100
1410 def __init__(self, name, expression):
1411 """Initializer.
1413 Args:
1414 name: The name of the computed field for the expression.
1415 expression: The expression to evaluate and return in a field with
1416 given name in results. See
1417 https://developers.google.com/appengine/docs/python/search/overview#Expressions
1418 for a list of legal expressions.
1420 Raises:
1421 TypeError: If any of the parameters has an invalid type, or an unknown
1422 attribute is passed.
1423 ValueError: If any of the parameters has an invalid value.
1424 ExpressionError: If the expression string is not parseable.
1426 self._name = _CheckFieldName(_ConvertToUnicode(name))
1427 if expression is None:
1428 raise ValueError('expression must be a FieldExpression, got None')
1429 if not isinstance(expression, basestring):
1430 raise TypeError('expression must be a FieldExpression, got %s' %
1431 expression.__class__.__name__)
1432 self._expression = _CheckExpression(_ConvertToUnicode(expression))
1434 @property
1435 def name(self):
1436 """Returns name of the expression to return in search results."""
1437 return self._name
1439 @property
1440 def expression(self):
1441 """Returns a string containing an expression returned in search results."""
1442 return self._expression
1444 def __repr__(self):
1445 return _Repr(
1446 self, [('name', self.name), ('expression', self.expression)])
1449 def _CopyFieldExpressionToProtocolBuffer(field_expression, pb):
1450 """Copies FieldExpression to a search_service_pb.FieldSpec_Expression."""
1451 pb.set_name(field_expression.name.encode('utf-8'))
1452 pb.set_expression(field_expression.expression.encode('utf-8'))
1455 class SortOptions(object):
1456 """Represents a mulit-dimensional sort of Documents.
1458 The following code shows how to sort documents based on product rating
1459 in descending order and then cheapest product within similarly rated
1460 products, sorting at most 1000 documents:
1462 SortOptions(expressions=[
1463 SortExpression(expression='rating',
1464 direction=SortExpression.DESCENDING, default_value=0),
1465 SortExpression(expression='price + tax',
1466 direction=SortExpression.ASCENDING, default_value=999999.99)],
1467 limit=1000)
1470 def __init__(self, expressions=None, match_scorer=None, limit=1000):
1471 """Initializer.
1473 Args:
1474 expressions: An iterable of SortExpression representing a
1475 multi-dimensional sort of Documents.
1476 match_scorer: A match scorer specification which may be used to
1477 score documents or in a SortExpression combined with other features.
1478 limit: The limit on the number of documents to score or sort.
1480 Raises:
1481 TypeError: If any of the parameters has an invalid type, or an unknown
1482 attribute is passed.
1483 ValueError: If any of the parameters has an invalid value.
1485 self._match_scorer = match_scorer
1486 self._expressions = _GetList(expressions)
1487 for expression in self._expressions:
1488 if not isinstance(expression, SortExpression):
1489 raise TypeError('expression must be a SortExpression, got %s' %
1490 expression.__class__.__name__)
1491 self._limit = _CheckSortLimit(limit)
1493 @property
1494 def expressions(self):
1495 """A list of SortExpression specifying a multi-dimensional sort."""
1496 return self._expressions
1498 @property
1499 def match_scorer(self):
1500 """Returns a match scorer to score documents with."""
1501 return self._match_scorer
1503 @property
1504 def limit(self):
1505 """Returns the limit on the number of documents to score or sort."""
1506 return self._limit
1508 def __repr__(self):
1509 return _Repr(
1510 self, [('match_scorer', self.match_scorer),
1511 ('expressions', self.expressions),
1512 ('limit', self.limit)])
1515 class MatchScorer(object):
1516 """Assigns a document score based on term frequency.
1518 If you add a MatchScorer to a SortOptions as in the following code:
1520 sort_opts = search.SortOptions(match_scorer=search.MatchScorer())
1522 then, this will sort the documents in descending score order. The scores
1523 will be positive. If you want to sort in ascending order, then use the
1524 following code:
1526 sort_opts = search.SortOptions(match_scorer=search.MatchScorer(),
1527 expressions=[search.SortExpression(
1528 expression='_score', direction=search.SortExpression.ASCENDING,
1529 default_value=0.0)])
1531 The scores in this case will be negative.
1534 def __init__(self):
1535 """Initializer.
1537 Raises:
1538 TypeError: If any of the parameters has an invalid type, or an unknown
1539 attribute is passed.
1540 ValueError: If any of the parameters has an invalid value.
1543 def __repr__(self):
1544 return _Repr(self, [])
1547 class RescoringMatchScorer(MatchScorer):
1548 """Assigns a document score based on term frequency weighted by doc parts.
1550 If you add a RescoringMatchScorer to a SortOptions as in the following code:
1552 sort_opts = search.SortOptions(match_scorer=search.RescoringMatchScorer())
1554 then, this will sort the documents in descending score order. The scores
1555 will be positive. If you want to sort in ascending order, then use the
1556 following code:
1558 sort_opts = search.SortOptions(match_scorer=search.RescoringMatchScorer(),
1559 expressions=[search.SortExpression(
1560 expression='_score', direction=search.SortExpression.ASCENDING,
1561 default_value=0.0)])
1563 The scores in this case will be negative.
1566 def __init__(self):
1567 """Initializer.
1569 Raises:
1570 TypeError: If any of the parameters has an invalid type, or an unknown
1571 attribute is passed.
1572 ValueError: If any of the parameters has an invalid value.
1574 super(RescoringMatchScorer, self).__init__()
1577 def _CopySortExpressionToProtocolBuffer(sort_expression, pb):
1578 """Copies a SortExpression to a search_service_pb.SortSpec protocol buffer."""
1579 pb.set_sort_expression(sort_expression.expression.encode('utf-8'))
1580 if sort_expression.direction == SortExpression.ASCENDING:
1581 pb.set_sort_descending(False)
1582 if sort_expression.default_value is not None:
1583 if isinstance(sort_expression.default_value, basestring):
1584 pb.set_default_value_text(sort_expression.default_value.encode('utf-8'))
1585 elif (isinstance(sort_expression.default_value, datetime.datetime) or
1586 isinstance(sort_expression.default_value, datetime.date)):
1587 pb.set_default_value_text(str(
1588 search_util.EpochTime(sort_expression.default_value)))
1589 else:
1590 pb.set_default_value_numeric(sort_expression.default_value)
1591 return pb
1594 def _CopyMatchScorerToScorerSpecProtocolBuffer(match_scorer, limit, pb):
1595 """Copies a MatchScorer to a search_service_pb.ScorerSpec."""
1596 if isinstance(match_scorer, RescoringMatchScorer):
1597 pb.set_scorer(search_service_pb.ScorerSpec.RESCORING_MATCH_SCORER)
1598 elif isinstance(match_scorer, MatchScorer):
1599 pb.set_scorer(search_service_pb.ScorerSpec.MATCH_SCORER)
1600 else:
1601 raise TypeError(
1602 'match_scorer must be a MatchScorer or RescoringMatchRescorer, '
1603 'got %s' % match_scorer.__class__.__name__)
1604 pb.set_limit(limit)
1605 return pb
1608 def _CopySortOptionsToProtocolBuffer(sort_options, params):
1609 """Copies the SortOptions into the SearchParams proto buf."""
1610 for expression in sort_options.expressions:
1611 sort_spec_pb = params.add_sort_spec()
1612 _CopySortExpressionToProtocolBuffer(expression, sort_spec_pb)
1613 if sort_options.match_scorer:
1614 scorer_spec = params.mutable_scorer_spec()
1615 _CopyMatchScorerToScorerSpecProtocolBuffer(
1616 sort_options.match_scorer, sort_options.limit, scorer_spec)
1617 scorer_spec.set_limit(sort_options.limit)
1618 else:
1619 params.mutable_scorer_spec().set_limit(sort_options.limit)
1622 class SortExpression(object):
1623 """Sort by a user specified scoring expression.
1625 For example, the following will sort documents on a numeric field named
1626 'length' in ascending order, assigning a default value of sys.maxint for
1627 documents which do not specify a 'length' field.
1629 SortExpression(expression='length',
1630 direction=sort.SortExpression.ASCENDING,
1631 default_value=sys.maxint)
1633 The following example will sort documents on a date field named
1634 'published_date' in descending order, assigning a default value of
1635 1999-12-31 for documents which do not specify a 'published_date' field.
1637 SortExpression(expression='published_date',
1638 default_value=datetime.date(year=1999, month=12, day=31))
1640 The following example will sort documents on a text field named 'subject'
1641 in descending order, assigning a default value of '' for documents which
1642 do not specify a 'subject' field.
1644 SortExpression(expression='subject')
1648 try:
1649 MAX_FIELD_VALUE = unichr(0x10ffff) * 80
1650 except ValueError:
1652 MAX_FIELD_VALUE = unichr(0xffff) * 80
1654 MIN_FIELD_VALUE = u''
1657 ASCENDING, DESCENDING = ('ASCENDING', 'DESCENDING')
1659 _DIRECTIONS = frozenset([ASCENDING, DESCENDING])
1661 def __init__(self, expression, direction=DESCENDING, default_value=None):
1662 """Initializer.
1664 Args:
1665 expression: An expression to be evaluated on each matching document
1666 to sort by. The expression must evaluate to a text or numeric value.
1667 The expression can simply be a field name, or some compound expression
1668 such as "_score + count(likes) * 0.1" which will add the score from a
1669 scorer to a count of the values of a likes field times 0.1. See
1670 https://developers.google.com/appengine/docs/python/search/overview#Expressions
1671 for a list of legal expressions.
1672 direction: The direction to sort the search results, either ASCENDING
1673 or DESCENDING
1674 default_value: The default value of the expression. The default_value is
1675 returned if expression cannot be calculated, for example, if the
1676 expression is a field name and no value for that named field exists.
1677 A text value must be specified for text sorts. A numeric value must be
1678 specified for numeric sorts. A date value must be specified for date
1679 sorts.
1681 Raises:
1682 TypeError: If any of the parameters has an invalid type, or an unknown
1683 attribute is passed.
1684 ValueError: If any of the parameters has an invalid value.
1685 ExpressionError: If the expression string is not parseable.
1687 self._expression = _ConvertToUnicode(expression)
1688 self._direction = self._CheckDirection(direction)
1689 if self._expression is None:
1690 raise TypeError('expression must be a SortExpression, got None')
1691 _CheckExpression(self._expression)
1692 self._default_value = default_value
1693 if self._default_value is not None:
1694 if isinstance(self.default_value, basestring):
1695 self._default_value = _ConvertToUnicode(default_value)
1696 _CheckText(self._default_value, 'default_value')
1697 elif not isinstance(self._default_value,
1698 (int, long, float, datetime.date, datetime.datetime)):
1699 raise TypeError('default_value must be text, numeric or datetime, got '
1700 '%s' % self._default_value.__class__.__name__)
1702 @property
1703 def expression(self):
1704 """Returns the expression to sort by."""
1705 return self._expression
1707 @property
1708 def direction(self):
1709 """Returns the direction to sort expression: ASCENDING or DESCENDING."""
1710 return self._direction
1712 @property
1713 def default_value(self):
1714 """Returns a default value for the expression if no value computed."""
1715 return self._default_value
1717 def _CheckDirection(self, direction):
1718 """Checks direction is a valid SortExpression direction and returns it."""
1719 return _CheckEnum(direction, 'direction', values=self._DIRECTIONS)
1721 def __repr__(self):
1722 return _Repr(
1723 self, [('expression', self.expression),
1724 ('direction', self.direction),
1725 ('default_value', self.default_value)])
1728 class ScoredDocument(Document):
1729 """Represents a scored document returned from a search."""
1731 def __init__(self, doc_id=None, fields=None, language='en',
1732 sort_scores=None, expressions=None, cursor=None, rank=None):
1733 """Initializer.
1735 Args:
1736 doc_id: The visible printable ASCII string identifying the document which
1737 does not start with '!'. Whitespace is excluded from ids. If no id is
1738 provided, the search service will provide one.
1739 fields: An iterable of Field instances representing the content of the
1740 document.
1741 language: The code of the language used in the field values.
1742 sort_scores: The list of scores assigned during sort evaluation. Each
1743 sort dimension is included. Positive scores are used for ascending
1744 sorts; negative scores for descending.
1745 expressions: The list of computed fields which are the result of
1746 expressions requested.
1747 cursor: A cursor associated with the document.
1748 rank: The rank of this document. A rank must be a non-negative integer
1749 less than sys.maxint. If not specified, the number of seconds since
1750 1st Jan 2011 is used. Documents are returned in descending order of
1751 their rank.
1753 Raises:
1754 TypeError: If any of the parameters have invalid types, or an unknown
1755 attribute is passed.
1756 ValueError: If any of the parameters have invalid values.
1758 super(ScoredDocument, self).__init__(doc_id=doc_id, fields=fields,
1759 language=language, rank=rank)
1760 self._sort_scores = self._CheckSortScores(_GetList(sort_scores))
1761 self._expressions = _GetList(expressions)
1762 if cursor is not None and not isinstance(cursor, Cursor):
1763 raise TypeError('cursor must be a Cursor, got %s' %
1764 cursor.__class__.__name__)
1765 self._cursor = cursor
1767 @property
1768 def sort_scores(self):
1769 """The list of scores assigned during sort evaluation.
1771 Each sort dimension is included. Positive scores are used for ascending
1772 sorts; negative scores for descending.
1774 Returns:
1775 The list of numeric sort scores.
1777 return self._sort_scores
1779 @property
1780 def expressions(self):
1781 """The list of computed fields the result of expression evaluation.
1783 For example, if a request has
1784 FieldExpression(name='snippet', 'snippet("good story", content)')
1785 meaning to compute a snippet field containing HTML snippets extracted
1786 from the matching of the query 'good story' on the field 'content'.
1787 This means a field such as the following will be returned in expressions
1788 for the search result:
1789 HtmlField(name='snippet', value='that was a <b>good story</b> to finish')
1791 Returns:
1792 The computed fields.
1794 return self._expressions
1796 @property
1797 def cursor(self):
1798 """A cursor associated with a result, a continued search starting point.
1800 To get this cursor to appear, set the Index.cursor_type to
1801 Index.RESULT_CURSOR, otherwise this will be None.
1803 Returns:
1804 The result cursor.
1806 return self._cursor
1808 def _CheckSortScores(self, sort_scores):
1809 """Checks sort_scores is a list of floats, and returns it."""
1810 for sort_score in sort_scores:
1811 _CheckNumber(sort_score, 'sort_scores')
1812 return sort_scores
1814 def __repr__(self):
1815 return _Repr(self, [('doc_id', self.doc_id),
1816 ('fields', self.fields),
1817 ('language', self.language),
1818 ('rank', self.rank),
1819 ('sort_scores', self.sort_scores),
1820 ('expressions', self.expressions),
1821 ('cursor', self.cursor)])
1824 class SearchResults(object):
1825 """Represents the result of executing a search request."""
1827 def __init__(self, number_found, results=None, cursor=None):
1828 """Initializer.
1830 Args:
1831 number_found: The number of documents found for the query.
1832 results: The list of ScoredDocuments returned from executing a
1833 search request.
1834 cursor: A Cursor to continue the search from the end of the
1835 search results.
1837 Raises:
1838 TypeError: If any of the parameters have an invalid type, or an unknown
1839 attribute is passed.
1840 ValueError: If any of the parameters have an invalid value.
1842 self._number_found = _CheckInteger(number_found, 'number_found')
1843 self._results = _GetList(results)
1844 if cursor is not None and not isinstance(cursor, Cursor):
1845 raise TypeError('cursor must be a Cursor, got %s' %
1846 cursor.__class__.__name__)
1847 self._cursor = cursor
1849 def __iter__(self):
1851 for result in self.results:
1852 yield result
1854 @property
1855 def results(self):
1856 """Returns the list of ScoredDocuments that matched the query."""
1857 return self._results
1859 @property
1860 def number_found(self):
1861 """Returns the number of documents which were found for the search.
1863 Note that this is an approximation and not an exact count.
1864 If QueryOptions.number_found_accuracy parameter is set to 100
1865 for example, then number_found <= 100 is accurate.
1867 Returns:
1868 The number of documents found.
1870 return self._number_found
1872 @property
1873 def cursor(self):
1874 """Returns a cursor that can be used to continue search from last result.
1876 This corresponds to using a ResultsCursor in QueryOptions,
1877 otherwise this will be None.
1879 Returns:
1880 The results cursor.
1882 return self._cursor
1884 def __repr__(self):
1885 return _Repr(self, [('results', self.results),
1886 ('number_found', self.number_found),
1887 ('cursor', self.cursor)])
1890 class GetResponse(object):
1891 """Represents the result of executing a get request.
1893 For example, the following code shows how a response could be used
1894 to determine which documents were successfully removed or not.
1896 response = index.get_range()
1897 for document in response:
1898 print "document ", document
1901 def __init__(self, results=None):
1902 """Initializer.
1904 Args:
1905 results: The results returned from an index ordered by Id.
1907 Raises:
1908 TypeError: If any of the parameters have an invalid type, or an unknown
1909 attribute is passed.
1910 ValueError: If any of the parameters have an invalid value.
1912 self._results = _GetList(results)
1914 def __iter__(self):
1915 for result in self.results:
1916 yield result
1918 @property
1919 def results(self):
1920 """Returns a list of results ordered by Id from the index."""
1921 return self._results
1923 def __repr__(self):
1924 return _Repr(self, [('results', self.results)])
1927 class Cursor(object):
1928 """Specifies how to get the next page of results in a search.
1930 A cursor returned in a previous set of search results to use as a starting
1931 point to retrieve the next set of results. This can get you better
1932 performance, and also improves the consistency of pagination through index
1933 updates.
1935 The following shows how to use the cursor to get the next page of results:
1937 # get the first set of results; the first cursor is used to specify
1938 # that cursors are to be returned in the SearchResults.
1939 results = index.search(Query(query_string='some stuff',
1940 QueryOptions(cursor=Cursor()))
1942 # get the next set of results
1943 results = index.search(Query(query_string='some stuff',
1944 QueryOptions(cursor=results.cursor)))
1946 If you want to continue search from any one of the ScoredDocuments in
1947 SearchResults, then you can set Cursor.per_result to True.
1949 # get the first set of results; the first cursor is used to specify
1950 # that cursors are to be returned in the SearchResults.
1951 results = index.search(Query(query_string='some stuff',
1952 QueryOptions(cursor=Cursor(per_result=True)))
1954 # this shows how to access the per_document cursors returned from a search
1955 per_document_cursor = None
1956 for scored_document in results:
1957 per_document_cursor = scored_document.cursor
1959 # get the next set of results
1960 results = index.search(Query(query_string='some stuff',
1961 QueryOptions(cursor=per_document_cursor)))
1966 def __init__(self, web_safe_string=None, per_result=False):
1967 """Initializer.
1969 Args:
1970 web_safe_string: The cursor string returned from the search service to
1971 be interpreted by the search service to get the next set of results.
1972 per_result: A bool when true will return a cursor per ScoredDocument in
1973 SearchResults, otherwise will return a single cursor for the whole
1974 SearchResults. If using offset this is ignored, as the user is
1975 responsible for calculating a next offset if any.
1976 Raises:
1978 ValueError: if the web_safe_string is not of required format.
1980 self._web_safe_string = _CheckCursor(_ConvertToUnicode(web_safe_string))
1981 self._per_result = per_result
1982 if self._web_safe_string:
1983 parts = self._web_safe_string.split(':', 1)
1984 if len(parts) != 2 or parts[0] not in ['True', 'False']:
1985 raise ValueError('invalid format for web_safe_string, got %s' %
1986 self._web_safe_string)
1987 self._internal_cursor = parts[1]
1989 self._per_result = (parts[0] == 'True')
1991 @property
1992 def web_safe_string(self):
1993 """Returns the cursor string generated by the search service."""
1994 return self._web_safe_string
1996 @property
1997 def per_result(self):
1998 """Returns whether to return a cursor for each ScoredDocument in results."""
1999 return self._per_result
2001 def __repr__(self):
2002 return _Repr(self, [('web_safe_string', self.web_safe_string)])
2005 def _ToWebSafeString(per_result, internal_cursor):
2006 """Returns the web safe string combining per_result with internal cursor."""
2007 return str(per_result) + ':' + internal_cursor
2010 def _CheckQuery(query):
2011 """Checks a query is a valid query string."""
2012 _ValidateString(query, 'query', MAXIMUM_QUERY_LENGTH, empty_ok=True)
2013 if query is None:
2014 raise TypeError('query must be unicode, got None')
2015 if query.strip():
2016 try:
2017 query_parser.Parse(query)
2018 except query_parser.QueryException, e:
2019 raise QueryError('Failed to parse query "%s"' % query)
2020 return query
2023 def _CheckLimit(limit):
2024 """Checks the limit of documents to return is an integer within range."""
2025 return _CheckInteger(
2026 limit, 'limit', zero_ok=False,
2027 upper_bound=MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH)
2030 def _CheckOffset(offset):
2031 """Checks the offset in document list is an integer within range."""
2032 return _CheckInteger(
2033 offset, 'offset', zero_ok=True,
2034 upper_bound=MAXIMUM_SEARCH_OFFSET)
2037 def _CheckNumberFoundAccuracy(number_found_accuracy):
2038 """Checks the accuracy is an integer within range."""
2039 return _CheckInteger(
2040 number_found_accuracy, 'number_found_accuracy',
2041 zero_ok=False, upper_bound=MAXIMUM_NUMBER_FOUND_ACCURACY)
2044 def _CheckCursor(cursor):
2045 """Checks the cursor if specified is a string which is not too long."""
2046 return _ValidateString(cursor, 'cursor', _MAXIMUM_CURSOR_LENGTH,
2047 empty_ok=True)
2050 def _CheckNumberOfFields(returned_expressions, snippeted_fields,
2051 returned_fields):
2052 """Checks the count of all field kinds is less than limit."""
2053 number_expressions = (len(returned_expressions) + len(snippeted_fields) +
2054 len(returned_fields))
2055 if number_expressions > MAXIMUM_FIELDS_RETURNED_PER_SEARCH:
2056 raise ValueError(
2057 'too many fields, snippets or expressions to return %d > maximum %d'
2058 % (number_expressions, MAXIMUM_FIELDS_RETURNED_PER_SEARCH))
2061 class QueryOptions(object):
2062 """Options for post-processing results for a query.
2064 Options include the ability to sort results, control which document fields
2065 to return, produce snippets of fields and compute and sort by complex
2066 scoring expressions.
2068 If you wish to randomly access pages of search results, you can use an
2069 offset:
2071 # get the first set of results
2072 page_size = 10
2073 results = index.search(Query(query_string='some stuff',
2074 QueryOptions(limit=page_size))
2076 # calculate pages
2077 pages = results.found_count / page_size
2079 # user chooses page and hence an offset into results
2080 next_page = ith * page_size
2082 # get the search results for that page
2083 results = index.search(Query(query_string='some stuff',
2084 QueryOptions(limit=page_size, offset=next_page))
2087 def __init__(self, limit=20, number_found_accuracy=None, cursor=None,
2088 offset=None, sort_options=None, returned_fields=None,
2089 ids_only=False, snippeted_fields=None,
2090 returned_expressions=None):
2093 """Initializer.
2095 For example, the following code fragment requests a search for
2096 documents where 'first' occurs in subject and 'good' occurs anywhere,
2097 returning at most 20 documents, starting the search from 'cursor token',
2098 returning another single cursor for the SearchResults, sorting by subject in
2099 descending order, returning the author, subject, and summary fields as well
2100 as a snippeted field content.
2102 results = index.search(Query(
2103 query='subject:first good',
2104 options=QueryOptions(
2105 limit=20,
2106 cursor=Cursor(),
2107 sort_options=SortOptions(
2108 expressions=[
2109 SortExpression(expression='subject')],
2110 limit=1000),
2111 returned_fields=['author', 'subject', 'summary'],
2112 snippeted_fields=['content'])))
2114 Args:
2115 limit: The limit on number of documents to return in results.
2116 number_found_accuracy: The minimum accuracy requirement for
2117 SearchResults.number_found. If set, the number_found will be
2118 accurate up to at least that number. For example, when set to 100,
2119 any SearchResults with number_found <= 100 is accurate. This option
2120 may add considerable latency/expense, especially when used with
2121 returned_fields.
2122 cursor: A Cursor describing where to get the next set of results,
2123 or to provide next cursors in SearchResults.
2124 offset: The offset is number of documents to skip in search results. This
2125 is an alternative to using a query cursor, but allows random access into
2126 the results. Using offsets rather than cursors are more expensive. You
2127 can only use either cursor or offset, but not both. Using an offset
2128 means that no cursor is returned in SearchResults.cursor, nor in each
2129 ScoredDocument.cursor.
2130 sort_options: A SortOptions specifying a multi-dimensional sort over
2131 search results.
2132 returned_fields: An iterable of names of fields to return in search
2133 results.
2134 ids_only: Only return document ids, do not return any fields.
2135 snippeted_fields: An iterable of names of fields to snippet and return
2136 in search result expressions.
2137 returned_expressions: An iterable of FieldExpression to evaluate and
2138 return in search results.
2139 Raises:
2140 TypeError: If an unknown iterator_options or sort_options is passed.
2141 ValueError: If ids_only and returned_fields are used together.
2142 ExpressionError: If one of the returned expression strings is not
2143 parseable.
2145 self._limit = _CheckLimit(limit)
2146 self._number_found_accuracy = _CheckNumberFoundAccuracy(
2147 number_found_accuracy)
2148 if cursor is not None and not isinstance(cursor, Cursor):
2149 raise TypeError('cursor must be a Cursor, got %s' %
2150 cursor.__class__.__name__)
2151 if cursor is not None and offset is not None:
2152 raise ValueError('cannot set cursor and offset together')
2153 self._cursor = cursor
2154 self._offset = _CheckOffset(offset)
2155 if sort_options is not None and not isinstance(sort_options, SortOptions):
2156 raise TypeError('sort_options must be a SortOptions, got %s' %
2157 sort_options.__class__.__name__)
2158 self._sort_options = sort_options
2160 self._returned_fields = _ConvertToUnicodeList(returned_fields)
2161 _CheckFieldNames(self._returned_fields)
2162 self._ids_only = ids_only
2163 if self._ids_only and self._returned_fields:
2164 raise ValueError('cannot have ids_only and returned_fields set together')
2165 self._snippeted_fields = _ConvertToUnicodeList(snippeted_fields)
2166 _CheckFieldNames(self._snippeted_fields)
2167 self._returned_expressions = _ConvertToList(returned_expressions)
2168 for expression in self._returned_expressions:
2169 _CheckFieldName(_ConvertToUnicode(expression.name))
2170 _CheckExpression(_ConvertToUnicode(expression.expression))
2171 _CheckNumberOfFields(self._returned_expressions, self._snippeted_fields,
2172 self._returned_fields)
2174 @property
2175 def limit(self):
2176 """Returns a limit on number of documents to return in results."""
2177 return self._limit
2179 @property
2180 def number_found_accuracy(self):
2181 """Returns minimum accuracy requirement for SearchResults.number_found."""
2182 return self._number_found_accuracy
2184 @property
2185 def cursor(self):
2186 """Returns the Cursor for the query."""
2187 return self._cursor
2189 @property
2190 def offset(self):
2191 """Returns the number of documents in search results to skip."""
2192 return self._offset
2194 @property
2195 def sort_options(self):
2196 """Returns a SortOptions."""
2197 return self._sort_options
2199 @property
2200 def returned_fields(self):
2201 """Returns an iterable of names of fields to return in search results."""
2202 return self._returned_fields
2204 @property
2205 def ids_only(self):
2206 """Returns whether to return only document ids in search results."""
2207 return self._ids_only
2209 @property
2210 def snippeted_fields(self):
2211 """Returns iterable of field names to snippet and return in results."""
2212 return self._snippeted_fields
2214 @property
2215 def returned_expressions(self):
2216 """Returns iterable of FieldExpression to return in results."""
2217 return self._returned_expressions
2219 def __repr__(self):
2220 return _Repr(self, [('limit', self.limit),
2221 ('number_found_accuracy', self.number_found_accuracy),
2222 ('cursor', self.cursor),
2223 ('sort_options', self.sort_options),
2224 ('returned_fields', self.returned_fields),
2225 ('ids_only', self.ids_only),
2226 ('snippeted_fields', self.snippeted_fields),
2227 ('returned_expressions', self.returned_expressions)])
2230 def _CopyQueryOptionsObjectToProtocolBuffer(query, options, params):
2231 """Copies a QueryOptions object to a SearchParams proto buff."""
2232 offset = 0
2233 web_safe_string = None
2234 cursor_type = None
2235 offset = options.offset
2236 if options.cursor:
2237 cursor = options.cursor
2238 if cursor.per_result:
2239 cursor_type = search_service_pb.SearchParams.PER_RESULT
2240 else:
2241 cursor_type = search_service_pb.SearchParams.SINGLE
2242 if isinstance(cursor, Cursor) and cursor.web_safe_string:
2243 web_safe_string = cursor._internal_cursor
2244 _CopyQueryOptionsToProtocolBuffer(
2245 query, offset, options.limit, options.number_found_accuracy,
2246 web_safe_string, cursor_type, options.ids_only, options.returned_fields,
2247 options.snippeted_fields, options.returned_expressions,
2248 options.sort_options, params)
2251 def _CopyQueryOptionsToProtocolBuffer(
2252 query, offset, limit, number_found_accuracy, cursor, cursor_type, ids_only,
2253 returned_fields, snippeted_fields, returned_expressions, sort_options,
2254 params):
2255 """Copies fields of QueryOptions to params protobuf."""
2256 if offset:
2257 params.set_offset(offset)
2258 params.set_limit(limit)
2259 if number_found_accuracy is not None:
2260 params.set_matched_count_accuracy(number_found_accuracy)
2261 if cursor:
2262 params.set_cursor(cursor.encode('utf-8'))
2263 if cursor_type is not None:
2264 params.set_cursor_type(cursor_type)
2265 if ids_only:
2266 params.set_keys_only(ids_only)
2267 if returned_fields or snippeted_fields or returned_expressions:
2268 field_spec_pb = params.mutable_field_spec()
2269 for field in returned_fields:
2270 field_spec_pb.add_name(field.encode('utf-8'))
2271 for snippeted_field in snippeted_fields:
2272 expression = u'snippet(%s, %s)' % (_QuoteString(query), snippeted_field)
2273 _CopyFieldExpressionToProtocolBuffer(
2274 FieldExpression(
2275 name=snippeted_field, expression=expression.encode('utf-8')),
2276 field_spec_pb.add_expression())
2277 for expression in returned_expressions:
2278 _CopyFieldExpressionToProtocolBuffer(
2279 expression, field_spec_pb.add_expression())
2281 if sort_options is not None:
2282 _CopySortOptionsToProtocolBuffer(sort_options, params)
2285 class Query(object):
2286 """Represents a request on the search service to query the index."""
2288 def __init__(self, query_string, options=None):
2292 """Initializer.
2294 For example, the following code fragment requests a search for
2295 documents where 'first' occurs in subject and 'good' occurs anywhere,
2296 returning at most 20 documents, starting the search from 'cursor token',
2297 returning another single document cursor for the results, sorting by
2298 subject in descending order, returning the author, subject, and summary
2299 fields as well as a snippeted field content.
2301 results = index.search(Query(
2302 query_string='subject:first good',
2303 options=QueryOptions(
2304 limit=20,
2305 cursor=Cursor(),
2306 sort_options=SortOptions(
2307 expressions=[
2308 SortExpression(expression='subject')],
2309 limit=1000),
2310 returned_fields=['author', 'subject', 'summary'],
2311 snippeted_fields=['content'])))
2313 In order to get a Cursor, you specify a Cursor in QueryOptions.cursor
2314 and extract the Cursor for the next request from results.cursor to
2315 continue from the last found document, as shown below:
2317 results = index.search(
2318 Query(query_string='subject:first good',
2319 options=QueryOptions(cursor=results.cursor)))
2321 Args:
2322 query_string: The query to match against documents in the index. A query
2323 is a boolean expression containing terms. For example, the query
2324 'job tag:"very important" sent <= 2011-02-28'
2325 finds documents with the term job in any field, that contain the
2326 phrase "very important" in a tag field, and a sent date up to and
2327 including 28th February, 2011. You can use combinations of
2328 '(cat OR feline) food NOT dog'
2329 to find documents which contain the term cat or feline as well as food,
2330 but do not mention the term dog. A further example,
2331 'category:televisions brand:sony price >= 300 price < 400'
2332 will return documents which have televisions in a category field, a
2333 sony brand and a price field which is 300 (inclusive) to 400
2334 (exclusive). See
2335 https://developers.google.com/appengine/docs/python/search/overview#Expressions
2336 for a list of expressions that can be used in queries.
2337 options: A QueryOptions describing post-processing of search results.
2338 Raises:
2339 QueryError: If the query string is not parseable.
2341 self._query_string = _ConvertToUnicode(query_string)
2342 _CheckQuery(self._query_string)
2343 self._options = options
2345 @property
2346 def query_string(self):
2347 """Returns the query string to be applied to search service."""
2348 return self._query_string
2350 @property
2351 def options(self):
2352 """Returns QueryOptions defining post-processing on the search results."""
2353 return self._options
2356 def _CopyQueryToProtocolBuffer(query, params):
2357 """Copies Query object to params protobuf."""
2358 params.set_query(query.encode('utf-8'))
2361 def _CopyQueryObjectToProtocolBuffer(query, params):
2362 _CopyQueryToProtocolBuffer(query.query_string, params)
2363 options = query.options
2364 if query.options is None:
2365 options = QueryOptions()
2366 _CopyQueryOptionsObjectToProtocolBuffer(query.query_string, options, params)
2369 class Index(object):
2370 """Represents an index allowing indexing, deleting and searching documents.
2372 The following code fragment shows how to add documents, then search the
2373 index for documents matching a query.
2375 # Get the index.
2376 index = Index(name='index-name')
2378 # Create a document.
2379 doc = Document(doc_id='document-id',
2380 fields=[TextField(name='subject', value='my first email'),
2381 HtmlField(name='body',
2382 value='<html>some content here</html>')])
2384 # Index the document.
2385 try:
2386 index.put(doc)
2387 except search.Error, e:
2388 # possibly retry indexing or log error
2390 # Query the index.
2391 try:
2392 results = index.search('subject:first body:here')
2394 # Iterate through the search results.
2395 for scored_document in results:
2396 print scored_document
2398 except search.Error, e:
2399 # possibly log the failure
2401 Once an index is created with a given specification, that specification is
2402 immutable.
2404 Search results may contain some out of date documents. However, any two
2405 changes to any document stored in an index are applied in the correct order.
2410 RESPONSE_CURSOR, RESULT_CURSOR = ('RESPONSE_CURSOR', 'RESULT_CURSOR')
2412 _CURSOR_TYPES = frozenset([RESPONSE_CURSOR, RESULT_CURSOR])
2414 SEARCH, DATASTORE, CLOUD_STORAGE = ('SEARCH', 'DATASTORE', 'CLOUD_STORAGE')
2416 _SOURCES = frozenset([SEARCH, DATASTORE, CLOUD_STORAGE])
2418 def __init__(self, name, namespace=None, source=SEARCH):
2419 """Initializer.
2421 Args:
2422 name: The name of the index. An index name must be a visible printable
2423 ASCII string not starting with '!'. Whitespace characters are excluded.
2424 namespace: The namespace of the index name. If not set, then the current
2425 namespace is used.
2426 source: Deprecated as of 1.7.6. The source of
2427 the index:
2428 SEARCH - The Index was created by adding documents throught this
2429 search API.
2430 DATASTORE - The Index was created as a side-effect of putting entities
2431 into Datastore.
2432 CLOUD_STORAGE - The Index was created as a side-effect of adding
2433 objects into a Cloud Storage bucket.
2434 Raises:
2435 TypeError: If an unknown attribute is passed.
2436 ValueError: If invalid namespace is given.
2438 if source not in self._SOURCES:
2439 raise ValueError('source must be one of %s' % self._SOURCES)
2440 if source is not self.SEARCH:
2441 warnings.warn('source is deprecated.', DeprecationWarning, stacklevel=2)
2442 self._source = source
2443 self._name = _CheckIndexName(_ConvertToUnicode(name))
2444 self._namespace = _ConvertToUnicode(namespace)
2445 if self._namespace is None:
2446 self._namespace = _ConvertToUnicode(namespace_manager.get_namespace())
2447 if self._namespace is None:
2448 self._namespace = u''
2449 namespace_manager.validate_namespace(self._namespace, exception=ValueError)
2450 self._schema = None
2451 self._storage_usage = None
2452 self._storage_limit = None
2454 @property
2455 def schema(self):
2456 """Returns the schema mapping field names to list of types supported.
2458 Only valid for Indexes returned by search.get_indexes method."""
2459 return self._schema
2461 @property
2462 def storage_usage(self):
2463 """The approximate number of bytes used by this index.
2465 The number may be slightly stale, as it may not reflect the
2466 results of recent changes.
2468 Returns None for indexes not obtained from search.get_indexes.
2471 return self._storage_usage
2473 @property
2474 def storage_limit(self):
2475 """The maximum allowable storage for this index, in bytes.
2477 Returns None for indexes not obtained from search.get_indexes."""
2478 return self._storage_limit
2480 @property
2481 def name(self):
2482 """Returns the name of the index."""
2483 return self._name
2485 @property
2486 def namespace(self):
2487 """Returns the namespace of the name of the index."""
2488 return self._namespace
2490 @property
2491 def source(self):
2492 """Returns the source of the index.
2494 Deprecated: from 1.7.6, source is no longer available."""
2495 warnings.warn('source is deprecated.', DeprecationWarning, stacklevel=2)
2496 return self._source
2498 def __eq__(self, other):
2499 return (isinstance(other, self.__class__)
2500 and self.__dict__ == other.__dict__)
2502 def __ne__(self, other):
2503 return not self.__eq__(other)
2505 def __hash__(self):
2506 return hash((self._name, self._namespace))
2508 def __repr__(self):
2510 return _Repr(self, [('name', self.name), ('namespace', self.namespace),
2511 ('source', self._source),
2512 ('schema', self.schema),
2513 ('storage_usage', self.storage_usage),
2514 ('storage_limit', self.storage_limit)])
2516 def _NewPutResultFromPb(self, status_pb, doc_id):
2517 """Constructs PutResult from RequestStatus pb and doc_id."""
2518 message = None
2519 if status_pb.has_error_detail():
2520 message = _DecodeUTF8(status_pb.error_detail())
2521 code = _ERROR_OPERATION_CODE_MAP.get(status_pb.code(),
2522 OperationResult.INTERNAL_ERROR)
2523 return PutResult(code=code, message=message, id=_DecodeUTF8(doc_id))
2525 def _NewPutResultList(self, response):
2526 return [self._NewPutResultFromPb(status, doc_id)
2527 for status, doc_id in zip(response.status_list(),
2528 response.doc_id_list())]
2530 @datastore_rpc._positional(2)
2531 def put(self, documents, deadline=None):
2532 """Index the collection of documents.
2534 If any of the documents are already in the index, then reindex them with
2535 their corresponding fresh document.
2537 Args:
2538 documents: A Document or iterable of Documents to index.
2540 Kwargs:
2541 deadline: Deadline for RPC call in seconds; if None use the default.
2543 Returns:
2544 A list of PutResult, one per Document requested to be indexed.
2546 Raises:
2547 PutError: If one or more documents failed to index or
2548 number indexed did not match requested.
2549 TypeError: If an unknown attribute is passed.
2550 ValueError: If documents is not a Document or iterable of Document
2551 or number of the documents is larger than
2552 MAXIMUM_DOCUMENTS_PER_PUT_REQUEST or deadline is a negative number.
2554 return self.put_async(documents, deadline=deadline).get_result()
2556 @datastore_rpc._positional(2)
2557 def put_async(self, documents, deadline=None):
2558 """Asynchronously indexes the collection of documents.
2560 Identical to put() except that it returns a future. Call
2561 get_result() on the return value to block on the call and get its result.
2563 if isinstance(documents, basestring):
2564 raise TypeError('documents must be a Document or sequence of '
2565 'Documents, got %s' % documents.__class__.__name__)
2566 try:
2567 docs = list(iter(documents))
2568 except TypeError:
2569 docs = [documents]
2571 if not docs:
2572 return _WrappedValueFuture([])
2574 if len(docs) > MAXIMUM_DOCUMENTS_PER_PUT_REQUEST:
2575 raise ValueError('too many documents to index')
2577 request = search_service_pb.IndexDocumentRequest()
2578 response = search_service_pb.IndexDocumentResponse()
2580 params = request.mutable_params()
2581 _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2583 seen_docs = {}
2584 for document in docs:
2585 doc_id = document.doc_id
2586 if doc_id:
2587 if doc_id in seen_docs:
2588 if document != seen_docs[doc_id]:
2589 raise ValueError(
2590 'Different documents with the same ID found in the '
2591 'same call to Index.put()')
2594 continue
2595 seen_docs[doc_id] = document
2596 doc_pb = params.add_document()
2597 _CopyDocumentToProtocolBuffer(document, doc_pb)
2599 def hook():
2600 results = self._NewPutResultList(response)
2602 if response.status_size() != len(params.document_list()):
2603 raise PutError('did not index requested number of documents', results)
2605 for status in response.status_list():
2606 if status.code() != search_service_pb.SearchServiceError.OK:
2607 raise PutError(
2608 _ConcatenateErrorMessages(
2609 'one or more put document operations failed', status), results)
2610 return results
2611 return _RpcOperationFuture(
2612 'IndexDocument', request, response, deadline, hook)
2614 def _NewDeleteResultFromPb(self, status_pb, doc_id):
2615 """Constructs DeleteResult from RequestStatus pb and doc_id."""
2616 message = None
2617 if status_pb.has_error_detail():
2618 message = _DecodeUTF8(status_pb.error_detail())
2619 code = _ERROR_OPERATION_CODE_MAP.get(status_pb.code(),
2620 OperationResult.INTERNAL_ERROR)
2622 return DeleteResult(code=code, message=message, id=doc_id)
2624 def _NewDeleteResultList(self, document_ids, response):
2625 return [self._NewDeleteResultFromPb(status, doc_id)
2626 for status, doc_id in zip(response.status_list(), document_ids)]
2628 @datastore_rpc._positional(2)
2629 def delete(self, document_ids, deadline=None):
2630 """Delete the documents with the corresponding document ids from the index.
2632 If no document exists for the identifier in the list, then that document
2633 identifier is ignored.
2635 Args:
2636 document_ids: A single identifier or list of identifiers of documents
2637 to delete.
2639 Kwargs:
2640 deadline: Deadline for RPC call in seconds; if None use the default.
2642 Raises:
2643 DeleteError: If one or more documents failed to remove or
2644 number removed did not match requested.
2645 ValueError: If document_ids is not a string or iterable of valid document
2646 identifiers or number of document ids is larger than
2647 MAXIMUM_DOCUMENTS_PER_PUT_REQUEST or deadline is a negative number.
2649 return self.delete_async(document_ids, deadline=deadline).get_result()
2651 @datastore_rpc._positional(2)
2652 def delete_async(self, document_ids, deadline=None):
2653 """Asynchronously deletes the documents with the corresponding document ids.
2655 Identical to delete() except that it returns a future. Call
2656 get_result() on the return value to block on the call and get its result.
2658 doc_ids = _ConvertToList(document_ids)
2659 if not doc_ids:
2660 return _WrappedValueFuture([])
2662 if len(doc_ids) > MAXIMUM_DOCUMENTS_PER_PUT_REQUEST:
2663 raise ValueError('too many documents to delete')
2665 request = search_service_pb.DeleteDocumentRequest()
2666 response = search_service_pb.DeleteDocumentResponse()
2667 params = request.mutable_params()
2668 _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2669 for document_id in doc_ids:
2670 _CheckDocumentId(document_id)
2671 params.add_doc_id(document_id)
2673 def hook():
2674 results = self._NewDeleteResultList(doc_ids, response)
2676 if response.status_size() != len(doc_ids):
2677 raise DeleteError(
2678 'did not delete requested number of documents', results)
2680 for status in response.status_list():
2681 if status.code() != search_service_pb.SearchServiceError.OK:
2682 raise DeleteError(
2683 _ConcatenateErrorMessages(
2684 'one or more delete document operations failed', status),
2685 results)
2686 return results
2687 return _RpcOperationFuture(
2688 'DeleteDocument', request, response, deadline, hook)
2690 def delete_schema(self):
2691 """Deprecated in 1.7.4. Delete the schema from the index.
2693 We are deprecating this method and replacing with more general schema
2694 and index managment.
2696 A possible use may be remove typed fields which are no longer used. After
2697 you delete the schema, you need to index one or more documents to rebuild
2698 the schema. Until you re-index some documents, searches may fail, especially
2699 searches using field restricts.
2701 Raises:
2702 DeleteError: If the schema failed to be deleted.
2704 warnings.warn('delete_schema is deprecated in 1.7.4.',
2705 DeprecationWarning, stacklevel=2)
2706 request = search_service_pb.DeleteSchemaRequest()
2707 response = search_service_pb.DeleteSchemaResponse()
2708 params = request.mutable_params()
2709 _CopyMetadataToProtocolBuffer(self, params.add_index_spec())
2711 def hook():
2713 results = self._NewDeleteResultList([self.name], response)
2715 if response.status_size() != 1:
2716 raise DeleteError('did not delete exactly one schema', results)
2718 status = response.status_list()[0]
2719 if status.code() != search_service_pb.SearchServiceError.OK:
2720 raise DeleteError(
2721 _ConcatenateErrorMessages('delete schema operation failed', status),
2722 results)
2723 return _RpcOperationFuture(
2724 'DeleteSchema', request, response, None, hook).get_result()
2726 def _NewScoredDocumentFromPb(self, doc_pb, sort_scores, expressions, cursor):
2727 """Constructs a Document from a document_pb.Document protocol buffer."""
2728 lang = None
2729 if doc_pb.has_language():
2730 lang = _DecodeUTF8(doc_pb.language())
2731 return ScoredDocument(
2732 doc_id=_DecodeUTF8(doc_pb.id()),
2733 fields=_NewFieldsFromPb(doc_pb.field_list()),
2734 language=lang, rank=doc_pb.order_id(), sort_scores=sort_scores,
2735 expressions=_NewFieldsFromPb(expressions), cursor=cursor)
2737 def _NewSearchResults(self, response, cursor):
2738 """Returns a SearchResults populated from a search_service response pb."""
2739 results = []
2740 for result_pb in response.result_list():
2741 per_result_cursor = None
2742 if result_pb.has_cursor():
2743 if isinstance(cursor, Cursor):
2745 per_result_cursor = Cursor(web_safe_string=_ToWebSafeString(
2746 cursor.per_result, _DecodeUTF8(result_pb.cursor())))
2747 results.append(
2748 self._NewScoredDocumentFromPb(
2749 result_pb.document(), result_pb.score_list(),
2750 result_pb.expression_list(), per_result_cursor))
2751 results_cursor = None
2752 if response.has_cursor():
2753 if isinstance(cursor, Cursor):
2755 results_cursor = Cursor(web_safe_string=_ToWebSafeString(
2756 cursor.per_result, _DecodeUTF8(response.cursor())))
2757 return SearchResults(
2758 results=results, number_found=response.matched_count(),
2759 cursor=results_cursor)
2761 @datastore_rpc._positional(2)
2762 def get(self, doc_id, deadline=None):
2763 """Retrieve a document by document ID.
2765 Args:
2766 doc_id: The ID of the document to retreive.
2768 Kwargs:
2769 deadline: Deadline for RPC call in seconds; if None use the default.
2771 Returns:
2772 If the document ID exists, returns the associated document. Otherwise,
2773 returns None.
2775 Raises:
2776 TypeError: If any of the parameters have invalid types, or an unknown
2777 attribute is passed.
2778 ValueError: If any of the parameters have invalid values (e.g., a
2779 negative deadline).
2781 return self.get_async(doc_id, deadline=deadline).get_result()
2783 @datastore_rpc._positional(2)
2784 def get_async(self, doc_id, deadline=None):
2785 """Asynchronously retrieve a document by document ID.
2787 Identical to get() except that it returns a future. Call
2788 get_result() on the return value to block on the call and get its result.
2790 future = self.get_range_async(start_id=doc_id, limit=1, deadline=deadline)
2791 def hook(response):
2792 if response.results and response.results[0].doc_id == doc_id:
2793 return response.results[0]
2794 return None
2795 return _SimpleOperationFuture(future, hook)
2797 @datastore_rpc._positional(2)
2798 def search(self, query, deadline=None, **kwargs):
2799 """Search the index for documents matching the query.
2801 For example, the following code fragment requests a search for
2802 documents where 'first' occurs in subject and 'good' occurs anywhere,
2803 returning at most 20 documents, starting the search from 'cursor token',
2804 returning another single cursor for the response, sorting by subject in
2805 descending order, returning the author, subject, and summary fields as well
2806 as a snippeted field content.
2808 results = index.search(
2809 query=Query('subject:first good',
2810 options=QueryOptions(limit=20,
2811 cursor=Cursor(),
2812 sort_options=SortOptions(
2813 expressions=[SortExpression(expression='subject')],
2814 limit=1000),
2815 returned_fields=['author', 'subject', 'summary'],
2816 snippeted_fields=['content'])))
2818 The following code fragment shows how to use a results cursor
2820 cursor = results.cursor
2821 for result in results:
2822 # process result
2824 results = index.search(
2825 Query('subject:first good', options=QueryOptions(cursor=cursor)))
2827 The following code fragment shows how to use a per_result cursor
2829 results = index.search(
2830 query=Query('subject:first good',
2831 options=QueryOptions(limit=20,
2832 cursor=Cursor(per_result=True),
2833 ...)))
2835 cursor = None
2836 for result in results:
2837 cursor = result.cursor
2839 results = index.search(
2840 Query('subject:first good', options=QueryOptions(cursor=cursor)))
2842 See http://developers.google.com/appengine/docs/python/search/query_strings
2843 for more information about query syntax.
2845 Args:
2846 query: The Query to match against documents in the index.
2848 Kwargs:
2849 deadline: Deadline for RPC call in seconds; if None use the default.
2851 Returns:
2852 A SearchResults containing a list of documents matched, number returned
2853 and number matched by the query.
2855 Raises:
2856 TypeError: If any of the parameters have invalid types, or an unknown
2857 attribute is passed.
2858 ValueError: If any of the parameters have invalid values (e.g., a
2859 negative deadline).
2861 return self.search_async(query, deadline=deadline, **kwargs).get_result()
2863 @datastore_rpc._positional(2)
2864 def search_async(self, query, deadline=None, **kwargs):
2865 """Asynchronously searches the index for documents matching the query.
2867 Identical to search() except that it returns a future. Call
2868 get_result() on the return value to block on the call and get its result.
2870 if isinstance(query, basestring):
2871 query = Query(query_string=query)
2872 request = self._NewSearchRequest(query, deadline, **kwargs)
2873 response = search_service_pb.SearchResponse()
2874 def hook():
2875 _CheckStatus(response.status())
2876 cursor = None
2877 if query.options:
2878 cursor = query.options.cursor
2879 return self._NewSearchResults(response, cursor)
2880 return _RpcOperationFuture('Search', request, response, deadline, hook)
2882 def _NewSearchRequest(self, query, deadline, **kwargs):
2884 app_id = kwargs.pop('app_id', None)
2885 if kwargs:
2886 raise TypeError('Invalid arguments: %s' % ', '.join(kwargs))
2888 request = search_service_pb.SearchRequest()
2889 if app_id:
2890 request.set_app_id(app_id)
2892 params = request.mutable_params()
2893 if isinstance(query, basestring):
2894 query = Query(query_string=query)
2895 _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2896 _CopyQueryObjectToProtocolBuffer(query, params)
2897 return request
2899 def _NewGetResponse(self, response):
2900 """Returns a GetResponse from the list_documents response pb."""
2901 documents = []
2902 for doc_proto in response.document_list():
2903 documents.append(_NewDocumentFromPb(doc_proto))
2905 return GetResponse(results=documents)
2907 @datastore_rpc._positional(5)
2908 def get_range(self, start_id=None, include_start_object=True,
2909 limit=100, ids_only=False, deadline=None, **kwargs):
2910 """Get a range of Documents in the index, in id order.
2912 Args:
2913 start_id: String containing the Id from which to list
2914 Documents from. By default, starts at the first Id.
2915 include_start_object: If true, include the Document with the
2916 Id specified by the start_id parameter.
2917 limit: The maximum number of Documents to return.
2918 ids_only: If true, the Documents returned only contain their keys.
2920 Kwargs:
2921 deadline: Deadline for RPC call in seconds; if None use the default.
2923 Returns:
2924 A GetResponse containing a list of Documents, ordered by Id.
2926 Raises:
2927 Error: Some subclass of Error is raised if an error occurred processing
2928 the request.
2929 TypeError: If any of the parameters have invalid types, or an unknown
2930 attribute is passed.
2931 ValueError: If any of the parameters have invalid values (e.g., a
2932 negative deadline).
2934 return self.get_range_async(
2935 start_id, include_start_object, limit, ids_only, deadline=deadline,
2936 **kwargs).get_result()
2938 @datastore_rpc._positional(5)
2939 def get_range_async(self, start_id=None, include_start_object=True,
2940 limit=100, ids_only=False, deadline=None, **kwargs):
2941 """Asynchronously gets a range of Documents in the index, in id order.
2943 Identical to get_range() except that it returns a future. Call
2944 get_result() on the return value to block on the call and get its result.
2947 app_id = kwargs.pop('app_id', None)
2948 if kwargs:
2949 raise TypeError('Invalid arguments: %s' % ', '.join(kwargs))
2950 request = search_service_pb.ListDocumentsRequest()
2951 if app_id:
2952 request.set_app_id(app_id)
2954 params = request.mutable_params()
2955 _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2957 if start_id:
2958 params.set_start_doc_id(start_id)
2959 params.set_include_start_doc(include_start_object)
2961 params.set_limit(_CheckInteger(
2962 limit, 'limit', zero_ok=False,
2963 upper_bound=MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH))
2964 params.set_keys_only(ids_only)
2966 response = search_service_pb.ListDocumentsResponse()
2967 def hook():
2968 _CheckStatus(response.status())
2969 return self._NewGetResponse(response)
2970 return _RpcOperationFuture(
2971 'ListDocuments', request, response, deadline, hook)
2974 _CURSOR_TYPE_PB_MAP = {
2975 None: search_service_pb.SearchParams.NONE,
2976 Index.RESPONSE_CURSOR: search_service_pb.SearchParams.SINGLE,
2977 Index.RESULT_CURSOR: search_service_pb.SearchParams.PER_RESULT
2982 _SOURCES_TO_PB_MAP = {
2983 Index.SEARCH: search_service_pb.IndexSpec.SEARCH,
2984 Index.DATASTORE: search_service_pb.IndexSpec.DATASTORE,
2985 Index.CLOUD_STORAGE: search_service_pb.IndexSpec.CLOUD_STORAGE}
2989 _SOURCE_PB_TO_SOURCES_MAP = {
2990 search_service_pb.IndexSpec.SEARCH: Index.SEARCH,
2991 search_service_pb.IndexSpec.DATASTORE: Index.DATASTORE,
2992 search_service_pb.IndexSpec.CLOUD_STORAGE: Index.CLOUD_STORAGE}
2995 def _CopyMetadataToProtocolBuffer(index, spec_pb):
2996 """Copies Index specification to a search_service_pb.IndexSpec."""
2997 spec_pb.set_name(index.name.encode('utf-8'))
2998 spec_pb.set_namespace(index.namespace.encode('utf-8'))
3001 if index._source != Index.SEARCH:
3002 spec_pb.set_source(_SOURCES_TO_PB_MAP.get(index._source))
3005 _FIELD_TYPE_MAP = {
3006 document_pb.FieldValue.TEXT: Field.TEXT,
3007 document_pb.FieldValue.HTML: Field.HTML,
3008 document_pb.FieldValue.ATOM: Field.ATOM,
3009 document_pb.FieldValue.DATE: Field.DATE,
3010 document_pb.FieldValue.NUMBER: Field.NUMBER,
3011 document_pb.FieldValue.GEO: Field.GEO_POINT,
3015 def _NewSchemaFromPb(field_type_pb_list):
3016 """Creates map of field name to type list from document_pb.FieldTypes list."""
3017 field_types = {}
3018 for field_type_pb in field_type_pb_list:
3019 for field_type in field_type_pb.type_list():
3020 public_type = _FIELD_TYPE_MAP[field_type]
3021 name = _DecodeUTF8(field_type_pb.name())
3022 if name in field_types:
3023 field_types[name].append(public_type)
3024 else:
3025 field_types[name] = [public_type]
3026 return field_types
3029 def _NewIndexFromIndexSpecPb(index_spec_pb):
3030 """Creates an Index from a search_service_pb.IndexSpec."""
3031 source = _SOURCE_PB_TO_SOURCES_MAP.get(index_spec_pb.source())
3032 index = None
3033 if index_spec_pb.has_namespace():
3034 index = Index(name=index_spec_pb.name(),
3035 namespace=index_spec_pb.namespace(),
3036 source=source)
3037 else:
3038 index = Index(name=index_spec_pb.name(), source=source)
3039 return index
3042 def _NewIndexFromPb(index_metadata_pb):
3043 """Creates an Index from a search_service_pb.IndexMetadata."""
3044 index = _NewIndexFromIndexSpecPb(index_metadata_pb.index_spec())
3045 if index_metadata_pb.field_list():
3046 index._schema = _NewSchemaFromPb(index_metadata_pb.field_list())
3047 if index_metadata_pb.has_storage():
3048 index._storage_usage = index_metadata_pb.storage().amount_used()
3049 index._storage_limit = index_metadata_pb.storage().limit()
3050 return index
3053 def _MakeSyncSearchServiceCall(call, request, response, deadline):
3054 """Deprecated: Make a synchronous call to search service.
3056 If the deadline is not None, waits only until the deadline expires.
3058 Args:
3059 call: Method name to call, as a string
3060 request: The request object
3061 response: The response object
3063 Kwargs:
3064 deadline: Deadline for RPC call in seconds; if None use the default.
3066 Raises:
3067 TypeError: if the deadline is not a number and is not None.
3068 ValueError: If the deadline is less than zero.
3070 _ValidateDeadline(deadline)
3071 logging.warning("_MakeSyncSearchServiceCall is deprecated; please use API.")
3072 try:
3073 if deadline is None:
3074 apiproxy_stub_map.MakeSyncCall('search', call, request, response)
3075 else:
3078 rpc = apiproxy_stub_map.UserRPC('search', deadline=deadline)
3079 rpc.make_call(call, request, response)
3080 rpc.wait()
3081 rpc.check_success()
3082 except apiproxy_errors.ApplicationError, e:
3083 raise _ToSearchError(e)
3085 def _ValidateDeadline(deadline):
3086 if deadline is None:
3087 return
3088 if (not isinstance(deadline, (int, long, float))
3089 or isinstance(deadline, (bool,))):
3090 raise TypeError('deadline argument should be int/long/float (%r)'
3091 % (deadline,))
3092 if deadline <= 0:
3093 raise ValueError('deadline argument must be > 0 (%s)' % (deadline,))