App Engine Python SDK version 1.9.9
[gae.git] / python / google / appengine / api / search / search.py
blob57bff9a540a65ad87e6cf39b599c8214852ec886
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
21 """A Python Search API used by app developers.
23 Contains methods used to interface with Search API.
24 Contains API classes that forward to apiproxy.
25 """
33 import datetime
34 import re
35 import string
36 import sys
37 import warnings
39 from google.appengine.datastore import document_pb
40 from google.appengine.api import apiproxy_stub_map
41 from google.appengine.api import datastore_types
42 from google.appengine.api import namespace_manager
43 from google.appengine.api.search import expression_parser
44 from google.appengine.api.search import query_parser
45 from google.appengine.api.search import search_service_pb
46 from google.appengine.api.search import search_util
47 from google.appengine.datastore import datastore_rpc
48 from google.appengine.runtime import apiproxy_errors
51 __all__ = [
52 'AtomField',
53 'ConcurrentTransactionError',
54 'Cursor',
55 'DateField',
56 'DeleteError',
57 'DeleteResult',
58 'Document',
59 'DOCUMENT_ID_FIELD_NAME',
60 'Error',
61 'ExpressionError',
62 'Field',
63 'FieldExpression',
64 'HtmlField',
65 'GeoField',
66 'GeoPoint',
67 'get_indexes',
68 'GetResponse',
69 'Index',
70 'InternalError',
71 'InvalidRequest',
72 'LANGUAGE_FIELD_NAME',
73 'MatchScorer',
74 'MAXIMUM_DOCUMENT_ID_LENGTH',
75 'MAXIMUM_DOCUMENTS_PER_PUT_REQUEST',
76 'MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH',
77 'MAXIMUM_EXPRESSION_LENGTH',
78 'MAXIMUM_FIELD_ATOM_LENGTH',
79 'MAXIMUM_FIELD_NAME_LENGTH',
80 'MAXIMUM_FIELD_VALUE_LENGTH',
81 'MAXIMUM_FIELDS_RETURNED_PER_SEARCH',
82 'MAXIMUM_GET_INDEXES_OFFSET',
83 'MAXIMUM_INDEX_NAME_LENGTH',
84 'MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST',
85 'MAXIMUM_NUMBER_FOUND_ACCURACY',
86 'MAXIMUM_QUERY_LENGTH',
87 'MAXIMUM_SEARCH_OFFSET',
88 'MAXIMUM_SORTED_DOCUMENTS',
89 'MAX_DATE',
90 'MAX_NUMBER_VALUE',
91 'MIN_DATE',
92 'MIN_NUMBER_VALUE',
93 'NumberField',
94 'OperationResult',
95 'PutError',
96 'PutResult',
97 'Query',
98 'QueryError',
99 'QueryOptions',
100 'RANK_FIELD_NAME',
101 'RescoringMatchScorer',
102 'SCORE_FIELD_NAME',
103 'ScoredDocument',
104 'SearchResults',
105 'SortExpression',
106 'SortOptions',
107 'TextField',
108 'Timeout',
109 'TIMESTAMP_FIELD_NAME',
110 'TransientError',
113 MAXIMUM_INDEX_NAME_LENGTH = 100
114 MAXIMUM_FIELD_VALUE_LENGTH = 1024 * 1024
115 MAXIMUM_FIELD_ATOM_LENGTH = 500
116 MAXIMUM_FIELD_NAME_LENGTH = 500
117 MAXIMUM_DOCUMENT_ID_LENGTH = 500
118 MAXIMUM_DOCUMENTS_PER_PUT_REQUEST = 200
119 MAXIMUM_EXPRESSION_LENGTH = 5000
120 MAXIMUM_QUERY_LENGTH = 2000
121 MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH = 1000
122 MAXIMUM_SEARCH_OFFSET = 1000
124 MAXIMUM_SORTED_DOCUMENTS = 10000
125 MAXIMUM_NUMBER_FOUND_ACCURACY = 10000
126 MAXIMUM_FIELDS_RETURNED_PER_SEARCH = 100
127 MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST = 1000
128 MAXIMUM_GET_INDEXES_OFFSET = 1000
131 DOCUMENT_ID_FIELD_NAME = '_doc_id'
133 LANGUAGE_FIELD_NAME = '_lang'
135 RANK_FIELD_NAME = '_rank'
137 SCORE_FIELD_NAME = '_score'
141 TIMESTAMP_FIELD_NAME = '_timestamp'
146 _LANGUAGE_RE = re.compile('^(.{2}|.{2}_.{2})$')
148 _MAXIMUM_STRING_LENGTH = 500
149 _MAXIMUM_CURSOR_LENGTH = 10000
151 _VISIBLE_PRINTABLE_ASCII = frozenset(
152 set(string.printable) - set(string.whitespace))
153 _FIELD_NAME_PATTERN = '^[A-Za-z][A-Za-z0-9_]*$'
155 MAX_DATE = datetime.datetime(
156 datetime.MAXYEAR, 12, 31, 23, 59, 59, 999999, tzinfo=None)
157 MIN_DATE = datetime.datetime(
158 datetime.MINYEAR, 1, 1, 0, 0, 0, 0, tzinfo=None)
161 MAX_NUMBER_VALUE = 2147483647
162 MIN_NUMBER_VALUE = -2147483647
165 _PROTO_FIELDS_STRING_VALUE = frozenset([document_pb.FieldValue.TEXT,
166 document_pb.FieldValue.HTML,
167 document_pb.FieldValue.ATOM])
170 class Error(Exception):
171 """Indicates a call on the search API has failed."""
174 class InternalError(Error):
175 """Indicates a call on the search API has failed on the internal backend."""
178 class TransientError(Error):
179 """Indicates a call on the search API has failed, but retrying may succeed."""
182 class InvalidRequest(Error):
183 """Indicates an invalid request was made on the search API by the client."""
186 class QueryError(Error):
187 """An error occurred while parsing a query input string."""
190 class ExpressionError(Error):
191 """An error occurred while parsing an expression input string."""
194 class Timeout(Error):
195 """Indicates a call on the search API could not finish before its deadline."""
198 class ConcurrentTransactionError(Error):
199 """Indicates a call on the search API failed due to concurrent updates."""
202 def _ConvertToUnicode(some_string):
203 """Convert UTF-8 encoded string to unicode."""
204 if some_string is None:
205 return None
206 if isinstance(some_string, unicode):
207 return some_string
208 return unicode(some_string, 'utf-8')
211 def _ConcatenateErrorMessages(prefix, status):
212 """Returns an error message combining prefix and status.error_detail()."""
213 if status.error_detail():
214 return prefix + ': ' + status.error_detail()
215 return prefix
218 class OperationResult(object):
219 """Represents result of individual operation of a batch index or removal.
221 This is an abstract class.
224 (OK, INVALID_REQUEST, TRANSIENT_ERROR, INTERNAL_ERROR,
225 TIMEOUT, CONCURRENT_TRANSACTION) = (
226 'OK', 'INVALID_REQUEST', 'TRANSIENT_ERROR', 'INTERNAL_ERROR',
227 'TIMEOUT', 'CONCURRENT_TRANSACTION')
229 _CODES = frozenset([OK, INVALID_REQUEST, TRANSIENT_ERROR, INTERNAL_ERROR,
230 TIMEOUT, CONCURRENT_TRANSACTION])
232 def __init__(self, code, message=None, id=None):
233 """Initializer.
235 Args:
236 code: The error or success code of the operation.
237 message: An error message associated with any error.
238 id: The id of the object some operation was performed on.
240 Raises:
241 TypeError: If an unknown attribute is passed.
242 ValueError: If an unknown code is passed.
244 self._message = _ConvertToUnicode(message)
245 self._code = code
246 if self._code not in self._CODES:
247 raise ValueError('Unknown operation result code %r, must be one of %s'
248 % (self._code, self._CODES))
249 self._id = _ConvertToUnicode(id)
251 @property
252 def code(self):
253 """Returns the code indicating the status of the operation."""
254 return self._code
256 @property
257 def message(self):
258 """Returns any associated error message if the operation was in error."""
259 return self._message
261 @property
262 def id(self):
263 """Returns the Id of the object the operation was performed on."""
264 return self._id
266 def __repr__(self):
267 return _Repr(self, [('code', self.code), ('message', self.message),
268 ('id', self.id)])
271 _ERROR_OPERATION_CODE_MAP = {
272 search_service_pb.SearchServiceError.OK: OperationResult.OK,
273 search_service_pb.SearchServiceError.INVALID_REQUEST:
274 OperationResult.INVALID_REQUEST,
275 search_service_pb.SearchServiceError.TRANSIENT_ERROR:
276 OperationResult.TRANSIENT_ERROR,
277 search_service_pb.SearchServiceError.INTERNAL_ERROR:
278 OperationResult.INTERNAL_ERROR,
279 search_service_pb.SearchServiceError.TIMEOUT:
280 OperationResult.TIMEOUT,
281 search_service_pb.SearchServiceError.CONCURRENT_TRANSACTION:
282 OperationResult.CONCURRENT_TRANSACTION,
286 class PutResult(OperationResult):
287 """The result of indexing a single object."""
290 class DeleteResult(OperationResult):
291 """The result of deleting a single document."""
294 class PutError(Error):
295 """Indicates some error occurred indexing one of the objects requested."""
297 def __init__(self, message, results):
298 """Initializer.
300 Args:
301 message: A message detailing the cause of the failure to index some
302 document.
303 results: A list of PutResult corresponding to the list of objects
304 requested to be indexed.
306 super(PutError, self).__init__(message)
307 self._results = results
309 @property
310 def results(self):
311 """Returns PutResult list corresponding to objects indexed."""
312 return self._results
315 class DeleteError(Error):
316 """Indicates some error occured deleting one of the objects requested."""
318 def __init__(self, message, results):
319 """Initializer.
321 Args:
322 message: A message detailing the cause of the failure to delete some
323 document.
324 results: A list of DeleteResult corresponding to the list of Ids of
325 objects requested to be deleted.
327 super(DeleteError, self).__init__(message)
328 self._results = results
330 @property
331 def results(self):
332 """Returns DeleteResult list corresponding to Documents deleted."""
333 return self._results
336 _ERROR_MAP = {
337 search_service_pb.SearchServiceError.INVALID_REQUEST: InvalidRequest,
338 search_service_pb.SearchServiceError.TRANSIENT_ERROR: TransientError,
339 search_service_pb.SearchServiceError.INTERNAL_ERROR: InternalError,
340 search_service_pb.SearchServiceError.TIMEOUT: Timeout,
341 search_service_pb.SearchServiceError.CONCURRENT_TRANSACTION:
342 ConcurrentTransactionError,
346 def _ToSearchError(error):
347 """Translate an application error to a search Error, if possible.
349 Args:
350 error: An ApplicationError to translate.
352 Returns:
353 An Error if the error is known, otherwise the given
354 apiproxy_errors.ApplicationError.
356 if error.application_error in _ERROR_MAP:
357 return _ERROR_MAP[error.application_error](error.error_detail)
358 return error
361 def _CheckInteger(value, name, zero_ok=True, upper_bound=None):
362 """Checks whether value is an integer between the lower and upper bounds.
364 Args:
365 value: The value to check.
366 name: The name of the value, to use in error messages.
367 zero_ok: True if zero is allowed.
368 upper_bound: The upper (inclusive) bound of the value. Optional.
370 Returns:
371 The checked value.
373 Raises:
374 ValueError: If the value is not a int or long, or is out of range.
376 datastore_types.ValidateInteger(value, name, ValueError, empty_ok=True,
377 zero_ok=zero_ok)
378 if upper_bound is not None and value > upper_bound:
379 raise ValueError('%s, %d must be <= %d' % (name, value, upper_bound))
380 return value
383 def _CheckEnum(value, name, values=None):
384 """Checks whether value is a member of the set of values given.
386 Args:
387 value: The value to check.
388 name: The name of the value, to use in error messages.
389 values: The iterable of possible values.
391 Returns:
392 The checked value.
394 Raises:
395 ValueError: If the value is not one of the allowable values.
397 if value not in values:
398 raise ValueError('%s, %r must be in %s' % (name, value, values))
399 return value
402 def _CheckNumber(value, name):
403 """Checks whether value is a number.
405 Args:
406 value: The value to check.
407 name: The name of the value, to use in error messages.
409 Returns:
410 The checked value.
412 Raises:
413 TypeError: If the value is not a number.
415 if not isinstance(value, (int, long, float)):
416 raise TypeError('%s must be a int, long or float, got %s' %
417 (name, value.__class__.__name__))
418 return value
421 def _CheckStatus(status):
422 """Checks whether a RequestStatus has a value of OK.
424 Args:
425 status: The RequestStatus to check.
427 Raises:
428 Error: A subclass of Error if the value of status is not OK.
429 The subclass of Error is chosen based on value of the status code.
430 InternalError: If the status value is unknown.
432 if status.code() != search_service_pb.SearchServiceError.OK:
433 if status.code() in _ERROR_MAP:
434 raise _ERROR_MAP[status.code()](status.error_detail())
435 else:
436 raise InternalError(status.error_detail())
439 def _ValidateString(value,
440 name='unused',
441 max_len=_MAXIMUM_STRING_LENGTH,
442 empty_ok=False,
443 type_exception=TypeError,
444 value_exception=ValueError):
445 """Raises an exception if value is not a valid string or a subclass thereof.
447 A string is valid if it's not empty, no more than _MAXIMUM_STRING_LENGTH
448 bytes. The exception type can be specified with the exception
449 arguments for type and value issues.
451 Args:
452 value: The value to validate.
453 name: The name of this value; used in the exception message.
454 max_len: The maximum allowed length, in bytes.
455 empty_ok: Allow empty value.
456 type_exception: The type of exception to raise if not a basestring.
457 value_exception: The type of exception to raise if invalid value.
459 Returns:
460 The checked string.
462 Raises:
463 TypeError: If value is not a basestring or subclass.
464 ValueError: If the value is None or longer than max_len.
466 if value is None and empty_ok:
467 return
468 if value is not None and not isinstance(value, basestring):
469 raise type_exception('%s must be a basestring; got %s:' %
470 (name, value.__class__.__name__))
471 if not value and not empty_ok:
472 raise value_exception('%s must not be empty.' % name)
474 if len(value.encode('utf-8')) > max_len:
475 raise value_exception('%s must be under %d bytes.' % (name, max_len))
476 return value
479 def _ValidateVisiblePrintableAsciiNotReserved(value, name):
480 """Checks if value is a visible printable ASCII string not starting with '!'.
482 Whitespace characters are excluded. Printable visible ASCII
483 strings starting with '!' are reserved for internal use.
485 Args:
486 value: The string to validate.
487 name: The name of this string; used in the exception message.
489 Returns:
490 The checked string.
492 Raises:
493 ValueError: If the string is not visible printable ASCII, or starts with
494 '!'.
496 for char in value:
497 if char not in _VISIBLE_PRINTABLE_ASCII:
498 raise ValueError(
499 '%r must be visible printable ASCII: %r'
500 % (name, value))
501 if value.startswith('!'):
502 raise ValueError('%r must not start with "!": %r' % (name, value))
503 return value
506 def _CheckIndexName(index_name):
507 """Checks index_name is a string which is not too long, and returns it.
509 Index names must be visible printable ASCII and not start with '!'.
511 _ValidateString(index_name, 'index name', MAXIMUM_INDEX_NAME_LENGTH)
512 return _ValidateVisiblePrintableAsciiNotReserved(index_name, 'index_name')
515 def _CheckFieldName(name):
516 """Checks field name is not too long and matches field name pattern.
518 Field name pattern: "[A-Za-z][A-Za-z0-9_]*".
520 _ValidateString(name, 'name', MAXIMUM_FIELD_NAME_LENGTH)
521 if not re.match(_FIELD_NAME_PATTERN, name):
522 raise ValueError('field name "%s" should match pattern: %s' %
523 (name, _FIELD_NAME_PATTERN))
524 return name
527 def _CheckExpression(expression):
528 """Checks whether the expression is a string."""
529 expression = _ValidateString(expression, max_len=MAXIMUM_EXPRESSION_LENGTH)
530 try:
531 expression_parser.Parse(expression)
532 except expression_parser.ExpressionException, e:
533 raise ExpressionError('Failed to parse expression "%s"' % expression)
534 return expression
537 def _CheckFieldNames(names):
538 """Checks each name in names is a valid field name."""
539 for name in names:
540 _CheckFieldName(name)
541 return names
544 def _GetList(a_list):
545 """Utility function that converts None to the empty list."""
546 if a_list is None:
547 return []
548 else:
549 return list(a_list)
552 def _ConvertToList(arg):
553 """Converts arg to a list, empty if None, single element if not a list."""
554 if isinstance(arg, basestring):
555 return [arg]
556 if arg is not None:
557 try:
558 return list(iter(arg))
559 except TypeError:
560 return [arg]
561 return []
564 def _ConvertToUnicodeList(arg):
565 """Converts arg to a list of unicode objects."""
566 return [_ConvertToUnicode(value) for value in _ConvertToList(arg)]
569 def _CheckDocumentId(doc_id):
570 """Checks doc_id is a valid document identifier, and returns it.
572 Document ids must be visible printable ASCII and not start with '!'.
574 _ValidateString(doc_id, 'doc_id', MAXIMUM_DOCUMENT_ID_LENGTH)
575 _ValidateVisiblePrintableAsciiNotReserved(doc_id, 'doc_id')
576 return doc_id
579 def _CheckText(value, name='value', empty_ok=True):
580 """Checks the field text is a valid string."""
581 return _ValidateString(value, name, MAXIMUM_FIELD_VALUE_LENGTH, empty_ok)
584 def _CheckHtml(html):
585 """Checks the field html is a valid HTML string."""
586 return _ValidateString(html, 'html', MAXIMUM_FIELD_VALUE_LENGTH,
587 empty_ok=True)
590 def _CheckAtom(atom):
591 """Checks the field atom is a valid string."""
592 return _ValidateString(atom, 'atom', MAXIMUM_FIELD_ATOM_LENGTH,
593 empty_ok=True)
596 def _CheckDate(date):
597 """Checks the date is in the correct range."""
598 if isinstance(date, datetime.datetime):
599 if date < MIN_DATE or date > MAX_DATE:
600 raise TypeError('date must be between %s and %s (got %s)' %
601 (MIN_DATE, MAX_DATE, date))
602 elif isinstance(date, datetime.date):
603 if date < MIN_DATE.date() or date > MAX_DATE.date():
604 raise TypeError('date must be between %s and %s (got %s)' %
605 (MIN_DATE, MAX_DATE, date))
606 else:
607 raise TypeError('date must be datetime.datetime or datetime.date')
608 return date
611 def _CheckLanguage(language):
612 """Checks language is None or a string that matches _LANGUAGE_RE."""
613 if language is None:
614 return None
615 if not isinstance(language, basestring):
616 raise TypeError('language must be a basestring, got %s' %
617 language.__class__.__name__)
618 if not re.match(_LANGUAGE_RE, language):
619 raise ValueError('invalid language %s. Languages should be two letters.'
620 % language)
621 return language
624 def _CheckDocument(document):
625 """Check that the document is valid.
627 This checks for all server-side requirements on Documents. Currently, that
628 means ensuring that there are no repeated number or date fields.
630 Args:
631 document: The search.Document to check for validity.
633 Raises:
634 ValueError if the document is invalid in a way that would trigger an
635 PutError from the server.
637 no_repeat_date_names = set()
638 no_repeat_number_names = set()
639 for field in document.fields:
640 if isinstance(field, NumberField):
641 if field.name in no_repeat_number_names:
642 raise ValueError(
643 'Invalid document %s: field %s with type date or number may not '
644 'be repeated.' % (document.doc_id, field.name))
645 no_repeat_number_names.add(field.name)
646 elif isinstance(field, DateField):
647 if field.name in no_repeat_date_names:
648 raise ValueError(
649 'Invalid document %s: field %s with type date or number may not '
650 'be repeated.' % (document.doc_id, field.name))
651 no_repeat_date_names.add(field.name)
654 def _CheckSortLimit(limit):
655 """Checks the limit on number of docs to score or sort is not too large."""
656 return _CheckInteger(limit, 'limit', upper_bound=MAXIMUM_SORTED_DOCUMENTS)
659 def _Repr(class_instance, ordered_dictionary):
660 """Generates an unambiguous representation for instance and ordered dict."""
661 return u'search.%s(%s)' % (class_instance.__class__.__name__, ', '.join(
662 ['%s=%r' % (key, value) for (key, value) in ordered_dictionary
663 if value is not None and value != []]))
666 def _ListIndexesResponsePbToGetResponse(response):
667 """Returns a GetResponse constructed from get_indexes response pb."""
668 return GetResponse(
669 results=[_NewIndexFromPb(index)
670 for index in response.index_metadata_list()])
672 @datastore_rpc._positional(7)
673 def get_indexes(namespace='', offset=None, limit=20,
674 start_index_name=None, include_start_index=True,
675 index_name_prefix=None, fetch_schema=False, deadline=None,
676 **kwargs):
677 """Returns a list of available indexes.
679 Args:
680 namespace: The namespace of indexes to be returned. If not set
681 then the current namespace is used.
682 offset: The offset of the first returned index.
683 limit: The number of indexes to return.
684 start_index_name: The name of the first index to be returned.
685 include_start_index: Whether or not to return the start index.
686 index_name_prefix: The prefix used to select returned indexes.
687 fetch_schema: Whether to retrieve Schema for each Index or not.
689 Kwargs:
690 deadline: Deadline for RPC call in seconds; if None use the default.
692 Returns:
693 The GetResponse containing a list of available indexes.
695 Raises:
696 InternalError: If the request fails on internal servers.
697 TypeError: If any of the parameters have invalid types, or an unknown
698 attribute is passed.
699 ValueError: If any of the parameters have invalid values (e.g., a
700 negative deadline).
703 app_id = kwargs.pop('app_id', None)
704 if kwargs:
705 raise TypeError('Invalid arguments: %s' % ', '.join(kwargs))
707 response = _GetIndexes(
708 namespace=namespace, offset=offset, limit=limit,
709 start_index_name=start_index_name,
710 include_start_index=include_start_index,
711 index_name_prefix=index_name_prefix,
712 fetch_schema=fetch_schema, deadline=deadline, app_id=app_id)
713 return _ListIndexesResponsePbToGetResponse(response)
716 def _GetIndexes(namespace='', offset=None, limit=20,
717 start_index_name=None, include_start_index=True,
718 index_name_prefix=None, fetch_schema=False, deadline=None,
719 app_id=None):
720 """Returns a ListIndexesResponse."""
722 request = search_service_pb.ListIndexesRequest()
723 params = request.mutable_params()
725 if namespace is None:
726 namespace = namespace_manager.get_namespace()
727 if namespace is None:
728 namespace = u''
729 namespace_manager.validate_namespace(namespace, exception=ValueError)
730 params.set_namespace(namespace)
731 if offset is not None:
732 params.set_offset(_CheckInteger(offset, 'offset', zero_ok=True,
733 upper_bound=MAXIMUM_GET_INDEXES_OFFSET))
734 params.set_limit(_CheckInteger(
735 limit, 'limit', zero_ok=False,
736 upper_bound=MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST))
737 if start_index_name is not None:
738 params.set_start_index_name(
739 _ValidateString(start_index_name, 'start_index_name',
740 MAXIMUM_INDEX_NAME_LENGTH,
741 empty_ok=False))
742 if include_start_index is not None:
743 params.set_include_start_index(bool(include_start_index))
744 if index_name_prefix is not None:
745 params.set_index_name_prefix(
746 _ValidateString(index_name_prefix, 'index_name_prefix',
747 MAXIMUM_INDEX_NAME_LENGTH,
748 empty_ok=False))
749 params.set_fetch_schema(fetch_schema)
751 response = search_service_pb.ListIndexesResponse()
752 if app_id:
753 request.set_app_id(app_id)
755 _MakeSyncSearchServiceCall('ListIndexes', request, response, deadline)
757 _CheckStatus(response.status())
758 return response
760 class Field(object):
761 """An abstract base class which represents a field of a document.
763 This class should not be directly instantiated.
767 TEXT, HTML, ATOM, DATE, NUMBER, GEO_POINT = ('TEXT', 'HTML', 'ATOM', 'DATE',
768 'NUMBER', 'GEO_POINT')
770 _FIELD_TYPES = frozenset([TEXT, HTML, ATOM, DATE, NUMBER, GEO_POINT])
772 def __init__(self, name, value, language=None):
773 """Initializer.
775 Args:
776 name: The name of the field. Field names must have maximum length
777 MAXIMUM_FIELD_NAME_LENGTH and match pattern "[A-Za-z][A-Za-z0-9_]*".
778 value: The value of the field which can be a str, unicode or date.
779 language: The ISO 693-1 two letter code of the language used in the value.
780 See http://www.sil.org/iso639-3/codes.asp?order=639_1&letter=%25 for a
781 list of valid codes. Correct specification of language code will assist
782 in correct tokenization of the field. If None is given, then the
783 language code of the document will be used.
785 Raises:
786 TypeError: If any of the parameters have invalid types, or an unknown
787 attribute is passed.
788 ValueError: If any of the parameters have invalid values.
790 self._name = _CheckFieldName(_ConvertToUnicode(name))
791 self._value = self._CheckValue(value)
792 self._language = _CheckLanguage(_ConvertToUnicode(language))
794 @property
795 def name(self):
796 """Returns the name of the field."""
797 return self._name
799 @property
800 def language(self):
801 """Returns the code of the language the content in value is written in."""
802 return self._language
804 @property
805 def value(self):
806 """Returns the value of the field."""
807 return self._value
809 def _CheckValue(self, value):
810 """Checks the value is valid for the given type.
812 Args:
813 value: The value to check.
815 Returns:
816 The checked value.
818 raise NotImplementedError('_CheckValue is an abstract method')
820 def __repr__(self):
821 return _Repr(self, [('name', self.name), ('language', self.language),
822 ('value', self.value)])
824 def __eq__(self, other):
825 return isinstance(other, type(self)) and self.__key() == other.__key()
827 def __ne__(self, other):
828 return not self == other
830 def __key(self):
831 return (self.name, self.value, self.language)
833 def __hash__(self):
834 return hash(self.__key())
836 def __str__(self):
837 return repr(self)
839 def _CopyStringValueToProtocolBuffer(self, field_value_pb):
840 """Copies value to a string value in proto buf."""
841 field_value_pb.set_string_value(self.value.encode('utf-8'))
844 def _CopyFieldToProtocolBuffer(field, pb):
845 """Copies field's contents to a document_pb.Field protocol buffer."""
846 pb.set_name(field.name.encode('utf-8'))
847 field_value_pb = pb.mutable_value()
848 if field.language:
849 field_value_pb.set_language(field.language.encode('utf-8'))
850 if field.value is not None:
851 field._CopyValueToProtocolBuffer(field_value_pb)
852 return pb
855 class TextField(Field):
856 """A Field that has text content.
858 The following example shows a text field named signature with Polish content:
859 TextField(name='signature', value='brzydka pogoda', language='pl')
862 def __init__(self, name, value=None, language=None):
863 """Initializer.
865 Args:
866 name: The name of the field.
867 value: A str or unicode object containing text.
868 language: The code of the language the value is encoded in.
870 Raises:
871 TypeError: If value is not a string.
872 ValueError: If value is longer than allowed.
874 Field.__init__(self, name, _ConvertToUnicode(value), language)
876 def _CheckValue(self, value):
877 return _CheckText(value)
879 def _CopyValueToProtocolBuffer(self, field_value_pb):
880 field_value_pb.set_type(document_pb.FieldValue.TEXT)
881 self._CopyStringValueToProtocolBuffer(field_value_pb)
884 class HtmlField(Field):
885 """A Field that has HTML content.
887 The following example shows an html field named content:
888 HtmlField(name='content', value='<html>herbata, kawa</html>', language='pl')
891 def __init__(self, name, value=None, language=None):
892 """Initializer.
894 Args:
895 name: The name of the field.
896 value: A str or unicode object containing the searchable content of the
897 Field.
898 language: The code of the language the value is encoded in.
900 Raises:
901 TypeError: If value is not a string.
902 ValueError: If value is longer than allowed.
904 Field.__init__(self, name, _ConvertToUnicode(value), language)
906 def _CheckValue(self, value):
907 return _CheckHtml(value)
909 def _CopyValueToProtocolBuffer(self, field_value_pb):
910 field_value_pb.set_type(document_pb.FieldValue.HTML)
911 self._CopyStringValueToProtocolBuffer(field_value_pb)
914 class AtomField(Field):
915 """A Field that has content to be treated as a single token for indexing.
917 The following example shows an atom field named contributor:
918 AtomField(name='contributor', value='foo@bar.com')
921 def __init__(self, name, value=None, language=None):
922 """Initializer.
924 Args:
925 name: The name of the field.
926 value: A str or unicode object to be treated as an indivisible text value.
927 language: The code of the language the value is encoded in.
929 Raises:
930 TypeError: If value is not a string.
931 ValueError: If value is longer than allowed.
933 Field.__init__(self, name, _ConvertToUnicode(value), language)
935 def _CheckValue(self, value):
936 return _CheckAtom(value)
938 def _CopyValueToProtocolBuffer(self, field_value_pb):
939 field_value_pb.set_type(document_pb.FieldValue.ATOM)
940 self._CopyStringValueToProtocolBuffer(field_value_pb)
943 class DateField(Field):
944 """A Field that has a date or datetime value.
946 The following example shows a date field named creation_date:
947 DateField(name='creation_date', value=datetime.date(2011, 03, 11))
950 def __init__(self, name, value=None):
951 """Initializer.
953 Args:
954 name: The name of the field.
955 value: A datetime.date or a datetime.datetime.
957 Raises:
958 TypeError: If value is not a datetime.date or a datetime.datetime.
960 Field.__init__(self, name, value)
962 def _CheckValue(self, value):
963 return _CheckDate(value)
965 def _CopyValueToProtocolBuffer(self, field_value_pb):
966 field_value_pb.set_type(document_pb.FieldValue.DATE)
967 field_value_pb.set_string_value(search_util.SerializeDate(self.value))
970 class NumberField(Field):
971 """A Field that has a numeric value.
973 The following example shows a number field named size:
974 NumberField(name='size', value=10)
977 def __init__(self, name, value=None):
978 """Initializer.
980 Args:
981 name: The name of the field.
982 value: A numeric value.
984 Raises:
985 TypeError: If value is not numeric.
986 ValueError: If value is out of range.
988 Field.__init__(self, name, value)
990 def _CheckValue(self, value):
991 value = _CheckNumber(value, 'field value')
992 if value is not None and (value < MIN_NUMBER_VALUE or
993 value > MAX_NUMBER_VALUE):
994 raise ValueError('value, %d must be between %d and %d' %
995 (value, MIN_NUMBER_VALUE, MAX_NUMBER_VALUE))
996 return value
998 def _CopyValueToProtocolBuffer(self, field_value_pb):
999 field_value_pb.set_type(document_pb.FieldValue.NUMBER)
1000 field_value_pb.set_string_value(str(self.value))
1003 class GeoPoint(object):
1004 """Represents a point on the Earth's surface, in lat, long coordinates."""
1006 def __init__(self, latitude, longitude):
1007 """Initializer.
1009 Args:
1010 latitude: The angle between the equatorial plan and a line that passes
1011 through the GeoPoint, between -90 and 90 degrees.
1012 longitude: The angle east or west from a reference meridian to another
1013 meridian that passes through the GeoPoint, between -180 and 180 degrees.
1015 Raises:
1016 TypeError: If any of the parameters have invalid types, or an unknown
1017 attribute is passed.
1018 ValueError: If any of the parameters have invalid values.
1020 self._latitude = self._CheckLatitude(latitude)
1021 self._longitude = self._CheckLongitude(longitude)
1023 @property
1024 def latitude(self):
1025 """Returns the angle between equatorial plan and line thru the geo point."""
1026 return self._latitude
1028 @property
1029 def longitude(self):
1030 """Returns the angle from a reference meridian to another meridian."""
1031 return self._longitude
1033 def _CheckLatitude(self, value):
1034 _CheckNumber(value, 'latitude')
1035 if value < -90.0 or value > 90.0:
1036 raise ValueError('latitude must be between -90 and 90 degrees '
1037 'inclusive, was %f' % value)
1038 return value
1040 def _CheckLongitude(self, value):
1041 _CheckNumber(value, 'longitude')
1042 if value < -180.0 or value > 180.0:
1043 raise ValueError('longitude must be between -180 and 180 degrees '
1044 'inclusive, was %f' % value)
1045 return value
1047 def __eq__(self, other):
1048 return (self.latitude == other.latitude and
1049 self.longitude == other.longitude)
1051 def __repr__(self):
1052 return _Repr(self,
1053 [('latitude', self.latitude),
1054 ('longitude', self.longitude)])
1057 def _CheckGeoPoint(geo_point):
1058 """Checks geo_point is a GeoPoint and returns it."""
1059 if not isinstance(geo_point, GeoPoint):
1060 raise TypeError('geo_point must be a GeoPoint, got %s' %
1061 geo_point.__class__.__name__)
1062 return geo_point
1065 class GeoField(Field):
1066 """A Field that has a GeoPoint value.
1068 The following example shows a geo field named place:
1070 GeoField(name='place', value=GeoPoint(latitude=-33.84, longitude=151.26))
1073 def __init__(self, name, value=None):
1074 """Initializer.
1076 Args:
1077 name: The name of the field.
1078 value: A GeoPoint value.
1080 Raises:
1081 TypeError: If value is not numeric.
1083 Field.__init__(self, name, value)
1085 def _CheckValue(self, value):
1086 return _CheckGeoPoint(value)
1088 def _CopyValueToProtocolBuffer(self, field_value_pb):
1089 field_value_pb.set_type(document_pb.FieldValue.GEO)
1090 geo_pb = field_value_pb.mutable_geo()
1091 geo_pb.set_lat(self.value.latitude)
1092 geo_pb.set_lng(self.value.longitude)
1095 def _GetValue(value_pb):
1096 """Gets the value from the value_pb."""
1097 if value_pb.type() in _PROTO_FIELDS_STRING_VALUE:
1098 if value_pb.has_string_value():
1099 return value_pb.string_value()
1100 return None
1101 if value_pb.type() == document_pb.FieldValue.DATE:
1102 if value_pb.has_string_value():
1103 return search_util.DeserializeDate(value_pb.string_value())
1104 return None
1105 if value_pb.type() == document_pb.FieldValue.NUMBER:
1106 if value_pb.has_string_value():
1107 return float(value_pb.string_value())
1108 return None
1109 if value_pb.type() == document_pb.FieldValue.GEO:
1110 if value_pb.has_geo():
1111 geo_pb = value_pb.geo()
1112 return GeoPoint(latitude=geo_pb.lat(), longitude=geo_pb.lng())
1113 return None
1114 raise TypeError('unknown FieldValue type %d' % value_pb.type())
1117 _STRING_TYPES = set([document_pb.FieldValue.TEXT,
1118 document_pb.FieldValue.HTML,
1119 document_pb.FieldValue.ATOM])
1122 def _DecodeUTF8(pb_value):
1123 """Decodes a UTF-8 encoded string into unicode."""
1124 if pb_value is not None:
1125 return pb_value.decode('utf-8')
1126 return None
1129 def _DecodeValue(pb_value, val_type):
1130 """Decodes a possible UTF-8 encoded string value to unicode."""
1131 if val_type in _STRING_TYPES:
1132 return _DecodeUTF8(pb_value)
1133 return pb_value
1136 def _NewFieldFromPb(pb):
1137 """Constructs a Field from a document_pb.Field protocol buffer."""
1138 name = _DecodeUTF8(pb.name())
1139 val_type = pb.value().type()
1140 value = _DecodeValue(_GetValue(pb.value()), val_type)
1141 lang = None
1142 if pb.value().has_language():
1143 lang = _DecodeUTF8(pb.value().language())
1144 if val_type == document_pb.FieldValue.TEXT:
1145 return TextField(name, value, lang)
1146 elif val_type == document_pb.FieldValue.HTML:
1147 return HtmlField(name, value, lang)
1148 elif val_type == document_pb.FieldValue.ATOM:
1149 return AtomField(name, value, lang)
1150 elif val_type == document_pb.FieldValue.DATE:
1151 return DateField(name, value)
1152 elif val_type == document_pb.FieldValue.NUMBER:
1153 return NumberField(name, value)
1154 elif val_type == document_pb.FieldValue.GEO:
1155 return GeoField(name, value)
1156 return InvalidRequest('Unknown field value type %d' % val_type)
1159 class Document(object):
1160 """Represents a user generated document.
1162 The following example shows how to create a document consisting of a set
1163 of fields, some plain text and some in HTML.
1165 Document(doc_id='document_id',
1166 fields=[TextField(name='subject', value='going for dinner'),
1167 HtmlField(name='body',
1168 value='<html>I found a place.</html>',
1169 TextField(name='signature', value='brzydka pogoda',
1170 language='pl')],
1171 language='en')
1173 _FIRST_JAN_2011 = datetime.datetime(2011, 1, 1)
1175 def __init__(self, doc_id=None, fields=None, language='en', rank=None):
1176 """Initializer.
1178 Args:
1179 doc_id: The visible printable ASCII string identifying the document which
1180 does not start with '!'. Whitespace is excluded from ids. If no id is
1181 provided, the search service will provide one.
1182 fields: An iterable of Field instances representing the content of the
1183 document.
1184 language: The code of the language used in the field values.
1185 rank: The rank of this document used to specify the order in which
1186 documents are returned by search. Rank must be a non-negative integer.
1187 If not specified, the number of seconds since 1st Jan 2011 is used.
1188 Documents are returned in descending order of their rank, in absence
1189 of sorting or scoring options.
1191 Raises:
1192 TypeError: If any of the parameters have invalid types, or an unknown
1193 attribute is passed.
1194 ValueError: If any of the parameters have invalid values.
1196 doc_id = _ConvertToUnicode(doc_id)
1197 if doc_id is not None:
1198 _CheckDocumentId(doc_id)
1199 self._doc_id = doc_id
1200 self._fields = _GetList(fields)
1201 self._language = _CheckLanguage(_ConvertToUnicode(language))
1204 self._field_map = None
1206 doc_rank = rank
1207 if doc_rank is None:
1208 doc_rank = self._GetDefaultRank()
1209 self._rank = self._CheckRank(doc_rank)
1211 _CheckDocument(self)
1213 @property
1214 def doc_id(self):
1215 """Returns the document identifier."""
1216 return self._doc_id
1218 @property
1219 def fields(self):
1220 """Returns a list of fields of the document."""
1221 return self._fields
1223 @property
1224 def language(self):
1225 """Returns the code of the language the document fields are written in."""
1226 return self._language
1228 @property
1229 def rank(self):
1230 """Returns the rank of this document."""
1231 return self._rank
1233 def field(self, field_name):
1234 """Returns the field with the provided field name.
1236 Args:
1237 field_name: The name of the field to return.
1239 Returns:
1240 A field with the given name.
1242 Raises:
1243 ValueError: There is not exactly one field with the given name.
1245 fields = self[field_name]
1246 if len(fields) == 1:
1247 return fields[0]
1248 raise ValueError(
1249 'Must have exactly one field with name %s, but found %d.' %
1250 (field_name, len(fields)))
1252 def __getitem__(self, field_name):
1253 """Returns a list of all fields with the provided field name.
1255 Args:
1256 field_name: The name of the field to return.
1258 Returns:
1259 All fields with the given name, or an empty list if no field with that
1260 name exists.
1262 return self._BuildFieldMap().get(field_name, [])
1264 def __iter__(self):
1265 """Documents do not support iteration.
1267 This is provided to raise an explicit exception.
1269 raise TypeError('Documents do not support iteration.')
1271 def _BuildFieldMap(self):
1272 """Lazily build the field map."""
1273 if self._field_map is None:
1274 self._field_map = {}
1275 for field in self._fields:
1276 self._field_map.setdefault(field.name, []).append(field)
1277 return self._field_map
1279 def _CheckRank(self, rank):
1280 """Checks if rank is valid, then returns it."""
1281 return _CheckInteger(rank, 'rank', upper_bound=sys.maxint)
1283 def _GetDefaultRank(self):
1284 """Returns a default rank as total seconds since 1st Jan 2011."""
1285 td = datetime.datetime.now() - Document._FIRST_JAN_2011
1286 return td.seconds + (td.days * 24 * 3600)
1288 def __repr__(self):
1289 return _Repr(
1290 self, [('doc_id', self.doc_id), ('fields', self.fields),
1291 ('language', self.language), ('rank', self.rank)])
1293 def __eq__(self, other):
1294 return (isinstance(other, type(self)) and self.doc_id == other.doc_id and
1295 self.rank == other.rank and self.language == other.language
1296 and self.fields == other.fields)
1298 def __ne__(self, other):
1299 return not self == other
1301 def __key(self):
1302 return self.doc_id
1304 def __hash__(self):
1305 return hash(self.__key())
1307 def __str__(self):
1308 return repr(self)
1311 def _CopyDocumentToProtocolBuffer(document, pb):
1312 """Copies Document to a document_pb.Document protocol buffer."""
1313 pb.set_storage(document_pb.Document.DISK)
1314 if document.doc_id:
1315 pb.set_id(document.doc_id.encode('utf-8'))
1316 if document.language:
1317 pb.set_language(document.language.encode('utf-8'))
1318 for field in document.fields:
1319 field_pb = pb.add_field()
1320 _CopyFieldToProtocolBuffer(field, field_pb)
1321 pb.set_order_id(document.rank)
1322 return pb
1325 def _NewFieldsFromPb(field_list):
1326 """Returns a list of Field copied from a document_pb.Document proto buf."""
1327 return [_NewFieldFromPb(f) for f in field_list]
1330 def _NewDocumentFromPb(doc_pb):
1331 """Constructs a Document from a document_pb.Document protocol buffer."""
1332 lang = None
1333 if doc_pb.has_language():
1334 lang = _DecodeUTF8(doc_pb.language())
1335 return Document(doc_id=_DecodeUTF8(doc_pb.id()),
1336 fields=_NewFieldsFromPb(doc_pb.field_list()),
1337 language=lang,
1338 rank=doc_pb.order_id())
1341 def _QuoteString(argument):
1342 return '"' + argument.replace('"', '\\\"') + '"'
1345 class FieldExpression(object):
1346 """Represents an expression that will be computed for each result returned.
1348 For example,
1349 FieldExpression(name='content_snippet',
1350 expression='snippet("very important", content)')
1351 means a computed field 'content_snippet' will be returned with each search
1352 result, which contains HTML snippets of the 'content' field which match
1353 the query 'very important'.
1356 MAXIMUM_EXPRESSION_LENGTH = 1000
1357 MAXIMUM_OPERATOR_LENGTH = 100
1359 def __init__(self, name, expression):
1360 """Initializer.
1362 Args:
1363 name: The name of the computed field for the expression.
1364 expression: The expression to evaluate and return in a field with
1365 given name in results. See
1366 https://developers.google.com/appengine/docs/python/search/overview#Expressions
1367 for a list of legal expressions.
1369 Raises:
1370 TypeError: If any of the parameters has an invalid type, or an unknown
1371 attribute is passed.
1372 ValueError: If any of the parameters has an invalid value.
1373 ExpressionError: If the expression string is not parseable.
1375 self._name = _CheckFieldName(_ConvertToUnicode(name))
1376 if expression is None:
1377 raise ValueError('expression must be a FieldExpression, got None')
1378 if not isinstance(expression, basestring):
1379 raise TypeError('expression must be a FieldExpression, got %s' %
1380 expression.__class__.__name__)
1381 self._expression = _CheckExpression(_ConvertToUnicode(expression))
1383 @property
1384 def name(self):
1385 """Returns name of the expression to return in search results."""
1386 return self._name
1388 @property
1389 def expression(self):
1390 """Returns a string containing an expression returned in search results."""
1391 return self._expression
1393 def __repr__(self):
1394 return _Repr(
1395 self, [('name', self.name), ('expression', self.expression)])
1398 def _CopyFieldExpressionToProtocolBuffer(field_expression, pb):
1399 """Copies FieldExpression to a search_service_pb.FieldSpec_Expression."""
1400 pb.set_name(field_expression.name.encode('utf-8'))
1401 pb.set_expression(field_expression.expression.encode('utf-8'))
1404 class SortOptions(object):
1405 """Represents a mulit-dimensional sort of Documents.
1407 The following code shows how to sort documents based on product rating
1408 in descending order and then cheapest product within similarly rated
1409 products, sorting at most 1000 documents:
1411 SortOptions(expressions=[
1412 SortExpression(expression='rating',
1413 direction=SortExpression.DESCENDING, default_value=0),
1414 SortExpression(expression='price + tax',
1415 direction=SortExpression.ASCENDING, default_value=999999.99)],
1416 limit=1000)
1419 def __init__(self, expressions=None, match_scorer=None, limit=1000):
1420 """Initializer.
1422 Args:
1423 expressions: An iterable of SortExpression representing a
1424 multi-dimensional sort of Documents.
1425 match_scorer: A match scorer specification which may be used to
1426 score documents or in a SortExpression combined with other features.
1427 limit: The limit on the number of documents to score or sort.
1429 Raises:
1430 TypeError: If any of the parameters has an invalid type, or an unknown
1431 attribute is passed.
1432 ValueError: If any of the parameters has an invalid value.
1434 self._match_scorer = match_scorer
1435 self._expressions = _GetList(expressions)
1436 for expression in self._expressions:
1437 if not isinstance(expression, SortExpression):
1438 raise TypeError('expression must be a SortExpression, got %s' %
1439 expression.__class__.__name__)
1440 self._limit = _CheckSortLimit(limit)
1442 @property
1443 def expressions(self):
1444 """A list of SortExpression specifying a multi-dimensional sort."""
1445 return self._expressions
1447 @property
1448 def match_scorer(self):
1449 """Returns a match scorer to score documents with."""
1450 return self._match_scorer
1452 @property
1453 def limit(self):
1454 """Returns the limit on the number of documents to score or sort."""
1455 return self._limit
1457 def __repr__(self):
1458 return _Repr(
1459 self, [('match_scorer', self.match_scorer),
1460 ('expressions', self.expressions),
1461 ('limit', self.limit)])
1464 class MatchScorer(object):
1465 """Assigns a document score based on term frequency.
1467 If you add a MatchScorer to a SortOptions as in the following code:
1469 sort_opts = search.SortOptions(match_scorer=search.MatchScorer())
1471 then, this will sort the documents in descending score order. The scores
1472 will be positive. If you want to sort in ascending order, then use the
1473 following code:
1475 sort_opts = search.SortOptions(match_scorer=search.MatchScorer(),
1476 expressions=[search.SortExpression(
1477 expression='_score', direction=search.SortExpression.ASCENDING,
1478 default_value=0.0)])
1480 The scores in this case will be negative.
1483 def __init__(self):
1484 """Initializer.
1486 Raises:
1487 TypeError: If any of the parameters has an invalid type, or an unknown
1488 attribute is passed.
1489 ValueError: If any of the parameters has an invalid value.
1492 def __repr__(self):
1493 return _Repr(self, [])
1496 class RescoringMatchScorer(MatchScorer):
1497 """Assigns a document score based on term frequency weighted by doc parts.
1499 If you add a RescoringMatchScorer to a SortOptions as in the following code:
1501 sort_opts = search.SortOptions(match_scorer=search.RescoringMatchScorer())
1503 then, this will sort the documents in descending score order. The scores
1504 will be positive. If you want to sort in ascending order, then use the
1505 following code:
1507 sort_opts = search.SortOptions(match_scorer=search.RescoringMatchScorer(),
1508 expressions=[search.SortExpression(
1509 expression='_score', direction=search.SortExpression.ASCENDING,
1510 default_value=0.0)])
1512 The scores in this case will be negative.
1515 def __init__(self):
1516 """Initializer.
1518 Raises:
1519 TypeError: If any of the parameters has an invalid type, or an unknown
1520 attribute is passed.
1521 ValueError: If any of the parameters has an invalid value.
1523 super(RescoringMatchScorer, self).__init__()
1526 def _CopySortExpressionToProtocolBuffer(sort_expression, pb):
1527 """Copies a SortExpression to a search_service_pb.SortSpec protocol buffer."""
1528 pb.set_sort_expression(sort_expression.expression.encode('utf-8'))
1529 if sort_expression.direction == SortExpression.ASCENDING:
1530 pb.set_sort_descending(False)
1531 if isinstance(sort_expression.default_value, basestring):
1532 pb.set_default_value_text(sort_expression.default_value.encode('utf-8'))
1533 elif (isinstance(sort_expression.default_value, datetime.datetime) or
1534 isinstance(sort_expression.default_value, datetime.date)):
1535 pb.set_default_value_numeric(
1536 search_util.EpochTime(sort_expression.default_value))
1537 else:
1538 pb.set_default_value_numeric(sort_expression.default_value)
1539 return pb
1542 def _CopyMatchScorerToScorerSpecProtocolBuffer(match_scorer, limit, pb):
1543 """Copies a MatchScorer to a search_service_pb.ScorerSpec."""
1544 if isinstance(match_scorer, RescoringMatchScorer):
1545 pb.set_scorer(search_service_pb.ScorerSpec.RESCORING_MATCH_SCORER)
1546 elif isinstance(match_scorer, MatchScorer):
1547 pb.set_scorer(search_service_pb.ScorerSpec.MATCH_SCORER)
1548 else:
1549 raise TypeError(
1550 'match_scorer must be a MatchScorer or RescoringMatchRescorer, '
1551 'got %s' % match_scorer.__class__.__name__)
1552 pb.set_limit(limit)
1553 return pb
1556 def _CopySortOptionsToProtocolBuffer(sort_options, params):
1557 """Copies the SortOptions into the SearchParams proto buf."""
1558 for expression in sort_options.expressions:
1559 sort_spec_pb = params.add_sort_spec()
1560 _CopySortExpressionToProtocolBuffer(expression, sort_spec_pb)
1561 if sort_options.match_scorer:
1562 scorer_spec = params.mutable_scorer_spec()
1563 _CopyMatchScorerToScorerSpecProtocolBuffer(
1564 sort_options.match_scorer, sort_options.limit, scorer_spec)
1565 scorer_spec.set_limit(sort_options.limit)
1566 else:
1567 params.mutable_scorer_spec().set_limit(sort_options.limit)
1570 class SortExpression(object):
1571 """Sort by a user specified scoring expression.
1573 For example, the following will sort documents on a numeric field named
1574 'length' in ascending order, assigning a default value of sys.maxint for
1575 documents which do not specify a 'length' field.
1577 SortExpression(expression='length',
1578 direction=sort.SortExpression.ASCENDING,
1579 default_value=sys.maxint)
1581 The following example will sort documents on a date field named
1582 'published_date' in descending order, assigning a default value of
1583 1999-12-31 for documents which do not specify a 'published_date' field.
1585 SortExpression(expression='published_date',
1586 default_value=datetime.date(year=1999, month=12, day=31))
1588 The following example will sort documents on a text field named 'subject'
1589 in descending order, assigning a default value of '' for documents which
1590 do not specify a 'subject' field.
1592 SortExpression(expression='subject')
1596 try:
1597 MAX_FIELD_VALUE = unichr(0x10ffff) * 80
1598 except ValueError:
1600 MAX_FIELD_VALUE = unichr(0xffff) * 80
1602 MIN_FIELD_VALUE = u''
1605 ASCENDING, DESCENDING = ('ASCENDING', 'DESCENDING')
1607 _DIRECTIONS = frozenset([ASCENDING, DESCENDING])
1609 def __init__(self, expression, direction=DESCENDING, default_value=''):
1610 """Initializer.
1612 Args:
1613 expression: An expression to be evaluated on each matching document
1614 to sort by. The expression must evaluate to a text or numeric value.
1615 The expression can simply be a field name, or some compound expression
1616 such as "_score + count(likes) * 0.1" which will add the score from a
1617 scorer to a count of the values of a likes field times 0.1. See
1618 https://developers.google.com/appengine/docs/python/search/overview#Expressions
1619 for a list of legal expressions.
1620 direction: The direction to sort the search results, either ASCENDING
1621 or DESCENDING
1622 default_value: The default value of the expression. The default_value is
1623 returned if expression cannot be calculated, for example, if the
1624 expression is a field name and no value for that named field exists.
1625 A text value must be specified for text sorts. A numeric value must be
1626 specified for numeric sorts. A date value must be specified for date
1627 sorts.
1629 Raises:
1630 TypeError: If any of the parameters has an invalid type, or an unknown
1631 attribute is passed.
1632 ValueError: If any of the parameters has an invalid value.
1633 ExpressionError: If the expression string is not parseable.
1635 self._expression = _ConvertToUnicode(expression)
1636 self._direction = self._CheckDirection(direction)
1637 if self._expression is None:
1638 raise TypeError('expression must be a SortExpression, got None')
1639 _CheckExpression(self._expression)
1640 self._default_value = default_value
1641 if isinstance(self.default_value, basestring):
1642 self._default_value = _ConvertToUnicode(default_value)
1643 _CheckText(self._default_value, 'default_value')
1644 elif not isinstance(self._default_value,
1645 (int, long, float, datetime.date, datetime.datetime)):
1646 raise TypeError('default_value must be text, numeric or datetime, got %s'
1647 % self._default_value.__class__.__name__)
1649 @property
1650 def expression(self):
1651 """Returns the expression to sort by."""
1652 return self._expression
1654 @property
1655 def direction(self):
1656 """Returns the direction to sort expression: ASCENDING or DESCENDING."""
1657 return self._direction
1659 @property
1660 def default_value(self):
1661 """Returns a default value for the expression if no value computed."""
1662 return self._default_value
1664 def _CheckDirection(self, direction):
1665 """Checks direction is a valid SortExpression direction and returns it."""
1666 return _CheckEnum(direction, 'direction', values=self._DIRECTIONS)
1668 def __repr__(self):
1669 return _Repr(
1670 self, [('expression', self.expression),
1671 ('direction', self.direction),
1672 ('default_value', self.default_value)])
1675 class ScoredDocument(Document):
1676 """Represents a scored document returned from a search."""
1678 def __init__(self, doc_id=None, fields=None, language='en',
1679 sort_scores=None, expressions=None, cursor=None, rank=None):
1680 """Initializer.
1682 Args:
1683 doc_id: The visible printable ASCII string identifying the document which
1684 does not start with '!'. Whitespace is excluded from ids. If no id is
1685 provided, the search service will provide one.
1686 fields: An iterable of Field instances representing the content of the
1687 document.
1688 language: The code of the language used in the field values.
1689 sort_scores: The list of scores assigned during sort evaluation. Each
1690 sort dimension is included. Positive scores are used for ascending
1691 sorts; negative scores for descending.
1692 expressions: The list of computed fields which are the result of
1693 expressions requested.
1694 cursor: A cursor associated with the document.
1695 rank: The rank of this document. A rank must be a non-negative integer
1696 less than sys.maxint. If not specified, the number of seconds since
1697 1st Jan 2011 is used. Documents are returned in descending order of
1698 their rank.
1700 Raises:
1701 TypeError: If any of the parameters have invalid types, or an unknown
1702 attribute is passed.
1703 ValueError: If any of the parameters have invalid values.
1705 super(ScoredDocument, self).__init__(doc_id=doc_id, fields=fields,
1706 language=language, rank=rank)
1707 self._sort_scores = self._CheckSortScores(_GetList(sort_scores))
1708 self._expressions = _GetList(expressions)
1709 if cursor is not None and not isinstance(cursor, Cursor):
1710 raise TypeError('cursor must be a Cursor, got %s' %
1711 cursor.__class__.__name__)
1712 self._cursor = cursor
1714 @property
1715 def sort_scores(self):
1716 """The list of scores assigned during sort evaluation.
1718 Each sort dimension is included. Positive scores are used for ascending
1719 sorts; negative scores for descending.
1721 Returns:
1722 The list of numeric sort scores.
1724 return self._sort_scores
1726 @property
1727 def expressions(self):
1728 """The list of computed fields the result of expression evaluation.
1730 For example, if a request has
1731 FieldExpression(name='snippet', 'snippet("good story", content)')
1732 meaning to compute a snippet field containing HTML snippets extracted
1733 from the matching of the query 'good story' on the field 'content'.
1734 This means a field such as the following will be returned in expressions
1735 for the search result:
1736 HtmlField(name='snippet', value='that was a <b>good story</b> to finish')
1738 Returns:
1739 The computed fields.
1741 return self._expressions
1743 @property
1744 def cursor(self):
1745 """A cursor associated with a result, a continued search starting point.
1747 To get this cursor to appear, set the Index.cursor_type to
1748 Index.RESULT_CURSOR, otherwise this will be None.
1750 Returns:
1751 The result cursor.
1753 return self._cursor
1755 def _CheckSortScores(self, sort_scores):
1756 """Checks sort_scores is a list of floats, and returns it."""
1757 for sort_score in sort_scores:
1758 _CheckNumber(sort_score, 'sort_scores')
1759 return sort_scores
1761 def __repr__(self):
1762 return _Repr(self, [('doc_id', self.doc_id),
1763 ('fields', self.fields),
1764 ('language', self.language),
1765 ('rank', self.rank),
1766 ('sort_scores', self.sort_scores),
1767 ('expressions', self.expressions),
1768 ('cursor', self.cursor)])
1771 class SearchResults(object):
1772 """Represents the result of executing a search request."""
1774 def __init__(self, number_found, results=None, cursor=None):
1775 """Initializer.
1777 Args:
1778 number_found: The number of documents found for the query.
1779 results: The list of ScoredDocuments returned from executing a
1780 search request.
1781 cursor: A Cursor to continue the search from the end of the
1782 search results.
1784 Raises:
1785 TypeError: If any of the parameters have an invalid type, or an unknown
1786 attribute is passed.
1787 ValueError: If any of the parameters have an invalid value.
1789 self._number_found = _CheckInteger(number_found, 'number_found')
1790 self._results = _GetList(results)
1791 if cursor is not None and not isinstance(cursor, Cursor):
1792 raise TypeError('cursor must be a Cursor, got %s' %
1793 cursor.__class__.__name__)
1794 self._cursor = cursor
1796 def __iter__(self):
1798 for result in self.results:
1799 yield result
1801 @property
1802 def results(self):
1803 """Returns the list of ScoredDocuments that matched the query."""
1804 return self._results
1806 @property
1807 def number_found(self):
1808 """Returns the number of documents which were found for the search.
1810 Note that this is an approximation and not an exact count.
1811 If QueryOptions.number_found_accuracy parameter is set to 100
1812 for example, then number_found <= 100 is accurate.
1814 Returns:
1815 The number of documents found.
1817 return self._number_found
1819 @property
1820 def cursor(self):
1821 """Returns a cursor that can be used to continue search from last result.
1823 This corresponds to using a ResultsCursor in QueryOptions,
1824 otherwise this will be None.
1826 Returns:
1827 The results cursor.
1829 return self._cursor
1831 def __repr__(self):
1832 return _Repr(self, [('results', self.results),
1833 ('number_found', self.number_found),
1834 ('cursor', self.cursor)])
1837 class GetResponse(object):
1838 """Represents the result of executing a get request.
1840 For example, the following code shows how a response could be used
1841 to determine which documents were successfully removed or not.
1843 response = index.get_range()
1844 for document in response:
1845 print "document ", document
1848 def __init__(self, results=None):
1849 """Initializer.
1851 Args:
1852 results: The results returned from an index ordered by Id.
1854 Raises:
1855 TypeError: If any of the parameters have an invalid type, or an unknown
1856 attribute is passed.
1857 ValueError: If any of the parameters have an invalid value.
1859 self._results = _GetList(results)
1861 def __iter__(self):
1862 for result in self.results:
1863 yield result
1865 @property
1866 def results(self):
1867 """Returns a list of results ordered by Id from the index."""
1868 return self._results
1870 def __repr__(self):
1871 return _Repr(self, [('results', self.results)])
1874 class Cursor(object):
1875 """Specifies how to get the next page of results in a search.
1877 A cursor returned in a previous set of search results to use as a starting
1878 point to retrieve the next set of results. This can get you better
1879 performance, and also improves the consistency of pagination through index
1880 updates.
1882 The following shows how to use the cursor to get the next page of results:
1884 # get the first set of results; the first cursor is used to specify
1885 # that cursors are to be returned in the SearchResults.
1886 results = index.search(Query(query_string='some stuff',
1887 QueryOptions(cursor=Cursor()))
1889 # get the next set of results
1890 results = index.search(Query(query_string='some stuff',
1891 QueryOptions(cursor=results.cursor)))
1893 If you want to continue search from any one of the ScoredDocuments in
1894 SearchResults, then you can set Cursor.per_result to True.
1896 # get the first set of results; the first cursor is used to specify
1897 # that cursors are to be returned in the SearchResults.
1898 results = index.search(Query(query_string='some stuff',
1899 QueryOptions(cursor=Cursor(per_result=True)))
1901 # this shows how to access the per_document cursors returned from a search
1902 per_document_cursor = None
1903 for scored_document in results:
1904 per_document_cursor = scored_document.cursor
1906 # get the next set of results
1907 results = index.search(Query(query_string='some stuff',
1908 QueryOptions(cursor=per_document_cursor)))
1913 def __init__(self, web_safe_string=None, per_result=False):
1914 """Initializer.
1916 Args:
1917 web_safe_string: The cursor string returned from the search service to
1918 be interpreted by the search service to get the next set of results.
1919 per_result: A bool when true will return a cursor per ScoredDocument in
1920 SearchResults, otherwise will return a single cursor for the whole
1921 SearchResults. If using offset this is ignored, as the user is
1922 responsible for calculating a next offset if any.
1923 Raises:
1925 ValueError: if the web_safe_string is not of required format.
1927 self._web_safe_string = _CheckCursor(_ConvertToUnicode(web_safe_string))
1928 self._per_result = per_result
1929 if self._web_safe_string:
1930 parts = self._web_safe_string.split(':', 1)
1931 if len(parts) != 2 or parts[0] not in ['True', 'False']:
1932 raise ValueError('invalid format for web_safe_string, got %s' %
1933 self._web_safe_string)
1934 self._internal_cursor = parts[1]
1936 self._per_result = (parts[0] == 'True')
1938 @property
1939 def web_safe_string(self):
1940 """Returns the cursor string generated by the search service."""
1941 return self._web_safe_string
1943 @property
1944 def per_result(self):
1945 """Returns whether to return a cursor for each ScoredDocument in results."""
1946 return self._per_result
1948 def __repr__(self):
1949 return _Repr(self, [('web_safe_string', self.web_safe_string)])
1952 def _ToWebSafeString(per_result, internal_cursor):
1953 """Returns the web safe string combining per_result with internal cursor."""
1954 return str(per_result) + ':' + internal_cursor
1957 def _CheckQuery(query):
1958 """Checks a query is a valid query string."""
1959 _ValidateString(query, 'query', MAXIMUM_QUERY_LENGTH, empty_ok=True)
1960 if query is None:
1961 raise TypeError('query must be unicode, got None')
1962 if query.strip():
1963 try:
1964 query_parser.Parse(query)
1965 except query_parser.QueryException, e:
1966 raise QueryError('Failed to parse query "%s"' % query)
1967 return query
1970 def _CheckLimit(limit):
1971 """Checks the limit of documents to return is an integer within range."""
1972 return _CheckInteger(
1973 limit, 'limit', zero_ok=False,
1974 upper_bound=MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH)
1977 def _CheckOffset(offset):
1978 """Checks the offset in document list is an integer within range."""
1979 return _CheckInteger(
1980 offset, 'offset', zero_ok=True,
1981 upper_bound=MAXIMUM_SEARCH_OFFSET)
1984 def _CheckNumberFoundAccuracy(number_found_accuracy):
1985 """Checks the accuracy is an integer within range."""
1986 return _CheckInteger(
1987 number_found_accuracy, 'number_found_accuracy',
1988 zero_ok=False, upper_bound=MAXIMUM_NUMBER_FOUND_ACCURACY)
1991 def _CheckCursor(cursor):
1992 """Checks the cursor if specified is a string which is not too long."""
1993 return _ValidateString(cursor, 'cursor', _MAXIMUM_CURSOR_LENGTH,
1994 empty_ok=True)
1997 def _CheckNumberOfFields(returned_expressions, snippeted_fields,
1998 returned_fields):
1999 """Checks the count of all field kinds is less than limit."""
2000 number_expressions = (len(returned_expressions) + len(snippeted_fields) +
2001 len(returned_fields))
2002 if number_expressions > MAXIMUM_FIELDS_RETURNED_PER_SEARCH:
2003 raise ValueError(
2004 'too many fields, snippets or expressions to return %d > maximum %d'
2005 % (number_expressions, MAXIMUM_FIELDS_RETURNED_PER_SEARCH))
2008 class QueryOptions(object):
2009 """Options for post-processing results for a query.
2011 Options include the ability to sort results, control which document fields
2012 to return, produce snippets of fields and compute and sort by complex
2013 scoring expressions.
2015 If you wish to randomly access pages of search results, you can use an
2016 offset:
2018 # get the first set of results
2019 page_size = 10
2020 results = index.search(Query(query_string='some stuff',
2021 QueryOptions(limit=page_size))
2023 # calculate pages
2024 pages = results.found_count / page_size
2026 # user chooses page and hence an offset into results
2027 next_page = ith * page_size
2029 # get the search results for that page
2030 results = index.search(Query(query_string='some stuff',
2031 QueryOptions(limit=page_size, offset=next_page))
2034 def __init__(self, limit=20, number_found_accuracy=None, cursor=None,
2035 offset=None, sort_options=None, returned_fields=None,
2036 ids_only=False, snippeted_fields=None,
2037 returned_expressions=None):
2040 """Initializer.
2042 For example, the following code fragment requests a search for
2043 documents where 'first' occurs in subject and 'good' occurs anywhere,
2044 returning at most 20 documents, starting the search from 'cursor token',
2045 returning another single cursor for the SearchResults, sorting by subject in
2046 descending order, returning the author, subject, and summary fields as well
2047 as a snippeted field content.
2049 results = index.search(Query(
2050 query='subject:first good',
2051 options=QueryOptions(
2052 limit=20,
2053 cursor=Cursor(),
2054 sort_options=SortOptions(
2055 expressions=[
2056 SortExpression(expression='subject')],
2057 limit=1000),
2058 returned_fields=['author', 'subject', 'summary'],
2059 snippeted_fields=['content'])))
2061 Args:
2062 limit: The limit on number of documents to return in results.
2063 number_found_accuracy: The minimum accuracy requirement for
2064 SearchResults.number_found. If set, the number_found will be
2065 accurate up to at least that number. For example, when set to 100,
2066 any SearchResults with number_found <= 100 is accurate. This option
2067 may add considerable latency/expense, especially when used with
2068 returned_fields.
2069 cursor: A Cursor describing where to get the next set of results,
2070 or to provide next cursors in SearchResults.
2071 offset: The offset is number of documents to skip in search results. This
2072 is an alternative to using a query cursor, but allows random access into
2073 the results. Using offsets rather than cursors are more expensive. You
2074 can only use either cursor or offset, but not both. Using an offset
2075 means that no cursor is returned in SearchResults.cursor, nor in each
2076 ScoredDocument.cursor.
2077 sort_options: A SortOptions specifying a multi-dimensional sort over
2078 search results.
2079 returned_fields: An iterable of names of fields to return in search
2080 results.
2081 ids_only: Only return document ids, do not return any fields.
2082 snippeted_fields: An iterable of names of fields to snippet and return
2083 in search result expressions.
2084 returned_expressions: An iterable of FieldExpression to evaluate and
2085 return in search results.
2086 Raises:
2087 TypeError: If an unknown iterator_options or sort_options is passed.
2088 ValueError: If ids_only and returned_fields are used together.
2089 ExpressionError: If one of the returned expression strings is not
2090 parseable.
2092 self._limit = _CheckLimit(limit)
2093 self._number_found_accuracy = _CheckNumberFoundAccuracy(
2094 number_found_accuracy)
2095 if cursor is not None and not isinstance(cursor, Cursor):
2096 raise TypeError('cursor must be a Cursor, got %s' %
2097 cursor.__class__.__name__)
2098 if cursor is not None and offset is not None:
2099 raise ValueError('cannot set cursor and offset together')
2100 self._cursor = cursor
2101 self._offset = _CheckOffset(offset)
2102 if sort_options is not None and not isinstance(sort_options, SortOptions):
2103 raise TypeError('sort_options must be a SortOptions, got %s' %
2104 sort_options.__class__.__name__)
2105 self._sort_options = sort_options
2107 self._returned_fields = _ConvertToUnicodeList(returned_fields)
2108 _CheckFieldNames(self._returned_fields)
2109 self._ids_only = ids_only
2110 if self._ids_only and self._returned_fields:
2111 raise ValueError('cannot have ids_only and returned_fields set together')
2112 self._snippeted_fields = _ConvertToUnicodeList(snippeted_fields)
2113 _CheckFieldNames(self._snippeted_fields)
2114 self._returned_expressions = _ConvertToList(returned_expressions)
2115 for expression in self._returned_expressions:
2116 _CheckFieldName(_ConvertToUnicode(expression.name))
2117 _CheckExpression(_ConvertToUnicode(expression.expression))
2118 _CheckNumberOfFields(self._returned_expressions, self._snippeted_fields,
2119 self._returned_fields)
2121 @property
2122 def limit(self):
2123 """Returns a limit on number of documents to return in results."""
2124 return self._limit
2126 @property
2127 def number_found_accuracy(self):
2128 """Returns minimum accuracy requirement for SearchResults.number_found."""
2129 return self._number_found_accuracy
2131 @property
2132 def cursor(self):
2133 """Returns the Cursor for the query."""
2134 return self._cursor
2136 @property
2137 def offset(self):
2138 """Returns the number of documents in search results to skip."""
2139 return self._offset
2141 @property
2142 def sort_options(self):
2143 """Returns a SortOptions."""
2144 return self._sort_options
2146 @property
2147 def returned_fields(self):
2148 """Returns an iterable of names of fields to return in search results."""
2149 return self._returned_fields
2151 @property
2152 def ids_only(self):
2153 """Returns whether to return only document ids in search results."""
2154 return self._ids_only
2156 @property
2157 def snippeted_fields(self):
2158 """Returns iterable of field names to snippet and return in results."""
2159 return self._snippeted_fields
2161 @property
2162 def returned_expressions(self):
2163 """Returns iterable of FieldExpression to return in results."""
2164 return self._returned_expressions
2166 def __repr__(self):
2167 return _Repr(self, [('limit', self.limit),
2168 ('number_found_accuracy', self.number_found_accuracy),
2169 ('cursor', self.cursor),
2170 ('sort_options', self.sort_options),
2171 ('returned_fields', self.returned_fields),
2172 ('ids_only', self.ids_only),
2173 ('snippeted_fields', self.snippeted_fields),
2174 ('returned_expressions', self.returned_expressions)])
2177 def _CopyQueryOptionsObjectToProtocolBuffer(query, options, params):
2178 """Copies a QueryOptions object to a SearchParams proto buff."""
2179 offset = 0
2180 web_safe_string = None
2181 cursor_type = None
2182 offset = options.offset
2183 if options.cursor:
2184 cursor = options.cursor
2185 if cursor.per_result:
2186 cursor_type = search_service_pb.SearchParams.PER_RESULT
2187 else:
2188 cursor_type = search_service_pb.SearchParams.SINGLE
2189 if isinstance(cursor, Cursor) and cursor.web_safe_string:
2190 web_safe_string = cursor._internal_cursor
2191 _CopyQueryOptionsToProtocolBuffer(
2192 query, offset, options.limit, options.number_found_accuracy,
2193 web_safe_string, cursor_type, options.ids_only, options.returned_fields,
2194 options.snippeted_fields, options.returned_expressions,
2195 options.sort_options, params)
2198 def _CopyQueryOptionsToProtocolBuffer(
2199 query, offset, limit, number_found_accuracy, cursor, cursor_type, ids_only,
2200 returned_fields, snippeted_fields, returned_expressions, sort_options,
2201 params):
2202 """Copies fields of QueryOptions to params protobuf."""
2203 if offset:
2204 params.set_offset(offset)
2205 params.set_limit(limit)
2206 if number_found_accuracy is not None:
2207 params.set_matched_count_accuracy(number_found_accuracy)
2208 if cursor:
2209 params.set_cursor(cursor.encode('utf-8'))
2210 if cursor_type is not None:
2211 params.set_cursor_type(cursor_type)
2212 if ids_only:
2213 params.set_keys_only(ids_only)
2214 if returned_fields or snippeted_fields or returned_expressions:
2215 field_spec_pb = params.mutable_field_spec()
2216 for field in returned_fields:
2217 field_spec_pb.add_name(field.encode('utf-8'))
2218 for snippeted_field in snippeted_fields:
2219 expression = u'snippet(%s, %s)' % (_QuoteString(query), snippeted_field)
2220 _CopyFieldExpressionToProtocolBuffer(
2221 FieldExpression(
2222 name=snippeted_field, expression=expression.encode('utf-8')),
2223 field_spec_pb.add_expression())
2224 for expression in returned_expressions:
2225 _CopyFieldExpressionToProtocolBuffer(
2226 expression, field_spec_pb.add_expression())
2228 if sort_options is not None:
2229 _CopySortOptionsToProtocolBuffer(sort_options, params)
2232 class Query(object):
2233 """Represents a request on the search service to query the index."""
2235 def __init__(self, query_string, options=None):
2239 """Initializer.
2241 For example, the following code fragment requests a search for
2242 documents where 'first' occurs in subject and 'good' occurs anywhere,
2243 returning at most 20 documents, starting the search from 'cursor token',
2244 returning another single document cursor for the results, sorting by
2245 subject in descending order, returning the author, subject, and summary
2246 fields as well as a snippeted field content.
2248 results = index.search(Query(
2249 query_string='subject:first good',
2250 options=QueryOptions(
2251 limit=20,
2252 cursor=Cursor(),
2253 sort_options=SortOptions(
2254 expressions=[
2255 SortExpression(expression='subject')],
2256 limit=1000),
2257 returned_fields=['author', 'subject', 'summary'],
2258 snippeted_fields=['content'])))
2260 In order to get a Cursor, you specify a Cursor in QueryOptions.cursor
2261 and extract the Cursor for the next request from results.cursor to
2262 continue from the last found document, as shown below:
2264 results = index.search(
2265 Query(query_string='subject:first good',
2266 options=QueryOptions(cursor=results.cursor)))
2268 Args:
2269 query_string: The query to match against documents in the index. A query
2270 is a boolean expression containing terms. For example, the query
2271 'job tag:"very important" sent <= 2011-02-28'
2272 finds documents with the term job in any field, that contain the
2273 phrase "very important" in a tag field, and a sent date up to and
2274 including 28th February, 2011. You can use combinations of
2275 '(cat OR feline) food NOT dog'
2276 to find documents which contain the term cat or feline as well as food,
2277 but do not mention the term dog. A further example,
2278 'category:televisions brand:sony price >= 300 price < 400'
2279 will return documents which have televisions in a category field, a
2280 sony brand and a price field which is 300 (inclusive) to 400
2281 (exclusive). See
2282 https://developers.google.com/appengine/docs/python/search/overview#Expressions
2283 for a list of expressions that can be used in queries.
2284 options: A QueryOptions describing post-processing of search results.
2285 Raises:
2286 QueryError: If the query string is not parseable.
2288 self._query_string = _ConvertToUnicode(query_string)
2289 _CheckQuery(self._query_string)
2290 self._options = options
2292 @property
2293 def query_string(self):
2294 """Returns the query string to be applied to search service."""
2295 return self._query_string
2297 @property
2298 def options(self):
2299 """Returns QueryOptions defining post-processing on the search results."""
2300 return self._options
2303 def _CopyQueryToProtocolBuffer(query, params):
2304 """Copies Query object to params protobuf."""
2305 params.set_query(query.encode('utf-8'))
2308 def _CopyQueryObjectToProtocolBuffer(query, params):
2309 _CopyQueryToProtocolBuffer(query.query_string, params)
2310 options = query.options
2311 if query.options is None:
2312 options = QueryOptions()
2313 _CopyQueryOptionsObjectToProtocolBuffer(query.query_string, options, params)
2316 class Index(object):
2317 """Represents an index allowing indexing, deleting and searching documents.
2319 The following code fragment shows how to add documents, then search the
2320 index for documents matching a query.
2322 # Get the index.
2323 index = Index(name='index-name')
2325 # Create a document.
2326 doc = Document(doc_id='document-id',
2327 fields=[TextField(name='subject', value='my first email'),
2328 HtmlField(name='body',
2329 value='<html>some content here</html>')])
2331 # Index the document.
2332 try:
2333 index.put(doc)
2334 except search.Error, e:
2335 # possibly retry indexing or log error
2337 # Query the index.
2338 try:
2339 results = index.search('subject:first body:here')
2341 # Iterate through the search results.
2342 for scored_document in results:
2343 print scored_document
2345 except search.Error, e:
2346 # possibly log the failure
2348 Once an index is created with a given specification, that specification is
2349 immutable.
2351 Search results may contain some out of date documents. However, any two
2352 changes to any document stored in an index are applied in the correct order.
2357 RESPONSE_CURSOR, RESULT_CURSOR = ('RESPONSE_CURSOR', 'RESULT_CURSOR')
2359 _CURSOR_TYPES = frozenset([RESPONSE_CURSOR, RESULT_CURSOR])
2361 SEARCH, DATASTORE, CLOUD_STORAGE = ('SEARCH', 'DATASTORE', 'CLOUD_STORAGE')
2363 _SOURCES = frozenset([SEARCH, DATASTORE, CLOUD_STORAGE])
2365 def __init__(self, name, namespace=None, source=SEARCH):
2366 """Initializer.
2368 Args:
2369 name: The name of the index. An index name must be a visible printable
2370 ASCII string not starting with '!'. Whitespace characters are excluded.
2371 namespace: The namespace of the index name. If not set, then the current
2372 namespace is used.
2373 source: Deprecated as of 1.7.6. The source of
2374 the index:
2375 SEARCH - The Index was created by adding documents throught this
2376 search API.
2377 DATASTORE - The Index was created as a side-effect of putting entities
2378 into Datastore.
2379 CLOUD_STORAGE - The Index was created as a side-effect of adding
2380 objects into a Cloud Storage bucket.
2381 Raises:
2382 TypeError: If an unknown attribute is passed.
2383 ValueError: If invalid namespace is given.
2385 if source not in self._SOURCES:
2386 raise ValueError('source must be one of %s' % self._SOURCES)
2387 if source is not self.SEARCH:
2388 warnings.warn('source is deprecated.', DeprecationWarning, stacklevel=2)
2389 self._source = source
2390 self._name = _CheckIndexName(_ConvertToUnicode(name))
2391 self._namespace = _ConvertToUnicode(namespace)
2392 if self._namespace is None:
2393 self._namespace = _ConvertToUnicode(namespace_manager.get_namespace())
2394 if self._namespace is None:
2395 self._namespace = u''
2396 namespace_manager.validate_namespace(self._namespace, exception=ValueError)
2397 self._schema = None
2398 self._storage_usage = None
2399 self._storage_limit = None
2401 @property
2402 def schema(self):
2403 """Returns the schema mapping field names to list of types supported.
2405 Only valid for Indexes returned by search.get_indexes method."""
2406 return self._schema
2408 @property
2409 def storage_usage(self):
2410 """The approximate number of bytes used by this index.
2412 The number may be slightly stale, as it may not reflect the
2413 results of recent changes.
2415 Returns None for indexes not obtained from search.get_indexes.
2418 return self._storage_usage
2420 @property
2421 def storage_limit(self):
2422 """The maximum allowable storage for this index, in bytes.
2424 Returns None for indexes not obtained from search.get_indexes."""
2425 return self._storage_limit
2427 @property
2428 def name(self):
2429 """Returns the name of the index."""
2430 return self._name
2432 @property
2433 def namespace(self):
2434 """Returns the namespace of the name of the index."""
2435 return self._namespace
2437 @property
2438 def source(self):
2439 """Returns the source of the index.
2441 Deprecated: from 1.7.6, source is no longer available."""
2442 warnings.warn('source is deprecated.', DeprecationWarning, stacklevel=2)
2443 return self._source
2445 def __eq__(self, other):
2446 return (isinstance(other, self.__class__)
2447 and self.__dict__ == other.__dict__)
2449 def __ne__(self, other):
2450 return not self.__eq__(other)
2452 def __hash__(self):
2453 return hash((self._name, self._namespace))
2455 def __repr__(self):
2457 return _Repr(self, [('name', self.name), ('namespace', self.namespace),
2458 ('source', self._source),
2459 ('schema', self.schema),
2460 ('storage_usage', self.storage_usage),
2461 ('storage_limit', self.storage_limit)])
2463 def _NewPutResultFromPb(self, status_pb, doc_id):
2464 """Constructs PutResult from RequestStatus pb and doc_id."""
2465 message = None
2466 if status_pb.has_error_detail():
2467 message = _DecodeUTF8(status_pb.error_detail())
2468 code = _ERROR_OPERATION_CODE_MAP.get(status_pb.code(),
2469 OperationResult.INTERNAL_ERROR)
2470 return PutResult(code=code, message=message, id=_DecodeUTF8(doc_id))
2472 def _NewPutResultList(self, response):
2473 return [self._NewPutResultFromPb(status, doc_id)
2474 for status, doc_id in zip(response.status_list(),
2475 response.doc_id_list())]
2477 @datastore_rpc._positional(2)
2478 def put(self, documents, deadline=None):
2479 """Index the collection of documents.
2481 If any of the documents are already in the index, then reindex them with
2482 their corresponding fresh document.
2484 Args:
2485 documents: A Document or iterable of Documents to index.
2487 Kwargs:
2488 deadline: Deadline for RPC call in seconds; if None use the default.
2490 Returns:
2491 A list of PutResult, one per Document requested to be indexed.
2493 Raises:
2494 PutError: If one or more documents failed to index or
2495 number indexed did not match requested.
2496 TypeError: If an unknown attribute is passed.
2497 ValueError: If documents is not a Document or iterable of Document
2498 or number of the documents is larger than
2499 MAXIMUM_DOCUMENTS_PER_PUT_REQUEST or deadline is a negative number.
2502 if isinstance(documents, basestring):
2503 raise TypeError('documents must be a Document or sequence of '
2504 'Documents, got %s' % documents.__class__.__name__)
2505 try:
2506 docs = list(iter(documents))
2507 except TypeError:
2508 docs = [documents]
2510 if not docs:
2511 return []
2513 if len(docs) > MAXIMUM_DOCUMENTS_PER_PUT_REQUEST:
2514 raise ValueError('too many documents to index')
2516 request = search_service_pb.IndexDocumentRequest()
2517 response = search_service_pb.IndexDocumentResponse()
2519 params = request.mutable_params()
2520 _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2522 seen_docs = {}
2523 for document in docs:
2524 doc_id = document.doc_id
2525 if doc_id:
2526 if doc_id in seen_docs:
2527 if document != seen_docs[doc_id]:
2528 raise ValueError(
2529 'Different documents with the same ID found in the '
2530 'same call to Index.put()')
2533 continue
2534 seen_docs[doc_id] = document
2535 doc_pb = params.add_document()
2536 _CopyDocumentToProtocolBuffer(document, doc_pb)
2538 _MakeSyncSearchServiceCall('IndexDocument', request, response, deadline)
2540 results = self._NewPutResultList(response)
2542 if response.status_size() != len(params.document_list()):
2543 raise PutError('did not index requested number of documents', results)
2545 for status in response.status_list():
2546 if status.code() != search_service_pb.SearchServiceError.OK:
2547 raise PutError(
2548 _ConcatenateErrorMessages(
2549 'one or more put document operations failed', status), results)
2550 return results
2552 def _NewDeleteResultFromPb(self, status_pb, doc_id):
2553 """Constructs DeleteResult from RequestStatus pb and doc_id."""
2554 message = None
2555 if status_pb.has_error_detail():
2556 message = _DecodeUTF8(status_pb.error_detail())
2557 code = _ERROR_OPERATION_CODE_MAP.get(status_pb.code(),
2558 OperationResult.INTERNAL_ERROR)
2560 return DeleteResult(code=code, message=message, id=doc_id)
2562 def _NewDeleteResultList(self, document_ids, response):
2563 return [self._NewDeleteResultFromPb(status, doc_id)
2564 for status, doc_id in zip(response.status_list(), document_ids)]
2566 @datastore_rpc._positional(2)
2567 def delete(self, document_ids, deadline=None):
2568 """Delete the documents with the corresponding document ids from the index.
2570 If no document exists for the identifier in the list, then that document
2571 identifier is ignored.
2573 Args:
2574 document_ids: A single identifier or list of identifiers of documents
2575 to delete.
2577 Kwargs:
2578 deadline: Deadline for RPC call in seconds; if None use the default.
2580 Raises:
2581 DeleteError: If one or more documents failed to remove or
2582 number removed did not match requested.
2583 ValueError: If document_ids is not a string or iterable of valid document
2584 identifiers or number of document ids is larger than
2585 MAXIMUM_DOCUMENTS_PER_PUT_REQUEST or deadline is a negative number.
2587 doc_ids = _ConvertToList(document_ids)
2588 if not doc_ids:
2589 return
2591 if len(doc_ids) > MAXIMUM_DOCUMENTS_PER_PUT_REQUEST:
2592 raise ValueError('too many documents to delete')
2594 request = search_service_pb.DeleteDocumentRequest()
2595 response = search_service_pb.DeleteDocumentResponse()
2596 params = request.mutable_params()
2597 _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2598 for document_id in doc_ids:
2599 _CheckDocumentId(document_id)
2600 params.add_doc_id(document_id)
2602 _MakeSyncSearchServiceCall('DeleteDocument', request, response,
2603 deadline)
2605 results = self._NewDeleteResultList(doc_ids, response)
2607 if response.status_size() != len(doc_ids):
2608 raise DeleteError(
2609 'did not delete requested number of documents', results)
2611 for status in response.status_list():
2612 if status.code() != search_service_pb.SearchServiceError.OK:
2613 raise DeleteError(
2614 _ConcatenateErrorMessages(
2615 'one or more delete document operations failed', status),
2616 results)
2618 def delete_schema(self):
2619 """Deprecated in 1.7.4. Delete the schema from the index.
2621 We are deprecating this method and replacing with more general schema
2622 and index managment.
2624 A possible use may be remove typed fields which are no longer used. After
2625 you delete the schema, you need to index one or more documents to rebuild
2626 the schema. Until you re-index some documents, searches may fail, especially
2627 searches using field restricts.
2629 Raises:
2630 DeleteError: If the schema failed to be deleted.
2632 warnings.warn('delete_schema is deprecated in 1.7.4.',
2633 DeprecationWarning, stacklevel=2)
2634 request = search_service_pb.DeleteSchemaRequest()
2635 response = search_service_pb.DeleteSchemaResponse()
2636 params = request.mutable_params()
2637 _CopyMetadataToProtocolBuffer(self, params.add_index_spec())
2639 _MakeSyncSearchServiceCall('DeleteSchema', request, response, None)
2641 results = self._NewDeleteResultList([self.name], response)
2643 if response.status_size() != 1:
2644 raise DeleteError('did not delete exactly one schema', results)
2646 status = response.status_list()[0]
2647 if status.code() != search_service_pb.SearchServiceError.OK:
2648 raise DeleteError(
2649 _ConcatenateErrorMessages('delete schema operation failed', status),
2650 results)
2652 def _NewScoredDocumentFromPb(self, doc_pb, sort_scores, expressions, cursor):
2653 """Constructs a Document from a document_pb.Document protocol buffer."""
2654 lang = None
2655 if doc_pb.has_language():
2656 lang = _DecodeUTF8(doc_pb.language())
2657 return ScoredDocument(
2658 doc_id=_DecodeUTF8(doc_pb.id()),
2659 fields=_NewFieldsFromPb(doc_pb.field_list()),
2660 language=lang, rank=doc_pb.order_id(), sort_scores=sort_scores,
2661 expressions=_NewFieldsFromPb(expressions), cursor=cursor)
2663 def _NewSearchResults(self, response, cursor):
2664 """Returns a SearchResults populated from a search_service response pb."""
2665 results = []
2666 for result_pb in response.result_list():
2667 per_result_cursor = None
2668 if result_pb.has_cursor():
2669 if isinstance(cursor, Cursor):
2671 per_result_cursor = Cursor(web_safe_string=_ToWebSafeString(
2672 cursor.per_result, _DecodeUTF8(result_pb.cursor())))
2673 results.append(
2674 self._NewScoredDocumentFromPb(
2675 result_pb.document(), result_pb.score_list(),
2676 result_pb.expression_list(), per_result_cursor))
2677 results_cursor = None
2678 if response.has_cursor():
2679 if isinstance(cursor, Cursor):
2681 results_cursor = Cursor(web_safe_string=_ToWebSafeString(
2682 cursor.per_result, _DecodeUTF8(response.cursor())))
2683 return SearchResults(
2684 results=results, number_found=response.matched_count(),
2685 cursor=results_cursor)
2687 @datastore_rpc._positional(2)
2688 def get(self, doc_id, deadline=None):
2689 """Retrieve a document by document ID.
2691 Args:
2692 doc_id: The ID of the document to retreive.
2694 Kwargs:
2695 deadline: Deadline for RPC call in seconds; if None use the default.
2697 Returns:
2698 If the document ID exists, returns the associated document. Otherwise,
2699 returns None.
2701 Raises:
2702 TypeError: If any of the parameters have invalid types, or an unknown
2703 attribute is passed.
2704 ValueError: If any of the parameters have invalid values (e.g., a
2705 negative deadline).
2707 response = self.get_range(start_id=doc_id, limit=1, deadline=deadline)
2708 if response.results and response.results[0].doc_id == doc_id:
2709 return response.results[0]
2710 return None
2712 @datastore_rpc._positional(2)
2713 def search(self, query, deadline=None, **kwargs):
2714 """Search the index for documents matching the query.
2716 For example, the following code fragment requests a search for
2717 documents where 'first' occurs in subject and 'good' occurs anywhere,
2718 returning at most 20 documents, starting the search from 'cursor token',
2719 returning another single cursor for the response, sorting by subject in
2720 descending order, returning the author, subject, and summary fields as well
2721 as a snippeted field content.
2723 results = index.search(
2724 query=Query('subject:first good',
2725 options=QueryOptions(limit=20,
2726 cursor=Cursor(),
2727 sort_options=SortOptions(
2728 expressions=[SortExpression(expression='subject')],
2729 limit=1000),
2730 returned_fields=['author', 'subject', 'summary'],
2731 snippeted_fields=['content'])))
2733 The following code fragment shows how to use a results cursor
2735 cursor = results.cursor
2736 for result in response:
2737 # process result
2739 results = index.search(
2740 Query('subject:first good', options=QueryOptions(cursor=cursor)))
2742 The following code fragment shows how to use a per_result cursor
2744 results = index.search(
2745 query=Query('subject:first good',
2746 options=QueryOptions(limit=20,
2747 cursor=Cursor(per_result=True),
2748 ...)))
2750 cursor = None
2751 for result in results:
2752 cursor = result.cursor
2754 results = index.search(
2755 Query('subject:first good', options=QueryOptions(cursor=cursor)))
2757 Args:
2758 query: The Query to match against documents in the index.
2760 Kwargs:
2761 deadline: Deadline for RPC call in seconds; if None use the default.
2763 Returns:
2764 A SearchResults containing a list of documents matched, number returned
2765 and number matched by the query.
2767 Raises:
2768 TypeError: If any of the parameters have invalid types, or an unknown
2769 attribute is passed.
2770 ValueError: If any of the parameters have invalid values (e.g., a
2771 negative deadline).
2778 app_id = kwargs.pop('app_id', None)
2779 if kwargs:
2780 raise TypeError('Invalid arguments: %s' % ', '.join(kwargs))
2782 request = search_service_pb.SearchRequest()
2783 if app_id:
2784 request.set_app_id(app_id)
2786 params = request.mutable_params()
2787 if isinstance(query, basestring):
2788 query = Query(query_string=query)
2789 _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2790 _CopyQueryObjectToProtocolBuffer(query, params)
2792 response = search_service_pb.SearchResponse()
2794 _MakeSyncSearchServiceCall('Search', request, response, deadline)
2796 _CheckStatus(response.status())
2797 cursor = None
2798 if query.options:
2799 cursor = query.options.cursor
2800 return self._NewSearchResults(response, cursor)
2802 def _NewGetResponse(self, response):
2803 """Returns a GetResponse from the list_documents response pb."""
2804 documents = []
2805 for doc_proto in response.document_list():
2806 documents.append(_NewDocumentFromPb(doc_proto))
2808 return GetResponse(results=documents)
2810 def _GetRange(self, start_id=None, include_start_object=True,
2811 limit=100, ids_only=False, deadline=None, app_id=None):
2812 """Get a range of objects in the index, in id order in a response."""
2813 request = search_service_pb.ListDocumentsRequest()
2814 if app_id:
2815 request.set_app_id(app_id)
2817 params = request.mutable_params()
2818 _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2820 if start_id:
2821 params.set_start_doc_id(start_id)
2822 params.set_include_start_doc(include_start_object)
2824 params.set_limit(_CheckInteger(
2825 limit, 'limit', zero_ok=False,
2826 upper_bound=MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH))
2827 params.set_keys_only(ids_only)
2829 response = search_service_pb.ListDocumentsResponse()
2830 _MakeSyncSearchServiceCall('ListDocuments', request, response, deadline)
2832 _CheckStatus(response.status())
2833 return response
2835 @datastore_rpc._positional(5)
2836 def get_range(self, start_id=None, include_start_object=True,
2837 limit=100, ids_only=False, deadline=None, **kwargs):
2838 """Get a range of Documents in the index, in id order.
2840 Args:
2841 start_id: String containing the Id from which to list
2842 Documents from. By default, starts at the first Id.
2843 include_start_object: If true, include the Document with the
2844 Id specified by the start_id parameter.
2845 limit: The maximum number of Documents to return.
2846 ids_only: If true, the Documents returned only contain their keys.
2848 Kwargs:
2849 deadline: Deadline for RPC call in seconds; if None use the default.
2851 Returns:
2852 A GetResponse containing a list of Documents, ordered by Id.
2854 Raises:
2855 Error: Some subclass of Error is raised if an error occurred processing
2856 the request.
2857 TypeError: If any of the parameters have invalid types, or an unknown
2858 attribute is passed.
2859 ValueError: If any of the parameters have invalid values (e.g., a
2860 negative deadline).
2863 app_id = kwargs.pop('app_id', None)
2864 if kwargs:
2865 raise TypeError('Invalid arguments: %s' % ', '.join(kwargs))
2866 response = self._GetRange(
2867 start_id=start_id, include_start_object=include_start_object,
2868 limit=limit, ids_only=ids_only, deadline=deadline, app_id=app_id)
2869 return self._NewGetResponse(response)
2872 _CURSOR_TYPE_PB_MAP = {
2873 None: search_service_pb.SearchParams.NONE,
2874 Index.RESPONSE_CURSOR: search_service_pb.SearchParams.SINGLE,
2875 Index.RESULT_CURSOR: search_service_pb.SearchParams.PER_RESULT
2880 _SOURCES_TO_PB_MAP = {
2881 Index.SEARCH: search_service_pb.IndexSpec.SEARCH,
2882 Index.DATASTORE: search_service_pb.IndexSpec.DATASTORE,
2883 Index.CLOUD_STORAGE: search_service_pb.IndexSpec.CLOUD_STORAGE}
2887 _SOURCE_PB_TO_SOURCES_MAP = {
2888 search_service_pb.IndexSpec.SEARCH: Index.SEARCH,
2889 search_service_pb.IndexSpec.DATASTORE: Index.DATASTORE,
2890 search_service_pb.IndexSpec.CLOUD_STORAGE: Index.CLOUD_STORAGE}
2893 def _CopyMetadataToProtocolBuffer(index, spec_pb):
2894 """Copies Index specification to a search_service_pb.IndexSpec."""
2895 spec_pb.set_name(index.name.encode('utf-8'))
2896 spec_pb.set_namespace(index.namespace.encode('utf-8'))
2899 if index._source != Index.SEARCH:
2900 spec_pb.set_source(_SOURCES_TO_PB_MAP.get(index._source))
2903 _FIELD_TYPE_MAP = {
2904 document_pb.FieldValue.TEXT: Field.TEXT,
2905 document_pb.FieldValue.HTML: Field.HTML,
2906 document_pb.FieldValue.ATOM: Field.ATOM,
2907 document_pb.FieldValue.DATE: Field.DATE,
2908 document_pb.FieldValue.NUMBER: Field.NUMBER,
2909 document_pb.FieldValue.GEO: Field.GEO_POINT,
2913 def _NewSchemaFromPb(field_type_pb_list):
2914 """Creates map of field name to type list from document_pb.FieldTypes list."""
2915 field_types = {}
2916 for field_type_pb in field_type_pb_list:
2917 for field_type in field_type_pb.type_list():
2918 public_type = _FIELD_TYPE_MAP[field_type]
2919 name = _DecodeUTF8(field_type_pb.name())
2920 if name in field_types:
2921 field_types[name].append(public_type)
2922 else:
2923 field_types[name] = [public_type]
2924 return field_types
2927 def _NewIndexFromIndexSpecPb(index_spec_pb):
2928 """Creates an Index from a search_service_pb.IndexSpec."""
2929 source = _SOURCE_PB_TO_SOURCES_MAP.get(index_spec_pb.source())
2930 index = None
2931 if index_spec_pb.has_namespace():
2932 index = Index(name=index_spec_pb.name(),
2933 namespace=index_spec_pb.namespace(),
2934 source=source)
2935 else:
2936 index = Index(name=index_spec_pb.name(), source=source)
2937 return index
2940 def _NewIndexFromPb(index_metadata_pb):
2941 """Creates an Index from a search_service_pb.IndexMetadata."""
2942 index = _NewIndexFromIndexSpecPb(index_metadata_pb.index_spec())
2943 if index_metadata_pb.field_list():
2944 index._schema = _NewSchemaFromPb(index_metadata_pb.field_list())
2945 if index_metadata_pb.has_storage():
2946 index._storage_usage = index_metadata_pb.storage().amount_used()
2947 index._storage_limit = index_metadata_pb.storage().limit()
2948 return index
2951 def _MakeSyncSearchServiceCall(call, request, response, deadline):
2952 """Make a synchronous call to search service.
2954 If the deadline is not None, waits only until the deadline expires.
2956 Args:
2957 call: Method name to call, as a string
2958 request: The request object
2959 response: The response object
2961 Kwargs:
2962 deadline: Deadline for RPC call in seconds; if None use the default.
2964 Raises:
2965 TypeError: if the deadline is not a number and is not None.
2966 ValueError: If the deadline is less than zero.
2968 try:
2969 if deadline is None:
2970 apiproxy_stub_map.MakeSyncCall('search', call, request, response)
2971 else:
2974 if (not isinstance(deadline, (int, long, float))
2975 or isinstance(deadline, (bool,))):
2976 raise TypeError('deadline argument should be int/long/float (%r)'
2977 % (deadline,))
2978 if deadline <= 0:
2979 raise ValueError('deadline argument must be > 0 (%s)' % (deadline,))
2980 rpc = apiproxy_stub_map.UserRPC('search', deadline=deadline)
2981 rpc.make_call(call, request, response)
2982 rpc.wait()
2983 rpc.check_success()
2984 except apiproxy_errors.ApplicationError, e:
2985 raise _ToSearchError(e)