python/google/appengine/api/search/search.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2007 Google Inc.
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17
  18
  19
  20
  21 """A Python Search API used by app developers.
  22
  23 Contains methods used to interface with Search API.
  24 Contains API classes that forward to apiproxy.
  25 """
  26
  27
  28
  29
  30
  31
  32
  33 import datetime
  34 import re
  35 import string
  36 import sys
  37 import warnings
  38
  39 from google.appengine.datastore import document_pb
  40 from google.appengine.api import apiproxy_stub_map
  41 from google.appengine.api import datastore_types
  42 from google.appengine.api import namespace_manager
  43 from google.appengine.api.search import expression_parser
  44 from google.appengine.api.search import query_parser
  45 from google.appengine.api.search import search_service_pb
  46 from google.appengine.api.search import search_util
  47 from google.appengine.datastore import datastore_rpc
  48 from google.appengine.runtime import apiproxy_errors
  49
  50
  51 __all__ = [
  52     'AtomField',
  53     'ConcurrentTransactionError',
  54     'Cursor',
  55     'DateField',
  56     'DeleteError',
  57     'DeleteResult',
  58     'Document',
  59     'DOCUMENT_ID_FIELD_NAME',
  60     'Error',
  61     'ExpressionError',
  62     'Field',
  63     'FieldExpression',
  64     'HtmlField',
  65     'GeoField',
  66     'GeoPoint',
  67     'get_indexes',
  68     'GetResponse',
  69     'Index',
  70     'InternalError',
  71     'InvalidRequest',
  72     'LANGUAGE_FIELD_NAME',
  73     'MatchScorer',
  74     'MAXIMUM_DOCUMENT_ID_LENGTH',
  75     'MAXIMUM_DOCUMENTS_PER_PUT_REQUEST',
  76     'MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH',
  77     'MAXIMUM_EXPRESSION_LENGTH',
  78     'MAXIMUM_FIELD_ATOM_LENGTH',
  79     'MAXIMUM_FIELD_NAME_LENGTH',
  80     'MAXIMUM_FIELD_VALUE_LENGTH',
  81     'MAXIMUM_FIELDS_RETURNED_PER_SEARCH',
  82     'MAXIMUM_GET_INDEXES_OFFSET',
  83     'MAXIMUM_INDEX_NAME_LENGTH',
  84     'MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST',
  85     'MAXIMUM_NUMBER_FOUND_ACCURACY',
  86     'MAXIMUM_QUERY_LENGTH',
  87     'MAXIMUM_SEARCH_OFFSET',
  88     'MAXIMUM_SORTED_DOCUMENTS',
  89     'MAX_DATE',
  90     'MAX_NUMBER_VALUE',
  91     'MIN_DATE',
  92     'MIN_NUMBER_VALUE',
  93     'NumberField',
  94     'OperationResult',
  95     'PutError',
  96     'PutResult',
  97     'Query',
  98     'QueryError',
  99     'QueryOptions',
 100     'RANK_FIELD_NAME',
 101     'RescoringMatchScorer',
 102     'SCORE_FIELD_NAME',
 103     'ScoredDocument',
 104     'SearchResults',
 105     'SortExpression',
 106     'SortOptions',
 107     'TextField',
 108     'Timeout',
 109     'TIMESTAMP_FIELD_NAME',
 110     'TransientError',
 111     ]
 112
 113 MAXIMUM_INDEX_NAME_LENGTH = 100
 114 MAXIMUM_FIELD_VALUE_LENGTH = 1024 * 1024
 115 MAXIMUM_FIELD_ATOM_LENGTH = 500
 116 MAXIMUM_FIELD_NAME_LENGTH = 500
 117 MAXIMUM_DOCUMENT_ID_LENGTH = 500
 118 MAXIMUM_DOCUMENTS_PER_PUT_REQUEST = 200
 119 MAXIMUM_EXPRESSION_LENGTH = 5000
 120 MAXIMUM_QUERY_LENGTH = 2000
 121 MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH = 1000
 122 MAXIMUM_SEARCH_OFFSET = 1000
 123
 124 MAXIMUM_SORTED_DOCUMENTS = 10000
 125 MAXIMUM_NUMBER_FOUND_ACCURACY = 10000
 126 MAXIMUM_FIELDS_RETURNED_PER_SEARCH = 100
 127 MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST = 1000
 128 MAXIMUM_GET_INDEXES_OFFSET = 1000
 129
 130
 131 DOCUMENT_ID_FIELD_NAME = '_doc_id'
 132
 133 LANGUAGE_FIELD_NAME = '_lang'
 134
 135 RANK_FIELD_NAME = '_rank'
 136
 137 SCORE_FIELD_NAME = '_score'
 138
 139
 140
 141 TIMESTAMP_FIELD_NAME = '_timestamp'
 142
 143
 144
 145
 146 _LANGUAGE_RE = re.compile('^(.{2}|.{2}_.{2})$')
 147
 148 _MAXIMUM_STRING_LENGTH = 500
 149 _MAXIMUM_CURSOR_LENGTH = 10000
 150
 151 _VISIBLE_PRINTABLE_ASCII = frozenset(
 152     set(string.printable) - set(string.whitespace))
 153 _FIELD_NAME_PATTERN = '^[A-Za-z][A-Za-z0-9_]*$'
 154
 155 MAX_DATE = datetime.datetime(
 156     datetime.MAXYEAR, 12, 31, 23, 59, 59, 999999, tzinfo=None)
 157 MIN_DATE = datetime.datetime(
 158     datetime.MINYEAR, 1, 1, 0, 0, 0, 0, tzinfo=None)
 159
 160
 161 MAX_NUMBER_VALUE = 2147483647
 162 MIN_NUMBER_VALUE = -2147483647
 163
 164
 165 _PROTO_FIELDS_STRING_VALUE = frozenset([document_pb.FieldValue.TEXT,
 166                                         document_pb.FieldValue.HTML,
 167                                         document_pb.FieldValue.ATOM])
 168
 169
 170 class Error(Exception):
 171   """Indicates a call on the search API has failed."""
 172
 173
 174 class InternalError(Error):
 175   """Indicates a call on the search API has failed on the internal backend."""
 176
 177
 178 class TransientError(Error):
 179   """Indicates a call on the search API has failed, but retrying may succeed."""
 180
 181
 182 class InvalidRequest(Error):
 183   """Indicates an invalid request was made on the search API by the client."""
 184
 185
 186 class QueryError(Error):
 187   """An error occurred while parsing a query input string."""
 188
 189
 190 class ExpressionError(Error):
 191   """An error occurred while parsing an expression input string."""
 192
 193
 194 class Timeout(Error):
 195   """Indicates a call on the search API could not finish before its deadline."""
 196
 197
 198 class ConcurrentTransactionError(Error):
 199   """Indicates a call on the search API failed due to concurrent updates."""
 200
 201
 202 def _ConvertToUnicode(some_string):
 203   """Convert UTF-8 encoded string to unicode."""
 204   if some_string is None:
 205     return None
 206   if isinstance(some_string, unicode):
 207     return some_string
 208   return unicode(some_string, 'utf-8')
 209
 210
 211 def _ConcatenateErrorMessages(prefix, status):
 212   """Returns an error message combining prefix and status.error_detail()."""
 213   if status.error_detail():
 214     return prefix + ': ' + status.error_detail()
 215   return prefix
 216
 217
 218 class OperationResult(object):
 219   """Represents result of individual operation of a batch index or removal.
 220
 221   This is an abstract class.
 222   """
 223
 224   (OK, INVALID_REQUEST, TRANSIENT_ERROR, INTERNAL_ERROR,
 225   TIMEOUT,  CONCURRENT_TRANSACTION) = (
 226       'OK', 'INVALID_REQUEST', 'TRANSIENT_ERROR', 'INTERNAL_ERROR',
 227       'TIMEOUT', 'CONCURRENT_TRANSACTION')
 228
 229   _CODES = frozenset([OK, INVALID_REQUEST, TRANSIENT_ERROR, INTERNAL_ERROR,
 230                       TIMEOUT, CONCURRENT_TRANSACTION])
 231
 232   def __init__(self, code, message=None, id=None):
 233     """Initializer.
 234
 235     Args:
 236       code: The error or success code of the operation.
 237       message: An error message associated with any error.
 238       id: The id of the object some operation was performed on.
 239
 240     Raises:
 241       TypeError: If an unknown attribute is passed.
 242       ValueError: If an unknown code is passed.
 243     """
 244     self._message = _ConvertToUnicode(message)
 245     self._code = code
 246     if self._code not in self._CODES:
 247       raise ValueError('Unknown operation result code %r, must be one of %s'
 248                        % (self._code, self._CODES))
 249     self._id = _ConvertToUnicode(id)
 250
 251   @property
 252   def code(self):
 253     """Returns the code indicating the status of the operation."""
 254     return self._code
 255
 256   @property
 257   def message(self):
 258     """Returns any associated error message if the operation was in error."""
 259     return self._message
 260
 261   @property
 262   def id(self):
 263     """Returns the Id of the object the operation was performed on."""
 264     return self._id
 265
 266   def __repr__(self):
 267     return _Repr(self, [('code', self.code), ('message', self.message),
 268                         ('id', self.id)])
 269
 270
 271 _ERROR_OPERATION_CODE_MAP = {
 272     search_service_pb.SearchServiceError.OK: OperationResult.OK,
 273     search_service_pb.SearchServiceError.INVALID_REQUEST:
 274     OperationResult.INVALID_REQUEST,
 275     search_service_pb.SearchServiceError.TRANSIENT_ERROR:
 276     OperationResult.TRANSIENT_ERROR,
 277     search_service_pb.SearchServiceError.INTERNAL_ERROR:
 278     OperationResult.INTERNAL_ERROR,
 279     search_service_pb.SearchServiceError.TIMEOUT:
 280     OperationResult.TIMEOUT,
 281     search_service_pb.SearchServiceError.CONCURRENT_TRANSACTION:
 282     OperationResult.CONCURRENT_TRANSACTION,
 283     }
 284
 285
 286 class PutResult(OperationResult):
 287   """The result of indexing a single object."""
 288
 289
 290 class DeleteResult(OperationResult):
 291   """The result of deleting a single document."""
 292
 293
 294 class PutError(Error):
 295   """Indicates some error occurred indexing one of the objects requested."""
 296
 297   def __init__(self, message, results):
 298     """Initializer.
 299
 300     Args:
 301       message: A message detailing the cause of the failure to index some
 302         document.
 303       results: A list of PutResult corresponding to the list of objects
 304         requested to be indexed.
 305     """
 306     super(PutError, self).__init__(message)
 307     self._results = results
 308
 309   @property
 310   def results(self):
 311     """Returns PutResult list corresponding to objects indexed."""
 312     return self._results
 313
 314
 315 class DeleteError(Error):
 316   """Indicates some error occured deleting one of the objects requested."""
 317
 318   def __init__(self, message, results):
 319     """Initializer.
 320
 321     Args:
 322       message: A message detailing the cause of the failure to delete some
 323         document.
 324       results: A list of DeleteResult corresponding to the list of Ids of
 325         objects requested to be deleted.
 326     """
 327     super(DeleteError, self).__init__(message)
 328     self._results = results
 329
 330   @property
 331   def results(self):
 332     """Returns DeleteResult list corresponding to Documents deleted."""
 333     return self._results
 334
 335
 336 _ERROR_MAP = {
 337     search_service_pb.SearchServiceError.INVALID_REQUEST: InvalidRequest,
 338     search_service_pb.SearchServiceError.TRANSIENT_ERROR: TransientError,
 339     search_service_pb.SearchServiceError.INTERNAL_ERROR: InternalError,
 340     search_service_pb.SearchServiceError.TIMEOUT: Timeout,
 341     search_service_pb.SearchServiceError.CONCURRENT_TRANSACTION:
 342     ConcurrentTransactionError,
 343     }
 344
 345
 346 def _ToSearchError(error):
 347   """Translate an application error to a search Error, if possible.
 348
 349   Args:
 350     error: An ApplicationError to translate.
 351
 352   Returns:
 353     An Error if the error is known, otherwise the given
 354     apiproxy_errors.ApplicationError.
 355   """
 356   if error.application_error in _ERROR_MAP:
 357     return _ERROR_MAP[error.application_error](error.error_detail)
 358   return error
 359
 360
 361 def _CheckInteger(value, name, zero_ok=True, upper_bound=None):
 362   """Checks whether value is an integer between the lower and upper bounds.
 363
 364   Args:
 365     value: The value to check.
 366     name: The name of the value, to use in error messages.
 367     zero_ok: True if zero is allowed.
 368     upper_bound: The upper (inclusive) bound of the value. Optional.
 369
 370   Returns:
 371     The checked value.
 372
 373   Raises:
 374     ValueError: If the value is not a int or long, or is out of range.
 375   """
 376   datastore_types.ValidateInteger(value, name, ValueError, empty_ok=True,
 377                                   zero_ok=zero_ok)
 378   if upper_bound is not None and value > upper_bound:
 379     raise ValueError('%s, %d must be <= %d' % (name, value, upper_bound))
 380   return value
 381
 382
 383 def _CheckEnum(value, name, values=None):
 384   """Checks whether value is a member of the set of values given.
 385
 386   Args:
 387     value: The value to check.
 388     name: The name of the value, to use in error messages.
 389     values: The iterable of possible values.
 390
 391   Returns:
 392     The checked value.
 393
 394   Raises:
 395     ValueError: If the value is not one of the allowable values.
 396   """
 397   if value not in values:
 398     raise ValueError('%s, %r must be in %s' % (name, value, values))
 399   return value
 400
 401
 402 def _CheckNumber(value, name):
 403   """Checks whether value is a number.
 404
 405   Args:
 406     value: The value to check.
 407     name: The name of the value, to use in error messages.
 408
 409   Returns:
 410     The checked value.
 411
 412   Raises:
 413     TypeError: If the value is not a number.
 414   """
 415   if not isinstance(value, (int, long, float)):
 416     raise TypeError('%s must be a int, long or float, got %s' %
 417                     (name, value.__class__.__name__))
 418   return value
 419
 420
 421 def _CheckStatus(status):
 422   """Checks whether a RequestStatus has a value of OK.
 423
 424   Args:
 425     status: The RequestStatus to check.
 426
 427   Raises:
 428     Error: A subclass of Error if the value of status is not OK.
 429       The subclass of Error is chosen based on value of the status code.
 430     InternalError: If the status value is unknown.
 431   """
 432   if status.code() != search_service_pb.SearchServiceError.OK:
 433     if status.code() in _ERROR_MAP:
 434       raise _ERROR_MAP[status.code()](status.error_detail())
 435     else:
 436       raise InternalError(status.error_detail())
 437
 438
 439 def _ValidateString(value,
 440                     name='unused',
 441                     max_len=_MAXIMUM_STRING_LENGTH,
 442                     empty_ok=False,
 443                     type_exception=TypeError,
 444                     value_exception=ValueError):
 445   """Raises an exception if value is not a valid string or a subclass thereof.
 446
 447   A string is valid if it's not empty, no more than _MAXIMUM_STRING_LENGTH
 448   bytes. The exception type can be specified with the exception
 449   arguments for type and value issues.
 450
 451   Args:
 452     value: The value to validate.
 453     name: The name of this value; used in the exception message.
 454     max_len: The maximum allowed length, in bytes.
 455     empty_ok: Allow empty value.
 456     type_exception: The type of exception to raise if not a basestring.
 457     value_exception: The type of exception to raise if invalid value.
 458
 459   Returns:
 460     The checked string.
 461
 462   Raises:
 463     TypeError: If value is not a basestring or subclass.
 464     ValueError: If the value is None or longer than max_len.
 465   """
 466   if value is None and empty_ok:
 467     return
 468   if value is not None and not isinstance(value, basestring):
 469     raise type_exception('%s must be a basestring; got %s:' %
 470                          (name, value.__class__.__name__))
 471   if not value and not empty_ok:
 472     raise value_exception('%s must not be empty.' % name)
 473
 474   if len(value.encode('utf-8')) > max_len:
 475     raise value_exception('%s must be under %d bytes.' % (name, max_len))
 476   return value
 477
 478
 479 def _ValidateVisiblePrintableAsciiNotReserved(value, name):
 480   """Checks if value is a visible printable ASCII string not starting with '!'.
 481
 482   Whitespace characters are excluded. Printable visible ASCII
 483   strings starting with '!' are reserved for internal use.
 484
 485   Args:
 486     value: The string to validate.
 487     name: The name of this string; used in the exception message.
 488
 489   Returns:
 490     The checked string.
 491
 492   Raises:
 493     ValueError: If the string is not visible printable ASCII, or starts with
 494       '!'.
 495   """
 496   for char in value:
 497     if char not in _VISIBLE_PRINTABLE_ASCII:
 498       raise ValueError(
 499           '%r must be visible printable ASCII: %r'
 500           % (name, value))
 501   if value.startswith('!'):
 502     raise ValueError('%r must not start with "!": %r' % (name, value))
 503   return value
 504
 505
 506 def _CheckIndexName(index_name):
 507   """Checks index_name is a string which is not too long, and returns it.
 508
 509   Index names must be visible printable ASCII and not start with '!'.
 510   """
 511   _ValidateString(index_name, 'index name', MAXIMUM_INDEX_NAME_LENGTH)
 512   return _ValidateVisiblePrintableAsciiNotReserved(index_name, 'index_name')
 513
 514
 515 def _CheckFieldName(name):
 516   """Checks field name is not too long and matches field name pattern.
 517
 518   Field name pattern: "[A-Za-z][A-Za-z0-9_]*".
 519   """
 520   _ValidateString(name, 'name', MAXIMUM_FIELD_NAME_LENGTH)
 521   if not re.match(_FIELD_NAME_PATTERN, name):
 522     raise ValueError('field name "%s" should match pattern: %s' %
 523                      (name, _FIELD_NAME_PATTERN))
 524   return name
 525
 526
 527 def _CheckExpression(expression):
 528   """Checks whether the expression is a string."""
 529   expression = _ValidateString(expression, max_len=MAXIMUM_EXPRESSION_LENGTH)
 530   try:
 531     expression_parser.Parse(expression)
 532   except expression_parser.ExpressionException, e:
 533     raise ExpressionError('Failed to parse expression "%s"' % expression)
 534   return expression
 535
 536
 537 def _CheckFieldNames(names):
 538   """Checks each name in names is a valid field name."""
 539   for name in names:
 540     _CheckFieldName(name)
 541   return names
 542
 543
 544 def _GetList(a_list):
 545   """Utility function that converts None to the empty list."""
 546   if a_list is None:
 547     return []
 548   else:
 549     return list(a_list)
 550
 551
 552 def _ConvertToList(arg):
 553   """Converts arg to a list, empty if None, single element if not a list."""
 554   if isinstance(arg, basestring):
 555     return [arg]
 556   if arg is not None:
 557     try:
 558       return list(iter(arg))
 559     except TypeError:
 560       return [arg]
 561   return []
 562
 563
 564 def _ConvertToUnicodeList(arg):
 565   """Converts arg to a list of unicode objects."""
 566   return [_ConvertToUnicode(value) for value in _ConvertToList(arg)]
 567
 568
 569 def _CheckDocumentId(doc_id):
 570   """Checks doc_id is a valid document identifier, and returns it.
 571
 572   Document ids must be visible printable ASCII and not start with '!'.
 573   """
 574   _ValidateString(doc_id, 'doc_id', MAXIMUM_DOCUMENT_ID_LENGTH)
 575   _ValidateVisiblePrintableAsciiNotReserved(doc_id, 'doc_id')
 576   return doc_id
 577
 578
 579 def _CheckText(value, name='value', empty_ok=True):
 580   """Checks the field text is a valid string."""
 581   return _ValidateString(value, name, MAXIMUM_FIELD_VALUE_LENGTH, empty_ok)
 582
 583
 584 def _CheckHtml(html):
 585   """Checks the field html is a valid HTML string."""
 586   return _ValidateString(html, 'html', MAXIMUM_FIELD_VALUE_LENGTH,
 587                          empty_ok=True)
 588
 589
 590 def _CheckAtom(atom):
 591   """Checks the field atom is a valid string."""
 592   return _ValidateString(atom, 'atom', MAXIMUM_FIELD_ATOM_LENGTH,
 593                          empty_ok=True)
 594
 595
 596 def _CheckDate(date):
 597   """Checks the date is in the correct range."""
 598   if isinstance(date, datetime.datetime):
 599     if date < MIN_DATE or date > MAX_DATE:
 600       raise TypeError('date must be between %s and %s (got %s)' %
 601                       (MIN_DATE, MAX_DATE, date))
 602   elif isinstance(date, datetime.date):
 603     if date < MIN_DATE.date() or date > MAX_DATE.date():
 604       raise TypeError('date must be between %s and %s (got %s)' %
 605                       (MIN_DATE, MAX_DATE, date))
 606   else:
 607     raise TypeError('date must be datetime.datetime or datetime.date')
 608   return date
 609
 610
 611 def _CheckLanguage(language):
 612   """Checks language is None or a string that matches _LANGUAGE_RE."""
 613   if language is None:
 614     return None
 615   if not isinstance(language, basestring):
 616     raise TypeError('language must be a basestring, got %s' %
 617                     language.__class__.__name__)
 618   if not re.match(_LANGUAGE_RE, language):
 619     raise ValueError('invalid language %s. Languages should be two letters.'
 620                      % language)
 621   return language
 622
 623
 624 def _CheckDocument(document):
 625   """Check that the document is valid.
 626
 627   This checks for all server-side requirements on Documents. Currently, that
 628   means ensuring that there are no repeated number or date fields.
 629
 630   Args:
 631     document: The search.Document to check for validity.
 632
 633   Raises:
 634     ValueError if the document is invalid in a way that would trigger an
 635     PutError from the server.
 636   """
 637   no_repeat_date_names = set()
 638   no_repeat_number_names = set()
 639   for field in document.fields:
 640     if isinstance(field, NumberField):
 641       if field.name in no_repeat_number_names:
 642         raise ValueError(
 643             'Invalid document %s: field %s with type date or number may not '
 644             'be repeated.' % (document.doc_id, field.name))
 645       no_repeat_number_names.add(field.name)
 646     elif isinstance(field, DateField):
 647       if field.name in no_repeat_date_names:
 648         raise ValueError(
 649             'Invalid document %s: field %s with type date or number may not '
 650             'be repeated.' % (document.doc_id, field.name))
 651       no_repeat_date_names.add(field.name)
 652
 653
 654 def _CheckSortLimit(limit):
 655   """Checks the limit on number of docs to score or sort is not too large."""
 656   return _CheckInteger(limit, 'limit', upper_bound=MAXIMUM_SORTED_DOCUMENTS)
 657
 658
 659 def _Repr(class_instance, ordered_dictionary):
 660   """Generates an unambiguous representation for instance and ordered dict."""
 661   return u'search.%s(%s)' % (class_instance.__class__.__name__, ', '.join(
 662       ['%s=%r' % (key, value) for (key, value) in ordered_dictionary
 663        if value is not None and value != []]))
 664
 665
 666 def _ListIndexesResponsePbToGetResponse(response):
 667   """Returns a GetResponse constructed from get_indexes response pb."""
 668   return GetResponse(
 669       results=[_NewIndexFromPb(index)
 670                for index in response.index_metadata_list()])
 671
 672 @datastore_rpc._positional(7)
 673 def get_indexes(namespace='', offset=None, limit=20,
 674                 start_index_name=None, include_start_index=True,
 675                 index_name_prefix=None, fetch_schema=False, deadline=None,
 676                 **kwargs):
 677   """Returns a list of available indexes.
 678
 679   Args:
 680     namespace: The namespace of indexes to be returned. If not set
 681       then the current namespace is used.
 682     offset: The offset of the first returned index.
 683     limit: The number of indexes to return.
 684     start_index_name: The name of the first index to be returned.
 685     include_start_index: Whether or not to return the start index.
 686     index_name_prefix: The prefix used to select returned indexes.
 687     fetch_schema: Whether to retrieve Schema for each Index or not.
 688
 689   Kwargs:
 690     deadline: Deadline for RPC call in seconds; if None use the default.
 691
 692   Returns:
 693     The GetResponse containing a list of available indexes.
 694
 695   Raises:
 696     InternalError: If the request fails on internal servers.
 697     TypeError: If any of the parameters have invalid types, or an unknown
 698       attribute is passed.
 699     ValueError: If any of the parameters have invalid values (e.g., a
 700       negative deadline).
 701   """
 702
 703   app_id = kwargs.pop('app_id', None)
 704   if kwargs:
 705     raise TypeError('Invalid arguments: %s' % ', '.join(kwargs))
 706
 707   response = _GetIndexes(
 708       namespace=namespace, offset=offset, limit=limit,
 709       start_index_name=start_index_name,
 710       include_start_index=include_start_index,
 711       index_name_prefix=index_name_prefix,
 712       fetch_schema=fetch_schema, deadline=deadline, app_id=app_id)
 713   return _ListIndexesResponsePbToGetResponse(response)
 714
 715
 716 def _GetIndexes(namespace='', offset=None, limit=20,
 717                 start_index_name=None, include_start_index=True,
 718                 index_name_prefix=None, fetch_schema=False, deadline=None,
 719                 app_id=None):
 720   """Returns a ListIndexesResponse."""
 721
 722   request = search_service_pb.ListIndexesRequest()
 723   params = request.mutable_params()
 724
 725   if namespace is None:
 726     namespace = namespace_manager.get_namespace()
 727   if namespace is None:
 728     namespace = u''
 729   namespace_manager.validate_namespace(namespace, exception=ValueError)
 730   params.set_namespace(namespace)
 731   if offset is not None:
 732     params.set_offset(_CheckInteger(offset, 'offset', zero_ok=True,
 733                                     upper_bound=MAXIMUM_GET_INDEXES_OFFSET))
 734   params.set_limit(_CheckInteger(
 735       limit, 'limit', zero_ok=False,
 736       upper_bound=MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST))
 737   if start_index_name is not None:
 738     params.set_start_index_name(
 739         _ValidateString(start_index_name, 'start_index_name',
 740                         MAXIMUM_INDEX_NAME_LENGTH,
 741                         empty_ok=False))
 742   if include_start_index is not None:
 743     params.set_include_start_index(bool(include_start_index))
 744   if index_name_prefix is not None:
 745     params.set_index_name_prefix(
 746         _ValidateString(index_name_prefix, 'index_name_prefix',
 747                         MAXIMUM_INDEX_NAME_LENGTH,
 748                         empty_ok=False))
 749   params.set_fetch_schema(fetch_schema)
 750
 751   response = search_service_pb.ListIndexesResponse()
 752   if app_id:
 753     request.set_app_id(app_id)
 754
 755   _MakeSyncSearchServiceCall('ListIndexes', request, response, deadline)
 756
 757   _CheckStatus(response.status())
 758   return response
 759
 760 class Field(object):
 761   """An abstract base class which represents a field of a document.
 762
 763   This class should not be directly instantiated.
 764   """
 765
 766
 767   TEXT, HTML, ATOM, DATE, NUMBER, GEO_POINT = ('TEXT', 'HTML', 'ATOM', 'DATE',
 768                                                'NUMBER', 'GEO_POINT')
 769
 770   _FIELD_TYPES = frozenset([TEXT, HTML, ATOM, DATE, NUMBER, GEO_POINT])
 771
 772   def __init__(self, name, value, language=None):
 773     """Initializer.
 774
 775     Args:
 776       name: The name of the field. Field names must have maximum length
 777         MAXIMUM_FIELD_NAME_LENGTH and match pattern "[A-Za-z][A-Za-z0-9_]*".
 778       value: The value of the field which can be a str, unicode or date.
 779       language: The ISO 693-1 two letter code of the language used in the value.
 780         See http://www.sil.org/iso639-3/codes.asp?order=639_1&letter=%25 for a
 781         list of valid codes. Correct specification of language code will assist
 782         in correct tokenization of the field. If None is given, then the
 783         language code of the document will be used.
 784
 785     Raises:
 786       TypeError: If any of the parameters have invalid types, or an unknown
 787         attribute is passed.
 788       ValueError: If any of the parameters have invalid values.
 789     """
 790     self._name = _CheckFieldName(_ConvertToUnicode(name))
 791     self._value = self._CheckValue(value)
 792     self._language = _CheckLanguage(_ConvertToUnicode(language))
 793
 794   @property
 795   def name(self):
 796     """Returns the name of the field."""
 797     return self._name
 798
 799   @property
 800   def language(self):
 801     """Returns the code of the language the content in value is written in."""
 802     return self._language
 803
 804   @property
 805   def value(self):
 806     """Returns the value of the field."""
 807     return self._value
 808
 809   def _CheckValue(self, value):
 810     """Checks the value is valid for the given type.
 811
 812     Args:
 813       value: The value to check.
 814
 815     Returns:
 816       The checked value.
 817     """
 818     raise NotImplementedError('_CheckValue is an abstract method')
 819
 820   def __repr__(self):
 821     return _Repr(self, [('name', self.name), ('language', self.language),
 822                         ('value', self.value)])
 823
 824   def __eq__(self, other):
 825     return isinstance(other, type(self)) and self.__key() == other.__key()
 826
 827   def __ne__(self, other):
 828     return not self == other
 829
 830   def __key(self):
 831     return (self.name, self.value, self.language)
 832
 833   def __hash__(self):
 834     return hash(self.__key())
 835
 836   def __str__(self):
 837     return repr(self)
 838
 839   def _CopyStringValueToProtocolBuffer(self, field_value_pb):
 840     """Copies value to a string value in proto buf."""
 841     field_value_pb.set_string_value(self.value.encode('utf-8'))
 842
 843
 844 def _CopyFieldToProtocolBuffer(field, pb):
 845   """Copies field's contents to a document_pb.Field protocol buffer."""
 846   pb.set_name(field.name.encode('utf-8'))
 847   field_value_pb = pb.mutable_value()
 848   if field.language:
 849     field_value_pb.set_language(field.language.encode('utf-8'))
 850   if field.value is not None:
 851     field._CopyValueToProtocolBuffer(field_value_pb)
 852   return pb
 853
 854
 855 class TextField(Field):
 856   """A Field that has text content.
 857
 858   The following example shows a text field named signature with Polish content:
 859     TextField(name='signature', value='brzydka pogoda', language='pl')
 860   """
 861
 862   def __init__(self, name, value=None, language=None):
 863     """Initializer.
 864
 865     Args:
 866       name: The name of the field.
 867       value: A str or unicode object containing text.
 868       language: The code of the language the value is encoded in.
 869
 870     Raises:
 871       TypeError: If value is not a string.
 872       ValueError: If value is longer than allowed.
 873     """
 874     Field.__init__(self, name, _ConvertToUnicode(value), language)
 875
 876   def _CheckValue(self, value):
 877     return _CheckText(value)
 878
 879   def _CopyValueToProtocolBuffer(self, field_value_pb):
 880     field_value_pb.set_type(document_pb.FieldValue.TEXT)
 881     self._CopyStringValueToProtocolBuffer(field_value_pb)
 882
 883
 884 class HtmlField(Field):
 885   """A Field that has HTML content.
 886
 887   The following example shows an html field named content:
 888     HtmlField(name='content', value='<html>herbata, kawa</html>', language='pl')
 889   """
 890
 891   def __init__(self, name, value=None, language=None):
 892     """Initializer.
 893
 894     Args:
 895       name: The name of the field.
 896       value: A str or unicode object containing the searchable content of the
 897         Field.
 898       language: The code of the language the value is encoded in.
 899
 900     Raises:
 901       TypeError: If value is not a string.
 902       ValueError: If value is longer than allowed.
 903     """
 904     Field.__init__(self, name, _ConvertToUnicode(value), language)
 905
 906   def _CheckValue(self, value):
 907     return _CheckHtml(value)
 908
 909   def _CopyValueToProtocolBuffer(self, field_value_pb):
 910     field_value_pb.set_type(document_pb.FieldValue.HTML)
 911     self._CopyStringValueToProtocolBuffer(field_value_pb)
 912
 913
 914 class AtomField(Field):
 915   """A Field that has content to be treated as a single token for indexing.
 916
 917   The following example shows an atom field named contributor:
 918     AtomField(name='contributor', value='foo@bar.com')
 919   """
 920
 921   def __init__(self, name, value=None, language=None):
 922     """Initializer.
 923
 924     Args:
 925       name: The name of the field.
 926       value: A str or unicode object to be treated as an indivisible text value.
 927       language: The code of the language the value is encoded in.
 928
 929     Raises:
 930       TypeError: If value is not a string.
 931       ValueError: If value is longer than allowed.
 932     """
 933     Field.__init__(self, name, _ConvertToUnicode(value), language)
 934
 935   def _CheckValue(self, value):
 936     return _CheckAtom(value)
 937
 938   def _CopyValueToProtocolBuffer(self, field_value_pb):
 939     field_value_pb.set_type(document_pb.FieldValue.ATOM)
 940     self._CopyStringValueToProtocolBuffer(field_value_pb)
 941
 942
 943 class DateField(Field):
 944   """A Field that has a date or datetime value.
 945
 946   The following example shows a date field named creation_date:
 947     DateField(name='creation_date', value=datetime.date(2011, 03, 11))
 948   """
 949
 950   def __init__(self, name, value=None):
 951     """Initializer.
 952
 953     Args:
 954       name: The name of the field.
 955       value: A datetime.date or a datetime.datetime.
 956
 957     Raises:
 958       TypeError: If value is not a datetime.date or a datetime.datetime.
 959     """
 960     Field.__init__(self, name, value)
 961
 962   def _CheckValue(self, value):
 963     return _CheckDate(value)
 964
 965   def _CopyValueToProtocolBuffer(self, field_value_pb):
 966     field_value_pb.set_type(document_pb.FieldValue.DATE)
 967     field_value_pb.set_string_value(search_util.SerializeDate(self.value))
 968
 969
 970 class NumberField(Field):
 971   """A Field that has a numeric value.
 972
 973   The following example shows a number field named size:
 974     NumberField(name='size', value=10)
 975   """
 976
 977   def __init__(self, name, value=None):
 978     """Initializer.
 979
 980     Args:
 981       name: The name of the field.
 982       value: A numeric value.
 983
 984     Raises:
 985       TypeError: If value is not numeric.
 986       ValueError: If value is out of range.
 987     """
 988     Field.__init__(self, name, value)
 989
 990   def _CheckValue(self, value):
 991     value = _CheckNumber(value, 'field value')
 992     if value is not None and (value < MIN_NUMBER_VALUE or
 993                               value > MAX_NUMBER_VALUE):
 994       raise ValueError('value, %d must be between %d and %d' %
 995                        (value, MIN_NUMBER_VALUE, MAX_NUMBER_VALUE))
 996     return value
 997
 998   def _CopyValueToProtocolBuffer(self, field_value_pb):
 999     field_value_pb.set_type(document_pb.FieldValue.NUMBER)
1000     field_value_pb.set_string_value(str(self.value))
1001
1002
1003 class GeoPoint(object):
1004   """Represents a point on the Earth's surface, in lat, long coordinates."""
1005
1006   def __init__(self, latitude, longitude):
1007     """Initializer.
1008
1009     Args:
1010       latitude: The angle between the equatorial plan and a line that passes
1011         through the GeoPoint, between -90 and 90 degrees.
1012       longitude: The angle east or west from a reference meridian to another
1013         meridian that passes through the GeoPoint, between -180 and 180 degrees.
1014
1015     Raises:
1016       TypeError: If any of the parameters have invalid types, or an unknown
1017         attribute is passed.
1018       ValueError: If any of the parameters have invalid values.
1019     """
1020     self._latitude = self._CheckLatitude(latitude)
1021     self._longitude = self._CheckLongitude(longitude)
1022
1023   @property
1024   def latitude(self):
1025     """Returns the angle between equatorial plan and line thru the geo point."""
1026     return self._latitude
1027
1028   @property
1029   def longitude(self):
1030     """Returns the angle from a reference meridian to another meridian."""
1031     return self._longitude
1032
1033   def _CheckLatitude(self, value):
1034     _CheckNumber(value, 'latitude')
1035     if value < -90.0 or value > 90.0:
1036       raise ValueError('latitude must be between -90 and 90 degrees '
1037                        'inclusive, was %f' % value)
1038     return value
1039
1040   def _CheckLongitude(self, value):
1041     _CheckNumber(value, 'longitude')
1042     if value < -180.0 or value > 180.0:
1043       raise ValueError('longitude must be between -180 and 180 degrees '
1044                        'inclusive, was %f' % value)
1045     return value
1046
1047   def __eq__(self, other):
1048     return (self.latitude == other.latitude and
1049       self.longitude == other.longitude)
1050
1051   def __repr__(self):
1052     return _Repr(self,
1053                  [('latitude', self.latitude),
1054                   ('longitude', self.longitude)])
1055
1056
1057 def _CheckGeoPoint(geo_point):
1058   """Checks geo_point is a GeoPoint and returns it."""
1059   if not isinstance(geo_point, GeoPoint):
1060     raise TypeError('geo_point must be a GeoPoint, got %s' %
1061                     geo_point.__class__.__name__)
1062   return geo_point
1063
1064
1065 class GeoField(Field):
1066   """A Field that has a GeoPoint value.
1067
1068   The following example shows a geo field named place:
1069
1070     GeoField(name='place', value=GeoPoint(latitude=-33.84, longitude=151.26))
1071   """
1072
1073   def __init__(self, name, value=None):
1074     """Initializer.
1075
1076     Args:
1077       name: The name of the field.
1078       value: A GeoPoint value.
1079
1080     Raises:
1081       TypeError: If value is not numeric.
1082     """
1083     Field.__init__(self, name, value)
1084
1085   def _CheckValue(self, value):
1086     return _CheckGeoPoint(value)
1087
1088   def _CopyValueToProtocolBuffer(self, field_value_pb):
1089     field_value_pb.set_type(document_pb.FieldValue.GEO)
1090     geo_pb = field_value_pb.mutable_geo()
1091     geo_pb.set_lat(self.value.latitude)
1092     geo_pb.set_lng(self.value.longitude)
1093
1094
1095 def _GetValue(value_pb):
1096   """Gets the value from the value_pb."""
1097   if value_pb.type() in _PROTO_FIELDS_STRING_VALUE:
1098     if value_pb.has_string_value():
1099       return value_pb.string_value()
1100     return None
1101   if value_pb.type() == document_pb.FieldValue.DATE:
1102     if value_pb.has_string_value():
1103       return search_util.DeserializeDate(value_pb.string_value())
1104     return None
1105   if value_pb.type() == document_pb.FieldValue.NUMBER:
1106     if value_pb.has_string_value():
1107       return float(value_pb.string_value())
1108     return None
1109   if value_pb.type() == document_pb.FieldValue.GEO:
1110     if value_pb.has_geo():
1111       geo_pb = value_pb.geo()
1112       return GeoPoint(latitude=geo_pb.lat(), longitude=geo_pb.lng())
1113     return None
1114   raise TypeError('unknown FieldValue type %d' % value_pb.type())
1115
1116
1117 _STRING_TYPES = set([document_pb.FieldValue.TEXT,
1118                      document_pb.FieldValue.HTML,
1119                      document_pb.FieldValue.ATOM])
1120
1121
1122 def _DecodeUTF8(pb_value):
1123   """Decodes a UTF-8 encoded string into unicode."""
1124   if pb_value is not None:
1125     return pb_value.decode('utf-8')
1126   return None
1127
1128
1129 def _DecodeValue(pb_value, val_type):
1130   """Decodes a possible UTF-8 encoded string value to unicode."""
1131   if val_type in _STRING_TYPES:
1132     return _DecodeUTF8(pb_value)
1133   return pb_value
1134
1135
1136 def _NewFieldFromPb(pb):
1137   """Constructs a Field from a document_pb.Field protocol buffer."""
1138   name = _DecodeUTF8(pb.name())
1139   val_type = pb.value().type()
1140   value = _DecodeValue(_GetValue(pb.value()), val_type)
1141   lang = None
1142   if pb.value().has_language():
1143     lang = _DecodeUTF8(pb.value().language())
1144   if val_type == document_pb.FieldValue.TEXT:
1145     return TextField(name, value, lang)
1146   elif val_type == document_pb.FieldValue.HTML:
1147     return HtmlField(name, value, lang)
1148   elif val_type == document_pb.FieldValue.ATOM:
1149     return AtomField(name, value, lang)
1150   elif val_type == document_pb.FieldValue.DATE:
1151     return DateField(name, value)
1152   elif val_type == document_pb.FieldValue.NUMBER:
1153     return NumberField(name, value)
1154   elif val_type == document_pb.FieldValue.GEO:
1155     return GeoField(name, value)
1156   return InvalidRequest('Unknown field value type %d' % val_type)
1157
1158
1159 class Document(object):
1160   """Represents a user generated document.
1161
1162   The following example shows how to create a document consisting of a set
1163   of fields, some plain text and some in HTML.
1164
1165   Document(doc_id='document_id',
1166            fields=[TextField(name='subject', value='going for dinner'),
1167                    HtmlField(name='body',
1168                              value='<html>I found a place.</html>',
1169                    TextField(name='signature', value='brzydka pogoda',
1170                              language='pl')],
1171            language='en')
1172   """
1173   _FIRST_JAN_2011 = datetime.datetime(2011, 1, 1)
1174
1175   def __init__(self, doc_id=None, fields=None, language='en', rank=None):
1176     """Initializer.
1177
1178     Args:
1179       doc_id: The visible printable ASCII string identifying the document which
1180         does not start with '!'. Whitespace is excluded from ids. If no id is
1181         provided, the search service will provide one.
1182       fields: An iterable of Field instances representing the content of the
1183         document.
1184       language: The code of the language used in the field values.
1185       rank: The rank of this document used to specify the order in which
1186         documents are returned by search. Rank must be a non-negative integer.
1187         If not specified, the number of seconds since 1st Jan 2011 is used.
1188         Documents are returned in descending order of their rank, in absence
1189         of sorting or scoring options.
1190
1191     Raises:
1192       TypeError: If any of the parameters have invalid types, or an unknown
1193         attribute is passed.
1194       ValueError: If any of the parameters have invalid values.
1195     """
1196     doc_id = _ConvertToUnicode(doc_id)
1197     if doc_id is not None:
1198       _CheckDocumentId(doc_id)
1199     self._doc_id = doc_id
1200     self._fields = _GetList(fields)
1201     self._language = _CheckLanguage(_ConvertToUnicode(language))
1202
1203
1204     self._field_map = None
1205
1206     doc_rank = rank
1207     if doc_rank is None:
1208       doc_rank = self._GetDefaultRank()
1209     self._rank = self._CheckRank(doc_rank)
1210
1211     _CheckDocument(self)
1212
1213   @property
1214   def doc_id(self):
1215     """Returns the document identifier."""
1216     return self._doc_id
1217
1218   @property
1219   def fields(self):
1220     """Returns a list of fields of the document."""
1221     return self._fields
1222
1223   @property
1224   def language(self):
1225     """Returns the code of the language the document fields are written in."""
1226     return self._language
1227
1228   @property
1229   def rank(self):
1230     """Returns the rank of this document."""
1231     return self._rank
1232
1233   def field(self, field_name):
1234     """Returns the field with the provided field name.
1235
1236     Args:
1237       field_name: The name of the field to return.
1238
1239     Returns:
1240       A field with the given name.
1241
1242     Raises:
1243       ValueError: There is not exactly one field with the given name.
1244     """
1245     fields = self[field_name]
1246     if len(fields) == 1:
1247       return fields[0]
1248     raise ValueError(
1249         'Must have exactly one field with name %s, but found %d.' %
1250         (field_name, len(fields)))
1251
1252   def __getitem__(self, field_name):
1253     """Returns a list of all fields with the provided field name.
1254
1255     Args:
1256       field_name: The name of the field to return.
1257
1258     Returns:
1259       All fields with the given name, or an empty list if no field with that
1260       name exists.
1261     """
1262     return self._BuildFieldMap().get(field_name, [])
1263
1264   def __iter__(self):
1265     """Documents do not support iteration.
1266
1267     This is provided to raise an explicit exception.
1268     """
1269     raise TypeError('Documents do not support iteration.')
1270
1271   def _BuildFieldMap(self):
1272     """Lazily build the field map."""
1273     if self._field_map is None:
1274       self._field_map = {}
1275       for field in self._fields:
1276         self._field_map.setdefault(field.name, []).append(field)
1277     return self._field_map
1278
1279   def _CheckRank(self, rank):
1280     """Checks if rank is valid, then returns it."""
1281     return _CheckInteger(rank, 'rank', upper_bound=sys.maxint)
1282
1283   def _GetDefaultRank(self):
1284     """Returns a default rank as total seconds since 1st Jan 2011."""
1285     td = datetime.datetime.now() - Document._FIRST_JAN_2011
1286     return td.seconds + (td.days * 24 * 3600)
1287
1288   def __repr__(self):
1289     return _Repr(
1290         self, [('doc_id', self.doc_id), ('fields', self.fields),
1291                ('language', self.language), ('rank', self.rank)])
1292
1293   def __eq__(self, other):
1294     return (isinstance(other, type(self)) and self.doc_id == other.doc_id and
1295             self.rank == other.rank and self.language == other.language
1296             and self.fields == other.fields)
1297
1298   def __ne__(self, other):
1299     return not self == other
1300
1301   def __key(self):
1302     return self.doc_id
1303
1304   def __hash__(self):
1305     return hash(self.__key())
1306
1307   def __str__(self):
1308     return repr(self)
1309
1310
1311 def _CopyDocumentToProtocolBuffer(document, pb):
1312   """Copies Document to a document_pb.Document protocol buffer."""
1313   pb.set_storage(document_pb.Document.DISK)
1314   if document.doc_id:
1315     pb.set_id(document.doc_id.encode('utf-8'))
1316   if document.language:
1317     pb.set_language(document.language.encode('utf-8'))
1318   for field in document.fields:
1319     field_pb = pb.add_field()
1320     _CopyFieldToProtocolBuffer(field, field_pb)
1321   pb.set_order_id(document.rank)
1322   return pb
1323
1324
1325 def _NewFieldsFromPb(field_list):
1326   """Returns a list of Field copied from a document_pb.Document proto buf."""
1327   return [_NewFieldFromPb(f) for f in field_list]
1328
1329
1330 def _NewDocumentFromPb(doc_pb):
1331   """Constructs a Document from a document_pb.Document protocol buffer."""
1332   lang = None
1333   if doc_pb.has_language():
1334     lang = _DecodeUTF8(doc_pb.language())
1335   return Document(doc_id=_DecodeUTF8(doc_pb.id()),
1336                   fields=_NewFieldsFromPb(doc_pb.field_list()),
1337                   language=lang,
1338                   rank=doc_pb.order_id())
1339
1340
1341 def _QuoteString(argument):
1342   return '"' + argument.replace('"', '\\\"') + '"'
1343
1344
1345 class FieldExpression(object):
1346   """Represents an expression that will be computed for each result returned.
1347
1348   For example,
1349     FieldExpression(name='content_snippet',
1350                     expression='snippet("very important", content)')
1351   means a computed field 'content_snippet' will be returned with each search
1352   result, which contains HTML snippets of the 'content' field which match
1353   the query 'very important'.
1354   """
1355
1356   MAXIMUM_EXPRESSION_LENGTH = 1000
1357   MAXIMUM_OPERATOR_LENGTH = 100
1358
1359   def __init__(self, name, expression):
1360     """Initializer.
1361
1362     Args:
1363       name: The name of the computed field for the expression.
1364       expression: The expression to evaluate and return in a field with
1365         given name in results. See
1366         https://developers.google.com/appengine/docs/python/search/overview#Expressions
1367         for a list of legal expressions.
1368
1369     Raises:
1370       TypeError: If any of the parameters has an invalid type, or an unknown
1371         attribute is passed.
1372       ValueError: If any of the parameters has an invalid value.
1373       ExpressionError: If the expression string is not parseable.
1374     """
1375     self._name = _CheckFieldName(_ConvertToUnicode(name))
1376     if expression is None:
1377       raise ValueError('expression must be a FieldExpression, got None')
1378     if not isinstance(expression, basestring):
1379       raise TypeError('expression must be a FieldExpression, got %s' %
1380                       expression.__class__.__name__)
1381     self._expression = _CheckExpression(_ConvertToUnicode(expression))
1382
1383   @property
1384   def name(self):
1385     """Returns name of the expression to return in search results."""
1386     return self._name
1387
1388   @property
1389   def expression(self):
1390     """Returns a string containing an expression returned in search results."""
1391     return self._expression
1392
1393   def __repr__(self):
1394     return _Repr(
1395         self, [('name', self.name), ('expression', self.expression)])
1396
1397
1398 def _CopyFieldExpressionToProtocolBuffer(field_expression, pb):
1399   """Copies FieldExpression to a search_service_pb.FieldSpec_Expression."""
1400   pb.set_name(field_expression.name.encode('utf-8'))
1401   pb.set_expression(field_expression.expression.encode('utf-8'))
1402
1403
1404 class SortOptions(object):
1405   """Represents a mulit-dimensional sort of Documents.
1406
1407    The following code shows how to sort documents based on product rating
1408    in descending order and then cheapest product within similarly rated
1409    products, sorting at most 1000 documents:
1410
1411      SortOptions(expressions=[
1412          SortExpression(expression='rating',
1413              direction=SortExpression.DESCENDING, default_value=0),
1414          SortExpression(expression='price + tax',
1415              direction=SortExpression.ASCENDING, default_value=999999.99)],
1416          limit=1000)
1417   """
1418
1419   def __init__(self, expressions=None, match_scorer=None, limit=1000):
1420     """Initializer.
1421
1422     Args:
1423       expressions: An iterable of SortExpression representing a
1424         multi-dimensional sort of Documents.
1425       match_scorer: A match scorer specification which may be used to
1426         score documents or in a SortExpression combined with other features.
1427       limit: The limit on the number of documents to score or sort.
1428
1429     Raises:
1430       TypeError: If any of the parameters has an invalid type, or an unknown
1431         attribute is passed.
1432       ValueError: If any of the parameters has an invalid value.
1433     """
1434     self._match_scorer = match_scorer
1435     self._expressions = _GetList(expressions)
1436     for expression in self._expressions:
1437       if not isinstance(expression, SortExpression):
1438         raise TypeError('expression must be a SortExpression, got %s' %
1439                         expression.__class__.__name__)
1440     self._limit = _CheckSortLimit(limit)
1441
1442   @property
1443   def expressions(self):
1444     """A list of SortExpression specifying a multi-dimensional sort."""
1445     return self._expressions
1446
1447   @property
1448   def match_scorer(self):
1449     """Returns a match scorer to score documents with."""
1450     return self._match_scorer
1451
1452   @property
1453   def limit(self):
1454     """Returns the limit on the number of documents to score or sort."""
1455     return self._limit
1456
1457   def __repr__(self):
1458     return _Repr(
1459         self, [('match_scorer', self.match_scorer),
1460                ('expressions', self.expressions),
1461                ('limit', self.limit)])
1462
1463
1464 class MatchScorer(object):
1465   """Assigns a document score based on term frequency.
1466
1467   If you add a MatchScorer to a SortOptions as in the following code:
1468
1469       sort_opts = search.SortOptions(match_scorer=search.MatchScorer())
1470
1471   then, this will sort the documents in descending score order. The scores
1472   will be positive. If you want to sort in ascending order, then use the
1473   following code:
1474
1475       sort_opts = search.SortOptions(match_scorer=search.MatchScorer(),
1476           expressions=[search.SortExpression(
1477               expression='_score', direction=search.SortExpression.ASCENDING,
1478               default_value=0.0)])
1479
1480   The scores in this case will be negative.
1481   """
1482
1483   def __init__(self):
1484     """Initializer.
1485
1486     Raises:
1487       TypeError: If any of the parameters has an invalid type, or an unknown
1488         attribute is passed.
1489       ValueError: If any of the parameters has an invalid value.
1490     """
1491
1492   def __repr__(self):
1493     return _Repr(self, [])
1494
1495
1496 class RescoringMatchScorer(MatchScorer):
1497   """Assigns a document score based on term frequency weighted by doc parts.
1498
1499   If you add a RescoringMatchScorer to a SortOptions as in the following code:
1500
1501       sort_opts = search.SortOptions(match_scorer=search.RescoringMatchScorer())
1502
1503   then, this will sort the documents in descending score order. The scores
1504   will be positive.  If you want to sort in ascending order, then use the
1505   following code:
1506
1507       sort_opts = search.SortOptions(match_scorer=search.RescoringMatchScorer(),
1508           expressions=[search.SortExpression(
1509               expression='_score', direction=search.SortExpression.ASCENDING,
1510               default_value=0.0)])
1511
1512   The scores in this case will be negative.
1513   """
1514
1515   def __init__(self):
1516     """Initializer.
1517
1518     Raises:
1519       TypeError: If any of the parameters has an invalid type, or an unknown
1520         attribute is passed.
1521       ValueError: If any of the parameters has an invalid value.
1522     """
1523     super(RescoringMatchScorer, self).__init__()
1524
1525
1526 def _CopySortExpressionToProtocolBuffer(sort_expression, pb):
1527   """Copies a SortExpression to a search_service_pb.SortSpec protocol buffer."""
1528   pb.set_sort_expression(sort_expression.expression.encode('utf-8'))
1529   if sort_expression.direction == SortExpression.ASCENDING:
1530     pb.set_sort_descending(False)
1531   if isinstance(sort_expression.default_value, basestring):
1532     pb.set_default_value_text(sort_expression.default_value.encode('utf-8'))
1533   elif (isinstance(sort_expression.default_value, datetime.datetime) or
1534         isinstance(sort_expression.default_value, datetime.date)):
1535     pb.set_default_value_numeric(
1536         search_util.EpochTime(sort_expression.default_value))
1537   else:
1538     pb.set_default_value_numeric(sort_expression.default_value)
1539   return pb
1540
1541
1542 def _CopyMatchScorerToScorerSpecProtocolBuffer(match_scorer, limit, pb):
1543   """Copies a MatchScorer to a search_service_pb.ScorerSpec."""
1544   if isinstance(match_scorer, RescoringMatchScorer):
1545     pb.set_scorer(search_service_pb.ScorerSpec.RESCORING_MATCH_SCORER)
1546   elif isinstance(match_scorer, MatchScorer):
1547     pb.set_scorer(search_service_pb.ScorerSpec.MATCH_SCORER)
1548   else:
1549     raise TypeError(
1550         'match_scorer must be a MatchScorer or RescoringMatchRescorer, '
1551         'got %s' % match_scorer.__class__.__name__)
1552   pb.set_limit(limit)
1553   return pb
1554
1555
1556 def _CopySortOptionsToProtocolBuffer(sort_options, params):
1557   """Copies the SortOptions into the SearchParams proto buf."""
1558   for expression in sort_options.expressions:
1559     sort_spec_pb = params.add_sort_spec()
1560     _CopySortExpressionToProtocolBuffer(expression, sort_spec_pb)
1561   if sort_options.match_scorer:
1562     scorer_spec = params.mutable_scorer_spec()
1563     _CopyMatchScorerToScorerSpecProtocolBuffer(
1564         sort_options.match_scorer, sort_options.limit, scorer_spec)
1565     scorer_spec.set_limit(sort_options.limit)
1566   else:
1567     params.mutable_scorer_spec().set_limit(sort_options.limit)
1568
1569
1570 class SortExpression(object):
1571   """Sort by a user specified scoring expression.
1572
1573   For example, the following will sort documents on a numeric field named
1574   'length' in ascending order, assigning a default value of sys.maxint for
1575   documents which do not specify a 'length' field.
1576
1577     SortExpression(expression='length',
1578                    direction=sort.SortExpression.ASCENDING,
1579                    default_value=sys.maxint)
1580
1581   The following example will sort documents on a date field named
1582   'published_date' in descending order, assigning a default value of
1583   1999-12-31 for documents which do not specify a 'published_date' field.
1584
1585     SortExpression(expression='published_date',
1586                    default_value=datetime.date(year=1999, month=12, day=31))
1587
1588   The following example will sort documents on a text field named 'subject'
1589   in descending order, assigning a default value of '' for documents which
1590   do not specify a 'subject' field.
1591
1592     SortExpression(expression='subject')
1593   """
1594
1595
1596   try:
1597     MAX_FIELD_VALUE = unichr(0x10ffff) * 80
1598   except ValueError:
1599
1600     MAX_FIELD_VALUE = unichr(0xffff) * 80
1601
1602   MIN_FIELD_VALUE = u''
1603
1604
1605   ASCENDING, DESCENDING = ('ASCENDING', 'DESCENDING')
1606
1607   _DIRECTIONS = frozenset([ASCENDING, DESCENDING])
1608
1609   def __init__(self, expression, direction=DESCENDING, default_value=''):
1610     """Initializer.
1611
1612     Args:
1613       expression: An expression to be evaluated on each matching document
1614         to sort by. The expression must evaluate to a text or numeric value.
1615         The expression can simply be a field name, or some compound expression
1616         such as "_score + count(likes) * 0.1" which will add the score from a
1617         scorer to a count of the values of a likes field times 0.1. See
1618         https://developers.google.com/appengine/docs/python/search/overview#Expressions
1619         for a list of legal expressions.
1620       direction: The direction to sort the search results, either ASCENDING
1621         or DESCENDING
1622       default_value: The default value of the expression. The default_value is
1623         returned if expression cannot be calculated, for example, if the
1624         expression is a field name and no value for that named field exists.
1625         A text value must be specified for text sorts. A numeric value must be
1626         specified for numeric sorts. A date value must be specified for date
1627         sorts.
1628
1629     Raises:
1630       TypeError: If any of the parameters has an invalid type, or an unknown
1631         attribute is passed.
1632       ValueError: If any of the parameters has an invalid value.
1633       ExpressionError: If the expression string is not parseable.
1634     """
1635     self._expression = _ConvertToUnicode(expression)
1636     self._direction = self._CheckDirection(direction)
1637     if self._expression is None:
1638       raise TypeError('expression must be a SortExpression, got None')
1639     _CheckExpression(self._expression)
1640     self._default_value = default_value
1641     if isinstance(self.default_value, basestring):
1642       self._default_value = _ConvertToUnicode(default_value)
1643       _CheckText(self._default_value, 'default_value')
1644     elif not isinstance(self._default_value,
1645                         (int, long, float, datetime.date, datetime.datetime)):
1646       raise TypeError('default_value must be text, numeric or datetime, got %s'
1647                       % self._default_value.__class__.__name__)
1648
1649   @property
1650   def expression(self):
1651     """Returns the expression to sort by."""
1652     return self._expression
1653
1654   @property
1655   def direction(self):
1656     """Returns the direction to sort expression: ASCENDING or DESCENDING."""
1657     return self._direction
1658
1659   @property
1660   def default_value(self):
1661     """Returns a default value for the expression if no value computed."""
1662     return self._default_value
1663
1664   def _CheckDirection(self, direction):
1665     """Checks direction is a valid SortExpression direction and returns it."""
1666     return _CheckEnum(direction, 'direction', values=self._DIRECTIONS)
1667
1668   def __repr__(self):
1669     return _Repr(
1670         self, [('expression', self.expression),
1671                ('direction', self.direction),
1672                ('default_value', self.default_value)])
1673
1674
1675 class ScoredDocument(Document):
1676   """Represents a scored document returned from a search."""
1677
1678   def __init__(self, doc_id=None, fields=None, language='en',
1679                sort_scores=None, expressions=None, cursor=None, rank=None):
1680     """Initializer.
1681
1682     Args:
1683       doc_id: The visible printable ASCII string identifying the document which
1684         does not start with '!'. Whitespace is excluded from ids. If no id is
1685         provided, the search service will provide one.
1686       fields: An iterable of Field instances representing the content of the
1687         document.
1688       language: The code of the language used in the field values.
1689       sort_scores: The list of scores assigned during sort evaluation. Each
1690         sort dimension is included. Positive scores are used for ascending
1691         sorts; negative scores for descending.
1692       expressions: The list of computed fields which are the result of
1693         expressions requested.
1694       cursor: A cursor associated with the document.
1695       rank: The rank of this document. A rank must be a non-negative integer
1696         less than sys.maxint. If not specified, the number of seconds since
1697         1st Jan 2011 is used. Documents are returned in descending order of
1698         their rank.
1699
1700     Raises:
1701       TypeError: If any of the parameters have invalid types, or an unknown
1702         attribute is passed.
1703       ValueError: If any of the parameters have invalid values.
1704     """
1705     super(ScoredDocument, self).__init__(doc_id=doc_id, fields=fields,
1706                                          language=language, rank=rank)
1707     self._sort_scores = self._CheckSortScores(_GetList(sort_scores))
1708     self._expressions = _GetList(expressions)
1709     if cursor is not None and not isinstance(cursor, Cursor):
1710       raise TypeError('cursor must be a Cursor, got %s' %
1711                       cursor.__class__.__name__)
1712     self._cursor = cursor
1713
1714   @property
1715   def sort_scores(self):
1716     """The list of scores assigned during sort evaluation.
1717
1718     Each sort dimension is included. Positive scores are used for ascending
1719     sorts; negative scores for descending.
1720
1721     Returns:
1722       The list of numeric sort scores.
1723     """
1724     return self._sort_scores
1725
1726   @property
1727   def expressions(self):
1728     """The list of computed fields the result of expression evaluation.
1729
1730     For example, if a request has
1731       FieldExpression(name='snippet', 'snippet("good story", content)')
1732     meaning to compute a snippet field containing HTML snippets extracted
1733     from the matching of the query 'good story' on the field 'content'.
1734     This means a field such as the following will be returned in expressions
1735     for the search result:
1736       HtmlField(name='snippet', value='that was a <b>good story</b> to finish')
1737
1738     Returns:
1739       The computed fields.
1740     """
1741     return self._expressions
1742
1743   @property
1744   def cursor(self):
1745     """A cursor associated with a result, a continued search starting point.
1746
1747     To get this cursor to appear, set the Index.cursor_type to
1748     Index.RESULT_CURSOR, otherwise this will be None.
1749
1750     Returns:
1751       The result cursor.
1752     """
1753     return self._cursor
1754
1755   def _CheckSortScores(self, sort_scores):
1756     """Checks sort_scores is a list of floats, and returns it."""
1757     for sort_score in sort_scores:
1758       _CheckNumber(sort_score, 'sort_scores')
1759     return sort_scores
1760
1761   def __repr__(self):
1762     return _Repr(self, [('doc_id', self.doc_id),
1763                         ('fields', self.fields),
1764                         ('language', self.language),
1765                         ('rank', self.rank),
1766                         ('sort_scores', self.sort_scores),
1767                         ('expressions', self.expressions),
1768                         ('cursor', self.cursor)])
1769
1770
1771 class SearchResults(object):
1772   """Represents the result of executing a search request."""
1773
1774   def __init__(self, number_found, results=None, cursor=None):
1775     """Initializer.
1776
1777     Args:
1778       number_found: The number of documents found for the query.
1779       results: The list of ScoredDocuments returned from executing a
1780         search request.
1781       cursor: A Cursor to continue the search from the end of the
1782         search results.
1783
1784     Raises:
1785       TypeError: If any of the parameters have an invalid type, or an unknown
1786         attribute is passed.
1787       ValueError: If any of the parameters have an invalid value.
1788     """
1789     self._number_found = _CheckInteger(number_found, 'number_found')
1790     self._results = _GetList(results)
1791     if cursor is not None and not isinstance(cursor, Cursor):
1792       raise TypeError('cursor must be a Cursor, got %s' %
1793                       cursor.__class__.__name__)
1794     self._cursor = cursor
1795
1796   def __iter__(self):
1797
1798     for result in self.results:
1799       yield result
1800
1801   @property
1802   def results(self):
1803     """Returns the list of ScoredDocuments that matched the query."""
1804     return self._results
1805
1806   @property
1807   def number_found(self):
1808     """Returns the number of documents which were found for the search.
1809
1810     Note that this is an approximation and not an exact count.
1811     If QueryOptions.number_found_accuracy parameter is set to 100
1812     for example, then number_found <= 100 is accurate.
1813
1814     Returns:
1815       The number of documents found.
1816     """
1817     return self._number_found
1818
1819   @property
1820   def cursor(self):
1821     """Returns a cursor that can be used to continue search from last result.
1822
1823     This corresponds to using a ResultsCursor in QueryOptions,
1824     otherwise this will be None.
1825
1826     Returns:
1827       The results cursor.
1828     """
1829     return self._cursor
1830
1831   def __repr__(self):
1832     return _Repr(self, [('results', self.results),
1833                         ('number_found', self.number_found),
1834                         ('cursor', self.cursor)])
1835
1836
1837 class GetResponse(object):
1838   """Represents the result of executing a get request.
1839
1840   For example, the following code shows how a response could be used
1841   to determine which documents were successfully removed or not.
1842
1843   response = index.get_range()
1844   for document in response:
1845     print "document ", document
1846   """
1847
1848   def __init__(self, results=None):
1849     """Initializer.
1850
1851     Args:
1852       results: The results returned from an index ordered by Id.
1853
1854     Raises:
1855       TypeError: If any of the parameters have an invalid type, or an unknown
1856         attribute is passed.
1857       ValueError: If any of the parameters have an invalid value.
1858     """
1859     self._results = _GetList(results)
1860
1861   def __iter__(self):
1862     for result in self.results:
1863       yield result
1864
1865   @property
1866   def results(self):
1867     """Returns a list of results ordered by Id from the index."""
1868     return self._results
1869
1870   def __repr__(self):
1871     return _Repr(self, [('results', self.results)])
1872
1873
1874 class Cursor(object):
1875   """Specifies how to get the next page of results in a search.
1876
1877   A cursor returned in a previous set of search results to use as a starting
1878   point to retrieve the next set of results. This can get you better
1879   performance, and also improves the consistency of pagination through index
1880   updates.
1881
1882   The following shows how to use the cursor to get the next page of results:
1883
1884   # get the first set of results; the first cursor is used to specify
1885   # that cursors are to be returned in the SearchResults.
1886   results = index.search(Query(query_string='some stuff',
1887       QueryOptions(cursor=Cursor()))
1888
1889   # get the next set of results
1890   results = index.search(Query(query_string='some stuff',
1891       QueryOptions(cursor=results.cursor)))
1892
1893   If you want to continue search from any one of the ScoredDocuments in
1894   SearchResults, then you can set Cursor.per_result to True.
1895
1896   # get the first set of results; the first cursor is used to specify
1897   # that cursors are to be returned in the SearchResults.
1898   results = index.search(Query(query_string='some stuff',
1899       QueryOptions(cursor=Cursor(per_result=True)))
1900
1901   # this shows how to access the per_document cursors returned from a search
1902   per_document_cursor = None
1903   for scored_document in results:
1904     per_document_cursor = scored_document.cursor
1905
1906   # get the next set of results
1907   results = index.search(Query(query_string='some stuff',
1908       QueryOptions(cursor=per_document_cursor)))
1909   """
1910
1911
1912
1913   def __init__(self, web_safe_string=None, per_result=False):
1914     """Initializer.
1915
1916     Args:
1917       web_safe_string: The cursor string returned from the search service to
1918         be interpreted by the search service to get the next set of results.
1919       per_result: A bool when true will return a cursor per ScoredDocument in
1920         SearchResults, otherwise will return a single cursor for the whole
1921         SearchResults. If using offset this is ignored, as the user is
1922         responsible for calculating a next offset if any.
1923     Raises:
1924
1925       ValueError: if the web_safe_string is not of required format.
1926     """
1927     self._web_safe_string = _CheckCursor(_ConvertToUnicode(web_safe_string))
1928     self._per_result = per_result
1929     if self._web_safe_string:
1930       parts = self._web_safe_string.split(':', 1)
1931       if len(parts) != 2 or parts[0] not in ['True', 'False']:
1932         raise ValueError('invalid format for web_safe_string, got %s' %
1933                          self._web_safe_string)
1934       self._internal_cursor = parts[1]
1935
1936       self._per_result = (parts[0] == 'True')
1937
1938   @property
1939   def web_safe_string(self):
1940     """Returns the cursor string generated by the search service."""
1941     return self._web_safe_string
1942
1943   @property
1944   def per_result(self):
1945     """Returns whether to return a cursor for each ScoredDocument in results."""
1946     return self._per_result
1947
1948   def __repr__(self):
1949     return _Repr(self, [('web_safe_string', self.web_safe_string)])
1950
1951
1952 def _ToWebSafeString(per_result, internal_cursor):
1953   """Returns the web safe string combining per_result with internal cursor."""
1954   return str(per_result) + ':' + internal_cursor
1955
1956
1957 def _CheckQuery(query):
1958   """Checks a query is a valid query string."""
1959   _ValidateString(query, 'query', MAXIMUM_QUERY_LENGTH, empty_ok=True)
1960   if query is None:
1961     raise TypeError('query must be unicode, got None')
1962   if query.strip():
1963     try:
1964       query_parser.Parse(query)
1965     except query_parser.QueryException, e:
1966       raise QueryError('Failed to parse query "%s"' % query)
1967   return query
1968
1969
1970 def _CheckLimit(limit):
1971   """Checks the limit of documents to return is an integer within range."""
1972   return _CheckInteger(
1973       limit, 'limit', zero_ok=False,
1974       upper_bound=MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH)
1975
1976
1977 def _CheckOffset(offset):
1978   """Checks the offset in document list is an integer within range."""
1979   return _CheckInteger(
1980       offset, 'offset', zero_ok=True,
1981       upper_bound=MAXIMUM_SEARCH_OFFSET)
1982
1983
1984 def _CheckNumberFoundAccuracy(number_found_accuracy):
1985   """Checks the accuracy is an integer within range."""
1986   return _CheckInteger(
1987       number_found_accuracy, 'number_found_accuracy',
1988       zero_ok=False, upper_bound=MAXIMUM_NUMBER_FOUND_ACCURACY)
1989
1990
1991 def _CheckCursor(cursor):
1992   """Checks the cursor if specified is a string which is not too long."""
1993   return _ValidateString(cursor, 'cursor', _MAXIMUM_CURSOR_LENGTH,
1994                          empty_ok=True)
1995
1996
1997 def _CheckNumberOfFields(returned_expressions, snippeted_fields,
1998                          returned_fields):
1999   """Checks the count of all field kinds is less than limit."""
2000   number_expressions = (len(returned_expressions) + len(snippeted_fields) +
2001                         len(returned_fields))
2002   if number_expressions > MAXIMUM_FIELDS_RETURNED_PER_SEARCH:
2003     raise ValueError(
2004         'too many fields, snippets or expressions to return  %d > maximum %d'
2005         % (number_expressions, MAXIMUM_FIELDS_RETURNED_PER_SEARCH))
2006
2007
2008 class QueryOptions(object):
2009   """Options for post-processing results for a query.
2010
2011   Options include the ability to sort results, control which document fields
2012   to return, produce snippets of fields and compute and sort by complex
2013   scoring expressions.
2014
2015   If you wish to randomly access pages of search results, you can use an
2016   offset:
2017
2018   # get the first set of results
2019   page_size = 10
2020   results = index.search(Query(query_string='some stuff',
2021       QueryOptions(limit=page_size))
2022
2023   # calculate pages
2024   pages = results.found_count / page_size
2025
2026   # user chooses page and hence an offset into results
2027   next_page = ith * page_size
2028
2029   # get the search results for that page
2030   results = index.search(Query(query_string='some stuff',
2031       QueryOptions(limit=page_size, offset=next_page))
2032   """
2033
2034   def __init__(self, limit=20, number_found_accuracy=None, cursor=None,
2035                offset=None, sort_options=None, returned_fields=None,
2036                ids_only=False, snippeted_fields=None,
2037                returned_expressions=None):
2038
2039
2040     """Initializer.
2041
2042     For example, the following code fragment requests a search for
2043     documents where 'first' occurs in subject and 'good' occurs anywhere,
2044     returning at most 20 documents, starting the search from 'cursor token',
2045     returning another single cursor for the SearchResults, sorting by subject in
2046     descending order, returning the author, subject, and summary fields as well
2047     as a snippeted field content.
2048
2049       results = index.search(Query(
2050           query='subject:first good',
2051           options=QueryOptions(
2052             limit=20,
2053             cursor=Cursor(),
2054             sort_options=SortOptions(
2055                 expressions=[
2056                     SortExpression(expression='subject')],
2057                 limit=1000),
2058             returned_fields=['author', 'subject', 'summary'],
2059             snippeted_fields=['content'])))
2060
2061     Args:
2062       limit: The limit on number of documents to return in results.
2063       number_found_accuracy: The minimum accuracy requirement for
2064         SearchResults.number_found. If set, the number_found will be
2065         accurate up to at least that number. For example, when set to 100,
2066         any SearchResults with number_found <= 100 is accurate. This option
2067         may add considerable latency/expense, especially when used with
2068         returned_fields.
2069       cursor: A Cursor describing where to get the next set of results,
2070         or to provide next cursors in SearchResults.
2071       offset: The offset is number of documents to skip in search results. This
2072         is an alternative to using a query cursor, but allows random access into
2073         the results. Using offsets rather than cursors are more expensive. You
2074         can only use either cursor or offset, but not both. Using an offset
2075         means that no cursor is returned in SearchResults.cursor, nor in each
2076         ScoredDocument.cursor.
2077       sort_options: A SortOptions specifying a multi-dimensional sort over
2078         search results.
2079       returned_fields: An iterable of names of fields to return in search
2080         results.
2081       ids_only: Only return document ids, do not return any fields.
2082       snippeted_fields: An iterable of names of fields to snippet and return
2083         in search result expressions.
2084       returned_expressions: An iterable of FieldExpression to evaluate and
2085         return in search results.
2086     Raises:
2087       TypeError: If an unknown iterator_options or sort_options is passed.
2088       ValueError: If ids_only and returned_fields are used together.
2089       ExpressionError: If one of the returned expression strings is not
2090         parseable.
2091     """
2092     self._limit = _CheckLimit(limit)
2093     self._number_found_accuracy = _CheckNumberFoundAccuracy(
2094         number_found_accuracy)
2095     if cursor is not None and not isinstance(cursor, Cursor):
2096       raise TypeError('cursor must be a Cursor, got %s' %
2097                       cursor.__class__.__name__)
2098     if cursor is not None and offset is not None:
2099       raise ValueError('cannot set cursor and offset together')
2100     self._cursor = cursor
2101     self._offset = _CheckOffset(offset)
2102     if sort_options is not None and not isinstance(sort_options, SortOptions):
2103       raise TypeError('sort_options must be a SortOptions, got %s' %
2104                       sort_options.__class__.__name__)
2105     self._sort_options = sort_options
2106
2107     self._returned_fields = _ConvertToUnicodeList(returned_fields)
2108     _CheckFieldNames(self._returned_fields)
2109     self._ids_only = ids_only
2110     if self._ids_only and self._returned_fields:
2111       raise ValueError('cannot have ids_only and returned_fields set together')
2112     self._snippeted_fields = _ConvertToUnicodeList(snippeted_fields)
2113     _CheckFieldNames(self._snippeted_fields)
2114     self._returned_expressions = _ConvertToList(returned_expressions)
2115     for expression in self._returned_expressions:
2116       _CheckFieldName(_ConvertToUnicode(expression.name))
2117       _CheckExpression(_ConvertToUnicode(expression.expression))
2118     _CheckNumberOfFields(self._returned_expressions, self._snippeted_fields,
2119                          self._returned_fields)
2120
2121   @property
2122   def limit(self):
2123     """Returns a limit on number of documents to return in results."""
2124     return self._limit
2125
2126   @property
2127   def number_found_accuracy(self):
2128     """Returns minimum accuracy requirement for SearchResults.number_found."""
2129     return self._number_found_accuracy
2130
2131   @property
2132   def cursor(self):
2133     """Returns the Cursor for the query."""
2134     return self._cursor
2135
2136   @property
2137   def offset(self):
2138     """Returns the number of documents in search results to skip."""
2139     return self._offset
2140
2141   @property
2142   def sort_options(self):
2143     """Returns a SortOptions."""
2144     return self._sort_options
2145
2146   @property
2147   def returned_fields(self):
2148     """Returns an iterable of names of fields to return in search results."""
2149     return self._returned_fields
2150
2151   @property
2152   def ids_only(self):
2153     """Returns whether to return only document ids in search results."""
2154     return self._ids_only
2155
2156   @property
2157   def snippeted_fields(self):
2158     """Returns iterable of field names to snippet and return in results."""
2159     return self._snippeted_fields
2160
2161   @property
2162   def returned_expressions(self):
2163     """Returns iterable of FieldExpression to return in results."""
2164     return self._returned_expressions
2165
2166   def __repr__(self):
2167     return _Repr(self, [('limit', self.limit),
2168                         ('number_found_accuracy', self.number_found_accuracy),
2169                         ('cursor', self.cursor),
2170                         ('sort_options', self.sort_options),
2171                         ('returned_fields', self.returned_fields),
2172                         ('ids_only', self.ids_only),
2173                         ('snippeted_fields', self.snippeted_fields),
2174                         ('returned_expressions', self.returned_expressions)])
2175
2176
2177 def _CopyQueryOptionsObjectToProtocolBuffer(query, options, params):
2178   """Copies a QueryOptions object to a SearchParams proto buff."""
2179   offset = 0
2180   web_safe_string = None
2181   cursor_type = None
2182   offset = options.offset
2183   if options.cursor:
2184     cursor = options.cursor
2185     if cursor.per_result:
2186       cursor_type = search_service_pb.SearchParams.PER_RESULT
2187     else:
2188       cursor_type = search_service_pb.SearchParams.SINGLE
2189     if isinstance(cursor, Cursor) and cursor.web_safe_string:
2190       web_safe_string = cursor._internal_cursor
2191   _CopyQueryOptionsToProtocolBuffer(
2192       query, offset, options.limit, options.number_found_accuracy,
2193       web_safe_string, cursor_type, options.ids_only, options.returned_fields,
2194       options.snippeted_fields, options.returned_expressions,
2195       options.sort_options, params)
2196
2197
2198 def _CopyQueryOptionsToProtocolBuffer(
2199     query, offset, limit, number_found_accuracy, cursor, cursor_type, ids_only,
2200     returned_fields, snippeted_fields, returned_expressions, sort_options,
2201     params):
2202   """Copies fields of QueryOptions to params protobuf."""
2203   if offset:
2204     params.set_offset(offset)
2205   params.set_limit(limit)
2206   if number_found_accuracy is not None:
2207     params.set_matched_count_accuracy(number_found_accuracy)
2208   if cursor:
2209     params.set_cursor(cursor.encode('utf-8'))
2210   if cursor_type is not None:
2211     params.set_cursor_type(cursor_type)
2212   if ids_only:
2213     params.set_keys_only(ids_only)
2214   if returned_fields or snippeted_fields or returned_expressions:
2215     field_spec_pb = params.mutable_field_spec()
2216     for field in returned_fields:
2217       field_spec_pb.add_name(field.encode('utf-8'))
2218     for snippeted_field in snippeted_fields:
2219       expression = u'snippet(%s, %s)' % (_QuoteString(query), snippeted_field)
2220       _CopyFieldExpressionToProtocolBuffer(
2221           FieldExpression(
2222               name=snippeted_field, expression=expression.encode('utf-8')),
2223           field_spec_pb.add_expression())
2224     for expression in returned_expressions:
2225       _CopyFieldExpressionToProtocolBuffer(
2226           expression, field_spec_pb.add_expression())
2227
2228   if sort_options is not None:
2229     _CopySortOptionsToProtocolBuffer(sort_options, params)
2230
2231
2232 class Query(object):
2233   """Represents a request on the search service to query the index."""
2234
2235   def __init__(self, query_string, options=None):
2236
2237
2238
2239     """Initializer.
2240
2241     For example, the following code fragment requests a search for
2242     documents where 'first' occurs in subject and 'good' occurs anywhere,
2243     returning at most 20 documents, starting the search from 'cursor token',
2244     returning another single document cursor for the results, sorting by
2245     subject in descending order, returning the author, subject, and summary
2246     fields as well as a snippeted field content.
2247
2248       results = index.search(Query(
2249           query_string='subject:first good',
2250           options=QueryOptions(
2251               limit=20,
2252               cursor=Cursor(),
2253               sort_options=SortOptions(
2254                   expressions=[
2255                       SortExpression(expression='subject')],
2256                   limit=1000),
2257               returned_fields=['author', 'subject', 'summary'],
2258               snippeted_fields=['content'])))
2259
2260     In order to get a Cursor, you specify a Cursor in QueryOptions.cursor
2261     and extract the Cursor for the next request from results.cursor to
2262     continue from the last found document, as shown below:
2263
2264       results = index.search(
2265           Query(query_string='subject:first good',
2266                 options=QueryOptions(cursor=results.cursor)))
2267
2268     Args:
2269       query_string: The query to match against documents in the index. A query
2270         is a boolean expression containing terms.  For example, the query
2271           'job tag:"very important" sent <= 2011-02-28'
2272         finds documents with the term job in any field, that contain the
2273         phrase "very important" in a tag field, and a sent date up to and
2274         including 28th February, 2011.  You can use combinations of
2275           '(cat OR feline) food NOT dog'
2276         to find documents which contain the term cat or feline as well as food,
2277         but do not mention the term dog. A further example,
2278           'category:televisions brand:sony price >= 300 price < 400'
2279         will return documents which have televisions in a category field, a
2280         sony brand and a price field which is 300 (inclusive) to 400
2281         (exclusive).  See
2282         https://developers.google.com/appengine/docs/python/search/overview#Expressions
2283         for a list of expressions that can be used in queries.
2284       options: A QueryOptions describing post-processing of search results.
2285     Raises:
2286       QueryError: If the query string is not parseable.
2287     """
2288     self._query_string = _ConvertToUnicode(query_string)
2289     _CheckQuery(self._query_string)
2290     self._options = options
2291
2292   @property
2293   def query_string(self):
2294     """Returns the query string to be applied to search service."""
2295     return self._query_string
2296
2297   @property
2298   def options(self):
2299     """Returns QueryOptions defining post-processing on the search results."""
2300     return self._options
2301
2302
2303 def _CopyQueryToProtocolBuffer(query, params):
2304   """Copies Query object to params protobuf."""
2305   params.set_query(query.encode('utf-8'))
2306
2307
2308 def _CopyQueryObjectToProtocolBuffer(query, params):
2309   _CopyQueryToProtocolBuffer(query.query_string, params)
2310   options = query.options
2311   if query.options is None:
2312     options = QueryOptions()
2313   _CopyQueryOptionsObjectToProtocolBuffer(query.query_string, options, params)
2314
2315
2316 class Index(object):
2317   """Represents an index allowing indexing, deleting and searching documents.
2318
2319   The following code fragment shows how to add documents, then search the
2320   index for documents matching a query.
2321
2322     # Get the index.
2323     index = Index(name='index-name')
2324
2325     # Create a document.
2326     doc = Document(doc_id='document-id',
2327                    fields=[TextField(name='subject', value='my first email'),
2328                            HtmlField(name='body',
2329                                      value='<html>some content here</html>')])
2330
2331     # Index the document.
2332     try:
2333       index.put(doc)
2334     except search.Error, e:
2335       # possibly retry indexing or log error
2336
2337     # Query the index.
2338     try:
2339       results = index.search('subject:first body:here')
2340
2341       # Iterate through the search results.
2342       for scored_document in results:
2343          print scored_document
2344
2345     except search.Error, e:
2346       # possibly log the failure
2347
2348   Once an index is created with a given specification, that specification is
2349   immutable.
2350
2351   Search results may contain some out of date documents. However, any two
2352   changes to any document stored in an index are applied in the correct order.
2353   """
2354
2355
2356
2357   RESPONSE_CURSOR, RESULT_CURSOR = ('RESPONSE_CURSOR', 'RESULT_CURSOR')
2358
2359   _CURSOR_TYPES = frozenset([RESPONSE_CURSOR, RESULT_CURSOR])
2360
2361   SEARCH, DATASTORE, CLOUD_STORAGE = ('SEARCH', 'DATASTORE', 'CLOUD_STORAGE')
2362
2363   _SOURCES = frozenset([SEARCH, DATASTORE, CLOUD_STORAGE])
2364
2365   def __init__(self, name, namespace=None, source=SEARCH):
2366     """Initializer.
2367
2368     Args:
2369       name: The name of the index. An index name must be a visible printable
2370         ASCII string not starting with '!'. Whitespace characters are excluded.
2371       namespace: The namespace of the index name. If not set, then the current
2372         namespace is used.
2373       source: Deprecated as of 1.7.6. The source of
2374         the index:
2375           SEARCH - The Index was created by adding documents throught this
2376             search API.
2377           DATASTORE - The Index was created as a side-effect of putting entities
2378             into Datastore.
2379           CLOUD_STORAGE - The Index was created as a side-effect of adding
2380             objects into a Cloud Storage bucket.
2381     Raises:
2382       TypeError: If an unknown attribute is passed.
2383       ValueError: If invalid namespace is given.
2384     """
2385     if source not in self._SOURCES:
2386       raise ValueError('source must be one of %s' % self._SOURCES)
2387     if source is not self.SEARCH:
2388       warnings.warn('source is deprecated.', DeprecationWarning, stacklevel=2)
2389     self._source = source
2390     self._name = _CheckIndexName(_ConvertToUnicode(name))
2391     self._namespace = _ConvertToUnicode(namespace)
2392     if self._namespace is None:
2393       self._namespace = _ConvertToUnicode(namespace_manager.get_namespace())
2394     if self._namespace is None:
2395       self._namespace = u''
2396     namespace_manager.validate_namespace(self._namespace, exception=ValueError)
2397     self._schema = None
2398     self._storage_usage = None
2399     self._storage_limit = None
2400
2401   @property
2402   def schema(self):
2403     """Returns the schema mapping field names to list of types supported.
2404
2405     Only valid for Indexes returned by search.get_indexes method."""
2406     return self._schema
2407
2408   @property
2409   def storage_usage(self):
2410     """The approximate number of bytes used by this index.
2411
2412     The number may be slightly stale, as it may not reflect the
2413     results of recent changes.
2414
2415     Returns None for indexes not obtained from search.get_indexes.
2416
2417     """
2418     return self._storage_usage
2419
2420   @property
2421   def storage_limit(self):
2422     """The maximum allowable storage for this index, in bytes.
2423
2424     Returns None for indexes not obtained from search.get_indexes."""
2425     return self._storage_limit
2426
2427   @property
2428   def name(self):
2429     """Returns the name of the index."""
2430     return self._name
2431
2432   @property
2433   def namespace(self):
2434     """Returns the namespace of the name of the index."""
2435     return self._namespace
2436
2437   @property
2438   def source(self):
2439     """Returns the source of the index.
2440
2441     Deprecated: from 1.7.6, source is no longer available."""
2442     warnings.warn('source is deprecated.', DeprecationWarning, stacklevel=2)
2443     return self._source
2444
2445   def __eq__(self, other):
2446     return (isinstance(other, self.__class__)
2447             and self.__dict__ == other.__dict__)
2448
2449   def __ne__(self, other):
2450     return not self.__eq__(other)
2451
2452   def __hash__(self):
2453     return hash((self._name, self._namespace))
2454
2455   def __repr__(self):
2456
2457     return _Repr(self, [('name', self.name), ('namespace', self.namespace),
2458                         ('source', self._source),
2459                         ('schema', self.schema),
2460                         ('storage_usage', self.storage_usage),
2461                         ('storage_limit', self.storage_limit)])
2462
2463   def _NewPutResultFromPb(self, status_pb, doc_id):
2464     """Constructs PutResult from RequestStatus pb and doc_id."""
2465     message = None
2466     if status_pb.has_error_detail():
2467       message = _DecodeUTF8(status_pb.error_detail())
2468     code = _ERROR_OPERATION_CODE_MAP.get(status_pb.code(),
2469                                          OperationResult.INTERNAL_ERROR)
2470     return PutResult(code=code, message=message, id=_DecodeUTF8(doc_id))
2471
2472   def _NewPutResultList(self, response):
2473     return [self._NewPutResultFromPb(status, doc_id)
2474             for status, doc_id in zip(response.status_list(),
2475                                       response.doc_id_list())]
2476
2477   @datastore_rpc._positional(2)
2478   def put(self, documents, deadline=None):
2479     """Index the collection of documents.
2480
2481     If any of the documents are already in the index, then reindex them with
2482     their corresponding fresh document.
2483
2484     Args:
2485       documents: A Document or iterable of Documents to index.
2486
2487     Kwargs:
2488       deadline: Deadline for RPC call in seconds; if None use the default.
2489
2490     Returns:
2491       A list of PutResult, one per Document requested to be indexed.
2492
2493     Raises:
2494       PutError: If one or more documents failed to index or
2495         number indexed did not match requested.
2496       TypeError: If an unknown attribute is passed.
2497       ValueError: If documents is not a Document or iterable of Document
2498         or number of the documents is larger than
2499         MAXIMUM_DOCUMENTS_PER_PUT_REQUEST or deadline is a negative number.
2500     """
2501
2502     if isinstance(documents, basestring):
2503       raise TypeError('documents must be a Document or sequence of '
2504                       'Documents, got %s' % documents.__class__.__name__)
2505     try:
2506       docs = list(iter(documents))
2507     except TypeError:
2508       docs = [documents]
2509
2510     if not docs:
2511       return []
2512
2513     if len(docs) > MAXIMUM_DOCUMENTS_PER_PUT_REQUEST:
2514       raise ValueError('too many documents to index')
2515
2516     request = search_service_pb.IndexDocumentRequest()
2517     response = search_service_pb.IndexDocumentResponse()
2518
2519     params = request.mutable_params()
2520     _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2521
2522     seen_docs = {}
2523     for document in docs:
2524       doc_id = document.doc_id
2525       if doc_id:
2526         if doc_id in seen_docs:
2527           if document != seen_docs[doc_id]:
2528             raise ValueError(
2529                 'Different documents with the same ID found in the '
2530                 'same call to Index.put()')
2531
2532
2533           continue
2534         seen_docs[doc_id] = document
2535       doc_pb = params.add_document()
2536       _CopyDocumentToProtocolBuffer(document, doc_pb)
2537
2538     _MakeSyncSearchServiceCall('IndexDocument', request, response, deadline)
2539
2540     results = self._NewPutResultList(response)
2541
2542     if response.status_size() != len(params.document_list()):
2543       raise PutError('did not index requested number of documents', results)
2544
2545     for status in response.status_list():
2546       if status.code() != search_service_pb.SearchServiceError.OK:
2547         raise PutError(
2548             _ConcatenateErrorMessages(
2549                 'one or more put document operations failed', status), results)
2550     return results
2551
2552   def _NewDeleteResultFromPb(self, status_pb, doc_id):
2553     """Constructs DeleteResult from RequestStatus pb and doc_id."""
2554     message = None
2555     if status_pb.has_error_detail():
2556       message = _DecodeUTF8(status_pb.error_detail())
2557     code = _ERROR_OPERATION_CODE_MAP.get(status_pb.code(),
2558                                          OperationResult.INTERNAL_ERROR)
2559
2560     return DeleteResult(code=code, message=message, id=doc_id)
2561
2562   def _NewDeleteResultList(self, document_ids, response):
2563     return [self._NewDeleteResultFromPb(status, doc_id)
2564             for status, doc_id in zip(response.status_list(), document_ids)]
2565
2566   @datastore_rpc._positional(2)
2567   def delete(self, document_ids, deadline=None):
2568     """Delete the documents with the corresponding document ids from the index.
2569
2570     If no document exists for the identifier in the list, then that document
2571     identifier is ignored.
2572
2573     Args:
2574       document_ids: A single identifier or list of identifiers of documents
2575         to delete.
2576
2577     Kwargs:
2578       deadline: Deadline for RPC call in seconds; if None use the default.
2579
2580     Raises:
2581       DeleteError: If one or more documents failed to remove or
2582         number removed did not match requested.
2583       ValueError: If document_ids is not a string or iterable of valid document
2584         identifiers or number of document ids is larger than
2585         MAXIMUM_DOCUMENTS_PER_PUT_REQUEST or deadline is a negative number.
2586     """
2587     doc_ids = _ConvertToList(document_ids)
2588     if not doc_ids:
2589       return
2590
2591     if len(doc_ids) > MAXIMUM_DOCUMENTS_PER_PUT_REQUEST:
2592       raise ValueError('too many documents to delete')
2593
2594     request = search_service_pb.DeleteDocumentRequest()
2595     response = search_service_pb.DeleteDocumentResponse()
2596     params = request.mutable_params()
2597     _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2598     for document_id in doc_ids:
2599       _CheckDocumentId(document_id)
2600       params.add_doc_id(document_id)
2601
2602     _MakeSyncSearchServiceCall('DeleteDocument', request, response,
2603                                deadline)
2604
2605     results = self._NewDeleteResultList(doc_ids, response)
2606
2607     if response.status_size() != len(doc_ids):
2608       raise DeleteError(
2609           'did not delete requested number of documents', results)
2610
2611     for status in response.status_list():
2612       if status.code() != search_service_pb.SearchServiceError.OK:
2613         raise DeleteError(
2614             _ConcatenateErrorMessages(
2615                 'one or more delete document operations failed', status),
2616             results)
2617
2618   def delete_schema(self):
2619     """Deprecated in 1.7.4. Delete the schema from the index.
2620
2621     We are deprecating this method and replacing with more general schema
2622     and index managment.
2623
2624     A possible use may be remove typed fields which are no longer used. After
2625     you delete the schema, you need to index one or more documents to rebuild
2626     the schema. Until you re-index some documents, searches may fail, especially
2627     searches using field restricts.
2628
2629     Raises:
2630       DeleteError: If the schema failed to be deleted.
2631     """
2632     warnings.warn('delete_schema is deprecated in 1.7.4.',
2633                   DeprecationWarning, stacklevel=2)
2634     request = search_service_pb.DeleteSchemaRequest()
2635     response = search_service_pb.DeleteSchemaResponse()
2636     params = request.mutable_params()
2637     _CopyMetadataToProtocolBuffer(self, params.add_index_spec())
2638
2639     _MakeSyncSearchServiceCall('DeleteSchema', request, response, None)
2640
2641     results = self._NewDeleteResultList([self.name], response)
2642
2643     if response.status_size() != 1:
2644       raise DeleteError('did not delete exactly one schema', results)
2645
2646     status = response.status_list()[0]
2647     if status.code() != search_service_pb.SearchServiceError.OK:
2648       raise DeleteError(
2649           _ConcatenateErrorMessages('delete schema operation failed', status),
2650           results)
2651
2652   def _NewScoredDocumentFromPb(self, doc_pb, sort_scores, expressions, cursor):
2653     """Constructs a Document from a document_pb.Document protocol buffer."""
2654     lang = None
2655     if doc_pb.has_language():
2656       lang = _DecodeUTF8(doc_pb.language())
2657     return ScoredDocument(
2658         doc_id=_DecodeUTF8(doc_pb.id()),
2659         fields=_NewFieldsFromPb(doc_pb.field_list()),
2660         language=lang, rank=doc_pb.order_id(), sort_scores=sort_scores,
2661         expressions=_NewFieldsFromPb(expressions), cursor=cursor)
2662
2663   def _NewSearchResults(self, response, cursor):
2664     """Returns a SearchResults populated from a search_service response pb."""
2665     results = []
2666     for result_pb in response.result_list():
2667       per_result_cursor = None
2668       if result_pb.has_cursor():
2669         if isinstance(cursor, Cursor):
2670
2671           per_result_cursor = Cursor(web_safe_string=_ToWebSafeString(
2672               cursor.per_result, _DecodeUTF8(result_pb.cursor())))
2673       results.append(
2674           self._NewScoredDocumentFromPb(
2675               result_pb.document(), result_pb.score_list(),
2676               result_pb.expression_list(), per_result_cursor))
2677     results_cursor = None
2678     if response.has_cursor():
2679       if isinstance(cursor, Cursor):
2680
2681         results_cursor = Cursor(web_safe_string=_ToWebSafeString(
2682             cursor.per_result, _DecodeUTF8(response.cursor())))
2683     return SearchResults(
2684         results=results, number_found=response.matched_count(),
2685         cursor=results_cursor)
2686
2687   @datastore_rpc._positional(2)
2688   def get(self, doc_id, deadline=None):
2689     """Retrieve a document by document ID.
2690
2691     Args:
2692       doc_id: The ID of the document to retreive.
2693
2694     Kwargs:
2695       deadline: Deadline for RPC call in seconds; if None use the default.
2696
2697     Returns:
2698       If the document ID exists, returns the associated document. Otherwise,
2699       returns None.
2700
2701     Raises:
2702       TypeError: If any of the parameters have invalid types, or an unknown
2703         attribute is passed.
2704       ValueError: If any of the parameters have invalid values (e.g., a
2705         negative deadline).
2706     """
2707     response = self.get_range(start_id=doc_id, limit=1, deadline=deadline)
2708     if response.results and response.results[0].doc_id == doc_id:
2709       return response.results[0]
2710     return None
2711
2712   @datastore_rpc._positional(2)
2713   def search(self, query, deadline=None, **kwargs):
2714     """Search the index for documents matching the query.
2715
2716     For example, the following code fragment requests a search for
2717     documents where 'first' occurs in subject and 'good' occurs anywhere,
2718     returning at most 20 documents, starting the search from 'cursor token',
2719     returning another single cursor for the response, sorting by subject in
2720     descending order, returning the author, subject, and summary fields as well
2721     as a snippeted field content.
2722
2723       results = index.search(
2724           query=Query('subject:first good',
2725               options=QueryOptions(limit=20,
2726                   cursor=Cursor(),
2727                   sort_options=SortOptions(
2728                       expressions=[SortExpression(expression='subject')],
2729                       limit=1000),
2730                   returned_fields=['author', 'subject', 'summary'],
2731                   snippeted_fields=['content'])))
2732
2733     The following code fragment shows how to use a results cursor
2734
2735       cursor = results.cursor
2736       for result in response:
2737          # process result
2738
2739       results = index.search(
2740           Query('subject:first good', options=QueryOptions(cursor=cursor)))
2741
2742     The following code fragment shows how to use a per_result cursor
2743
2744       results = index.search(
2745           query=Query('subject:first good',
2746               options=QueryOptions(limit=20,
2747                   cursor=Cursor(per_result=True),
2748                   ...)))
2749
2750       cursor = None
2751       for result in results:
2752          cursor = result.cursor
2753
2754       results = index.search(
2755           Query('subject:first good', options=QueryOptions(cursor=cursor)))
2756
2757     Args:
2758       query: The Query to match against documents in the index.
2759
2760     Kwargs:
2761       deadline: Deadline for RPC call in seconds; if None use the default.
2762
2763     Returns:
2764       A SearchResults containing a list of documents matched, number returned
2765       and number matched by the query.
2766
2767     Raises:
2768       TypeError: If any of the parameters have invalid types, or an unknown
2769         attribute is passed.
2770       ValueError: If any of the parameters have invalid values (e.g., a
2771         negative deadline).
2772     """
2773
2774
2775
2776
2777
2778     app_id = kwargs.pop('app_id', None)
2779     if kwargs:
2780       raise TypeError('Invalid arguments: %s' % ', '.join(kwargs))
2781
2782     request = search_service_pb.SearchRequest()
2783     if app_id:
2784       request.set_app_id(app_id)
2785
2786     params = request.mutable_params()
2787     if isinstance(query, basestring):
2788       query = Query(query_string=query)
2789     _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2790     _CopyQueryObjectToProtocolBuffer(query, params)
2791
2792     response = search_service_pb.SearchResponse()
2793
2794     _MakeSyncSearchServiceCall('Search', request, response, deadline)
2795
2796     _CheckStatus(response.status())
2797     cursor = None
2798     if query.options:
2799       cursor = query.options.cursor
2800     return self._NewSearchResults(response, cursor)
2801
2802   def _NewGetResponse(self, response):
2803     """Returns a GetResponse from the list_documents response pb."""
2804     documents = []
2805     for doc_proto in response.document_list():
2806       documents.append(_NewDocumentFromPb(doc_proto))
2807
2808     return GetResponse(results=documents)
2809
2810   def _GetRange(self, start_id=None, include_start_object=True,
2811                 limit=100, ids_only=False, deadline=None, app_id=None):
2812     """Get a range of objects in the index, in id order in a response."""
2813     request = search_service_pb.ListDocumentsRequest()
2814     if app_id:
2815       request.set_app_id(app_id)
2816
2817     params = request.mutable_params()
2818     _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2819
2820     if start_id:
2821       params.set_start_doc_id(start_id)
2822     params.set_include_start_doc(include_start_object)
2823
2824     params.set_limit(_CheckInteger(
2825         limit, 'limit', zero_ok=False,
2826         upper_bound=MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH))
2827     params.set_keys_only(ids_only)
2828
2829     response = search_service_pb.ListDocumentsResponse()
2830     _MakeSyncSearchServiceCall('ListDocuments', request, response, deadline)
2831
2832     _CheckStatus(response.status())
2833     return response
2834
2835   @datastore_rpc._positional(5)
2836   def get_range(self, start_id=None, include_start_object=True,
2837                 limit=100, ids_only=False, deadline=None, **kwargs):
2838     """Get a range of Documents in the index, in id order.
2839
2840     Args:
2841       start_id: String containing the Id from which to list
2842         Documents from. By default, starts at the first Id.
2843       include_start_object: If true, include the Document with the
2844         Id specified by the start_id parameter.
2845       limit: The maximum number of Documents to return.
2846       ids_only: If true, the Documents returned only contain their keys.
2847
2848     Kwargs:
2849       deadline: Deadline for RPC call in seconds; if None use the default.
2850
2851     Returns:
2852       A GetResponse containing a list of Documents, ordered by Id.
2853
2854     Raises:
2855       Error: Some subclass of Error is raised if an error occurred processing
2856         the request.
2857       TypeError: If any of the parameters have invalid types, or an unknown
2858         attribute is passed.
2859       ValueError: If any of the parameters have invalid values (e.g., a
2860         negative deadline).
2861     """
2862
2863     app_id = kwargs.pop('app_id', None)
2864     if kwargs:
2865       raise TypeError('Invalid arguments: %s' % ', '.join(kwargs))
2866     response = self._GetRange(
2867         start_id=start_id, include_start_object=include_start_object,
2868         limit=limit, ids_only=ids_only, deadline=deadline, app_id=app_id)
2869     return self._NewGetResponse(response)
2870
2871
2872 _CURSOR_TYPE_PB_MAP = {
2873   None: search_service_pb.SearchParams.NONE,
2874   Index.RESPONSE_CURSOR: search_service_pb.SearchParams.SINGLE,
2875   Index.RESULT_CURSOR: search_service_pb.SearchParams.PER_RESULT
2876   }
2877
2878
2879
2880 _SOURCES_TO_PB_MAP = {
2881     Index.SEARCH: search_service_pb.IndexSpec.SEARCH,
2882     Index.DATASTORE: search_service_pb.IndexSpec.DATASTORE,
2883     Index.CLOUD_STORAGE: search_service_pb.IndexSpec.CLOUD_STORAGE}
2884
2885
2886
2887 _SOURCE_PB_TO_SOURCES_MAP = {
2888     search_service_pb.IndexSpec.SEARCH: Index.SEARCH,
2889     search_service_pb.IndexSpec.DATASTORE: Index.DATASTORE,
2890     search_service_pb.IndexSpec.CLOUD_STORAGE: Index.CLOUD_STORAGE}
2891
2892
2893 def _CopyMetadataToProtocolBuffer(index, spec_pb):
2894   """Copies Index specification to a search_service_pb.IndexSpec."""
2895   spec_pb.set_name(index.name.encode('utf-8'))
2896   spec_pb.set_namespace(index.namespace.encode('utf-8'))
2897
2898
2899   if index._source != Index.SEARCH:
2900     spec_pb.set_source(_SOURCES_TO_PB_MAP.get(index._source))
2901
2902
2903 _FIELD_TYPE_MAP = {
2904     document_pb.FieldValue.TEXT: Field.TEXT,
2905     document_pb.FieldValue.HTML: Field.HTML,
2906     document_pb.FieldValue.ATOM: Field.ATOM,
2907     document_pb.FieldValue.DATE: Field.DATE,
2908     document_pb.FieldValue.NUMBER: Field.NUMBER,
2909     document_pb.FieldValue.GEO: Field.GEO_POINT,
2910     }
2911
2912
2913 def _NewSchemaFromPb(field_type_pb_list):
2914   """Creates map of field name to type list from document_pb.FieldTypes list."""
2915   field_types = {}
2916   for field_type_pb in field_type_pb_list:
2917     for field_type in field_type_pb.type_list():
2918       public_type = _FIELD_TYPE_MAP[field_type]
2919       name = _DecodeUTF8(field_type_pb.name())
2920       if name in field_types:
2921         field_types[name].append(public_type)
2922       else:
2923         field_types[name] = [public_type]
2924   return field_types
2925
2926
2927 def _NewIndexFromIndexSpecPb(index_spec_pb):
2928   """Creates an Index from a search_service_pb.IndexSpec."""
2929   source = _SOURCE_PB_TO_SOURCES_MAP.get(index_spec_pb.source())
2930   index = None
2931   if index_spec_pb.has_namespace():
2932     index = Index(name=index_spec_pb.name(),
2933                   namespace=index_spec_pb.namespace(),
2934                   source=source)
2935   else:
2936     index = Index(name=index_spec_pb.name(), source=source)
2937   return index
2938
2939
2940 def _NewIndexFromPb(index_metadata_pb):
2941   """Creates an Index from a search_service_pb.IndexMetadata."""
2942   index = _NewIndexFromIndexSpecPb(index_metadata_pb.index_spec())
2943   if index_metadata_pb.field_list():
2944     index._schema = _NewSchemaFromPb(index_metadata_pb.field_list())
2945   if index_metadata_pb.has_storage():
2946     index._storage_usage = index_metadata_pb.storage().amount_used()
2947     index._storage_limit = index_metadata_pb.storage().limit()
2948   return index
2949
2950
2951 def _MakeSyncSearchServiceCall(call, request, response, deadline):
2952   """Make a synchronous call to search service.
2953
2954   If the deadline is not None, waits only until the deadline expires.
2955
2956   Args:
2957     call: Method name to call, as a string
2958     request: The request object
2959     response: The response object
2960
2961   Kwargs:
2962     deadline: Deadline for RPC call in seconds; if None use the default.
2963
2964   Raises:
2965     TypeError: if the deadline is not a number and is not None.
2966     ValueError: If the deadline is less than zero.
2967   """
2968   try:
2969     if deadline is None:
2970       apiproxy_stub_map.MakeSyncCall('search', call, request, response)
2971     else:
2972
2973
2974       if (not isinstance(deadline, (int, long, float))
2975           or isinstance(deadline, (bool,))):
2976         raise TypeError('deadline argument should be int/long/float (%r)'
2977                         % (deadline,))
2978       if deadline <= 0:
2979         raise ValueError('deadline argument must be > 0 (%s)' % (deadline,))
2980       rpc = apiproxy_stub_map.UserRPC('search', deadline=deadline)
2981       rpc.make_call(call, request, response)
2982       rpc.wait()
2983       rpc.check_success()
2984   except apiproxy_errors.ApplicationError, e:
2985     raise _ToSearchError(e)