python/google/appengine/api/search/search.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2007 Google Inc.
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #     http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16 #
  17
  18
  19 """A Python Search API used by app developers.
  20
  21 Contains methods used to interface with Search API.
  22 Contains API classes that forward to apiproxy.
  23 """
  24
  25
  26
  27
  28
  29
  30 import datetime
  31 import logging
  32 import re
  33 import string
  34 import sys
  35 import warnings
  36
  37 from google.appengine.datastore import document_pb
  38 from google.appengine.api import apiproxy_stub_map
  39 from google.appengine.api import datastore_types
  40 from google.appengine.api import namespace_manager
  41 from google.appengine.api.search import expression_parser
  42 from google.appengine.api.search import query_parser
  43 from google.appengine.api.search import search_service_pb
  44 from google.appengine.api.search import search_util
  45 from google.appengine.datastore import datastore_rpc
  46 from google.appengine.runtime import apiproxy_errors
  47
  48
  49 __all__ = [
  50     'AtomField',
  51     'ConcurrentTransactionError',
  52     'Cursor',
  53     'DateField',
  54     'DeleteError',
  55     'DeleteResult',
  56     'Document',
  57     'DOCUMENT_ID_FIELD_NAME',
  58     'Error',
  59     'ExpressionError',
  60     'Field',
  61     'FieldExpression',
  62     'HtmlField',
  63     'GeoField',
  64     'GeoPoint',
  65     'get_indexes',
  66     'get_indexes_async',
  67     'GetResponse',
  68     'Index',
  69     'InternalError',
  70     'InvalidRequest',
  71     'LANGUAGE_FIELD_NAME',
  72     'MatchScorer',
  73     'MAXIMUM_DOCUMENT_ID_LENGTH',
  74     'MAXIMUM_DOCUMENTS_PER_PUT_REQUEST',
  75     'MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH',
  76     'MAXIMUM_EXPRESSION_LENGTH',
  77     'MAXIMUM_FIELD_ATOM_LENGTH',
  78     'MAXIMUM_FIELD_NAME_LENGTH',
  79     'MAXIMUM_FIELD_VALUE_LENGTH',
  80     'MAXIMUM_FIELDS_RETURNED_PER_SEARCH',
  81     'MAXIMUM_GET_INDEXES_OFFSET',
  82     'MAXIMUM_INDEX_NAME_LENGTH',
  83     'MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST',
  84     'MAXIMUM_NUMBER_FOUND_ACCURACY',
  85     'MAXIMUM_QUERY_LENGTH',
  86     'MAXIMUM_SEARCH_OFFSET',
  87     'MAXIMUM_SORTED_DOCUMENTS',
  88     'MAX_DATE',
  89     'MAX_NUMBER_VALUE',
  90     'MIN_DATE',
  91     'MIN_NUMBER_VALUE',
  92     'NumberField',
  93     'OperationResult',
  94     'PutError',
  95     'PutResult',
  96     'Query',
  97     'QueryError',
  98     'QueryOptions',
  99     'RANK_FIELD_NAME',
 100     'RescoringMatchScorer',
 101     'SCORE_FIELD_NAME',
 102     'ScoredDocument',
 103     'SearchResults',
 104     'SortExpression',
 105     'SortOptions',
 106     'TextField',
 107     'Timeout',
 108     'TIMESTAMP_FIELD_NAME',
 109     'TransientError',
 110     ]
 111
 112 MAXIMUM_INDEX_NAME_LENGTH = 100
 113 MAXIMUM_FIELD_VALUE_LENGTH = 1024 * 1024
 114 MAXIMUM_FIELD_ATOM_LENGTH = 500
 115 MAXIMUM_FIELD_NAME_LENGTH = 500
 116 MAXIMUM_DOCUMENT_ID_LENGTH = 500
 117 MAXIMUM_DOCUMENTS_PER_PUT_REQUEST = 200
 118 MAXIMUM_EXPRESSION_LENGTH = 5000
 119 MAXIMUM_QUERY_LENGTH = 2000
 120 MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH = 1000
 121 MAXIMUM_SEARCH_OFFSET = 1000
 122
 123 MAXIMUM_SORTED_DOCUMENTS = 10000
 124 MAXIMUM_NUMBER_FOUND_ACCURACY = 10000
 125 MAXIMUM_FIELDS_RETURNED_PER_SEARCH = 100
 126 MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST = 1000
 127 MAXIMUM_GET_INDEXES_OFFSET = 1000
 128
 129
 130 DOCUMENT_ID_FIELD_NAME = '_doc_id'
 131
 132 LANGUAGE_FIELD_NAME = '_lang'
 133
 134 RANK_FIELD_NAME = '_rank'
 135
 136 SCORE_FIELD_NAME = '_score'
 137
 138
 139
 140 TIMESTAMP_FIELD_NAME = '_timestamp'
 141
 142
 143
 144
 145 _LANGUAGE_RE = re.compile('^(.{2}|.{2}_.{2})$')
 146
 147 _MAXIMUM_STRING_LENGTH = 500
 148 _MAXIMUM_CURSOR_LENGTH = 10000
 149
 150 _VISIBLE_PRINTABLE_ASCII = frozenset(
 151     set(string.printable) - set(string.whitespace))
 152 _FIELD_NAME_PATTERN = '^[A-Za-z][A-Za-z0-9_]*$'
 153
 154 MAX_DATE = datetime.datetime(
 155     datetime.MAXYEAR, 12, 31, 23, 59, 59, 999999, tzinfo=None)
 156 MIN_DATE = datetime.datetime(
 157     datetime.MINYEAR, 1, 1, 0, 0, 0, 0, tzinfo=None)
 158
 159
 160 MAX_NUMBER_VALUE = 2147483647
 161 MIN_NUMBER_VALUE = -2147483647
 162
 163
 164 _PROTO_FIELDS_STRING_VALUE = frozenset([document_pb.FieldValue.TEXT,
 165                                         document_pb.FieldValue.HTML,
 166                                         document_pb.FieldValue.ATOM])
 167
 168
 169 class Error(Exception):
 170   """Indicates a call on the search API has failed."""
 171
 172
 173 class InternalError(Error):
 174   """Indicates a call on the search API has failed on the internal backend."""
 175
 176
 177 class TransientError(Error):
 178   """Indicates a call on the search API has failed, but retrying may succeed."""
 179
 180
 181 class InvalidRequest(Error):
 182   """Indicates an invalid request was made on the search API by the client."""
 183
 184
 185 class QueryError(Error):
 186   """An error occurred while parsing a query input string."""
 187
 188
 189 class ExpressionError(Error):
 190   """An error occurred while parsing an expression input string."""
 191
 192
 193 class Timeout(Error):
 194   """Indicates a call on the search API could not finish before its deadline."""
 195
 196
 197 class ConcurrentTransactionError(Error):
 198   """Indicates a call on the search API failed due to concurrent updates."""
 199
 200
 201 def _ConvertToUnicode(some_string):
 202   """Convert UTF-8 encoded string to unicode."""
 203   if some_string is None:
 204     return None
 205   if isinstance(some_string, unicode):
 206     return some_string
 207   return unicode(some_string, 'utf-8')
 208
 209
 210 def _ConcatenateErrorMessages(prefix, status):
 211   """Returns an error message combining prefix and status.error_detail()."""
 212   if status.error_detail():
 213     return prefix + ': ' + status.error_detail()
 214   return prefix
 215
 216
 217 class _RpcOperationFuture(object):
 218   """Represents the future result a search RPC sent to a backend."""
 219
 220   def __init__(self, call, request, response, deadline, get_result_hook):
 221     """Initializer.
 222
 223     Args:
 224       call: Method name to call, as a string
 225       request: The request object
 226       response: The response object
 227       deadline: Deadline for RPC call in seconds; if None use the default.
 228       get_result_hook: Required result hook. Must be a function that takes
 229         no arguments. Its return value is returned by get_result().
 230     """
 231     _ValidateDeadline(deadline)
 232     self._get_result_hook = get_result_hook
 233     self._rpc = apiproxy_stub_map.UserRPC('search', deadline=deadline)
 234     self._rpc.make_call(call, request, response)
 235
 236   def get_result(self):
 237     self._rpc.wait();
 238     try:
 239       self._rpc.check_success();
 240     except apiproxy_errors.ApplicationError, e:
 241       raise _ToSearchError(e)
 242     return self._get_result_hook()
 243
 244
 245 class _SimpleOperationFuture(object):
 246   """Adapts a late-binding function to a future."""
 247
 248   def __init__(self, future, function):
 249     self._future = future
 250     self._function = function
 251
 252   def get_result(self):
 253     return self._function(self._future.get_result())
 254
 255
 256 class _WrappedValueFuture(object):
 257   """Adapts an immediately-known result to a future."""
 258
 259   def __init__(self, result):
 260     self._result = result
 261
 262   def get_result(self):
 263     return self._result
 264
 265
 266 class OperationResult(object):
 267   """Represents result of individual operation of a batch index or removal.
 268
 269   This is an abstract class.
 270   """
 271
 272   (OK, INVALID_REQUEST, TRANSIENT_ERROR, INTERNAL_ERROR,
 273   TIMEOUT,  CONCURRENT_TRANSACTION) = (
 274       'OK', 'INVALID_REQUEST', 'TRANSIENT_ERROR', 'INTERNAL_ERROR',
 275       'TIMEOUT', 'CONCURRENT_TRANSACTION')
 276
 277   _CODES = frozenset([OK, INVALID_REQUEST, TRANSIENT_ERROR, INTERNAL_ERROR,
 278                       TIMEOUT, CONCURRENT_TRANSACTION])
 279
 280   def __init__(self, code, message=None, id=None):
 281     """Initializer.
 282
 283     Args:
 284       code: The error or success code of the operation.
 285       message: An error message associated with any error.
 286       id: The id of the object some operation was performed on.
 287
 288     Raises:
 289       TypeError: If an unknown attribute is passed.
 290       ValueError: If an unknown code is passed.
 291     """
 292     self._message = _ConvertToUnicode(message)
 293     self._code = code
 294     if self._code not in self._CODES:
 295       raise ValueError('Unknown operation result code %r, must be one of %s'
 296                        % (self._code, self._CODES))
 297     self._id = _ConvertToUnicode(id)
 298
 299   @property
 300   def code(self):
 301     """Returns the code indicating the status of the operation."""
 302     return self._code
 303
 304   @property
 305   def message(self):
 306     """Returns any associated error message if the operation was in error."""
 307     return self._message
 308
 309   @property
 310   def id(self):
 311     """Returns the Id of the object the operation was performed on."""
 312     return self._id
 313
 314   def __repr__(self):
 315     return _Repr(self, [('code', self.code), ('message', self.message),
 316                         ('id', self.id)])
 317
 318
 319 _ERROR_OPERATION_CODE_MAP = {
 320     search_service_pb.SearchServiceError.OK: OperationResult.OK,
 321     search_service_pb.SearchServiceError.INVALID_REQUEST:
 322     OperationResult.INVALID_REQUEST,
 323     search_service_pb.SearchServiceError.TRANSIENT_ERROR:
 324     OperationResult.TRANSIENT_ERROR,
 325     search_service_pb.SearchServiceError.INTERNAL_ERROR:
 326     OperationResult.INTERNAL_ERROR,
 327     search_service_pb.SearchServiceError.TIMEOUT:
 328     OperationResult.TIMEOUT,
 329     search_service_pb.SearchServiceError.CONCURRENT_TRANSACTION:
 330     OperationResult.CONCURRENT_TRANSACTION,
 331     }
 332
 333
 334 class PutResult(OperationResult):
 335   """The result of indexing a single object."""
 336
 337
 338 class DeleteResult(OperationResult):
 339   """The result of deleting a single document."""
 340
 341
 342 class PutError(Error):
 343   """Indicates some error occurred indexing one of the objects requested."""
 344
 345   def __init__(self, message, results):
 346     """Initializer.
 347
 348     Args:
 349       message: A message detailing the cause of the failure to index some
 350         document.
 351       results: A list of PutResult corresponding to the list of objects
 352         requested to be indexed.
 353     """
 354     super(PutError, self).__init__(message)
 355     self._results = results
 356
 357   @property
 358   def results(self):
 359     """Returns PutResult list corresponding to objects indexed."""
 360     return self._results
 361
 362
 363 class DeleteError(Error):
 364   """Indicates some error occured deleting one of the objects requested."""
 365
 366   def __init__(self, message, results):
 367     """Initializer.
 368
 369     Args:
 370       message: A message detailing the cause of the failure to delete some
 371         document.
 372       results: A list of DeleteResult corresponding to the list of Ids of
 373         objects requested to be deleted.
 374     """
 375     super(DeleteError, self).__init__(message)
 376     self._results = results
 377
 378   @property
 379   def results(self):
 380     """Returns DeleteResult list corresponding to Documents deleted."""
 381     return self._results
 382
 383
 384 _ERROR_MAP = {
 385     search_service_pb.SearchServiceError.INVALID_REQUEST: InvalidRequest,
 386     search_service_pb.SearchServiceError.TRANSIENT_ERROR: TransientError,
 387     search_service_pb.SearchServiceError.INTERNAL_ERROR: InternalError,
 388     search_service_pb.SearchServiceError.TIMEOUT: Timeout,
 389     search_service_pb.SearchServiceError.CONCURRENT_TRANSACTION:
 390     ConcurrentTransactionError,
 391     }
 392
 393
 394 def _ToSearchError(error):
 395   """Translate an application error to a search Error, if possible.
 396
 397   Args:
 398     error: An ApplicationError to translate.
 399
 400   Returns:
 401     An Error if the error is known, otherwise the given
 402     apiproxy_errors.ApplicationError.
 403   """
 404   if error.application_error in _ERROR_MAP:
 405     return _ERROR_MAP[error.application_error](error.error_detail)
 406   return error
 407
 408
 409 def _CheckInteger(value, name, zero_ok=True, upper_bound=None):
 410   """Checks whether value is an integer between the lower and upper bounds.
 411
 412   Args:
 413     value: The value to check.
 414     name: The name of the value, to use in error messages.
 415     zero_ok: True if zero is allowed.
 416     upper_bound: The upper (inclusive) bound of the value. Optional.
 417
 418   Returns:
 419     The checked value.
 420
 421   Raises:
 422     ValueError: If the value is not a int or long, or is out of range.
 423   """
 424   datastore_types.ValidateInteger(value, name, ValueError, empty_ok=True,
 425                                   zero_ok=zero_ok)
 426   if upper_bound is not None and value > upper_bound:
 427     raise ValueError('%s, %d must be <= %d' % (name, value, upper_bound))
 428   return value
 429
 430
 431 def _CheckEnum(value, name, values=None):
 432   """Checks whether value is a member of the set of values given.
 433
 434   Args:
 435     value: The value to check.
 436     name: The name of the value, to use in error messages.
 437     values: The iterable of possible values.
 438
 439   Returns:
 440     The checked value.
 441
 442   Raises:
 443     ValueError: If the value is not one of the allowable values.
 444   """
 445   if value not in values:
 446     raise ValueError('%s, %r must be in %s' % (name, value, values))
 447   return value
 448
 449
 450 def _CheckNumber(value, name):
 451   """Checks whether value is a number.
 452
 453   Args:
 454     value: The value to check.
 455     name: The name of the value, to use in error messages.
 456
 457   Returns:
 458     The checked value.
 459
 460   Raises:
 461     TypeError: If the value is not a number.
 462   """
 463   if not isinstance(value, (int, long, float)):
 464     raise TypeError('%s must be a int, long or float, got %s' %
 465                     (name, value.__class__.__name__))
 466   return value
 467
 468
 469 def _CheckStatus(status):
 470   """Checks whether a RequestStatus has a value of OK.
 471
 472   Args:
 473     status: The RequestStatus to check.
 474
 475   Raises:
 476     Error: A subclass of Error if the value of status is not OK.
 477       The subclass of Error is chosen based on value of the status code.
 478     InternalError: If the status value is unknown.
 479   """
 480   if status.code() != search_service_pb.SearchServiceError.OK:
 481     if status.code() in _ERROR_MAP:
 482       raise _ERROR_MAP[status.code()](status.error_detail())
 483     else:
 484       raise InternalError(status.error_detail())
 485
 486
 487 def _ValidateString(value,
 488                     name='unused',
 489                     max_len=_MAXIMUM_STRING_LENGTH,
 490                     empty_ok=False,
 491                     type_exception=TypeError,
 492                     value_exception=ValueError):
 493   """Raises an exception if value is not a valid string or a subclass thereof.
 494
 495   A string is valid if it's not empty, no more than _MAXIMUM_STRING_LENGTH
 496   bytes. The exception type can be specified with the exception
 497   arguments for type and value issues.
 498
 499   Args:
 500     value: The value to validate.
 501     name: The name of this value; used in the exception message.
 502     max_len: The maximum allowed length, in bytes.
 503     empty_ok: Allow empty value.
 504     type_exception: The type of exception to raise if not a basestring.
 505     value_exception: The type of exception to raise if invalid value.
 506
 507   Returns:
 508     The checked string.
 509
 510   Raises:
 511     TypeError: If value is not a basestring or subclass.
 512     ValueError: If the value is None or longer than max_len.
 513   """
 514   if value is None and empty_ok:
 515     return
 516   if value is not None and not isinstance(value, basestring):
 517     raise type_exception('%s must be a basestring; got %s:' %
 518                          (name, value.__class__.__name__))
 519   if not value and not empty_ok:
 520     raise value_exception('%s must not be empty.' % name)
 521
 522   if len(value.encode('utf-8')) > max_len:
 523     raise value_exception('%s must be under %d bytes.' % (name, max_len))
 524   return value
 525
 526
 527 def _ValidateVisiblePrintableAsciiNotReserved(value, name):
 528   """Checks if value is a visible printable ASCII string not starting with '!'.
 529
 530   Whitespace characters are excluded. Printable visible ASCII
 531   strings starting with '!' are reserved for internal use.
 532
 533   Args:
 534     value: The string to validate.
 535     name: The name of this string; used in the exception message.
 536
 537   Returns:
 538     The checked string.
 539
 540   Raises:
 541     ValueError: If the string is not visible printable ASCII, or starts with
 542       '!'.
 543   """
 544   for char in value:
 545     if char not in _VISIBLE_PRINTABLE_ASCII:
 546       raise ValueError(
 547           '%r must be visible printable ASCII: %r'
 548           % (name, value))
 549   if value.startswith('!'):
 550     raise ValueError('%r must not start with "!": %r' % (name, value))
 551   return value
 552
 553
 554 def _CheckIndexName(index_name):
 555   """Checks index_name is a string which is not too long, and returns it.
 556
 557   Index names must be visible printable ASCII and not start with '!'.
 558   """
 559   _ValidateString(index_name, 'index name', MAXIMUM_INDEX_NAME_LENGTH)
 560   return _ValidateVisiblePrintableAsciiNotReserved(index_name, 'index_name')
 561
 562
 563 def _CheckFieldName(name):
 564   """Checks field name is not too long and matches field name pattern.
 565
 566   Field name pattern: "[A-Za-z][A-Za-z0-9_]*".
 567   """
 568   _ValidateString(name, 'name', MAXIMUM_FIELD_NAME_LENGTH)
 569   if not re.match(_FIELD_NAME_PATTERN, name):
 570     raise ValueError('field name "%s" should match pattern: %s' %
 571                      (name, _FIELD_NAME_PATTERN))
 572   return name
 573
 574
 575 def _CheckExpression(expression):
 576   """Checks whether the expression is a string."""
 577   expression = _ValidateString(expression, max_len=MAXIMUM_EXPRESSION_LENGTH)
 578   try:
 579     expression_parser.Parse(expression)
 580   except expression_parser.ExpressionException, e:
 581     raise ExpressionError('Failed to parse expression "%s"' % expression)
 582   return expression
 583
 584
 585 def _CheckFieldNames(names):
 586   """Checks each name in names is a valid field name."""
 587   for name in names:
 588     _CheckFieldName(name)
 589   return names
 590
 591
 592 def _GetList(a_list):
 593   """Utility function that converts None to the empty list."""
 594   if a_list is None:
 595     return []
 596   else:
 597     return list(a_list)
 598
 599
 600 def _ConvertToList(arg):
 601   """Converts arg to a list, empty if None, single element if not a list."""
 602   if isinstance(arg, basestring):
 603     return [arg]
 604   if arg is not None:
 605     try:
 606       return list(iter(arg))
 607     except TypeError:
 608       return [arg]
 609   return []
 610
 611
 612 def _ConvertToUnicodeList(arg):
 613   """Converts arg to a list of unicode objects."""
 614   return [_ConvertToUnicode(value) for value in _ConvertToList(arg)]
 615
 616
 617 def _CheckDocumentId(doc_id):
 618   """Checks doc_id is a valid document identifier, and returns it.
 619
 620   Document ids must be visible printable ASCII and not start with '!'.
 621   """
 622   _ValidateString(doc_id, 'doc_id', MAXIMUM_DOCUMENT_ID_LENGTH)
 623   _ValidateVisiblePrintableAsciiNotReserved(doc_id, 'doc_id')
 624   return doc_id
 625
 626
 627 def _CheckText(value, name='value', empty_ok=True):
 628   """Checks the field text is a valid string."""
 629   return _ValidateString(value, name, MAXIMUM_FIELD_VALUE_LENGTH, empty_ok)
 630
 631
 632 def _CheckHtml(html):
 633   """Checks the field html is a valid HTML string."""
 634   return _ValidateString(html, 'html', MAXIMUM_FIELD_VALUE_LENGTH,
 635                          empty_ok=True)
 636
 637
 638 def _CheckAtom(atom):
 639   """Checks the field atom is a valid string."""
 640   return _ValidateString(atom, 'atom', MAXIMUM_FIELD_ATOM_LENGTH,
 641                          empty_ok=True)
 642
 643
 644 def _CheckDate(date):
 645   """Checks the date is in the correct range."""
 646   if isinstance(date, datetime.datetime):
 647     if date < MIN_DATE or date > MAX_DATE:
 648       raise TypeError('date must be between %s and %s (got %s)' %
 649                       (MIN_DATE, MAX_DATE, date))
 650   elif isinstance(date, datetime.date):
 651     if date < MIN_DATE.date() or date > MAX_DATE.date():
 652       raise TypeError('date must be between %s and %s (got %s)' %
 653                       (MIN_DATE, MAX_DATE, date))
 654   else:
 655     raise TypeError('date must be datetime.datetime or datetime.date')
 656   return date
 657
 658
 659 def _CheckLanguage(language):
 660   """Checks language is None or a string that matches _LANGUAGE_RE."""
 661   if language is None:
 662     return None
 663   if not isinstance(language, basestring):
 664     raise TypeError('language must be a basestring, got %s' %
 665                     language.__class__.__name__)
 666   if not re.match(_LANGUAGE_RE, language):
 667     raise ValueError('invalid language %s. Languages should be two letters.'
 668                      % language)
 669   return language
 670
 671
 672 def _CheckDocument(document):
 673   """Check that the document is valid.
 674
 675   This checks for all server-side requirements on Documents. Currently, that
 676   means ensuring that there are no repeated number or date fields.
 677
 678   Args:
 679     document: The search.Document to check for validity.
 680
 681   Raises:
 682     ValueError if the document is invalid in a way that would trigger an
 683     PutError from the server.
 684   """
 685   no_repeat_date_names = set()
 686   no_repeat_number_names = set()
 687   for field in document.fields:
 688     if isinstance(field, NumberField):
 689       if field.name in no_repeat_number_names:
 690         raise ValueError(
 691             'Invalid document %s: field %s with type date or number may not '
 692             'be repeated.' % (document.doc_id, field.name))
 693       no_repeat_number_names.add(field.name)
 694     elif isinstance(field, DateField):
 695       if field.name in no_repeat_date_names:
 696         raise ValueError(
 697             'Invalid document %s: field %s with type date or number may not '
 698             'be repeated.' % (document.doc_id, field.name))
 699       no_repeat_date_names.add(field.name)
 700
 701
 702 def _CheckSortLimit(limit):
 703   """Checks the limit on number of docs to score or sort is not too large."""
 704   return _CheckInteger(limit, 'limit', upper_bound=MAXIMUM_SORTED_DOCUMENTS)
 705
 706
 707 def _Repr(class_instance, ordered_dictionary):
 708   """Generates an unambiguous representation for instance and ordered dict."""
 709   return u'search.%s(%s)' % (class_instance.__class__.__name__, ', '.join(
 710       ['%s=%r' % (key, value) for (key, value) in ordered_dictionary
 711        if value is not None and value != []]))
 712
 713
 714 def _ListIndexesResponsePbToGetResponse(response):
 715   """Returns a GetResponse constructed from get_indexes response pb."""
 716   return GetResponse(
 717       results=[_NewIndexFromPb(index)
 718                for index in response.index_metadata_list()])
 719
 720
 721 @datastore_rpc._positional(7)
 722 def get_indexes(namespace='', offset=None, limit=20,
 723                 start_index_name=None, include_start_index=True,
 724                 index_name_prefix=None, fetch_schema=False, deadline=None,
 725                 **kwargs):
 726   """Returns a list of available indexes.
 727
 728   Args:
 729     namespace: The namespace of indexes to be returned. If not set
 730       then the current namespace is used.
 731     offset: The offset of the first returned index.
 732     limit: The number of indexes to return.
 733     start_index_name: The name of the first index to be returned.
 734     include_start_index: Whether or not to return the start index.
 735     index_name_prefix: The prefix used to select returned indexes.
 736     fetch_schema: Whether to retrieve Schema for each Index or not.
 737
 738   Kwargs:
 739     deadline: Deadline for RPC call in seconds; if None use the default.
 740
 741   Returns:
 742     The GetResponse containing a list of available indexes.
 743
 744   Raises:
 745     InternalError: If the request fails on internal servers.
 746     TypeError: If any of the parameters have invalid types, or an unknown
 747       attribute is passed.
 748     ValueError: If any of the parameters have invalid values (e.g., a
 749       negative deadline).
 750   """
 751   return get_indexes_async(
 752       namespace, offset, limit, start_index_name, include_start_index,
 753       index_name_prefix, fetch_schema, deadline=deadline, **kwargs).get_result()
 754
 755
 756 @datastore_rpc._positional(7)
 757 def get_indexes_async(namespace='', offset=None, limit=20,
 758                       start_index_name=None, include_start_index=True,
 759                       index_name_prefix=None, fetch_schema=False, deadline=None,
 760                       **kwargs):
 761   """Asynchronously returns a list of available indexes.
 762
 763   Identical to get_indexes() except that it returns a future. Call
 764   get_result() on the return value to block on the call and get its result.
 765   """
 766
 767   app_id = kwargs.pop('app_id', None)
 768   if kwargs:
 769     raise TypeError('Invalid arguments: %s' % ', '.join(kwargs))
 770
 771   request = search_service_pb.ListIndexesRequest()
 772   params = request.mutable_params()
 773
 774   if namespace is None:
 775     namespace = namespace_manager.get_namespace()
 776   if namespace is None:
 777     namespace = u''
 778   namespace_manager.validate_namespace(namespace, exception=ValueError)
 779   params.set_namespace(namespace)
 780   if offset is not None:
 781     params.set_offset(_CheckInteger(offset, 'offset', zero_ok=True,
 782                                     upper_bound=MAXIMUM_GET_INDEXES_OFFSET))
 783   params.set_limit(_CheckInteger(
 784       limit, 'limit', zero_ok=False,
 785       upper_bound=MAXIMUM_INDEXES_RETURNED_PER_GET_REQUEST))
 786   if start_index_name is not None:
 787     params.set_start_index_name(
 788         _ValidateString(start_index_name, 'start_index_name',
 789                         MAXIMUM_INDEX_NAME_LENGTH,
 790                         empty_ok=False))
 791   if include_start_index is not None:
 792     params.set_include_start_index(bool(include_start_index))
 793   if index_name_prefix is not None:
 794     params.set_index_name_prefix(
 795         _ValidateString(index_name_prefix, 'index_name_prefix',
 796                         MAXIMUM_INDEX_NAME_LENGTH,
 797                         empty_ok=False))
 798   params.set_fetch_schema(fetch_schema)
 799
 800   response = search_service_pb.ListIndexesResponse()
 801   if app_id:
 802     request.set_app_id(app_id)
 803
 804   def hook():
 805     _CheckStatus(response.status())
 806     return _ListIndexesResponsePbToGetResponse(response)
 807   return _RpcOperationFuture(
 808       'ListIndexes', request, response, deadline, hook)
 809
 810
 811 class Field(object):
 812   """An abstract base class which represents a field of a document.
 813
 814   This class should not be directly instantiated.
 815   """
 816
 817
 818   TEXT, HTML, ATOM, DATE, NUMBER, GEO_POINT = ('TEXT', 'HTML', 'ATOM', 'DATE',
 819                                                'NUMBER', 'GEO_POINT')
 820
 821   _FIELD_TYPES = frozenset([TEXT, HTML, ATOM, DATE, NUMBER, GEO_POINT])
 822
 823   def __init__(self, name, value, language=None):
 824     """Initializer.
 825
 826     Args:
 827       name: The name of the field. Field names must have maximum length
 828         MAXIMUM_FIELD_NAME_LENGTH and match pattern "[A-Za-z][A-Za-z0-9_]*".
 829       value: The value of the field which can be a str, unicode or date.
 830       language: The ISO 693-1 two letter code of the language used in the value.
 831         See http://www.sil.org/iso639-3/codes.asp?order=639_1&letter=%25 for a
 832         list of valid codes. Correct specification of language code will assist
 833         in correct tokenization of the field. If None is given, then the
 834         language code of the document will be used.
 835
 836     Raises:
 837       TypeError: If any of the parameters have invalid types, or an unknown
 838         attribute is passed.
 839       ValueError: If any of the parameters have invalid values.
 840     """
 841     self._name = _CheckFieldName(_ConvertToUnicode(name))
 842     self._value = self._CheckValue(value)
 843     self._language = _CheckLanguage(_ConvertToUnicode(language))
 844
 845   @property
 846   def name(self):
 847     """Returns the name of the field."""
 848     return self._name
 849
 850   @property
 851   def language(self):
 852     """Returns the code of the language the content in value is written in."""
 853     return self._language
 854
 855   @property
 856   def value(self):
 857     """Returns the value of the field."""
 858     return self._value
 859
 860   def _CheckValue(self, value):
 861     """Checks the value is valid for the given type.
 862
 863     Args:
 864       value: The value to check.
 865
 866     Returns:
 867       The checked value.
 868     """
 869     raise NotImplementedError('_CheckValue is an abstract method')
 870
 871   def __repr__(self):
 872     return _Repr(self, [('name', self.name), ('language', self.language),
 873                         ('value', self.value)])
 874
 875   def __eq__(self, other):
 876     return isinstance(other, type(self)) and self.__key() == other.__key()
 877
 878   def __ne__(self, other):
 879     return not self == other
 880
 881   def __key(self):
 882     return (self.name, self.value, self.language)
 883
 884   def __hash__(self):
 885     return hash(self.__key())
 886
 887   def __str__(self):
 888     return repr(self)
 889
 890   def _CopyStringValueToProtocolBuffer(self, field_value_pb):
 891     """Copies value to a string value in proto buf."""
 892     field_value_pb.set_string_value(self.value.encode('utf-8'))
 893
 894
 895 def _CopyFieldToProtocolBuffer(field, pb):
 896   """Copies field's contents to a document_pb.Field protocol buffer."""
 897   pb.set_name(field.name.encode('utf-8'))
 898   field_value_pb = pb.mutable_value()
 899   if field.language:
 900     field_value_pb.set_language(field.language.encode('utf-8'))
 901   if field.value is not None:
 902     field._CopyValueToProtocolBuffer(field_value_pb)
 903   return pb
 904
 905
 906 class TextField(Field):
 907   """A Field that has text content.
 908
 909   The following example shows a text field named signature with Polish content:
 910     TextField(name='signature', value='brzydka pogoda', language='pl')
 911   """
 912
 913   def __init__(self, name, value=None, language=None):
 914     """Initializer.
 915
 916     Args:
 917       name: The name of the field.
 918       value: A str or unicode object containing text.
 919       language: The code of the language the value is encoded in.
 920
 921     Raises:
 922       TypeError: If value is not a string.
 923       ValueError: If value is longer than allowed.
 924     """
 925     Field.__init__(self, name, _ConvertToUnicode(value), language)
 926
 927   def _CheckValue(self, value):
 928     return _CheckText(value)
 929
 930   def _CopyValueToProtocolBuffer(self, field_value_pb):
 931     field_value_pb.set_type(document_pb.FieldValue.TEXT)
 932     self._CopyStringValueToProtocolBuffer(field_value_pb)
 933
 934
 935 class HtmlField(Field):
 936   """A Field that has HTML content.
 937
 938   The following example shows an html field named content:
 939     HtmlField(name='content', value='<html>herbata, kawa</html>', language='pl')
 940   """
 941
 942   def __init__(self, name, value=None, language=None):
 943     """Initializer.
 944
 945     Args:
 946       name: The name of the field.
 947       value: A str or unicode object containing the searchable content of the
 948         Field.
 949       language: The code of the language the value is encoded in.
 950
 951     Raises:
 952       TypeError: If value is not a string.
 953       ValueError: If value is longer than allowed.
 954     """
 955     Field.__init__(self, name, _ConvertToUnicode(value), language)
 956
 957   def _CheckValue(self, value):
 958     return _CheckHtml(value)
 959
 960   def _CopyValueToProtocolBuffer(self, field_value_pb):
 961     field_value_pb.set_type(document_pb.FieldValue.HTML)
 962     self._CopyStringValueToProtocolBuffer(field_value_pb)
 963
 964
 965 class AtomField(Field):
 966   """A Field that has content to be treated as a single token for indexing.
 967
 968   The following example shows an atom field named contributor:
 969     AtomField(name='contributor', value='foo@bar.com')
 970   """
 971
 972   def __init__(self, name, value=None, language=None):
 973     """Initializer.
 974
 975     Args:
 976       name: The name of the field.
 977       value: A str or unicode object to be treated as an indivisible text value.
 978       language: The code of the language the value is encoded in.
 979
 980     Raises:
 981       TypeError: If value is not a string.
 982       ValueError: If value is longer than allowed.
 983     """
 984     Field.__init__(self, name, _ConvertToUnicode(value), language)
 985
 986   def _CheckValue(self, value):
 987     return _CheckAtom(value)
 988
 989   def _CopyValueToProtocolBuffer(self, field_value_pb):
 990     field_value_pb.set_type(document_pb.FieldValue.ATOM)
 991     self._CopyStringValueToProtocolBuffer(field_value_pb)
 992
 993
 994 class DateField(Field):
 995   """A Field that has a date or datetime value.
 996
 997   The following example shows a date field named creation_date:
 998     DateField(name='creation_date', value=datetime.date(2011, 03, 11))
 999   """
1000
1001   def __init__(self, name, value=None):
1002     """Initializer.
1003
1004     Args:
1005       name: The name of the field.
1006       value: A datetime.date or a datetime.datetime.
1007
1008     Raises:
1009       TypeError: If value is not a datetime.date or a datetime.datetime.
1010     """
1011     Field.__init__(self, name, value)
1012
1013   def _CheckValue(self, value):
1014     return _CheckDate(value)
1015
1016   def _CopyValueToProtocolBuffer(self, field_value_pb):
1017     field_value_pb.set_type(document_pb.FieldValue.DATE)
1018     field_value_pb.set_string_value(search_util.SerializeDate(self.value))
1019
1020
1021 class NumberField(Field):
1022   """A Field that has a numeric value.
1023
1024   The following example shows a number field named size:
1025     NumberField(name='size', value=10)
1026   """
1027
1028   def __init__(self, name, value=None):
1029     """Initializer.
1030
1031     Args:
1032       name: The name of the field.
1033       value: A numeric value.
1034
1035     Raises:
1036       TypeError: If value is not numeric.
1037       ValueError: If value is out of range.
1038     """
1039     Field.__init__(self, name, value)
1040
1041   def _CheckValue(self, value):
1042     value = _CheckNumber(value, 'field value')
1043     if value is not None and (value < MIN_NUMBER_VALUE or
1044                               value > MAX_NUMBER_VALUE):
1045       raise ValueError('value, %d must be between %d and %d' %
1046                        (value, MIN_NUMBER_VALUE, MAX_NUMBER_VALUE))
1047     return value
1048
1049   def _CopyValueToProtocolBuffer(self, field_value_pb):
1050     field_value_pb.set_type(document_pb.FieldValue.NUMBER)
1051     field_value_pb.set_string_value(str(self.value))
1052
1053
1054 class GeoPoint(object):
1055   """Represents a point on the Earth's surface, in lat, long coordinates."""
1056
1057   def __init__(self, latitude, longitude):
1058     """Initializer.
1059
1060     Args:
1061       latitude: The angle between the equatorial plan and a line that passes
1062         through the GeoPoint, between -90 and 90 degrees.
1063       longitude: The angle east or west from a reference meridian to another
1064         meridian that passes through the GeoPoint, between -180 and 180 degrees.
1065
1066     Raises:
1067       TypeError: If any of the parameters have invalid types, or an unknown
1068         attribute is passed.
1069       ValueError: If any of the parameters have invalid values.
1070     """
1071     self._latitude = self._CheckLatitude(latitude)
1072     self._longitude = self._CheckLongitude(longitude)
1073
1074   @property
1075   def latitude(self):
1076     """Returns the angle between equatorial plan and line thru the geo point."""
1077     return self._latitude
1078
1079   @property
1080   def longitude(self):
1081     """Returns the angle from a reference meridian to another meridian."""
1082     return self._longitude
1083
1084   def _CheckLatitude(self, value):
1085     _CheckNumber(value, 'latitude')
1086     if value < -90.0 or value > 90.0:
1087       raise ValueError('latitude must be between -90 and 90 degrees '
1088                        'inclusive, was %f' % value)
1089     return value
1090
1091   def _CheckLongitude(self, value):
1092     _CheckNumber(value, 'longitude')
1093     if value < -180.0 or value > 180.0:
1094       raise ValueError('longitude must be between -180 and 180 degrees '
1095                        'inclusive, was %f' % value)
1096     return value
1097
1098   def __eq__(self, other):
1099     return (self.latitude == other.latitude and
1100       self.longitude == other.longitude)
1101
1102   def __repr__(self):
1103     return _Repr(self,
1104                  [('latitude', self.latitude),
1105                   ('longitude', self.longitude)])
1106
1107
1108 def _CheckGeoPoint(geo_point):
1109   """Checks geo_point is a GeoPoint and returns it."""
1110   if not isinstance(geo_point, GeoPoint):
1111     raise TypeError('geo_point must be a GeoPoint, got %s' %
1112                     geo_point.__class__.__name__)
1113   return geo_point
1114
1115
1116 class GeoField(Field):
1117   """A Field that has a GeoPoint value.
1118
1119   The following example shows a geo field named place:
1120
1121     GeoField(name='place', value=GeoPoint(latitude=-33.84, longitude=151.26))
1122   """
1123
1124   def __init__(self, name, value=None):
1125     """Initializer.
1126
1127     Args:
1128       name: The name of the field.
1129       value: A GeoPoint value.
1130
1131     Raises:
1132       TypeError: If value is not numeric.
1133     """
1134     Field.__init__(self, name, value)
1135
1136   def _CheckValue(self, value):
1137     return _CheckGeoPoint(value)
1138
1139   def _CopyValueToProtocolBuffer(self, field_value_pb):
1140     field_value_pb.set_type(document_pb.FieldValue.GEO)
1141     geo_pb = field_value_pb.mutable_geo()
1142     geo_pb.set_lat(self.value.latitude)
1143     geo_pb.set_lng(self.value.longitude)
1144
1145
1146 def _GetValue(value_pb):
1147   """Gets the value from the value_pb."""
1148   if value_pb.type() in _PROTO_FIELDS_STRING_VALUE:
1149     if value_pb.has_string_value():
1150       return value_pb.string_value()
1151     return None
1152   if value_pb.type() == document_pb.FieldValue.DATE:
1153     if value_pb.has_string_value():
1154       return search_util.DeserializeDate(value_pb.string_value())
1155     return None
1156   if value_pb.type() == document_pb.FieldValue.NUMBER:
1157     if value_pb.has_string_value():
1158       return float(value_pb.string_value())
1159     return None
1160   if value_pb.type() == document_pb.FieldValue.GEO:
1161     if value_pb.has_geo():
1162       geo_pb = value_pb.geo()
1163       return GeoPoint(latitude=geo_pb.lat(), longitude=geo_pb.lng())
1164     return None
1165   raise TypeError('unknown FieldValue type %d' % value_pb.type())
1166
1167
1168 _STRING_TYPES = set([document_pb.FieldValue.TEXT,
1169                      document_pb.FieldValue.HTML,
1170                      document_pb.FieldValue.ATOM])
1171
1172
1173 def _DecodeUTF8(pb_value):
1174   """Decodes a UTF-8 encoded string into unicode."""
1175   if pb_value is not None:
1176     return pb_value.decode('utf-8')
1177   return None
1178
1179
1180 def _DecodeValue(pb_value, val_type):
1181   """Decodes a possible UTF-8 encoded string value to unicode."""
1182   if val_type in _STRING_TYPES:
1183     return _DecodeUTF8(pb_value)
1184   return pb_value
1185
1186
1187 def _NewFieldFromPb(pb):
1188   """Constructs a Field from a document_pb.Field protocol buffer."""
1189   name = _DecodeUTF8(pb.name())
1190   val_type = pb.value().type()
1191   value = _DecodeValue(_GetValue(pb.value()), val_type)
1192   lang = None
1193   if pb.value().has_language():
1194     lang = _DecodeUTF8(pb.value().language())
1195   if val_type == document_pb.FieldValue.TEXT:
1196     return TextField(name, value, lang)
1197   elif val_type == document_pb.FieldValue.HTML:
1198     return HtmlField(name, value, lang)
1199   elif val_type == document_pb.FieldValue.ATOM:
1200     return AtomField(name, value, lang)
1201   elif val_type == document_pb.FieldValue.DATE:
1202     return DateField(name, value)
1203   elif val_type == document_pb.FieldValue.NUMBER:
1204     return NumberField(name, value)
1205   elif val_type == document_pb.FieldValue.GEO:
1206     return GeoField(name, value)
1207   return InvalidRequest('Unknown field value type %d' % val_type)
1208
1209
1210 class Document(object):
1211   """Represents a user generated document.
1212
1213   The following example shows how to create a document consisting of a set
1214   of fields, some plain text and some in HTML.
1215
1216   Document(doc_id='document_id',
1217            fields=[TextField(name='subject', value='going for dinner'),
1218                    HtmlField(name='body',
1219                              value='<html>I found a place.</html>',
1220                    TextField(name='signature', value='brzydka pogoda',
1221                              language='pl')],
1222            language='en')
1223   """
1224   _FIRST_JAN_2011 = datetime.datetime(2011, 1, 1)
1225
1226   def __init__(self, doc_id=None, fields=None, language='en', rank=None):
1227     """Initializer.
1228
1229     Args:
1230       doc_id: The visible printable ASCII string identifying the document which
1231         does not start with '!'. Whitespace is excluded from ids. If no id is
1232         provided, the search service will provide one.
1233       fields: An iterable of Field instances representing the content of the
1234         document.
1235       language: The code of the language used in the field values.
1236       rank: The rank of this document used to specify the order in which
1237         documents are returned by search. Rank must be a non-negative integer.
1238         If not specified, the number of seconds since 1st Jan 2011 is used.
1239         Documents are returned in descending order of their rank, in absence
1240         of sorting or scoring options.
1241
1242     Raises:
1243       TypeError: If any of the parameters have invalid types, or an unknown
1244         attribute is passed.
1245       ValueError: If any of the parameters have invalid values.
1246     """
1247     doc_id = _ConvertToUnicode(doc_id)
1248     if doc_id is not None:
1249       _CheckDocumentId(doc_id)
1250     self._doc_id = doc_id
1251     self._fields = _GetList(fields)
1252     self._language = _CheckLanguage(_ConvertToUnicode(language))
1253
1254
1255     self._field_map = None
1256
1257     doc_rank = rank
1258     if doc_rank is None:
1259       doc_rank = self._GetDefaultRank()
1260     self._rank = self._CheckRank(doc_rank)
1261
1262     _CheckDocument(self)
1263
1264   @property
1265   def doc_id(self):
1266     """Returns the document identifier."""
1267     return self._doc_id
1268
1269   @property
1270   def fields(self):
1271     """Returns a list of fields of the document."""
1272     return self._fields
1273
1274   @property
1275   def language(self):
1276     """Returns the code of the language the document fields are written in."""
1277     return self._language
1278
1279   @property
1280   def rank(self):
1281     """Returns the rank of this document."""
1282     return self._rank
1283
1284   def field(self, field_name):
1285     """Returns the field with the provided field name.
1286
1287     Args:
1288       field_name: The name of the field to return.
1289
1290     Returns:
1291       A field with the given name.
1292
1293     Raises:
1294       ValueError: There is not exactly one field with the given name.
1295     """
1296     fields = self[field_name]
1297     if len(fields) == 1:
1298       return fields[0]
1299     raise ValueError(
1300         'Must have exactly one field with name %s, but found %d.' %
1301         (field_name, len(fields)))
1302
1303   def __getitem__(self, field_name):
1304     """Returns a list of all fields with the provided field name.
1305
1306     Args:
1307       field_name: The name of the field to return.
1308
1309     Returns:
1310       All fields with the given name, or an empty list if no field with that
1311       name exists.
1312     """
1313     return self._BuildFieldMap().get(field_name, [])
1314
1315   def __iter__(self):
1316     """Documents do not support iteration.
1317
1318     This is provided to raise an explicit exception.
1319     """
1320     raise TypeError('Documents do not support iteration.')
1321
1322   def _BuildFieldMap(self):
1323     """Lazily build the field map."""
1324     if self._field_map is None:
1325       self._field_map = {}
1326       for field in self._fields:
1327         self._field_map.setdefault(field.name, []).append(field)
1328     return self._field_map
1329
1330   def _CheckRank(self, rank):
1331     """Checks if rank is valid, then returns it."""
1332     return _CheckInteger(rank, 'rank', upper_bound=sys.maxint)
1333
1334   def _GetDefaultRank(self):
1335     """Returns a default rank as total seconds since 1st Jan 2011."""
1336     td = datetime.datetime.now() - Document._FIRST_JAN_2011
1337     return td.seconds + (td.days * 24 * 3600)
1338
1339   def __repr__(self):
1340     return _Repr(
1341         self, [('doc_id', self.doc_id), ('fields', self.fields),
1342                ('language', self.language), ('rank', self.rank)])
1343
1344   def __eq__(self, other):
1345     return (isinstance(other, type(self)) and self.doc_id == other.doc_id and
1346             self.rank == other.rank and self.language == other.language
1347             and self.fields == other.fields)
1348
1349   def __ne__(self, other):
1350     return not self == other
1351
1352   def __key(self):
1353     return self.doc_id
1354
1355   def __hash__(self):
1356     return hash(self.__key())
1357
1358   def __str__(self):
1359     return repr(self)
1360
1361
1362 def _CopyDocumentToProtocolBuffer(document, pb):
1363   """Copies Document to a document_pb.Document protocol buffer."""
1364   pb.set_storage(document_pb.Document.DISK)
1365   if document.doc_id:
1366     pb.set_id(document.doc_id.encode('utf-8'))
1367   if document.language:
1368     pb.set_language(document.language.encode('utf-8'))
1369   for field in document.fields:
1370     field_pb = pb.add_field()
1371     _CopyFieldToProtocolBuffer(field, field_pb)
1372   pb.set_order_id(document.rank)
1373   return pb
1374
1375
1376 def _NewFieldsFromPb(field_list):
1377   """Returns a list of Field copied from a document_pb.Document proto buf."""
1378   return [_NewFieldFromPb(f) for f in field_list]
1379
1380
1381 def _NewDocumentFromPb(doc_pb):
1382   """Constructs a Document from a document_pb.Document protocol buffer."""
1383   lang = None
1384   if doc_pb.has_language():
1385     lang = _DecodeUTF8(doc_pb.language())
1386   return Document(doc_id=_DecodeUTF8(doc_pb.id()),
1387                   fields=_NewFieldsFromPb(doc_pb.field_list()),
1388                   language=lang,
1389                   rank=doc_pb.order_id())
1390
1391
1392 def _QuoteString(argument):
1393   return '"' + argument.replace('"', '\\\"') + '"'
1394
1395
1396 class FieldExpression(object):
1397   """Represents an expression that will be computed for each result returned.
1398
1399   For example,
1400     FieldExpression(name='content_snippet',
1401                     expression='snippet("very important", content)')
1402   means a computed field 'content_snippet' will be returned with each search
1403   result, which contains HTML snippets of the 'content' field which match
1404   the query 'very important'.
1405   """
1406
1407   MAXIMUM_EXPRESSION_LENGTH = 1000
1408   MAXIMUM_OPERATOR_LENGTH = 100
1409
1410   def __init__(self, name, expression):
1411     """Initializer.
1412
1413     Args:
1414       name: The name of the computed field for the expression.
1415       expression: The expression to evaluate and return in a field with
1416         given name in results. See
1417         https://developers.google.com/appengine/docs/python/search/overview#Expressions
1418         for a list of legal expressions.
1419
1420     Raises:
1421       TypeError: If any of the parameters has an invalid type, or an unknown
1422         attribute is passed.
1423       ValueError: If any of the parameters has an invalid value.
1424       ExpressionError: If the expression string is not parseable.
1425     """
1426     self._name = _CheckFieldName(_ConvertToUnicode(name))
1427     if expression is None:
1428       raise ValueError('expression must be a FieldExpression, got None')
1429     if not isinstance(expression, basestring):
1430       raise TypeError('expression must be a FieldExpression, got %s' %
1431                       expression.__class__.__name__)
1432     self._expression = _CheckExpression(_ConvertToUnicode(expression))
1433
1434   @property
1435   def name(self):
1436     """Returns name of the expression to return in search results."""
1437     return self._name
1438
1439   @property
1440   def expression(self):
1441     """Returns a string containing an expression returned in search results."""
1442     return self._expression
1443
1444   def __repr__(self):
1445     return _Repr(
1446         self, [('name', self.name), ('expression', self.expression)])
1447
1448
1449 def _CopyFieldExpressionToProtocolBuffer(field_expression, pb):
1450   """Copies FieldExpression to a search_service_pb.FieldSpec_Expression."""
1451   pb.set_name(field_expression.name.encode('utf-8'))
1452   pb.set_expression(field_expression.expression.encode('utf-8'))
1453
1454
1455 class SortOptions(object):
1456   """Represents a mulit-dimensional sort of Documents.
1457
1458    The following code shows how to sort documents based on product rating
1459    in descending order and then cheapest product within similarly rated
1460    products, sorting at most 1000 documents:
1461
1462      SortOptions(expressions=[
1463          SortExpression(expression='rating',
1464              direction=SortExpression.DESCENDING, default_value=0),
1465          SortExpression(expression='price + tax',
1466              direction=SortExpression.ASCENDING, default_value=999999.99)],
1467          limit=1000)
1468   """
1469
1470   def __init__(self, expressions=None, match_scorer=None, limit=1000):
1471     """Initializer.
1472
1473     Args:
1474       expressions: An iterable of SortExpression representing a
1475         multi-dimensional sort of Documents.
1476       match_scorer: A match scorer specification which may be used to
1477         score documents or in a SortExpression combined with other features.
1478       limit: The limit on the number of documents to score or sort.
1479
1480     Raises:
1481       TypeError: If any of the parameters has an invalid type, or an unknown
1482         attribute is passed.
1483       ValueError: If any of the parameters has an invalid value.
1484     """
1485     self._match_scorer = match_scorer
1486     self._expressions = _GetList(expressions)
1487     for expression in self._expressions:
1488       if not isinstance(expression, SortExpression):
1489         raise TypeError('expression must be a SortExpression, got %s' %
1490                         expression.__class__.__name__)
1491     self._limit = _CheckSortLimit(limit)
1492
1493   @property
1494   def expressions(self):
1495     """A list of SortExpression specifying a multi-dimensional sort."""
1496     return self._expressions
1497
1498   @property
1499   def match_scorer(self):
1500     """Returns a match scorer to score documents with."""
1501     return self._match_scorer
1502
1503   @property
1504   def limit(self):
1505     """Returns the limit on the number of documents to score or sort."""
1506     return self._limit
1507
1508   def __repr__(self):
1509     return _Repr(
1510         self, [('match_scorer', self.match_scorer),
1511                ('expressions', self.expressions),
1512                ('limit', self.limit)])
1513
1514
1515 class MatchScorer(object):
1516   """Assigns a document score based on term frequency.
1517
1518   If you add a MatchScorer to a SortOptions as in the following code:
1519
1520       sort_opts = search.SortOptions(match_scorer=search.MatchScorer())
1521
1522   then, this will sort the documents in descending score order. The scores
1523   will be positive. If you want to sort in ascending order, then use the
1524   following code:
1525
1526       sort_opts = search.SortOptions(match_scorer=search.MatchScorer(),
1527           expressions=[search.SortExpression(
1528               expression='_score', direction=search.SortExpression.ASCENDING,
1529               default_value=0.0)])
1530
1531   The scores in this case will be negative.
1532   """
1533
1534   def __init__(self):
1535     """Initializer.
1536
1537     Raises:
1538       TypeError: If any of the parameters has an invalid type, or an unknown
1539         attribute is passed.
1540       ValueError: If any of the parameters has an invalid value.
1541     """
1542
1543   def __repr__(self):
1544     return _Repr(self, [])
1545
1546
1547 class RescoringMatchScorer(MatchScorer):
1548   """Assigns a document score based on term frequency weighted by doc parts.
1549
1550   If you add a RescoringMatchScorer to a SortOptions as in the following code:
1551
1552       sort_opts = search.SortOptions(match_scorer=search.RescoringMatchScorer())
1553
1554   then, this will sort the documents in descending score order. The scores
1555   will be positive.  If you want to sort in ascending order, then use the
1556   following code:
1557
1558       sort_opts = search.SortOptions(match_scorer=search.RescoringMatchScorer(),
1559           expressions=[search.SortExpression(
1560               expression='_score', direction=search.SortExpression.ASCENDING,
1561               default_value=0.0)])
1562
1563   The scores in this case will be negative.
1564   """
1565
1566   def __init__(self):
1567     """Initializer.
1568
1569     Raises:
1570       TypeError: If any of the parameters has an invalid type, or an unknown
1571         attribute is passed.
1572       ValueError: If any of the parameters has an invalid value.
1573     """
1574     super(RescoringMatchScorer, self).__init__()
1575
1576
1577 def _CopySortExpressionToProtocolBuffer(sort_expression, pb):
1578   """Copies a SortExpression to a search_service_pb.SortSpec protocol buffer."""
1579   pb.set_sort_expression(sort_expression.expression.encode('utf-8'))
1580   if sort_expression.direction == SortExpression.ASCENDING:
1581     pb.set_sort_descending(False)
1582   if sort_expression.default_value is not None:
1583     if isinstance(sort_expression.default_value, basestring):
1584       pb.set_default_value_text(sort_expression.default_value.encode('utf-8'))
1585     elif (isinstance(sort_expression.default_value, datetime.datetime) or
1586           isinstance(sort_expression.default_value, datetime.date)):
1587       pb.set_default_value_text(str(
1588           search_util.EpochTime(sort_expression.default_value)))
1589     else:
1590       pb.set_default_value_numeric(sort_expression.default_value)
1591   return pb
1592
1593
1594 def _CopyMatchScorerToScorerSpecProtocolBuffer(match_scorer, limit, pb):
1595   """Copies a MatchScorer to a search_service_pb.ScorerSpec."""
1596   if isinstance(match_scorer, RescoringMatchScorer):
1597     pb.set_scorer(search_service_pb.ScorerSpec.RESCORING_MATCH_SCORER)
1598   elif isinstance(match_scorer, MatchScorer):
1599     pb.set_scorer(search_service_pb.ScorerSpec.MATCH_SCORER)
1600   else:
1601     raise TypeError(
1602         'match_scorer must be a MatchScorer or RescoringMatchRescorer, '
1603         'got %s' % match_scorer.__class__.__name__)
1604   pb.set_limit(limit)
1605   return pb
1606
1607
1608 def _CopySortOptionsToProtocolBuffer(sort_options, params):
1609   """Copies the SortOptions into the SearchParams proto buf."""
1610   for expression in sort_options.expressions:
1611     sort_spec_pb = params.add_sort_spec()
1612     _CopySortExpressionToProtocolBuffer(expression, sort_spec_pb)
1613   if sort_options.match_scorer:
1614     scorer_spec = params.mutable_scorer_spec()
1615     _CopyMatchScorerToScorerSpecProtocolBuffer(
1616         sort_options.match_scorer, sort_options.limit, scorer_spec)
1617     scorer_spec.set_limit(sort_options.limit)
1618   else:
1619     params.mutable_scorer_spec().set_limit(sort_options.limit)
1620
1621
1622 class SortExpression(object):
1623   """Sort by a user specified scoring expression.
1624
1625   For example, the following will sort documents on a numeric field named
1626   'length' in ascending order, assigning a default value of sys.maxint for
1627   documents which do not specify a 'length' field.
1628
1629     SortExpression(expression='length',
1630                    direction=sort.SortExpression.ASCENDING,
1631                    default_value=sys.maxint)
1632
1633   The following example will sort documents on a date field named
1634   'published_date' in descending order, assigning a default value of
1635   1999-12-31 for documents which do not specify a 'published_date' field.
1636
1637     SortExpression(expression='published_date',
1638                    default_value=datetime.date(year=1999, month=12, day=31))
1639
1640   The following example will sort documents on a text field named 'subject'
1641   in descending order, assigning a default value of '' for documents which
1642   do not specify a 'subject' field.
1643
1644     SortExpression(expression='subject')
1645   """
1646
1647
1648   try:
1649     MAX_FIELD_VALUE = unichr(0x10ffff) * 80
1650   except ValueError:
1651
1652     MAX_FIELD_VALUE = unichr(0xffff) * 80
1653
1654   MIN_FIELD_VALUE = u''
1655
1656
1657   ASCENDING, DESCENDING = ('ASCENDING', 'DESCENDING')
1658
1659   _DIRECTIONS = frozenset([ASCENDING, DESCENDING])
1660
1661   def __init__(self, expression, direction=DESCENDING, default_value=None):
1662     """Initializer.
1663
1664     Args:
1665       expression: An expression to be evaluated on each matching document
1666         to sort by. The expression must evaluate to a text or numeric value.
1667         The expression can simply be a field name, or some compound expression
1668         such as "_score + count(likes) * 0.1" which will add the score from a
1669         scorer to a count of the values of a likes field times 0.1. See
1670         https://developers.google.com/appengine/docs/python/search/overview#Expressions
1671         for a list of legal expressions.
1672       direction: The direction to sort the search results, either ASCENDING
1673         or DESCENDING
1674       default_value: The default value of the expression. The default_value is
1675         returned if expression cannot be calculated, for example, if the
1676         expression is a field name and no value for that named field exists.
1677         A text value must be specified for text sorts. A numeric value must be
1678         specified for numeric sorts. A date value must be specified for date
1679         sorts.
1680
1681     Raises:
1682       TypeError: If any of the parameters has an invalid type, or an unknown
1683         attribute is passed.
1684       ValueError: If any of the parameters has an invalid value.
1685       ExpressionError: If the expression string is not parseable.
1686     """
1687     self._expression = _ConvertToUnicode(expression)
1688     self._direction = self._CheckDirection(direction)
1689     if self._expression is None:
1690       raise TypeError('expression must be a SortExpression, got None')
1691     _CheckExpression(self._expression)
1692     self._default_value = default_value
1693     if self._default_value is not None:
1694       if isinstance(self.default_value, basestring):
1695         self._default_value = _ConvertToUnicode(default_value)
1696         _CheckText(self._default_value, 'default_value')
1697       elif not isinstance(self._default_value,
1698                           (int, long, float, datetime.date, datetime.datetime)):
1699         raise TypeError('default_value must be text, numeric or datetime, got '
1700                         '%s' % self._default_value.__class__.__name__)
1701
1702   @property
1703   def expression(self):
1704     """Returns the expression to sort by."""
1705     return self._expression
1706
1707   @property
1708   def direction(self):
1709     """Returns the direction to sort expression: ASCENDING or DESCENDING."""
1710     return self._direction
1711
1712   @property
1713   def default_value(self):
1714     """Returns a default value for the expression if no value computed."""
1715     return self._default_value
1716
1717   def _CheckDirection(self, direction):
1718     """Checks direction is a valid SortExpression direction and returns it."""
1719     return _CheckEnum(direction, 'direction', values=self._DIRECTIONS)
1720
1721   def __repr__(self):
1722     return _Repr(
1723         self, [('expression', self.expression),
1724                ('direction', self.direction),
1725                ('default_value', self.default_value)])
1726
1727
1728 class ScoredDocument(Document):
1729   """Represents a scored document returned from a search."""
1730
1731   def __init__(self, doc_id=None, fields=None, language='en',
1732                sort_scores=None, expressions=None, cursor=None, rank=None):
1733     """Initializer.
1734
1735     Args:
1736       doc_id: The visible printable ASCII string identifying the document which
1737         does not start with '!'. Whitespace is excluded from ids. If no id is
1738         provided, the search service will provide one.
1739       fields: An iterable of Field instances representing the content of the
1740         document.
1741       language: The code of the language used in the field values.
1742       sort_scores: The list of scores assigned during sort evaluation. Each
1743         sort dimension is included. Positive scores are used for ascending
1744         sorts; negative scores for descending.
1745       expressions: The list of computed fields which are the result of
1746         expressions requested.
1747       cursor: A cursor associated with the document.
1748       rank: The rank of this document. A rank must be a non-negative integer
1749         less than sys.maxint. If not specified, the number of seconds since
1750         1st Jan 2011 is used. Documents are returned in descending order of
1751         their rank.
1752
1753     Raises:
1754       TypeError: If any of the parameters have invalid types, or an unknown
1755         attribute is passed.
1756       ValueError: If any of the parameters have invalid values.
1757     """
1758     super(ScoredDocument, self).__init__(doc_id=doc_id, fields=fields,
1759                                          language=language, rank=rank)
1760     self._sort_scores = self._CheckSortScores(_GetList(sort_scores))
1761     self._expressions = _GetList(expressions)
1762     if cursor is not None and not isinstance(cursor, Cursor):
1763       raise TypeError('cursor must be a Cursor, got %s' %
1764                       cursor.__class__.__name__)
1765     self._cursor = cursor
1766
1767   @property
1768   def sort_scores(self):
1769     """The list of scores assigned during sort evaluation.
1770
1771     Each sort dimension is included. Positive scores are used for ascending
1772     sorts; negative scores for descending.
1773
1774     Returns:
1775       The list of numeric sort scores.
1776     """
1777     return self._sort_scores
1778
1779   @property
1780   def expressions(self):
1781     """The list of computed fields the result of expression evaluation.
1782
1783     For example, if a request has
1784       FieldExpression(name='snippet', 'snippet("good story", content)')
1785     meaning to compute a snippet field containing HTML snippets extracted
1786     from the matching of the query 'good story' on the field 'content'.
1787     This means a field such as the following will be returned in expressions
1788     for the search result:
1789       HtmlField(name='snippet', value='that was a <b>good story</b> to finish')
1790
1791     Returns:
1792       The computed fields.
1793     """
1794     return self._expressions
1795
1796   @property
1797   def cursor(self):
1798     """A cursor associated with a result, a continued search starting point.
1799
1800     To get this cursor to appear, set the Index.cursor_type to
1801     Index.RESULT_CURSOR, otherwise this will be None.
1802
1803     Returns:
1804       The result cursor.
1805     """
1806     return self._cursor
1807
1808   def _CheckSortScores(self, sort_scores):
1809     """Checks sort_scores is a list of floats, and returns it."""
1810     for sort_score in sort_scores:
1811       _CheckNumber(sort_score, 'sort_scores')
1812     return sort_scores
1813
1814   def __repr__(self):
1815     return _Repr(self, [('doc_id', self.doc_id),
1816                         ('fields', self.fields),
1817                         ('language', self.language),
1818                         ('rank', self.rank),
1819                         ('sort_scores', self.sort_scores),
1820                         ('expressions', self.expressions),
1821                         ('cursor', self.cursor)])
1822
1823
1824 class SearchResults(object):
1825   """Represents the result of executing a search request."""
1826
1827   def __init__(self, number_found, results=None, cursor=None):
1828     """Initializer.
1829
1830     Args:
1831       number_found: The number of documents found for the query.
1832       results: The list of ScoredDocuments returned from executing a
1833         search request.
1834       cursor: A Cursor to continue the search from the end of the
1835         search results.
1836
1837     Raises:
1838       TypeError: If any of the parameters have an invalid type, or an unknown
1839         attribute is passed.
1840       ValueError: If any of the parameters have an invalid value.
1841     """
1842     self._number_found = _CheckInteger(number_found, 'number_found')
1843     self._results = _GetList(results)
1844     if cursor is not None and not isinstance(cursor, Cursor):
1845       raise TypeError('cursor must be a Cursor, got %s' %
1846                       cursor.__class__.__name__)
1847     self._cursor = cursor
1848
1849   def __iter__(self):
1850
1851     for result in self.results:
1852       yield result
1853
1854   @property
1855   def results(self):
1856     """Returns the list of ScoredDocuments that matched the query."""
1857     return self._results
1858
1859   @property
1860   def number_found(self):
1861     """Returns the number of documents which were found for the search.
1862
1863     Note that this is an approximation and not an exact count.
1864     If QueryOptions.number_found_accuracy parameter is set to 100
1865     for example, then number_found <= 100 is accurate.
1866
1867     Returns:
1868       The number of documents found.
1869     """
1870     return self._number_found
1871
1872   @property
1873   def cursor(self):
1874     """Returns a cursor that can be used to continue search from last result.
1875
1876     This corresponds to using a ResultsCursor in QueryOptions,
1877     otherwise this will be None.
1878
1879     Returns:
1880       The results cursor.
1881     """
1882     return self._cursor
1883
1884   def __repr__(self):
1885     return _Repr(self, [('results', self.results),
1886                         ('number_found', self.number_found),
1887                         ('cursor', self.cursor)])
1888
1889
1890 class GetResponse(object):
1891   """Represents the result of executing a get request.
1892
1893   For example, the following code shows how a response could be used
1894   to determine which documents were successfully removed or not.
1895
1896   response = index.get_range()
1897   for document in response:
1898     print "document ", document
1899   """
1900
1901   def __init__(self, results=None):
1902     """Initializer.
1903
1904     Args:
1905       results: The results returned from an index ordered by Id.
1906
1907     Raises:
1908       TypeError: If any of the parameters have an invalid type, or an unknown
1909         attribute is passed.
1910       ValueError: If any of the parameters have an invalid value.
1911     """
1912     self._results = _GetList(results)
1913
1914   def __iter__(self):
1915     for result in self.results:
1916       yield result
1917
1918   @property
1919   def results(self):
1920     """Returns a list of results ordered by Id from the index."""
1921     return self._results
1922
1923   def __repr__(self):
1924     return _Repr(self, [('results', self.results)])
1925
1926
1927 class Cursor(object):
1928   """Specifies how to get the next page of results in a search.
1929
1930   A cursor returned in a previous set of search results to use as a starting
1931   point to retrieve the next set of results. This can get you better
1932   performance, and also improves the consistency of pagination through index
1933   updates.
1934
1935   The following shows how to use the cursor to get the next page of results:
1936
1937   # get the first set of results; the first cursor is used to specify
1938   # that cursors are to be returned in the SearchResults.
1939   results = index.search(Query(query_string='some stuff',
1940       QueryOptions(cursor=Cursor()))
1941
1942   # get the next set of results
1943   results = index.search(Query(query_string='some stuff',
1944       QueryOptions(cursor=results.cursor)))
1945
1946   If you want to continue search from any one of the ScoredDocuments in
1947   SearchResults, then you can set Cursor.per_result to True.
1948
1949   # get the first set of results; the first cursor is used to specify
1950   # that cursors are to be returned in the SearchResults.
1951   results = index.search(Query(query_string='some stuff',
1952       QueryOptions(cursor=Cursor(per_result=True)))
1953
1954   # this shows how to access the per_document cursors returned from a search
1955   per_document_cursor = None
1956   for scored_document in results:
1957     per_document_cursor = scored_document.cursor
1958
1959   # get the next set of results
1960   results = index.search(Query(query_string='some stuff',
1961       QueryOptions(cursor=per_document_cursor)))
1962   """
1963
1964
1965
1966   def __init__(self, web_safe_string=None, per_result=False):
1967     """Initializer.
1968
1969     Args:
1970       web_safe_string: The cursor string returned from the search service to
1971         be interpreted by the search service to get the next set of results.
1972       per_result: A bool when true will return a cursor per ScoredDocument in
1973         SearchResults, otherwise will return a single cursor for the whole
1974         SearchResults. If using offset this is ignored, as the user is
1975         responsible for calculating a next offset if any.
1976     Raises:
1977
1978       ValueError: if the web_safe_string is not of required format.
1979     """
1980     self._web_safe_string = _CheckCursor(_ConvertToUnicode(web_safe_string))
1981     self._per_result = per_result
1982     if self._web_safe_string:
1983       parts = self._web_safe_string.split(':', 1)
1984       if len(parts) != 2 or parts[0] not in ['True', 'False']:
1985         raise ValueError('invalid format for web_safe_string, got %s' %
1986                          self._web_safe_string)
1987       self._internal_cursor = parts[1]
1988
1989       self._per_result = (parts[0] == 'True')
1990
1991   @property
1992   def web_safe_string(self):
1993     """Returns the cursor string generated by the search service."""
1994     return self._web_safe_string
1995
1996   @property
1997   def per_result(self):
1998     """Returns whether to return a cursor for each ScoredDocument in results."""
1999     return self._per_result
2000
2001   def __repr__(self):
2002     return _Repr(self, [('web_safe_string', self.web_safe_string)])
2003
2004
2005 def _ToWebSafeString(per_result, internal_cursor):
2006   """Returns the web safe string combining per_result with internal cursor."""
2007   return str(per_result) + ':' + internal_cursor
2008
2009
2010 def _CheckQuery(query):
2011   """Checks a query is a valid query string."""
2012   _ValidateString(query, 'query', MAXIMUM_QUERY_LENGTH, empty_ok=True)
2013   if query is None:
2014     raise TypeError('query must be unicode, got None')
2015   if query.strip():
2016     try:
2017       query_parser.Parse(query)
2018     except query_parser.QueryException, e:
2019       raise QueryError('Failed to parse query "%s"' % query)
2020   return query
2021
2022
2023 def _CheckLimit(limit):
2024   """Checks the limit of documents to return is an integer within range."""
2025   return _CheckInteger(
2026       limit, 'limit', zero_ok=False,
2027       upper_bound=MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH)
2028
2029
2030 def _CheckOffset(offset):
2031   """Checks the offset in document list is an integer within range."""
2032   return _CheckInteger(
2033       offset, 'offset', zero_ok=True,
2034       upper_bound=MAXIMUM_SEARCH_OFFSET)
2035
2036
2037 def _CheckNumberFoundAccuracy(number_found_accuracy):
2038   """Checks the accuracy is an integer within range."""
2039   return _CheckInteger(
2040       number_found_accuracy, 'number_found_accuracy',
2041       zero_ok=False, upper_bound=MAXIMUM_NUMBER_FOUND_ACCURACY)
2042
2043
2044 def _CheckCursor(cursor):
2045   """Checks the cursor if specified is a string which is not too long."""
2046   return _ValidateString(cursor, 'cursor', _MAXIMUM_CURSOR_LENGTH,
2047                          empty_ok=True)
2048
2049
2050 def _CheckNumberOfFields(returned_expressions, snippeted_fields,
2051                          returned_fields):
2052   """Checks the count of all field kinds is less than limit."""
2053   number_expressions = (len(returned_expressions) + len(snippeted_fields) +
2054                         len(returned_fields))
2055   if number_expressions > MAXIMUM_FIELDS_RETURNED_PER_SEARCH:
2056     raise ValueError(
2057         'too many fields, snippets or expressions to return  %d > maximum %d'
2058         % (number_expressions, MAXIMUM_FIELDS_RETURNED_PER_SEARCH))
2059
2060
2061 class QueryOptions(object):
2062   """Options for post-processing results for a query.
2063
2064   Options include the ability to sort results, control which document fields
2065   to return, produce snippets of fields and compute and sort by complex
2066   scoring expressions.
2067
2068   If you wish to randomly access pages of search results, you can use an
2069   offset:
2070
2071   # get the first set of results
2072   page_size = 10
2073   results = index.search(Query(query_string='some stuff',
2074       QueryOptions(limit=page_size))
2075
2076   # calculate pages
2077   pages = results.found_count / page_size
2078
2079   # user chooses page and hence an offset into results
2080   next_page = ith * page_size
2081
2082   # get the search results for that page
2083   results = index.search(Query(query_string='some stuff',
2084       QueryOptions(limit=page_size, offset=next_page))
2085   """
2086
2087   def __init__(self, limit=20, number_found_accuracy=None, cursor=None,
2088                offset=None, sort_options=None, returned_fields=None,
2089                ids_only=False, snippeted_fields=None,
2090                returned_expressions=None):
2091
2092
2093     """Initializer.
2094
2095     For example, the following code fragment requests a search for
2096     documents where 'first' occurs in subject and 'good' occurs anywhere,
2097     returning at most 20 documents, starting the search from 'cursor token',
2098     returning another single cursor for the SearchResults, sorting by subject in
2099     descending order, returning the author, subject, and summary fields as well
2100     as a snippeted field content.
2101
2102       results = index.search(Query(
2103           query='subject:first good',
2104           options=QueryOptions(
2105             limit=20,
2106             cursor=Cursor(),
2107             sort_options=SortOptions(
2108                 expressions=[
2109                     SortExpression(expression='subject')],
2110                 limit=1000),
2111             returned_fields=['author', 'subject', 'summary'],
2112             snippeted_fields=['content'])))
2113
2114     Args:
2115       limit: The limit on number of documents to return in results.
2116       number_found_accuracy: The minimum accuracy requirement for
2117         SearchResults.number_found. If set, the number_found will be
2118         accurate up to at least that number. For example, when set to 100,
2119         any SearchResults with number_found <= 100 is accurate. This option
2120         may add considerable latency/expense, especially when used with
2121         returned_fields.
2122       cursor: A Cursor describing where to get the next set of results,
2123         or to provide next cursors in SearchResults.
2124       offset: The offset is number of documents to skip in search results. This
2125         is an alternative to using a query cursor, but allows random access into
2126         the results. Using offsets rather than cursors are more expensive. You
2127         can only use either cursor or offset, but not both. Using an offset
2128         means that no cursor is returned in SearchResults.cursor, nor in each
2129         ScoredDocument.cursor.
2130       sort_options: A SortOptions specifying a multi-dimensional sort over
2131         search results.
2132       returned_fields: An iterable of names of fields to return in search
2133         results.
2134       ids_only: Only return document ids, do not return any fields.
2135       snippeted_fields: An iterable of names of fields to snippet and return
2136         in search result expressions.
2137       returned_expressions: An iterable of FieldExpression to evaluate and
2138         return in search results.
2139     Raises:
2140       TypeError: If an unknown iterator_options or sort_options is passed.
2141       ValueError: If ids_only and returned_fields are used together.
2142       ExpressionError: If one of the returned expression strings is not
2143         parseable.
2144     """
2145     self._limit = _CheckLimit(limit)
2146     self._number_found_accuracy = _CheckNumberFoundAccuracy(
2147         number_found_accuracy)
2148     if cursor is not None and not isinstance(cursor, Cursor):
2149       raise TypeError('cursor must be a Cursor, got %s' %
2150                       cursor.__class__.__name__)
2151     if cursor is not None and offset is not None:
2152       raise ValueError('cannot set cursor and offset together')
2153     self._cursor = cursor
2154     self._offset = _CheckOffset(offset)
2155     if sort_options is not None and not isinstance(sort_options, SortOptions):
2156       raise TypeError('sort_options must be a SortOptions, got %s' %
2157                       sort_options.__class__.__name__)
2158     self._sort_options = sort_options
2159
2160     self._returned_fields = _ConvertToUnicodeList(returned_fields)
2161     _CheckFieldNames(self._returned_fields)
2162     self._ids_only = ids_only
2163     if self._ids_only and self._returned_fields:
2164       raise ValueError('cannot have ids_only and returned_fields set together')
2165     self._snippeted_fields = _ConvertToUnicodeList(snippeted_fields)
2166     _CheckFieldNames(self._snippeted_fields)
2167     self._returned_expressions = _ConvertToList(returned_expressions)
2168     for expression in self._returned_expressions:
2169       _CheckFieldName(_ConvertToUnicode(expression.name))
2170       _CheckExpression(_ConvertToUnicode(expression.expression))
2171     _CheckNumberOfFields(self._returned_expressions, self._snippeted_fields,
2172                          self._returned_fields)
2173
2174   @property
2175   def limit(self):
2176     """Returns a limit on number of documents to return in results."""
2177     return self._limit
2178
2179   @property
2180   def number_found_accuracy(self):
2181     """Returns minimum accuracy requirement for SearchResults.number_found."""
2182     return self._number_found_accuracy
2183
2184   @property
2185   def cursor(self):
2186     """Returns the Cursor for the query."""
2187     return self._cursor
2188
2189   @property
2190   def offset(self):
2191     """Returns the number of documents in search results to skip."""
2192     return self._offset
2193
2194   @property
2195   def sort_options(self):
2196     """Returns a SortOptions."""
2197     return self._sort_options
2198
2199   @property
2200   def returned_fields(self):
2201     """Returns an iterable of names of fields to return in search results."""
2202     return self._returned_fields
2203
2204   @property
2205   def ids_only(self):
2206     """Returns whether to return only document ids in search results."""
2207     return self._ids_only
2208
2209   @property
2210   def snippeted_fields(self):
2211     """Returns iterable of field names to snippet and return in results."""
2212     return self._snippeted_fields
2213
2214   @property
2215   def returned_expressions(self):
2216     """Returns iterable of FieldExpression to return in results."""
2217     return self._returned_expressions
2218
2219   def __repr__(self):
2220     return _Repr(self, [('limit', self.limit),
2221                         ('number_found_accuracy', self.number_found_accuracy),
2222                         ('cursor', self.cursor),
2223                         ('sort_options', self.sort_options),
2224                         ('returned_fields', self.returned_fields),
2225                         ('ids_only', self.ids_only),
2226                         ('snippeted_fields', self.snippeted_fields),
2227                         ('returned_expressions', self.returned_expressions)])
2228
2229
2230 def _CopyQueryOptionsObjectToProtocolBuffer(query, options, params):
2231   """Copies a QueryOptions object to a SearchParams proto buff."""
2232   offset = 0
2233   web_safe_string = None
2234   cursor_type = None
2235   offset = options.offset
2236   if options.cursor:
2237     cursor = options.cursor
2238     if cursor.per_result:
2239       cursor_type = search_service_pb.SearchParams.PER_RESULT
2240     else:
2241       cursor_type = search_service_pb.SearchParams.SINGLE
2242     if isinstance(cursor, Cursor) and cursor.web_safe_string:
2243       web_safe_string = cursor._internal_cursor
2244   _CopyQueryOptionsToProtocolBuffer(
2245       query, offset, options.limit, options.number_found_accuracy,
2246       web_safe_string, cursor_type, options.ids_only, options.returned_fields,
2247       options.snippeted_fields, options.returned_expressions,
2248       options.sort_options, params)
2249
2250
2251 def _CopyQueryOptionsToProtocolBuffer(
2252     query, offset, limit, number_found_accuracy, cursor, cursor_type, ids_only,
2253     returned_fields, snippeted_fields, returned_expressions, sort_options,
2254     params):
2255   """Copies fields of QueryOptions to params protobuf."""
2256   if offset:
2257     params.set_offset(offset)
2258   params.set_limit(limit)
2259   if number_found_accuracy is not None:
2260     params.set_matched_count_accuracy(number_found_accuracy)
2261   if cursor:
2262     params.set_cursor(cursor.encode('utf-8'))
2263   if cursor_type is not None:
2264     params.set_cursor_type(cursor_type)
2265   if ids_only:
2266     params.set_keys_only(ids_only)
2267   if returned_fields or snippeted_fields or returned_expressions:
2268     field_spec_pb = params.mutable_field_spec()
2269     for field in returned_fields:
2270       field_spec_pb.add_name(field.encode('utf-8'))
2271     for snippeted_field in snippeted_fields:
2272       expression = u'snippet(%s, %s)' % (_QuoteString(query), snippeted_field)
2273       _CopyFieldExpressionToProtocolBuffer(
2274           FieldExpression(
2275               name=snippeted_field, expression=expression.encode('utf-8')),
2276           field_spec_pb.add_expression())
2277     for expression in returned_expressions:
2278       _CopyFieldExpressionToProtocolBuffer(
2279           expression, field_spec_pb.add_expression())
2280
2281   if sort_options is not None:
2282     _CopySortOptionsToProtocolBuffer(sort_options, params)
2283
2284
2285 class Query(object):
2286   """Represents a request on the search service to query the index."""
2287
2288   def __init__(self, query_string, options=None):
2289
2290
2291
2292     """Initializer.
2293
2294     For example, the following code fragment requests a search for
2295     documents where 'first' occurs in subject and 'good' occurs anywhere,
2296     returning at most 20 documents, starting the search from 'cursor token',
2297     returning another single document cursor for the results, sorting by
2298     subject in descending order, returning the author, subject, and summary
2299     fields as well as a snippeted field content.
2300
2301       results = index.search(Query(
2302           query_string='subject:first good',
2303           options=QueryOptions(
2304               limit=20,
2305               cursor=Cursor(),
2306               sort_options=SortOptions(
2307                   expressions=[
2308                       SortExpression(expression='subject')],
2309                   limit=1000),
2310               returned_fields=['author', 'subject', 'summary'],
2311               snippeted_fields=['content'])))
2312
2313     In order to get a Cursor, you specify a Cursor in QueryOptions.cursor
2314     and extract the Cursor for the next request from results.cursor to
2315     continue from the last found document, as shown below:
2316
2317       results = index.search(
2318           Query(query_string='subject:first good',
2319                 options=QueryOptions(cursor=results.cursor)))
2320
2321     Args:
2322       query_string: The query to match against documents in the index. A query
2323         is a boolean expression containing terms.  For example, the query
2324           'job tag:"very important" sent <= 2011-02-28'
2325         finds documents with the term job in any field, that contain the
2326         phrase "very important" in a tag field, and a sent date up to and
2327         including 28th February, 2011.  You can use combinations of
2328           '(cat OR feline) food NOT dog'
2329         to find documents which contain the term cat or feline as well as food,
2330         but do not mention the term dog. A further example,
2331           'category:televisions brand:sony price >= 300 price < 400'
2332         will return documents which have televisions in a category field, a
2333         sony brand and a price field which is 300 (inclusive) to 400
2334         (exclusive).  See
2335         https://developers.google.com/appengine/docs/python/search/overview#Expressions
2336         for a list of expressions that can be used in queries.
2337       options: A QueryOptions describing post-processing of search results.
2338     Raises:
2339       QueryError: If the query string is not parseable.
2340     """
2341     self._query_string = _ConvertToUnicode(query_string)
2342     _CheckQuery(self._query_string)
2343     self._options = options
2344
2345   @property
2346   def query_string(self):
2347     """Returns the query string to be applied to search service."""
2348     return self._query_string
2349
2350   @property
2351   def options(self):
2352     """Returns QueryOptions defining post-processing on the search results."""
2353     return self._options
2354
2355
2356 def _CopyQueryToProtocolBuffer(query, params):
2357   """Copies Query object to params protobuf."""
2358   params.set_query(query.encode('utf-8'))
2359
2360
2361 def _CopyQueryObjectToProtocolBuffer(query, params):
2362   _CopyQueryToProtocolBuffer(query.query_string, params)
2363   options = query.options
2364   if query.options is None:
2365     options = QueryOptions()
2366   _CopyQueryOptionsObjectToProtocolBuffer(query.query_string, options, params)
2367
2368
2369 class Index(object):
2370   """Represents an index allowing indexing, deleting and searching documents.
2371
2372   The following code fragment shows how to add documents, then search the
2373   index for documents matching a query.
2374
2375     # Get the index.
2376     index = Index(name='index-name')
2377
2378     # Create a document.
2379     doc = Document(doc_id='document-id',
2380                    fields=[TextField(name='subject', value='my first email'),
2381                            HtmlField(name='body',
2382                                      value='<html>some content here</html>')])
2383
2384     # Index the document.
2385     try:
2386       index.put(doc)
2387     except search.Error, e:
2388       # possibly retry indexing or log error
2389
2390     # Query the index.
2391     try:
2392       results = index.search('subject:first body:here')
2393
2394       # Iterate through the search results.
2395       for scored_document in results:
2396          print scored_document
2397
2398     except search.Error, e:
2399       # possibly log the failure
2400
2401   Once an index is created with a given specification, that specification is
2402   immutable.
2403
2404   Search results may contain some out of date documents. However, any two
2405   changes to any document stored in an index are applied in the correct order.
2406   """
2407
2408
2409
2410   RESPONSE_CURSOR, RESULT_CURSOR = ('RESPONSE_CURSOR', 'RESULT_CURSOR')
2411
2412   _CURSOR_TYPES = frozenset([RESPONSE_CURSOR, RESULT_CURSOR])
2413
2414   SEARCH, DATASTORE, CLOUD_STORAGE = ('SEARCH', 'DATASTORE', 'CLOUD_STORAGE')
2415
2416   _SOURCES = frozenset([SEARCH, DATASTORE, CLOUD_STORAGE])
2417
2418   def __init__(self, name, namespace=None, source=SEARCH):
2419     """Initializer.
2420
2421     Args:
2422       name: The name of the index. An index name must be a visible printable
2423         ASCII string not starting with '!'. Whitespace characters are excluded.
2424       namespace: The namespace of the index name. If not set, then the current
2425         namespace is used.
2426       source: Deprecated as of 1.7.6. The source of
2427         the index:
2428           SEARCH - The Index was created by adding documents throught this
2429             search API.
2430           DATASTORE - The Index was created as a side-effect of putting entities
2431             into Datastore.
2432           CLOUD_STORAGE - The Index was created as a side-effect of adding
2433             objects into a Cloud Storage bucket.
2434     Raises:
2435       TypeError: If an unknown attribute is passed.
2436       ValueError: If invalid namespace is given.
2437     """
2438     if source not in self._SOURCES:
2439       raise ValueError('source must be one of %s' % self._SOURCES)
2440     if source is not self.SEARCH:
2441       warnings.warn('source is deprecated.', DeprecationWarning, stacklevel=2)
2442     self._source = source
2443     self._name = _CheckIndexName(_ConvertToUnicode(name))
2444     self._namespace = _ConvertToUnicode(namespace)
2445     if self._namespace is None:
2446       self._namespace = _ConvertToUnicode(namespace_manager.get_namespace())
2447     if self._namespace is None:
2448       self._namespace = u''
2449     namespace_manager.validate_namespace(self._namespace, exception=ValueError)
2450     self._schema = None
2451     self._storage_usage = None
2452     self._storage_limit = None
2453
2454   @property
2455   def schema(self):
2456     """Returns the schema mapping field names to list of types supported.
2457
2458     Only valid for Indexes returned by search.get_indexes method."""
2459     return self._schema
2460
2461   @property
2462   def storage_usage(self):
2463     """The approximate number of bytes used by this index.
2464
2465     The number may be slightly stale, as it may not reflect the
2466     results of recent changes.
2467
2468     Returns None for indexes not obtained from search.get_indexes.
2469
2470     """
2471     return self._storage_usage
2472
2473   @property
2474   def storage_limit(self):
2475     """The maximum allowable storage for this index, in bytes.
2476
2477     Returns None for indexes not obtained from search.get_indexes."""
2478     return self._storage_limit
2479
2480   @property
2481   def name(self):
2482     """Returns the name of the index."""
2483     return self._name
2484
2485   @property
2486   def namespace(self):
2487     """Returns the namespace of the name of the index."""
2488     return self._namespace
2489
2490   @property
2491   def source(self):
2492     """Returns the source of the index.
2493
2494     Deprecated: from 1.7.6, source is no longer available."""
2495     warnings.warn('source is deprecated.', DeprecationWarning, stacklevel=2)
2496     return self._source
2497
2498   def __eq__(self, other):
2499     return (isinstance(other, self.__class__)
2500             and self.__dict__ == other.__dict__)
2501
2502   def __ne__(self, other):
2503     return not self.__eq__(other)
2504
2505   def __hash__(self):
2506     return hash((self._name, self._namespace))
2507
2508   def __repr__(self):
2509
2510     return _Repr(self, [('name', self.name), ('namespace', self.namespace),
2511                         ('source', self._source),
2512                         ('schema', self.schema),
2513                         ('storage_usage', self.storage_usage),
2514                         ('storage_limit', self.storage_limit)])
2515
2516   def _NewPutResultFromPb(self, status_pb, doc_id):
2517     """Constructs PutResult from RequestStatus pb and doc_id."""
2518     message = None
2519     if status_pb.has_error_detail():
2520       message = _DecodeUTF8(status_pb.error_detail())
2521     code = _ERROR_OPERATION_CODE_MAP.get(status_pb.code(),
2522                                          OperationResult.INTERNAL_ERROR)
2523     return PutResult(code=code, message=message, id=_DecodeUTF8(doc_id))
2524
2525   def _NewPutResultList(self, response):
2526     return [self._NewPutResultFromPb(status, doc_id)
2527             for status, doc_id in zip(response.status_list(),
2528                                       response.doc_id_list())]
2529
2530   @datastore_rpc._positional(2)
2531   def put(self, documents, deadline=None):
2532     """Index the collection of documents.
2533
2534     If any of the documents are already in the index, then reindex them with
2535     their corresponding fresh document.
2536
2537     Args:
2538       documents: A Document or iterable of Documents to index.
2539
2540     Kwargs:
2541       deadline: Deadline for RPC call in seconds; if None use the default.
2542
2543     Returns:
2544       A list of PutResult, one per Document requested to be indexed.
2545
2546     Raises:
2547       PutError: If one or more documents failed to index or
2548         number indexed did not match requested.
2549       TypeError: If an unknown attribute is passed.
2550       ValueError: If documents is not a Document or iterable of Document
2551         or number of the documents is larger than
2552         MAXIMUM_DOCUMENTS_PER_PUT_REQUEST or deadline is a negative number.
2553     """
2554     return self.put_async(documents, deadline=deadline).get_result()
2555
2556   @datastore_rpc._positional(2)
2557   def put_async(self, documents, deadline=None):
2558     """Asynchronously indexes the collection of documents.
2559
2560     Identical to put() except that it returns a future. Call
2561     get_result() on the return value to block on the call and get its result.
2562     """
2563     if isinstance(documents, basestring):
2564       raise TypeError('documents must be a Document or sequence of '
2565                       'Documents, got %s' % documents.__class__.__name__)
2566     try:
2567       docs = list(iter(documents))
2568     except TypeError:
2569       docs = [documents]
2570
2571     if not docs:
2572       return _WrappedValueFuture([])
2573
2574     if len(docs) > MAXIMUM_DOCUMENTS_PER_PUT_REQUEST:
2575       raise ValueError('too many documents to index')
2576
2577     request = search_service_pb.IndexDocumentRequest()
2578     response = search_service_pb.IndexDocumentResponse()
2579
2580     params = request.mutable_params()
2581     _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2582
2583     seen_docs = {}
2584     for document in docs:
2585       doc_id = document.doc_id
2586       if doc_id:
2587         if doc_id in seen_docs:
2588           if document != seen_docs[doc_id]:
2589             raise ValueError(
2590                 'Different documents with the same ID found in the '
2591                 'same call to Index.put()')
2592
2593
2594           continue
2595         seen_docs[doc_id] = document
2596       doc_pb = params.add_document()
2597       _CopyDocumentToProtocolBuffer(document, doc_pb)
2598
2599     def hook():
2600       results = self._NewPutResultList(response)
2601
2602       if response.status_size() != len(params.document_list()):
2603         raise PutError('did not index requested number of documents', results)
2604
2605       for status in response.status_list():
2606         if status.code() != search_service_pb.SearchServiceError.OK:
2607           raise PutError(
2608               _ConcatenateErrorMessages(
2609                   'one or more put document operations failed', status), results)
2610       return results
2611     return _RpcOperationFuture(
2612         'IndexDocument', request, response, deadline, hook)
2613
2614   def _NewDeleteResultFromPb(self, status_pb, doc_id):
2615     """Constructs DeleteResult from RequestStatus pb and doc_id."""
2616     message = None
2617     if status_pb.has_error_detail():
2618       message = _DecodeUTF8(status_pb.error_detail())
2619     code = _ERROR_OPERATION_CODE_MAP.get(status_pb.code(),
2620                                          OperationResult.INTERNAL_ERROR)
2621
2622     return DeleteResult(code=code, message=message, id=doc_id)
2623
2624   def _NewDeleteResultList(self, document_ids, response):
2625     return [self._NewDeleteResultFromPb(status, doc_id)
2626             for status, doc_id in zip(response.status_list(), document_ids)]
2627
2628   @datastore_rpc._positional(2)
2629   def delete(self, document_ids, deadline=None):
2630     """Delete the documents with the corresponding document ids from the index.
2631
2632     If no document exists for the identifier in the list, then that document
2633     identifier is ignored.
2634
2635     Args:
2636       document_ids: A single identifier or list of identifiers of documents
2637         to delete.
2638
2639     Kwargs:
2640       deadline: Deadline for RPC call in seconds; if None use the default.
2641
2642     Raises:
2643       DeleteError: If one or more documents failed to remove or
2644         number removed did not match requested.
2645       ValueError: If document_ids is not a string or iterable of valid document
2646         identifiers or number of document ids is larger than
2647         MAXIMUM_DOCUMENTS_PER_PUT_REQUEST or deadline is a negative number.
2648     """
2649     return self.delete_async(document_ids, deadline=deadline).get_result()
2650
2651   @datastore_rpc._positional(2)
2652   def delete_async(self, document_ids, deadline=None):
2653     """Asynchronously deletes the documents with the corresponding document ids.
2654
2655     Identical to delete() except that it returns a future. Call
2656     get_result() on the return value to block on the call and get its result.
2657     """
2658     doc_ids = _ConvertToList(document_ids)
2659     if not doc_ids:
2660       return _WrappedValueFuture([])
2661
2662     if len(doc_ids) > MAXIMUM_DOCUMENTS_PER_PUT_REQUEST:
2663       raise ValueError('too many documents to delete')
2664
2665     request = search_service_pb.DeleteDocumentRequest()
2666     response = search_service_pb.DeleteDocumentResponse()
2667     params = request.mutable_params()
2668     _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2669     for document_id in doc_ids:
2670       _CheckDocumentId(document_id)
2671       params.add_doc_id(document_id)
2672
2673     def hook():
2674       results = self._NewDeleteResultList(doc_ids, response)
2675
2676       if response.status_size() != len(doc_ids):
2677         raise DeleteError(
2678             'did not delete requested number of documents', results)
2679
2680       for status in response.status_list():
2681         if status.code() != search_service_pb.SearchServiceError.OK:
2682           raise DeleteError(
2683               _ConcatenateErrorMessages(
2684                   'one or more delete document operations failed', status),
2685               results)
2686       return results
2687     return _RpcOperationFuture(
2688         'DeleteDocument', request, response, deadline, hook)
2689
2690   def delete_schema(self):
2691     """Deprecated in 1.7.4. Delete the schema from the index.
2692
2693     We are deprecating this method and replacing with more general schema
2694     and index managment.
2695
2696     A possible use may be remove typed fields which are no longer used. After
2697     you delete the schema, you need to index one or more documents to rebuild
2698     the schema. Until you re-index some documents, searches may fail, especially
2699     searches using field restricts.
2700
2701     Raises:
2702       DeleteError: If the schema failed to be deleted.
2703     """
2704     warnings.warn('delete_schema is deprecated in 1.7.4.',
2705                   DeprecationWarning, stacklevel=2)
2706     request = search_service_pb.DeleteSchemaRequest()
2707     response = search_service_pb.DeleteSchemaResponse()
2708     params = request.mutable_params()
2709     _CopyMetadataToProtocolBuffer(self, params.add_index_spec())
2710
2711     def hook():
2712
2713       results = self._NewDeleteResultList([self.name], response)
2714
2715       if response.status_size() != 1:
2716         raise DeleteError('did not delete exactly one schema', results)
2717
2718       status = response.status_list()[0]
2719       if status.code() != search_service_pb.SearchServiceError.OK:
2720         raise DeleteError(
2721             _ConcatenateErrorMessages('delete schema operation failed', status),
2722             results)
2723     return _RpcOperationFuture(
2724         'DeleteSchema', request, response, None, hook).get_result()
2725
2726   def _NewScoredDocumentFromPb(self, doc_pb, sort_scores, expressions, cursor):
2727     """Constructs a Document from a document_pb.Document protocol buffer."""
2728     lang = None
2729     if doc_pb.has_language():
2730       lang = _DecodeUTF8(doc_pb.language())
2731     return ScoredDocument(
2732         doc_id=_DecodeUTF8(doc_pb.id()),
2733         fields=_NewFieldsFromPb(doc_pb.field_list()),
2734         language=lang, rank=doc_pb.order_id(), sort_scores=sort_scores,
2735         expressions=_NewFieldsFromPb(expressions), cursor=cursor)
2736
2737   def _NewSearchResults(self, response, cursor):
2738     """Returns a SearchResults populated from a search_service response pb."""
2739     results = []
2740     for result_pb in response.result_list():
2741       per_result_cursor = None
2742       if result_pb.has_cursor():
2743         if isinstance(cursor, Cursor):
2744
2745           per_result_cursor = Cursor(web_safe_string=_ToWebSafeString(
2746               cursor.per_result, _DecodeUTF8(result_pb.cursor())))
2747       results.append(
2748           self._NewScoredDocumentFromPb(
2749               result_pb.document(), result_pb.score_list(),
2750               result_pb.expression_list(), per_result_cursor))
2751     results_cursor = None
2752     if response.has_cursor():
2753       if isinstance(cursor, Cursor):
2754
2755         results_cursor = Cursor(web_safe_string=_ToWebSafeString(
2756             cursor.per_result, _DecodeUTF8(response.cursor())))
2757     return SearchResults(
2758         results=results, number_found=response.matched_count(),
2759         cursor=results_cursor)
2760
2761   @datastore_rpc._positional(2)
2762   def get(self, doc_id, deadline=None):
2763     """Retrieve a document by document ID.
2764
2765     Args:
2766       doc_id: The ID of the document to retreive.
2767
2768     Kwargs:
2769       deadline: Deadline for RPC call in seconds; if None use the default.
2770
2771     Returns:
2772       If the document ID exists, returns the associated document. Otherwise,
2773       returns None.
2774
2775     Raises:
2776       TypeError: If any of the parameters have invalid types, or an unknown
2777         attribute is passed.
2778       ValueError: If any of the parameters have invalid values (e.g., a
2779         negative deadline).
2780     """
2781     return self.get_async(doc_id, deadline=deadline).get_result()
2782
2783   @datastore_rpc._positional(2)
2784   def get_async(self, doc_id, deadline=None):
2785     """Asynchronously retrieve a document by document ID.
2786
2787     Identical to get() except that it returns a future. Call
2788     get_result() on the return value to block on the call and get its result.
2789     """
2790     future = self.get_range_async(start_id=doc_id, limit=1, deadline=deadline)
2791     def hook(response):
2792       if response.results and response.results[0].doc_id == doc_id:
2793         return response.results[0]
2794       return None
2795     return _SimpleOperationFuture(future, hook)
2796
2797   @datastore_rpc._positional(2)
2798   def search(self, query, deadline=None, **kwargs):
2799     """Search the index for documents matching the query.
2800
2801     For example, the following code fragment requests a search for
2802     documents where 'first' occurs in subject and 'good' occurs anywhere,
2803     returning at most 20 documents, starting the search from 'cursor token',
2804     returning another single cursor for the response, sorting by subject in
2805     descending order, returning the author, subject, and summary fields as well
2806     as a snippeted field content.
2807
2808       results = index.search(
2809           query=Query('subject:first good',
2810               options=QueryOptions(limit=20,
2811                   cursor=Cursor(),
2812                   sort_options=SortOptions(
2813                       expressions=[SortExpression(expression='subject')],
2814                       limit=1000),
2815                   returned_fields=['author', 'subject', 'summary'],
2816                   snippeted_fields=['content'])))
2817
2818     The following code fragment shows how to use a results cursor
2819
2820       cursor = results.cursor
2821       for result in results:
2822          # process result
2823
2824       results = index.search(
2825           Query('subject:first good', options=QueryOptions(cursor=cursor)))
2826
2827     The following code fragment shows how to use a per_result cursor
2828
2829       results = index.search(
2830           query=Query('subject:first good',
2831               options=QueryOptions(limit=20,
2832                   cursor=Cursor(per_result=True),
2833                   ...)))
2834
2835       cursor = None
2836       for result in results:
2837          cursor = result.cursor
2838
2839       results = index.search(
2840           Query('subject:first good', options=QueryOptions(cursor=cursor)))
2841
2842     See http://developers.google.com/appengine/docs/python/search/query_strings
2843     for more information about query syntax.
2844
2845     Args:
2846       query: The Query to match against documents in the index.
2847
2848     Kwargs:
2849       deadline: Deadline for RPC call in seconds; if None use the default.
2850
2851     Returns:
2852       A SearchResults containing a list of documents matched, number returned
2853       and number matched by the query.
2854
2855     Raises:
2856       TypeError: If any of the parameters have invalid types, or an unknown
2857         attribute is passed.
2858       ValueError: If any of the parameters have invalid values (e.g., a
2859         negative deadline).
2860     """
2861     return self.search_async(query, deadline=deadline, **kwargs).get_result()
2862
2863   @datastore_rpc._positional(2)
2864   def search_async(self, query, deadline=None, **kwargs):
2865     """Asynchronously searches the index for documents matching the query.
2866
2867     Identical to search() except that it returns a future. Call
2868     get_result() on the return value to block on the call and get its result.
2869     """
2870     if isinstance(query, basestring):
2871       query = Query(query_string=query)
2872     request = self._NewSearchRequest(query, deadline, **kwargs)
2873     response = search_service_pb.SearchResponse()
2874     def hook():
2875       _CheckStatus(response.status())
2876       cursor = None
2877       if query.options:
2878         cursor = query.options.cursor
2879       return self._NewSearchResults(response, cursor)
2880     return _RpcOperationFuture('Search', request, response, deadline, hook)
2881
2882   def _NewSearchRequest(self, query, deadline, **kwargs):
2883
2884     app_id = kwargs.pop('app_id', None)
2885     if kwargs:
2886       raise TypeError('Invalid arguments: %s' % ', '.join(kwargs))
2887
2888     request = search_service_pb.SearchRequest()
2889     if app_id:
2890       request.set_app_id(app_id)
2891
2892     params = request.mutable_params()
2893     if isinstance(query, basestring):
2894       query = Query(query_string=query)
2895     _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2896     _CopyQueryObjectToProtocolBuffer(query, params)
2897     return request
2898
2899   def _NewGetResponse(self, response):
2900     """Returns a GetResponse from the list_documents response pb."""
2901     documents = []
2902     for doc_proto in response.document_list():
2903       documents.append(_NewDocumentFromPb(doc_proto))
2904
2905     return GetResponse(results=documents)
2906
2907   @datastore_rpc._positional(5)
2908   def get_range(self, start_id=None, include_start_object=True,
2909                 limit=100, ids_only=False, deadline=None, **kwargs):
2910     """Get a range of Documents in the index, in id order.
2911
2912     Args:
2913       start_id: String containing the Id from which to list
2914         Documents from. By default, starts at the first Id.
2915       include_start_object: If true, include the Document with the
2916         Id specified by the start_id parameter.
2917       limit: The maximum number of Documents to return.
2918       ids_only: If true, the Documents returned only contain their keys.
2919
2920     Kwargs:
2921       deadline: Deadline for RPC call in seconds; if None use the default.
2922
2923     Returns:
2924       A GetResponse containing a list of Documents, ordered by Id.
2925
2926     Raises:
2927       Error: Some subclass of Error is raised if an error occurred processing
2928         the request.
2929       TypeError: If any of the parameters have invalid types, or an unknown
2930         attribute is passed.
2931       ValueError: If any of the parameters have invalid values (e.g., a
2932         negative deadline).
2933     """
2934     return self.get_range_async(
2935         start_id, include_start_object, limit, ids_only, deadline=deadline,
2936         **kwargs).get_result()
2937
2938   @datastore_rpc._positional(5)
2939   def get_range_async(self, start_id=None, include_start_object=True,
2940                       limit=100, ids_only=False, deadline=None, **kwargs):
2941     """Asynchronously gets a range of Documents in the index, in id order.
2942
2943     Identical to get_range() except that it returns a future. Call
2944     get_result() on the return value to block on the call and get its result.
2945     """
2946
2947     app_id = kwargs.pop('app_id', None)
2948     if kwargs:
2949       raise TypeError('Invalid arguments: %s' % ', '.join(kwargs))
2950     request = search_service_pb.ListDocumentsRequest()
2951     if app_id:
2952       request.set_app_id(app_id)
2953
2954     params = request.mutable_params()
2955     _CopyMetadataToProtocolBuffer(self, params.mutable_index_spec())
2956
2957     if start_id:
2958       params.set_start_doc_id(start_id)
2959     params.set_include_start_doc(include_start_object)
2960
2961     params.set_limit(_CheckInteger(
2962         limit, 'limit', zero_ok=False,
2963         upper_bound=MAXIMUM_DOCUMENTS_RETURNED_PER_SEARCH))
2964     params.set_keys_only(ids_only)
2965
2966     response = search_service_pb.ListDocumentsResponse()
2967     def hook():
2968       _CheckStatus(response.status())
2969       return self._NewGetResponse(response)
2970     return _RpcOperationFuture(
2971         'ListDocuments', request, response, deadline, hook)
2972
2973
2974 _CURSOR_TYPE_PB_MAP = {
2975   None: search_service_pb.SearchParams.NONE,
2976   Index.RESPONSE_CURSOR: search_service_pb.SearchParams.SINGLE,
2977   Index.RESULT_CURSOR: search_service_pb.SearchParams.PER_RESULT
2978   }
2979
2980
2981
2982 _SOURCES_TO_PB_MAP = {
2983     Index.SEARCH: search_service_pb.IndexSpec.SEARCH,
2984     Index.DATASTORE: search_service_pb.IndexSpec.DATASTORE,
2985     Index.CLOUD_STORAGE: search_service_pb.IndexSpec.CLOUD_STORAGE}
2986
2987
2988
2989 _SOURCE_PB_TO_SOURCES_MAP = {
2990     search_service_pb.IndexSpec.SEARCH: Index.SEARCH,
2991     search_service_pb.IndexSpec.DATASTORE: Index.DATASTORE,
2992     search_service_pb.IndexSpec.CLOUD_STORAGE: Index.CLOUD_STORAGE}
2993
2994
2995 def _CopyMetadataToProtocolBuffer(index, spec_pb):
2996   """Copies Index specification to a search_service_pb.IndexSpec."""
2997   spec_pb.set_name(index.name.encode('utf-8'))
2998   spec_pb.set_namespace(index.namespace.encode('utf-8'))
2999
3000
3001   if index._source != Index.SEARCH:
3002     spec_pb.set_source(_SOURCES_TO_PB_MAP.get(index._source))
3003
3004
3005 _FIELD_TYPE_MAP = {
3006     document_pb.FieldValue.TEXT: Field.TEXT,
3007     document_pb.FieldValue.HTML: Field.HTML,
3008     document_pb.FieldValue.ATOM: Field.ATOM,
3009     document_pb.FieldValue.DATE: Field.DATE,
3010     document_pb.FieldValue.NUMBER: Field.NUMBER,
3011     document_pb.FieldValue.GEO: Field.GEO_POINT,
3012     }
3013
3014
3015 def _NewSchemaFromPb(field_type_pb_list):
3016   """Creates map of field name to type list from document_pb.FieldTypes list."""
3017   field_types = {}
3018   for field_type_pb in field_type_pb_list:
3019     for field_type in field_type_pb.type_list():
3020       public_type = _FIELD_TYPE_MAP[field_type]
3021       name = _DecodeUTF8(field_type_pb.name())
3022       if name in field_types:
3023         field_types[name].append(public_type)
3024       else:
3025         field_types[name] = [public_type]
3026   return field_types
3027
3028
3029 def _NewIndexFromIndexSpecPb(index_spec_pb):
3030   """Creates an Index from a search_service_pb.IndexSpec."""
3031   source = _SOURCE_PB_TO_SOURCES_MAP.get(index_spec_pb.source())
3032   index = None
3033   if index_spec_pb.has_namespace():
3034     index = Index(name=index_spec_pb.name(),
3035                   namespace=index_spec_pb.namespace(),
3036                   source=source)
3037   else:
3038     index = Index(name=index_spec_pb.name(), source=source)
3039   return index
3040
3041
3042 def _NewIndexFromPb(index_metadata_pb):
3043   """Creates an Index from a search_service_pb.IndexMetadata."""
3044   index = _NewIndexFromIndexSpecPb(index_metadata_pb.index_spec())
3045   if index_metadata_pb.field_list():
3046     index._schema = _NewSchemaFromPb(index_metadata_pb.field_list())
3047   if index_metadata_pb.has_storage():
3048     index._storage_usage = index_metadata_pb.storage().amount_used()
3049     index._storage_limit = index_metadata_pb.storage().limit()
3050   return index
3051
3052
3053 def _MakeSyncSearchServiceCall(call, request, response, deadline):
3054   """Deprecated: Make a synchronous call to search service.
3055
3056   If the deadline is not None, waits only until the deadline expires.
3057
3058   Args:
3059     call: Method name to call, as a string
3060     request: The request object
3061     response: The response object
3062
3063   Kwargs:
3064     deadline: Deadline for RPC call in seconds; if None use the default.
3065
3066   Raises:
3067     TypeError: if the deadline is not a number and is not None.
3068     ValueError: If the deadline is less than zero.
3069   """
3070   _ValidateDeadline(deadline)
3071   logging.warning("_MakeSyncSearchServiceCall is deprecated; please use API.")
3072   try:
3073     if deadline is None:
3074       apiproxy_stub_map.MakeSyncCall('search', call, request, response)
3075     else:
3076
3077
3078       rpc = apiproxy_stub_map.UserRPC('search', deadline=deadline)
3079       rpc.make_call(call, request, response)
3080       rpc.wait()
3081       rpc.check_success()
3082   except apiproxy_errors.ApplicationError, e:
3083     raise _ToSearchError(e)
3084
3085 def _ValidateDeadline(deadline):
3086   if deadline is None:
3087     return
3088   if (not isinstance(deadline, (int, long, float))
3089       or isinstance(deadline, (bool,))):
3090     raise TypeError('deadline argument should be int/long/float (%r)'
3091                     % (deadline,))
3092   if deadline <= 0:
3093     raise ValueError('deadline argument must be > 0 (%s)' % (deadline,))