Fix some case-insensitive cases for StartsWith.
[chromium-blink-merge.git] / components / omnibox / search_suggestion_parser.cc
blob1e04d5fda21488563a67b9b8e6eb2040e9d6b3cb
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/omnibox/search_suggestion_parser.h"
7 #include <algorithm>
9 #include "base/i18n/icu_string_conversions.h"
10 #include "base/json/json_string_value_serializer.h"
11 #include "base/json/json_writer.h"
12 #include "base/logging.h"
13 #include "base/metrics/histogram_macros.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "base/values.h"
18 #include "components/omnibox/autocomplete_input.h"
19 #include "components/omnibox/url_prefix.h"
20 #include "components/url_fixer/url_fixer.h"
21 #include "net/base/net_util.h"
22 #include "net/http/http_response_headers.h"
23 #include "net/url_request/url_fetcher.h"
24 #include "url/url_constants.h"
26 namespace {
28 AutocompleteMatchType::Type GetAutocompleteMatchType(const std::string& type) {
29 if (type == "CALCULATOR")
30 return AutocompleteMatchType::CALCULATOR;
31 if (type == "ENTITY")
32 return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY;
33 if (type == "TAIL")
34 return AutocompleteMatchType::SEARCH_SUGGEST_TAIL;
35 if (type == "PERSONALIZED_QUERY")
36 return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED;
37 if (type == "PROFILE")
38 return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE;
39 if (type == "NAVIGATION")
40 return AutocompleteMatchType::NAVSUGGEST;
41 if (type == "PERSONALIZED_NAVIGATION")
42 return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED;
43 return AutocompleteMatchType::SEARCH_SUGGEST;
46 } // namespace
48 // SearchSuggestionParser::Result ----------------------------------------------
50 SearchSuggestionParser::Result::Result(bool from_keyword_provider,
51 int relevance,
52 bool relevance_from_server,
53 AutocompleteMatchType::Type type,
54 const std::string& deletion_url)
55 : from_keyword_provider_(from_keyword_provider),
56 type_(type),
57 relevance_(relevance),
58 relevance_from_server_(relevance_from_server),
59 received_after_last_keystroke_(true),
60 deletion_url_(deletion_url) {}
62 SearchSuggestionParser::Result::~Result() {}
64 // SearchSuggestionParser::SuggestResult ---------------------------------------
66 SearchSuggestionParser::SuggestResult::SuggestResult(
67 const base::string16& suggestion,
68 AutocompleteMatchType::Type type,
69 const base::string16& match_contents,
70 const base::string16& match_contents_prefix,
71 const base::string16& annotation,
72 const base::string16& answer_contents,
73 const base::string16& answer_type,
74 scoped_ptr<SuggestionAnswer> answer,
75 const std::string& suggest_query_params,
76 const std::string& deletion_url,
77 bool from_keyword_provider,
78 int relevance,
79 bool relevance_from_server,
80 bool should_prefetch,
81 const base::string16& input_text)
82 : Result(from_keyword_provider,
83 relevance,
84 relevance_from_server,
85 type,
86 deletion_url),
87 suggestion_(suggestion),
88 match_contents_prefix_(match_contents_prefix),
89 annotation_(annotation),
90 suggest_query_params_(suggest_query_params),
91 answer_contents_(answer_contents),
92 answer_type_(answer_type),
93 answer_(answer.Pass()),
94 should_prefetch_(should_prefetch) {
95 match_contents_ = match_contents;
96 DCHECK(!match_contents_.empty());
97 ClassifyMatchContents(true, input_text);
100 SearchSuggestionParser::SuggestResult::SuggestResult(
101 const SuggestResult& result)
102 : Result(result),
103 suggestion_(result.suggestion_),
104 match_contents_prefix_(result.match_contents_prefix_),
105 annotation_(result.annotation_),
106 suggest_query_params_(result.suggest_query_params_),
107 answer_contents_(result.answer_contents_),
108 answer_type_(result.answer_type_),
109 answer_(SuggestionAnswer::copy(result.answer_.get())),
110 should_prefetch_(result.should_prefetch_) {
113 SearchSuggestionParser::SuggestResult::~SuggestResult() {}
115 SearchSuggestionParser::SuggestResult&
116 SearchSuggestionParser::SuggestResult::operator=(const SuggestResult& rhs) {
117 if (this == &rhs)
118 return *this;
120 // Assign via parent class first.
121 Result::operator=(rhs);
123 suggestion_ = rhs.suggestion_;
124 match_contents_prefix_ = rhs.match_contents_prefix_;
125 annotation_ = rhs.annotation_;
126 suggest_query_params_ = rhs.suggest_query_params_;
127 answer_contents_ = rhs.answer_contents_;
128 answer_type_ = rhs.answer_type_;
129 answer_ = SuggestionAnswer::copy(rhs.answer_.get());
130 should_prefetch_ = rhs.should_prefetch_;
132 return *this;
135 void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
136 const bool allow_bolding_all,
137 const base::string16& input_text) {
138 if (input_text.empty()) {
139 // In case of zero-suggest results, do not highlight matches.
140 match_contents_class_.push_back(
141 ACMatchClassification(0, ACMatchClassification::NONE));
142 return;
145 base::string16 lookup_text = input_text;
146 if (type_ == AutocompleteMatchType::SEARCH_SUGGEST_TAIL) {
147 const size_t contents_index =
148 suggestion_.length() - match_contents_.length();
149 // Ensure the query starts with the input text, and ends with the match
150 // contents, and the input text has an overlap with contents.
151 if (base::StartsWith(suggestion_, input_text,
152 base::CompareCase::SENSITIVE) &&
153 base::EndsWith(suggestion_, match_contents_, true) &&
154 (input_text.length() > contents_index)) {
155 lookup_text = input_text.substr(contents_index);
158 // Do a case-insensitive search for |lookup_text|.
159 base::string16::const_iterator lookup_position = std::search(
160 match_contents_.begin(), match_contents_.end(), lookup_text.begin(),
161 lookup_text.end(), base::CaseInsensitiveCompare<base::char16>());
162 if (!allow_bolding_all && (lookup_position == match_contents_.end())) {
163 // Bail if the code below to update the bolding would bold the whole
164 // string. Note that the string may already be entirely bolded; if
165 // so, leave it as is.
166 return;
168 match_contents_class_.clear();
169 // We do intra-string highlighting for suggestions - the suggested segment
170 // will be highlighted, e.g. for input_text = "you" the suggestion may be
171 // "youtube", so we'll bold the "tube" section: you*tube*.
172 if (input_text != match_contents_) {
173 if (lookup_position == match_contents_.end()) {
174 // The input text is not a substring of the query string, e.g. input
175 // text is "slasdot" and the query string is "slashdot", so we bold the
176 // whole thing.
177 match_contents_class_.push_back(
178 ACMatchClassification(0, ACMatchClassification::MATCH));
179 } else {
180 // We don't iterate over the string here annotating all matches because
181 // it looks odd to have every occurrence of a substring that may be as
182 // short as a single character highlighted in a query suggestion result,
183 // e.g. for input text "s" and query string "southwest airlines", it
184 // looks odd if both the first and last s are highlighted.
185 const size_t lookup_index = lookup_position - match_contents_.begin();
186 if (lookup_index != 0) {
187 match_contents_class_.push_back(
188 ACMatchClassification(0, ACMatchClassification::MATCH));
190 match_contents_class_.push_back(
191 ACMatchClassification(lookup_index, ACMatchClassification::NONE));
192 size_t next_fragment_position = lookup_index + lookup_text.length();
193 if (next_fragment_position < match_contents_.length()) {
194 match_contents_class_.push_back(ACMatchClassification(
195 next_fragment_position, ACMatchClassification::MATCH));
198 } else {
199 // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either
200 // for the default provider or a keyword search provider.
201 match_contents_class_.push_back(
202 ACMatchClassification(0, ACMatchClassification::NONE));
206 int SearchSuggestionParser::SuggestResult::CalculateRelevance(
207 const AutocompleteInput& input,
208 bool keyword_provider_requested) const {
209 if (!from_keyword_provider_ && keyword_provider_requested)
210 return 100;
211 return ((input.type() == metrics::OmniboxInputType::URL) ? 300 : 600);
214 // SearchSuggestionParser::NavigationResult ------------------------------------
216 SearchSuggestionParser::NavigationResult::NavigationResult(
217 const AutocompleteSchemeClassifier& scheme_classifier,
218 const GURL& url,
219 AutocompleteMatchType::Type type,
220 const base::string16& description,
221 const std::string& deletion_url,
222 bool from_keyword_provider,
223 int relevance,
224 bool relevance_from_server,
225 const base::string16& input_text,
226 const std::string& languages)
227 : Result(from_keyword_provider, relevance, relevance_from_server, type,
228 deletion_url),
229 url_(url),
230 formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
231 url, net::FormatUrl(url, languages,
232 net::kFormatUrlOmitAll & ~net::kFormatUrlOmitHTTP,
233 net::UnescapeRule::SPACES, NULL, NULL, NULL),
234 scheme_classifier)),
235 description_(description) {
236 DCHECK(url_.is_valid());
237 CalculateAndClassifyMatchContents(true, input_text, languages);
240 SearchSuggestionParser::NavigationResult::~NavigationResult() {}
242 void
243 SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
244 const bool allow_bolding_nothing,
245 const base::string16& input_text,
246 const std::string& languages) {
247 if (input_text.empty()) {
248 // In case of zero-suggest results, do not highlight matches.
249 match_contents_class_.push_back(
250 ACMatchClassification(0, ACMatchClassification::NONE));
251 return;
254 // First look for the user's input inside the formatted url as it would be
255 // without trimming the scheme, so we can find matches at the beginning of the
256 // scheme.
257 const URLPrefix* prefix =
258 URLPrefix::BestURLPrefix(formatted_url_, input_text);
259 size_t match_start = (prefix == NULL) ?
260 formatted_url_.find(input_text) : prefix->prefix.length();
261 bool trim_http = !AutocompleteInput::HasHTTPScheme(input_text) &&
262 (!prefix || (match_start != 0));
263 const net::FormatUrlTypes format_types =
264 net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP);
266 base::string16 match_contents = net::FormatUrl(url_, languages, format_types,
267 net::UnescapeRule::SPACES, NULL, NULL, &match_start);
268 // If the first match in the untrimmed string was inside a scheme that we
269 // trimmed, look for a subsequent match.
270 if (match_start == base::string16::npos)
271 match_start = match_contents.find(input_text);
272 // Update |match_contents_| and |match_contents_class_| if it's allowed.
273 if (allow_bolding_nothing || (match_start != base::string16::npos)) {
274 match_contents_ = match_contents;
275 // Safe if |match_start| is npos; also safe if the input is longer than the
276 // remaining contents after |match_start|.
277 AutocompleteMatch::ClassifyLocationInString(match_start,
278 input_text.length(), match_contents_.length(),
279 ACMatchClassification::URL, &match_contents_class_);
283 int SearchSuggestionParser::NavigationResult::CalculateRelevance(
284 const AutocompleteInput& input,
285 bool keyword_provider_requested) const {
286 return (from_keyword_provider_ || !keyword_provider_requested) ? 800 : 150;
289 // SearchSuggestionParser::Results ---------------------------------------------
291 SearchSuggestionParser::Results::Results()
292 : verbatim_relevance(-1),
293 field_trial_triggered(false),
294 relevances_from_server(false) {}
296 SearchSuggestionParser::Results::~Results() {}
298 void SearchSuggestionParser::Results::Clear() {
299 suggest_results.clear();
300 navigation_results.clear();
301 verbatim_relevance = -1;
302 metadata.clear();
305 bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
306 if (verbatim_relevance >= 0)
307 return true;
309 // Right now either all results of one type will be server-scored or they will
310 // all be locally scored, but in case we change this later, we'll just check
311 // them all.
312 for (SuggestResults::const_iterator i(suggest_results.begin());
313 i != suggest_results.end(); ++i) {
314 if (i->relevance_from_server())
315 return true;
317 for (NavigationResults::const_iterator i(navigation_results.begin());
318 i != navigation_results.end(); ++i) {
319 if (i->relevance_from_server())
320 return true;
323 return false;
326 // SearchSuggestionParser ------------------------------------------------------
328 // static
329 std::string SearchSuggestionParser::ExtractJsonData(
330 const net::URLFetcher* source) {
331 const net::HttpResponseHeaders* const response_headers =
332 source->GetResponseHeaders();
333 std::string json_data;
334 source->GetResponseAsString(&json_data);
336 // JSON is supposed to be UTF-8, but some suggest service providers send
337 // JSON files in non-UTF-8 encodings. The actual encoding is usually
338 // specified in the Content-Type header field.
339 if (response_headers) {
340 std::string charset;
341 if (response_headers->GetCharset(&charset)) {
342 base::string16 data_16;
343 // TODO(jungshik): Switch to CodePageToUTF8 after it's added.
344 if (base::CodepageToUTF16(json_data, charset.c_str(),
345 base::OnStringConversionError::FAIL,
346 &data_16))
347 json_data = base::UTF16ToUTF8(data_16);
350 return json_data;
353 // static
354 scoped_ptr<base::Value> SearchSuggestionParser::DeserializeJsonData(
355 base::StringPiece json_data) {
356 // The JSON response should be an array.
357 for (size_t response_start_index = json_data.find("["), i = 0;
358 response_start_index != base::StringPiece::npos && i < 5;
359 response_start_index = json_data.find("[", 1), i++) {
360 // Remove any XSSI guards to allow for JSON parsing.
361 json_data.remove_prefix(response_start_index);
363 JSONStringValueDeserializer deserializer(json_data);
364 deserializer.set_allow_trailing_comma(true);
365 int error_code = 0;
366 scoped_ptr<base::Value> data(deserializer.Deserialize(&error_code, NULL));
367 if (error_code == 0)
368 return data.Pass();
370 return scoped_ptr<base::Value>();
373 // static
374 bool SearchSuggestionParser::ParseSuggestResults(
375 const base::Value& root_val,
376 const AutocompleteInput& input,
377 const AutocompleteSchemeClassifier& scheme_classifier,
378 int default_result_relevance,
379 const std::string& languages,
380 bool is_keyword_result,
381 Results* results) {
382 base::string16 query;
383 const base::ListValue* root_list = NULL;
384 const base::ListValue* results_list = NULL;
386 if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) ||
387 query != input.text() || !root_list->GetList(1, &results_list))
388 return false;
390 // 3rd element: Description list.
391 const base::ListValue* descriptions = NULL;
392 root_list->GetList(2, &descriptions);
394 // 4th element: Disregard the query URL list for now.
396 // Reset suggested relevance information.
397 results->verbatim_relevance = -1;
399 // 5th element: Optional key-value pairs from the Suggest server.
400 const base::ListValue* types = NULL;
401 const base::ListValue* relevances = NULL;
402 const base::ListValue* suggestion_details = NULL;
403 const base::DictionaryValue* extras = NULL;
404 int prefetch_index = -1;
405 if (root_list->GetDictionary(4, &extras)) {
406 extras->GetList("google:suggesttype", &types);
408 // Discard this list if its size does not match that of the suggestions.
409 if (extras->GetList("google:suggestrelevance", &relevances) &&
410 (relevances->GetSize() != results_list->GetSize()))
411 relevances = NULL;
412 extras->GetInteger("google:verbatimrelevance",
413 &results->verbatim_relevance);
415 // Check if the active suggest field trial (if any) has triggered either
416 // for the default provider or keyword provider.
417 results->field_trial_triggered = false;
418 extras->GetBoolean("google:fieldtrialtriggered",
419 &results->field_trial_triggered);
421 const base::DictionaryValue* client_data = NULL;
422 if (extras->GetDictionary("google:clientdata", &client_data) && client_data)
423 client_data->GetInteger("phi", &prefetch_index);
425 if (extras->GetList("google:suggestdetail", &suggestion_details) &&
426 suggestion_details->GetSize() != results_list->GetSize())
427 suggestion_details = NULL;
429 // Store the metadata that came with the response in case we need to pass it
430 // along with the prefetch query to Instant.
431 JSONStringValueSerializer json_serializer(&results->metadata);
432 json_serializer.Serialize(*extras);
435 // Clear the previous results now that new results are available.
436 results->suggest_results.clear();
437 results->navigation_results.clear();
438 results->answers_image_urls.clear();
440 base::string16 suggestion;
441 std::string type;
442 int relevance = default_result_relevance;
443 // Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs.
444 const bool allow_navsuggest =
445 input.type() != metrics::OmniboxInputType::FORCED_QUERY;
446 const base::string16& trimmed_input =
447 base::CollapseWhitespace(input.text(), false);
448 for (size_t index = 0; results_list->GetString(index, &suggestion); ++index) {
449 // Google search may return empty suggestions for weird input characters,
450 // they make no sense at all and can cause problems in our code.
451 if (suggestion.empty())
452 continue;
454 // Apply valid suggested relevance scores; discard invalid lists.
455 if (relevances != NULL && !relevances->GetInteger(index, &relevance))
456 relevances = NULL;
457 AutocompleteMatchType::Type match_type =
458 AutocompleteMatchType::SEARCH_SUGGEST;
459 if (types && types->GetString(index, &type))
460 match_type = GetAutocompleteMatchType(type);
461 const base::DictionaryValue* suggestion_detail = NULL;
462 std::string deletion_url;
464 if (suggestion_details &&
465 suggestion_details->GetDictionary(index, &suggestion_detail))
466 suggestion_detail->GetString("du", &deletion_url);
468 if ((match_type == AutocompleteMatchType::NAVSUGGEST) ||
469 (match_type == AutocompleteMatchType::NAVSUGGEST_PERSONALIZED)) {
470 // Do not blindly trust the URL coming from the server to be valid.
471 GURL url(
472 url_fixer::FixupURL(base::UTF16ToUTF8(suggestion), std::string()));
473 if (url.is_valid() && allow_navsuggest) {
474 base::string16 title;
475 if (descriptions != NULL)
476 descriptions->GetString(index, &title);
477 results->navigation_results.push_back(NavigationResult(
478 scheme_classifier, url, match_type, title, deletion_url,
479 is_keyword_result, relevance, relevances != NULL, input.text(),
480 languages));
482 } else {
483 // TODO(dschuyler) If the "= " is no longer sent from the back-end
484 // then this may be removed.
485 if ((match_type == AutocompleteMatchType::CALCULATOR) &&
486 !suggestion.compare(0, 2, base::UTF8ToUTF16("= ")))
487 suggestion.erase(0, 2);
489 base::string16 match_contents = suggestion;
490 base::string16 match_contents_prefix;
491 base::string16 annotation;
492 base::string16 answer_contents;
493 base::string16 answer_type_str;
494 scoped_ptr<SuggestionAnswer> answer;
495 std::string suggest_query_params;
497 if (suggestion_details) {
498 suggestion_details->GetDictionary(index, &suggestion_detail);
499 if (suggestion_detail) {
500 suggestion_detail->GetString("t", &match_contents);
501 suggestion_detail->GetString("mp", &match_contents_prefix);
502 // Error correction for bad data from server.
503 if (match_contents.empty())
504 match_contents = suggestion;
505 suggestion_detail->GetString("a", &annotation);
506 suggestion_detail->GetString("q", &suggest_query_params);
508 // Extract the Answer, if provided.
509 const base::DictionaryValue* answer_json = NULL;
510 if (suggestion_detail->GetDictionary("ansa", &answer_json) &&
511 suggestion_detail->GetString("ansb", &answer_type_str)) {
512 bool answer_parsed_successfully = false;
513 answer = SuggestionAnswer::ParseAnswer(answer_json);
514 int answer_type = 0;
515 if (answer && base::StringToInt(answer_type_str, &answer_type)) {
516 answer_parsed_successfully = true;
518 answer->set_type(answer_type);
519 answer->AddImageURLsTo(&results->answers_image_urls);
521 std::string contents;
522 base::JSONWriter::Write(*answer_json, &contents);
523 answer_contents = base::UTF8ToUTF16(contents);
524 } else {
525 answer_type_str = base::string16();
527 UMA_HISTOGRAM_BOOLEAN("Omnibox.AnswerParseSuccess",
528 answer_parsed_successfully);
533 bool should_prefetch = static_cast<int>(index) == prefetch_index;
534 results->suggest_results.push_back(SuggestResult(
535 base::CollapseWhitespace(suggestion, false), match_type,
536 base::CollapseWhitespace(match_contents, false),
537 match_contents_prefix, annotation, answer_contents, answer_type_str,
538 answer.Pass(), suggest_query_params, deletion_url, is_keyword_result,
539 relevance, relevances != NULL, should_prefetch, trimmed_input));
542 results->relevances_from_server = relevances != NULL;
543 return true;