Revert of Add address3 autofill heuristics. (patchset #6 id:100001 of https://coderev...
[chromium-blink-merge.git] / components / autofill / core / browser / form_structure.cc
blob4baffa7e294d74bace21901dfd0bae37feb7b2d9
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/core/browser/form_structure.h"
7 #include <utility>
9 #include "base/basictypes.h"
10 #include "base/command_line.h"
11 #include "base/i18n/case_conversion.h"
12 #include "base/logging.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/sha1.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/stringprintf.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/time/time.h"
20 #include "components/autofill/core/browser/autofill_metrics.h"
21 #include "components/autofill/core/browser/autofill_type.h"
22 #include "components/autofill/core/browser/autofill_xml_parser.h"
23 #include "components/autofill/core/browser/field_types.h"
24 #include "components/autofill/core/browser/form_field.h"
25 #include "components/autofill/core/common/autofill_constants.h"
26 #include "components/autofill/core/common/form_data.h"
27 #include "components/autofill/core/common/form_data_predictions.h"
28 #include "components/autofill/core/common/form_field_data.h"
29 #include "components/autofill/core/common/form_field_data_predictions.h"
30 #include "third_party/icu/source/i18n/unicode/regex.h"
31 #include "third_party/webrtc/libjingle/xmllite/xmlelement.h"
33 namespace autofill {
34 namespace {
36 // XML elements and attributes.
37 const char kAttributeAutofillUsed[] = "autofillused";
38 const char kAttributeAutofillType[] = "autofilltype";
39 const char kAttributeClientVersion[] = "clientversion";
40 const char kAttributeDataPresent[] = "datapresent";
41 const char kAttributeFieldID[] = "fieldid";
42 const char kAttributeFieldType[] = "fieldtype";
43 const char kAttributeFormSignature[] = "formsignature";
44 const char kAttributeName[] = "name";
45 const char kAttributeSignature[] = "signature";
46 const char kClientVersion[] = "6.1.1715.1442/en (GGLL)";
47 const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
48 const char kXMLElementAutofillQuery[] = "autofillquery";
49 const char kXMLElementAutofillUpload[] = "autofillupload";
50 const char kXMLElementFieldAssignments[] = "fieldassignments";
51 const char kXMLElementField[] = "field";
52 const char kXMLElementFields[] = "fields";
53 const char kXMLElementForm[] = "form";
54 const char kBillingMode[] = "billing";
55 const char kShippingMode[] = "shipping";
57 // Stip away >= 5 consecutive digits.
58 const char kIgnorePatternInFieldName[] = "\\d{5,}+";
60 // Helper for |EncodeUploadRequest()| that creates a bit field corresponding to
61 // |available_field_types| and returns the hex representation as a string.
62 std::string EncodeFieldTypes(const ServerFieldTypeSet& available_field_types) {
63 // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte,
64 // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field.
65 const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8;
67 // Pack the types in |available_field_types| into |bit_field|.
68 std::vector<uint8> bit_field(kNumBytes, 0);
69 for (ServerFieldTypeSet::const_iterator field_type =
70 available_field_types.begin();
71 field_type != available_field_types.end();
72 ++field_type) {
73 // Set the appropriate bit in the field. The bit we set is the one
74 // |field_type| % 8 from the left of the byte.
75 const size_t byte = *field_type / 8;
76 const size_t bit = 0x80 >> (*field_type % 8);
77 DCHECK(byte < bit_field.size());
78 bit_field[byte] |= bit;
81 // Discard any trailing zeroes.
82 // If there are no available types, we return the empty string.
83 size_t data_end = bit_field.size();
84 for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) {
87 // Print all meaningfull bytes into a string.
88 std::string data_presence;
89 data_presence.reserve(data_end * 2 + 1);
90 for (size_t i = 0; i < data_end; ++i) {
91 base::StringAppendF(&data_presence, "%02x", bit_field[i]);
94 return data_presence;
97 // Helper for |EncodeFormRequest()| that creates XmlElements for the given field
98 // in upload xml, and also add them to the parent XmlElement.
99 void EncodeFieldForUpload(const AutofillField& field,
100 buzz::XmlElement* parent) {
101 // Don't upload checkable fields.
102 if (field.is_checkable)
103 return;
105 ServerFieldTypeSet types = field.possible_types();
106 // |types| could be empty in unit-tests only.
107 for (ServerFieldTypeSet::iterator field_type = types.begin();
108 field_type != types.end(); ++field_type) {
109 buzz::XmlElement *field_element = new buzz::XmlElement(
110 buzz::QName(kXMLElementField));
112 field_element->SetAttr(buzz::QName(kAttributeSignature),
113 field.FieldSignature());
114 field_element->SetAttr(buzz::QName(kAttributeAutofillType),
115 base::IntToString(*field_type));
116 parent->AddElement(field_element);
120 // Helper for |EncodeFormRequest()| that creates XmlElement for the given field
121 // in query xml, and also add it to the parent XmlElement.
122 void EncodeFieldForQuery(const AutofillField& field,
123 buzz::XmlElement* parent) {
124 buzz::XmlElement *field_element = new buzz::XmlElement(
125 buzz::QName(kXMLElementField));
126 field_element->SetAttr(buzz::QName(kAttributeSignature),
127 field.FieldSignature());
128 parent->AddElement(field_element);
131 // Helper for |EncodeFormRequest()| that creates XmlElements for the given field
132 // in field assignments xml, and also add them to the parent XmlElement.
133 void EncodeFieldForFieldAssignments(const AutofillField& field,
134 buzz::XmlElement* parent) {
135 ServerFieldTypeSet types = field.possible_types();
136 for (ServerFieldTypeSet::iterator field_type = types.begin();
137 field_type != types.end(); ++field_type) {
138 buzz::XmlElement *field_element = new buzz::XmlElement(
139 buzz::QName(kXMLElementFields));
141 field_element->SetAttr(buzz::QName(kAttributeFieldID),
142 field.FieldSignature());
143 field_element->SetAttr(buzz::QName(kAttributeFieldType),
144 base::IntToString(*field_type));
145 field_element->SetAttr(buzz::QName(kAttributeName),
146 base::UTF16ToUTF8(field.name));
147 parent->AddElement(field_element);
151 // Returns |true| iff the |token| is a type hint for a contact field, as
152 // specified in the implementation section of http://is.gd/whatwg_autocomplete
153 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
154 // support filling either type of information.
155 bool IsContactTypeHint(const std::string& token) {
156 return token == "home" || token == "work" || token == "mobile";
159 // Returns |true| iff the |token| is a type hint appropriate for a field of the
160 // given |field_type|, as specified in the implementation section of
161 // http://is.gd/whatwg_autocomplete
162 bool ContactTypeHintMatchesFieldType(const std::string& token,
163 HtmlFieldType field_type) {
164 // The "home" and "work" type hints are only appropriate for email and phone
165 // number field types.
166 if (token == "home" || token == "work") {
167 return field_type == HTML_TYPE_EMAIL ||
168 (field_type >= HTML_TYPE_TEL &&
169 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX);
172 // The "mobile" type hint is only appropriate for phone number field types.
173 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
174 // support filling either type of information.
175 if (token == "mobile") {
176 return field_type >= HTML_TYPE_TEL &&
177 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX;
180 return false;
183 // Returns the Chrome Autofill-supported field type corresponding to the given
184 // |autocomplete_attribute_value|, if there is one, in the context of the given
185 // |field|. Chrome Autofill supports a subset of the field types listed at
186 // http://is.gd/whatwg_autocomplete
187 HtmlFieldType FieldTypeFromAutocompleteAttributeValue(
188 const std::string& autocomplete_attribute_value,
189 const AutofillField& field) {
190 if (autocomplete_attribute_value == "name")
191 return HTML_TYPE_NAME;
193 if (autocomplete_attribute_value == "given-name")
194 return HTML_TYPE_GIVEN_NAME;
196 if (autocomplete_attribute_value == "additional-name") {
197 if (field.max_length == 1)
198 return HTML_TYPE_ADDITIONAL_NAME_INITIAL;
199 else
200 return HTML_TYPE_ADDITIONAL_NAME;
203 if (autocomplete_attribute_value == "family-name")
204 return HTML_TYPE_FAMILY_NAME;
206 if (autocomplete_attribute_value == "organization")
207 return HTML_TYPE_ORGANIZATION;
209 if (autocomplete_attribute_value == "street-address")
210 return HTML_TYPE_STREET_ADDRESS;
212 if (autocomplete_attribute_value == "address-line1")
213 return HTML_TYPE_ADDRESS_LINE1;
215 if (autocomplete_attribute_value == "address-line2")
216 return HTML_TYPE_ADDRESS_LINE2;
218 if (autocomplete_attribute_value == "address-line3")
219 return HTML_TYPE_ADDRESS_LINE3;
221 // TODO(estade): remove support for "locality" and "region".
222 if (autocomplete_attribute_value == "locality")
223 return HTML_TYPE_ADDRESS_LEVEL2;
225 if (autocomplete_attribute_value == "region")
226 return HTML_TYPE_ADDRESS_LEVEL1;
228 if (autocomplete_attribute_value == "address-level1")
229 return HTML_TYPE_ADDRESS_LEVEL1;
231 if (autocomplete_attribute_value == "address-level2")
232 return HTML_TYPE_ADDRESS_LEVEL2;
234 if (autocomplete_attribute_value == "address-level3")
235 return HTML_TYPE_ADDRESS_LEVEL3;
237 if (autocomplete_attribute_value == "country")
238 return HTML_TYPE_COUNTRY_CODE;
240 if (autocomplete_attribute_value == "country-name")
241 return HTML_TYPE_COUNTRY_NAME;
243 if (autocomplete_attribute_value == "postal-code")
244 return HTML_TYPE_POSTAL_CODE;
246 // content_switches.h isn't accessible from here, hence we have
247 // to copy the string literal. This should be removed soon anyway.
248 if (autocomplete_attribute_value == "address" &&
249 CommandLine::ForCurrentProcess()->HasSwitch(
250 "enable-experimental-web-platform-features")) {
251 return HTML_TYPE_FULL_ADDRESS;
254 if (autocomplete_attribute_value == "cc-name")
255 return HTML_TYPE_CREDIT_CARD_NAME;
257 if (autocomplete_attribute_value == "cc-number")
258 return HTML_TYPE_CREDIT_CARD_NUMBER;
260 if (autocomplete_attribute_value == "cc-exp") {
261 if (field.max_length == 5)
262 return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
263 else if (field.max_length == 7)
264 return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR;
265 else
266 return HTML_TYPE_CREDIT_CARD_EXP;
269 if (autocomplete_attribute_value == "cc-exp-month")
270 return HTML_TYPE_CREDIT_CARD_EXP_MONTH;
272 if (autocomplete_attribute_value == "cc-exp-year") {
273 if (field.max_length == 2)
274 return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR;
275 else if (field.max_length == 4)
276 return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR;
277 else
278 return HTML_TYPE_CREDIT_CARD_EXP_YEAR;
281 if (autocomplete_attribute_value == "cc-csc")
282 return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE;
284 if (autocomplete_attribute_value == "cc-type")
285 return HTML_TYPE_CREDIT_CARD_TYPE;
287 if (autocomplete_attribute_value == "transaction-amount")
288 return HTML_TYPE_TRANSACTION_AMOUNT;
290 if (autocomplete_attribute_value == "transaction-currency")
291 return HTML_TYPE_TRANSACTION_CURRENCY;
293 if (autocomplete_attribute_value == "tel")
294 return HTML_TYPE_TEL;
296 if (autocomplete_attribute_value == "tel-country-code")
297 return HTML_TYPE_TEL_COUNTRY_CODE;
299 if (autocomplete_attribute_value == "tel-national")
300 return HTML_TYPE_TEL_NATIONAL;
302 if (autocomplete_attribute_value == "tel-area-code")
303 return HTML_TYPE_TEL_AREA_CODE;
305 if (autocomplete_attribute_value == "tel-local")
306 return HTML_TYPE_TEL_LOCAL;
308 if (autocomplete_attribute_value == "tel-local-prefix")
309 return HTML_TYPE_TEL_LOCAL_PREFIX;
311 if (autocomplete_attribute_value == "tel-local-suffix")
312 return HTML_TYPE_TEL_LOCAL_SUFFIX;
314 if (autocomplete_attribute_value == "email")
315 return HTML_TYPE_EMAIL;
317 return HTML_TYPE_UNKNOWN;
320 std::string StripDigitsIfRequired(const base::string16& input) {
321 UErrorCode status = U_ZERO_ERROR;
322 CR_DEFINE_STATIC_LOCAL(icu::UnicodeString, icu_pattern,
323 (kIgnorePatternInFieldName));
324 CR_DEFINE_STATIC_LOCAL(icu::RegexMatcher, matcher,
325 (icu_pattern, UREGEX_CASE_INSENSITIVE, status));
326 DCHECK_EQ(status, U_ZERO_ERROR);
328 icu::UnicodeString icu_input(input.data(), input.length());
329 matcher.reset(icu_input);
331 icu::UnicodeString replaced_string = matcher.replaceAll("", status);
333 std::string return_string;
334 status = U_ZERO_ERROR;
335 base::UTF16ToUTF8(replaced_string.getBuffer(),
336 static_cast<size_t>(replaced_string.length()),
337 &return_string);
338 if (status != U_ZERO_ERROR) {
339 DVLOG(1) << "Couldn't strip digits in " << base::UTF16ToUTF8(input);
340 return base::UTF16ToUTF8(input);
343 return return_string;
346 } // namespace
348 FormStructure::FormStructure(const FormData& form)
349 : form_name_(form.name),
350 source_url_(form.origin),
351 target_url_(form.action),
352 autofill_count_(0),
353 active_field_count_(0),
354 upload_required_(USE_UPLOAD_RATES),
355 has_author_specified_types_(false),
356 has_password_field_(false) {
357 // Copy the form fields.
358 std::map<base::string16, size_t> unique_names;
359 for (std::vector<FormFieldData>::const_iterator field =
360 form.fields.begin();
361 field != form.fields.end(); ++field) {
362 if (!ShouldSkipField(*field)) {
363 // Add all supported form fields (including with empty names) to the
364 // signature. This is a requirement for Autofill servers.
365 form_signature_field_names_.append("&");
366 form_signature_field_names_.append(StripDigitsIfRequired(field->name));
368 ++active_field_count_;
371 if (field->form_control_type == "password")
372 has_password_field_ = true;
374 // Generate a unique name for this field by appending a counter to the name.
375 // Make sure to prepend the counter with a non-numeric digit so that we are
376 // guaranteed to avoid collisions.
377 if (!unique_names.count(field->name))
378 unique_names[field->name] = 1;
379 else
380 ++unique_names[field->name];
381 base::string16 unique_name = field->name + base::ASCIIToUTF16("_") +
382 base::IntToString16(unique_names[field->name]);
383 fields_.push_back(new AutofillField(*field, unique_name));
387 FormStructure::~FormStructure() {}
389 void FormStructure::DetermineHeuristicTypes(
390 const AutofillMetrics& metric_logger) {
391 // First, try to detect field types based on each field's |autocomplete|
392 // attribute value. If there is at least one form field that specifies an
393 // autocomplete type hint, don't try to apply other heuristics to match fields
394 // in this form.
395 bool has_author_specified_sections;
396 ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_,
397 &has_author_specified_sections);
399 if (!has_author_specified_types_) {
400 ServerFieldTypeMap field_type_map;
401 FormField::ParseFormFields(fields_.get(), &field_type_map);
402 for (size_t i = 0; i < field_count(); ++i) {
403 AutofillField* field = fields_[i];
404 ServerFieldTypeMap::iterator iter =
405 field_type_map.find(field->unique_name());
406 if (iter != field_type_map.end())
407 field->set_heuristic_type(iter->second);
411 UpdateAutofillCount();
412 IdentifySections(has_author_specified_sections);
414 if (IsAutofillable()) {
415 metric_logger.LogDeveloperEngagementMetric(
416 AutofillMetrics::FILLABLE_FORM_PARSED);
417 if (has_author_specified_types_) {
418 metric_logger.LogDeveloperEngagementMetric(
419 AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS);
424 bool FormStructure::EncodeUploadRequest(
425 const ServerFieldTypeSet& available_field_types,
426 bool form_was_autofilled,
427 std::string* encoded_xml) const {
428 DCHECK(ShouldBeCrowdsourced());
430 // Verify that |available_field_types| agrees with the possible field types we
431 // are uploading.
432 for (std::vector<AutofillField*>::const_iterator field = begin();
433 field != end();
434 ++field) {
435 for (ServerFieldTypeSet::const_iterator type =
436 (*field)->possible_types().begin();
437 type != (*field)->possible_types().end();
438 ++type) {
439 DCHECK(*type == UNKNOWN_TYPE ||
440 *type == EMPTY_TYPE ||
441 available_field_types.count(*type));
445 // Set up the <autofillupload> element and its attributes.
446 buzz::XmlElement autofill_request_xml(
447 (buzz::QName(kXMLElementAutofillUpload)));
448 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
449 kClientVersion);
450 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
451 FormSignature());
452 autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed),
453 form_was_autofilled ? "true" : "false");
454 autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent),
455 EncodeFieldTypes(available_field_types).c_str());
457 if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml))
458 return false; // Malformed form, skip it.
460 // Obtain the XML structure as a string.
461 *encoded_xml = kXMLDeclaration;
462 *encoded_xml += autofill_request_xml.Str().c_str();
464 // To enable this logging, run with the flag --vmodule="form_structure=2".
465 VLOG(2) << "\n" << *encoded_xml;
467 return true;
470 bool FormStructure::EncodeFieldAssignments(
471 const ServerFieldTypeSet& available_field_types,
472 std::string* encoded_xml) const {
473 DCHECK(ShouldBeCrowdsourced());
475 // Set up the <fieldassignments> element and its attributes.
476 buzz::XmlElement autofill_request_xml(
477 (buzz::QName(kXMLElementFieldAssignments)));
478 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
479 FormSignature());
481 if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS,
482 &autofill_request_xml))
483 return false; // Malformed form, skip it.
485 // Obtain the XML structure as a string.
486 *encoded_xml = kXMLDeclaration;
487 *encoded_xml += autofill_request_xml.Str().c_str();
489 return true;
492 // static
493 bool FormStructure::EncodeQueryRequest(
494 const std::vector<FormStructure*>& forms,
495 std::vector<std::string>* encoded_signatures,
496 std::string* encoded_xml) {
497 DCHECK(encoded_signatures);
498 DCHECK(encoded_xml);
499 encoded_xml->clear();
500 encoded_signatures->clear();
501 encoded_signatures->reserve(forms.size());
503 // Set up the <autofillquery> element and attributes.
504 buzz::XmlElement autofill_request_xml(
505 (buzz::QName(kXMLElementAutofillQuery)));
506 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
507 kClientVersion);
509 // Some badly formatted web sites repeat forms - detect that and encode only
510 // one form as returned data would be the same for all the repeated forms.
511 std::set<std::string> processed_forms;
512 for (ScopedVector<FormStructure>::const_iterator it = forms.begin();
513 it != forms.end();
514 ++it) {
515 std::string signature((*it)->FormSignature());
516 if (processed_forms.find(signature) != processed_forms.end())
517 continue;
518 processed_forms.insert(signature);
519 scoped_ptr<buzz::XmlElement> encompassing_xml_element(
520 new buzz::XmlElement(buzz::QName(kXMLElementForm)));
521 encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature),
522 signature);
524 if (!(*it)->EncodeFormRequest(FormStructure::QUERY,
525 encompassing_xml_element.get()))
526 continue; // Malformed form, skip it.
528 autofill_request_xml.AddElement(encompassing_xml_element.release());
529 encoded_signatures->push_back(signature);
532 if (!encoded_signatures->size())
533 return false;
535 // Note: Chrome used to also set 'accepts="e"' (where 'e' is for experiments),
536 // but no longer sets this because support for experiments is deprecated. If
537 // it ever resurfaces, re-add code here to set the attribute accordingly.
539 // Obtain the XML structure as a string.
540 *encoded_xml = kXMLDeclaration;
541 *encoded_xml += autofill_request_xml.Str().c_str();
543 return true;
546 // static
547 void FormStructure::ParseQueryResponse(
548 const std::string& response_xml,
549 const std::vector<FormStructure*>& forms,
550 const AutofillMetrics& metric_logger) {
551 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED);
553 // Parse the field types from the server response to the query.
554 std::vector<AutofillServerFieldInfo> field_infos;
555 UploadRequired upload_required;
556 AutofillQueryXmlParser parse_handler(&field_infos,
557 &upload_required);
558 buzz::XmlParser parser(&parse_handler);
559 parser.Parse(response_xml.c_str(), response_xml.length(), true);
560 if (!parse_handler.succeeded())
561 return;
563 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED);
565 bool heuristics_detected_fillable_field = false;
566 bool query_response_overrode_heuristics = false;
568 // Copy the field types into the actual form.
569 std::vector<AutofillServerFieldInfo>::iterator current_info =
570 field_infos.begin();
571 for (std::vector<FormStructure*>::const_iterator iter = forms.begin();
572 iter != forms.end(); ++iter) {
573 FormStructure* form = *iter;
574 form->upload_required_ = upload_required;
576 for (std::vector<AutofillField*>::iterator field = form->fields_.begin();
577 field != form->fields_.end(); ++field) {
578 if (form->ShouldSkipField(**field))
579 continue;
581 // In some cases *successful* response does not return all the fields.
582 // Quit the update of the types then.
583 if (current_info == field_infos.end())
584 break;
586 // If |form->has_author_specified_types| only password fields should be
587 // updated.
588 if (!form->has_author_specified_types_ ||
589 (*field)->form_control_type == "password") {
590 // UNKNOWN_TYPE is reserved for use by the client.
591 DCHECK_NE(current_info->field_type, UNKNOWN_TYPE);
593 ServerFieldType heuristic_type = (*field)->heuristic_type();
594 if (heuristic_type != UNKNOWN_TYPE)
595 heuristics_detected_fillable_field = true;
597 (*field)->set_server_type(current_info->field_type);
598 if (heuristic_type != (*field)->Type().GetStorableType())
599 query_response_overrode_heuristics = true;
601 // Copy default value into the field if available.
602 if (!current_info->default_value.empty())
603 (*field)->set_default_value(current_info->default_value);
606 ++current_info;
609 form->UpdateAutofillCount();
610 form->IdentifySections(false);
613 AutofillMetrics::ServerQueryMetric metric;
614 if (query_response_overrode_heuristics) {
615 if (heuristics_detected_fillable_field) {
616 metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS;
617 } else {
618 metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS;
620 } else {
621 metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS;
623 metric_logger.LogServerQueryMetric(metric);
626 // static
627 void FormStructure::GetFieldTypePredictions(
628 const std::vector<FormStructure*>& form_structures,
629 std::vector<FormDataPredictions>* forms) {
630 forms->clear();
631 forms->reserve(form_structures.size());
632 for (size_t i = 0; i < form_structures.size(); ++i) {
633 FormStructure* form_structure = form_structures[i];
634 FormDataPredictions form;
635 form.data.name = form_structure->form_name_;
636 form.data.origin = form_structure->source_url_;
637 form.data.action = form_structure->target_url_;
638 form.signature = form_structure->FormSignature();
640 for (std::vector<AutofillField*>::const_iterator field =
641 form_structure->fields_.begin();
642 field != form_structure->fields_.end(); ++field) {
643 form.data.fields.push_back(FormFieldData(**field));
645 FormFieldDataPredictions annotated_field;
646 annotated_field.signature = (*field)->FieldSignature();
647 annotated_field.heuristic_type =
648 AutofillType((*field)->heuristic_type()).ToString();
649 annotated_field.server_type =
650 AutofillType((*field)->server_type()).ToString();
651 annotated_field.overall_type = (*field)->Type().ToString();
652 form.fields.push_back(annotated_field);
655 forms->push_back(form);
659 std::string FormStructure::FormSignature() const {
660 std::string scheme(target_url_.scheme());
661 std::string host(target_url_.host());
663 // If target host or scheme is empty, set scheme and host of source url.
664 // This is done to match the Toolbar's behavior.
665 if (scheme.empty() || host.empty()) {
666 scheme = source_url_.scheme();
667 host = source_url_.host();
670 std::string form_string = scheme + "://" + host + "&" +
671 base::UTF16ToUTF8(form_name_) +
672 form_signature_field_names_;
674 return Hash64Bit(form_string);
677 bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
678 return field.is_checkable;
681 bool FormStructure::IsAutofillable() const {
682 if (autofill_count() < kRequiredAutofillFields)
683 return false;
685 return ShouldBeParsed();
688 void FormStructure::UpdateAutofillCount() {
689 autofill_count_ = 0;
690 for (std::vector<AutofillField*>::const_iterator iter = begin();
691 iter != end(); ++iter) {
692 AutofillField* field = *iter;
693 if (field && field->IsFieldFillable())
694 ++autofill_count_;
698 bool FormStructure::ShouldBeParsed() const {
699 if (active_field_count() < kRequiredAutofillFields)
700 return false;
702 // Rule out http(s)://*/search?...
703 // e.g. http://www.google.com/search?q=...
704 // http://search.yahoo.com/search?p=...
705 if (target_url_.path() == "/search")
706 return false;
708 bool has_text_field = false;
709 for (std::vector<AutofillField*>::const_iterator it = begin();
710 it != end() && !has_text_field; ++it) {
711 has_text_field |= (*it)->form_control_type != "select-one";
714 return has_text_field;
717 bool FormStructure::ShouldBeCrowdsourced() const {
718 return (has_password_field_ || !has_author_specified_types_) &&
719 ShouldBeParsed();
722 void FormStructure::UpdateFromCache(const FormStructure& cached_form) {
723 // Map from field signatures to cached fields.
724 std::map<std::string, const AutofillField*> cached_fields;
725 for (size_t i = 0; i < cached_form.field_count(); ++i) {
726 const AutofillField* field = cached_form.field(i);
727 cached_fields[field->FieldSignature()] = field;
730 for (std::vector<AutofillField*>::const_iterator iter = begin();
731 iter != end(); ++iter) {
732 AutofillField* field = *iter;
734 std::map<std::string, const AutofillField*>::const_iterator
735 cached_field = cached_fields.find(field->FieldSignature());
736 if (cached_field != cached_fields.end()) {
737 if (field->form_control_type != "select-one" &&
738 field->value == cached_field->second->value) {
739 // From the perspective of learning user data, text fields containing
740 // default values are equivalent to empty fields.
741 field->value = base::string16();
744 field->set_heuristic_type(cached_field->second->heuristic_type());
745 field->set_server_type(cached_field->second->server_type());
749 UpdateAutofillCount();
751 // The form signature should match between query and upload requests to the
752 // server. On many websites, form elements are dynamically added, removed, or
753 // rearranged via JavaScript between page load and form submission, so we
754 // copy over the |form_signature_field_names_| corresponding to the query
755 // request.
756 DCHECK_EQ(cached_form.form_name_, form_name_);
757 DCHECK_EQ(cached_form.source_url_, source_url_);
758 DCHECK_EQ(cached_form.target_url_, target_url_);
759 form_signature_field_names_ = cached_form.form_signature_field_names_;
762 void FormStructure::LogQualityMetrics(
763 const AutofillMetrics& metric_logger,
764 const base::TimeTicks& load_time,
765 const base::TimeTicks& interaction_time,
766 const base::TimeTicks& submission_time) const {
767 size_t num_detected_field_types = 0;
768 bool did_autofill_all_possible_fields = true;
769 bool did_autofill_some_possible_fields = false;
770 for (size_t i = 0; i < field_count(); ++i) {
771 const AutofillField* field = this->field(i);
773 // No further logging for empty fields nor for fields where the entered data
774 // does not appear to already exist in the user's stored Autofill data.
775 const ServerFieldTypeSet& field_types = field->possible_types();
776 DCHECK(!field_types.empty());
777 if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE))
778 continue;
780 // Similarly, no further logging for password fields. Those are primarily
781 // related to a different feature code path, and so make more sense to track
782 // outside of this metric.
783 if (field->form_control_type == "password")
784 continue;
786 ++num_detected_field_types;
787 if (field->is_autofilled)
788 did_autofill_some_possible_fields = true;
789 else
790 did_autofill_all_possible_fields = false;
792 // Collapse field types that Chrome treats as identical, e.g. home and
793 // billing address fields.
794 ServerFieldTypeSet collapsed_field_types;
795 for (ServerFieldTypeSet::const_iterator it = field_types.begin();
796 it != field_types.end();
797 ++it) {
798 // Since we currently only support US phone numbers, the (city code + main
799 // digits) number is almost always identical to the whole phone number.
800 // TODO(isherman): Improve this logic once we add support for
801 // international numbers.
802 if (*it == PHONE_HOME_CITY_AND_NUMBER)
803 collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER);
804 else
805 collapsed_field_types.insert(AutofillType(*it).GetStorableType());
808 // Capture the field's type, if it is unambiguous.
809 ServerFieldType field_type = UNKNOWN_TYPE;
810 if (collapsed_field_types.size() == 1)
811 field_type = *collapsed_field_types.begin();
813 ServerFieldType heuristic_type =
814 AutofillType(field->heuristic_type()).GetStorableType();
815 ServerFieldType server_type =
816 AutofillType(field->server_type()).GetStorableType();
817 ServerFieldType predicted_type = field->Type().GetStorableType();
819 // Log heuristic, server, and overall type quality metrics, independently of
820 // whether the field was autofilled.
821 if (heuristic_type == UNKNOWN_TYPE) {
822 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
823 field_type);
824 } else if (field_types.count(heuristic_type)) {
825 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH,
826 field_type);
827 } else {
828 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH,
829 field_type);
832 if (server_type == NO_SERVER_DATA) {
833 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
834 field_type);
835 } else if (field_types.count(server_type)) {
836 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH,
837 field_type);
838 } else {
839 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH,
840 field_type);
843 if (predicted_type == UNKNOWN_TYPE) {
844 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
845 field_type);
846 } else if (field_types.count(predicted_type)) {
847 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH,
848 field_type);
849 } else {
850 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH,
851 field_type);
855 if (num_detected_field_types < kRequiredAutofillFields) {
856 metric_logger.LogUserHappinessMetric(
857 AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM);
858 } else {
859 if (did_autofill_all_possible_fields) {
860 metric_logger.LogUserHappinessMetric(
861 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL);
862 } else if (did_autofill_some_possible_fields) {
863 metric_logger.LogUserHappinessMetric(
864 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME);
865 } else {
866 metric_logger.LogUserHappinessMetric(
867 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE);
870 // Unlike the other times, the |submission_time| should always be available.
871 DCHECK(!submission_time.is_null());
873 // The |load_time| might be unset, in the case that the form was dynamically
874 // added to the DOM.
875 if (!load_time.is_null()) {
876 // Submission should always chronologically follow form load.
877 DCHECK(submission_time > load_time);
878 base::TimeDelta elapsed = submission_time - load_time;
879 if (did_autofill_some_possible_fields)
880 metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed);
881 else
882 metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed);
885 // The |interaction_time| might be unset, in the case that the user
886 // submitted a blank form.
887 if (!interaction_time.is_null()) {
888 // Submission should always chronologically follow interaction.
889 DCHECK(submission_time > interaction_time);
890 base::TimeDelta elapsed = submission_time - interaction_time;
891 if (did_autofill_some_possible_fields) {
892 metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed);
893 } else {
894 metric_logger.LogFormFillDurationFromInteractionWithoutAutofill(
895 elapsed);
901 const AutofillField* FormStructure::field(size_t index) const {
902 if (index >= fields_.size()) {
903 NOTREACHED();
904 return NULL;
907 return fields_[index];
910 AutofillField* FormStructure::field(size_t index) {
911 return const_cast<AutofillField*>(
912 static_cast<const FormStructure*>(this)->field(index));
915 size_t FormStructure::field_count() const {
916 return fields_.size();
919 size_t FormStructure::active_field_count() const {
920 return active_field_count_;
923 FormData FormStructure::ToFormData() const {
924 // |data.user_submitted| will always be false.
925 FormData data;
926 data.name = form_name_;
927 data.origin = source_url_;
928 data.action = target_url_;
930 for (size_t i = 0; i < fields_.size(); ++i) {
931 data.fields.push_back(FormFieldData(*fields_[i]));
934 return data;
937 bool FormStructure::operator==(const FormData& form) const {
938 // TODO(jhawkins): Is this enough to differentiate a form?
939 if (form_name_ == form.name &&
940 source_url_ == form.origin &&
941 target_url_ == form.action) {
942 return true;
945 // TODO(jhawkins): Compare field names, IDs and labels once we have labels
946 // set up.
948 return false;
951 bool FormStructure::operator!=(const FormData& form) const {
952 return !operator==(form);
955 std::string FormStructure::Hash64Bit(const std::string& str) {
956 std::string hash_bin = base::SHA1HashString(str);
957 DCHECK_EQ(20U, hash_bin.length());
959 uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) |
960 (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) |
961 (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) |
962 (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) |
963 (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) |
964 (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) |
965 (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) |
966 ((static_cast<uint64>(hash_bin[7])) & 0xFF);
968 return base::Uint64ToString(hash64);
971 bool FormStructure::EncodeFormRequest(
972 FormStructure::EncodeRequestType request_type,
973 buzz::XmlElement* encompassing_xml_element) const {
974 if (!field_count()) // Nothing to add.
975 return false;
977 // Some badly formatted web sites repeat fields - limit number of fields to
978 // 48, which is far larger than any valid form and XML still fits into 2K.
979 // Do not send requests for forms with more than this many fields, as they are
980 // near certainly not valid/auto-fillable.
981 const size_t kMaxFieldsOnTheForm = 48;
982 if (field_count() > kMaxFieldsOnTheForm)
983 return false;
985 // Add the child nodes for the form fields.
986 for (size_t index = 0; index < field_count(); ++index) {
987 const AutofillField* field = fields_[index];
988 switch (request_type) {
989 case FormStructure::UPLOAD:
990 EncodeFieldForUpload(*field, encompassing_xml_element);
991 break;
992 case FormStructure::QUERY:
993 if (ShouldSkipField(*field))
994 continue;
995 EncodeFieldForQuery(*field, encompassing_xml_element);
996 break;
997 case FormStructure::FIELD_ASSIGNMENTS:
998 EncodeFieldForFieldAssignments(*field, encompassing_xml_element);
999 break;
1002 return true;
1005 void FormStructure::ParseFieldTypesFromAutocompleteAttributes(
1006 bool* found_types,
1007 bool* found_sections) {
1008 const std::string kDefaultSection = "-default";
1010 *found_types = false;
1011 *found_sections = false;
1012 for (std::vector<AutofillField*>::iterator it = fields_.begin();
1013 it != fields_.end(); ++it) {
1014 AutofillField* field = *it;
1016 // To prevent potential section name collisions, add a default suffix for
1017 // other fields. Without this, 'autocomplete' attribute values
1018 // "section--shipping street-address" and "shipping street-address" would be
1019 // parsed identically, given the section handling code below. We do this
1020 // before any validation so that fields with invalid attributes still end up
1021 // in the default section. These default section names will be overridden
1022 // by subsequent heuristic parsing steps if there are no author-specified
1023 // section names.
1024 field->set_section(kDefaultSection);
1026 // Canonicalize the attribute value by trimming whitespace, collapsing
1027 // non-space characters (e.g. tab) to spaces, and converting to lowercase.
1028 std::string autocomplete_attribute =
1029 base::CollapseWhitespaceASCII(field->autocomplete_attribute, false);
1030 autocomplete_attribute = base::StringToLowerASCII(autocomplete_attribute);
1032 // The autocomplete attribute is overloaded: it can specify either a field
1033 // type hint or whether autocomplete should be enabled at all. Ignore the
1034 // latter type of attribute value.
1035 if (autocomplete_attribute.empty() ||
1036 autocomplete_attribute == "on" ||
1037 autocomplete_attribute == "off") {
1038 continue;
1041 // Any other value, even it is invalid, is considered to be a type hint.
1042 // This allows a website's author to specify an attribute like
1043 // autocomplete="other" on a field to disable all Autofill heuristics for
1044 // the form.
1045 *found_types = true;
1047 // Tokenize the attribute value. Per the spec, the tokens are parsed in
1048 // reverse order.
1049 std::vector<std::string> tokens;
1050 Tokenize(autocomplete_attribute, " ", &tokens);
1052 // The final token must be the field type.
1053 // If it is not one of the known types, abort.
1054 DCHECK(!tokens.empty());
1055 std::string field_type_token = tokens.back();
1056 tokens.pop_back();
1057 HtmlFieldType field_type =
1058 FieldTypeFromAutocompleteAttributeValue(field_type_token, *field);
1059 if (field_type == HTML_TYPE_UNKNOWN)
1060 continue;
1062 // The preceding token, if any, may be a type hint.
1063 if (!tokens.empty() && IsContactTypeHint(tokens.back())) {
1064 // If it is, it must match the field type; otherwise, abort.
1065 // Note that an invalid token invalidates the entire attribute value, even
1066 // if the other tokens are valid.
1067 if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type))
1068 continue;
1070 // Chrome Autofill ignores these type hints.
1071 tokens.pop_back();
1074 // The preceding token, if any, may be a fixed string that is either
1075 // "shipping" or "billing". Chrome Autofill treats these as implicit
1076 // section name suffixes.
1077 DCHECK_EQ(kDefaultSection, field->section());
1078 std::string section = field->section();
1079 HtmlFieldMode mode = HTML_MODE_NONE;
1080 if (!tokens.empty()) {
1081 if (tokens.back() == kShippingMode)
1082 mode = HTML_MODE_SHIPPING;
1083 else if (tokens.back() == kBillingMode)
1084 mode = HTML_MODE_BILLING;
1087 if (mode != HTML_MODE_NONE) {
1088 section = "-" + tokens.back();
1089 tokens.pop_back();
1092 // The preceding token, if any, may be a named section.
1093 const std::string kSectionPrefix = "section-";
1094 if (!tokens.empty() &&
1095 StartsWithASCII(tokens.back(), kSectionPrefix, true)) {
1096 // Prepend this section name to the suffix set in the preceding block.
1097 section = tokens.back().substr(kSectionPrefix.size()) + section;
1098 tokens.pop_back();
1101 // No other tokens are allowed. If there are any remaining, abort.
1102 if (!tokens.empty())
1103 continue;
1105 if (section != kDefaultSection) {
1106 *found_sections = true;
1107 field->set_section(section);
1110 // No errors encountered while parsing!
1111 // Update the |field|'s type based on what was parsed from the attribute.
1112 field->SetHtmlType(field_type, mode);
1116 bool FormStructure::FillFields(
1117 const std::vector<ServerFieldType>& types,
1118 const InputFieldComparator& matches,
1119 const base::Callback<base::string16(const AutofillType&)>& get_info,
1120 const std::string& address_language_code,
1121 const std::string& app_locale) {
1122 bool filled_something = false;
1123 for (size_t i = 0; i < field_count(); ++i) {
1124 for (size_t j = 0; j < types.size(); ++j) {
1125 if (matches.Run(types[j], *field(i))) {
1126 AutofillField::FillFormField(*field(i),
1127 get_info.Run(field(i)->Type()),
1128 address_language_code,
1129 app_locale,
1130 field(i));
1131 filled_something = true;
1132 break;
1136 return filled_something;
1139 std::set<base::string16> FormStructure::PossibleValues(ServerFieldType type) {
1140 std::set<base::string16> values;
1141 AutofillType target_type(type);
1142 for (std::vector<AutofillField*>::iterator iter = fields_.begin();
1143 iter != fields_.end(); ++iter) {
1144 AutofillField* field = *iter;
1145 if (field->Type().GetStorableType() != target_type.GetStorableType() ||
1146 field->Type().group() != target_type.group()) {
1147 continue;
1150 // No option values; anything goes.
1151 if (field->option_values.empty())
1152 return std::set<base::string16>();
1154 for (size_t i = 0; i < field->option_values.size(); ++i) {
1155 if (!field->option_values[i].empty())
1156 values.insert(base::i18n::ToUpper(field->option_values[i]));
1159 for (size_t i = 0; i < field->option_contents.size(); ++i) {
1160 if (!field->option_contents[i].empty())
1161 values.insert(base::i18n::ToUpper(field->option_contents[i]));
1165 return values;
1168 base::string16 FormStructure::GetUniqueValue(HtmlFieldType type) const {
1169 base::string16 value;
1170 for (std::vector<AutofillField*>::const_iterator iter = fields_.begin();
1171 iter != fields_.end(); ++iter) {
1172 const AutofillField* field = *iter;
1173 if (field->html_type() != type)
1174 continue;
1176 // More than one value found; abort rather than choosing one arbitrarily.
1177 if (!value.empty() && !field->value.empty())
1178 return base::string16();
1180 value = field->value;
1183 return value;
1186 void FormStructure::IdentifySections(bool has_author_specified_sections) {
1187 if (fields_.empty())
1188 return;
1190 if (!has_author_specified_sections) {
1191 // Name sections after the first field in the section.
1192 base::string16 current_section = fields_.front()->unique_name();
1194 // Keep track of the types we've seen in this section.
1195 std::set<ServerFieldType> seen_types;
1196 ServerFieldType previous_type = UNKNOWN_TYPE;
1198 for (std::vector<AutofillField*>::iterator field = fields_.begin();
1199 field != fields_.end(); ++field) {
1200 const ServerFieldType current_type = (*field)->Type().GetStorableType();
1202 bool already_saw_current_type = seen_types.count(current_type) > 0;
1204 // Forms often ask for multiple phone numbers -- e.g. both a daytime and
1205 // evening phone number. Our phone number detection is also generally a
1206 // little off. Hence, ignore this field type as a signal here.
1207 if (AutofillType(current_type).group() == PHONE_HOME)
1208 already_saw_current_type = false;
1210 // Ignore non-focusable field while inferring boundaries between sections.
1211 if (!(*field)->is_focusable)
1212 already_saw_current_type = false;
1214 // Some forms have adjacent fields of the same type. Two common examples:
1215 // * Forms with two email fields, where the second is meant to "confirm"
1216 // the first.
1217 // * Forms with a <select> menu for states in some countries, and a
1218 // freeform <input> field for states in other countries. (Usually,
1219 // only one of these two will be visible for any given choice of
1220 // country.)
1221 // Generally, adjacent fields of the same type belong in the same logical
1222 // section.
1223 if (current_type == previous_type)
1224 already_saw_current_type = false;
1226 previous_type = current_type;
1228 if (current_type != UNKNOWN_TYPE && already_saw_current_type) {
1229 // We reached the end of a section, so start a new section.
1230 seen_types.clear();
1231 current_section = (*field)->unique_name();
1234 seen_types.insert(current_type);
1235 (*field)->set_section(base::UTF16ToUTF8(current_section));
1239 // Ensure that credit card and address fields are in separate sections.
1240 // This simplifies the section-aware logic in autofill_manager.cc.
1241 for (std::vector<AutofillField*>::iterator field = fields_.begin();
1242 field != fields_.end(); ++field) {
1243 FieldTypeGroup field_type_group = (*field)->Type().group();
1244 if (field_type_group == CREDIT_CARD)
1245 (*field)->set_section((*field)->section() + "-cc");
1246 else
1247 (*field)->set_section((*field)->section() + "-default");
1251 } // namespace autofill