1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/content/renderer/password_form_conversion_utils.h"
9 #include "base/i18n/case_conversion.h"
10 #include "base/lazy_instance.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/metrics/histogram_macros.h"
13 #include "base/strings/string_util.h"
14 #include "components/autofill/content/renderer/form_autofill_util.h"
15 #include "components/autofill/core/common/password_form.h"
16 #include "components/autofill/core/common/password_form_field_prediction_map.h"
17 #include "third_party/WebKit/public/platform/WebString.h"
18 #include "third_party/WebKit/public/web/WebDocument.h"
19 #include "third_party/WebKit/public/web/WebFormControlElement.h"
20 #include "third_party/WebKit/public/web/WebInputElement.h"
21 #include "third_party/icu/source/i18n/unicode/regex.h"
23 using blink::WebDocument
;
24 using blink::WebFormControlElement
;
25 using blink::WebFormElement
;
26 using blink::WebInputElement
;
27 using blink::WebString
;
28 using blink::WebVector
;
33 // Layout classification of password forms
34 // A layout sequence of a form is the sequence of it's non-password and password
35 // input fields, represented by "N" and "P", respectively. A form like this
37 // <input type='text' ...>
38 // <input type='hidden' ...>
39 // <input type='password' ...>
40 // <input type='submit' ...>
42 // has the layout sequence "NP" -- "N" for the first field, and "P" for the
43 // third. The second and fourth fields are ignored, because they are not text
46 // The code below classifies the layout (see PasswordForm::Layout) of a form
47 // based on its layout sequence. This is done by assigning layouts regular
48 // expressions over the alphabet {N, P}. LAYOUT_OTHER is implicitly the type
49 // corresponding to all layout sequences not matching any other layout.
51 // LAYOUT_LOGIN_AND_SIGNUP is classified by NPN+P.*. This corresponds to a form
52 // which starts with a login section (NP) and continues with a sign-up section
53 // (N+P.*). The aim is to distinguish such forms from change password-forms
54 // (N*PPP?.*) and forms which use password fields to store private but
55 // non-password data (could look like, e.g., PN+P.*).
56 const char kLoginAndSignupRegex
[] =
57 "NP" // Login section.
58 "N+P" // Sign-up section.
59 ".*"; // Anything beyond that.
61 struct LoginAndSignupLazyInstanceTraits
62 : public base::DefaultLazyInstanceTraits
<icu::RegexMatcher
> {
63 static icu::RegexMatcher
* New(void* instance
) {
64 const icu::UnicodeString
icu_pattern(kLoginAndSignupRegex
);
66 UErrorCode status
= U_ZERO_ERROR
;
67 // Use placement new to initialize the instance in the preallocated space.
68 // The "(instance)" is very important to force POD type initialization.
69 scoped_ptr
<icu::RegexMatcher
> matcher(new (instance
) icu::RegexMatcher(
70 icu_pattern
, UREGEX_CASE_INSENSITIVE
, status
));
71 DCHECK(U_SUCCESS(status
));
72 return matcher
.release();
76 base::LazyInstance
<icu::RegexMatcher
, LoginAndSignupLazyInstanceTraits
>
77 login_and_signup_matcher
= LAZY_INSTANCE_INITIALIZER
;
79 bool MatchesLoginAndSignupPattern(base::StringPiece layout_sequence
) {
80 icu::RegexMatcher
* matcher
= login_and_signup_matcher
.Pointer();
81 icu::UnicodeString
icu_input(icu::UnicodeString::fromUTF8(
82 icu::StringPiece(layout_sequence
.data(), layout_sequence
.length())));
83 matcher
->reset(icu_input
);
85 UErrorCode status
= U_ZERO_ERROR
;
86 UBool match
= matcher
->find(0, status
);
87 DCHECK(U_SUCCESS(status
));
91 // Given the sequence of non-password and password text input fields of a form,
92 // represented as a string of Ns (non-password) and Ps (password), computes the
93 // layout type of that form.
94 PasswordForm::Layout
SequenceToLayout(base::StringPiece layout_sequence
) {
95 if (MatchesLoginAndSignupPattern(layout_sequence
))
96 return PasswordForm::Layout::LAYOUT_LOGIN_AND_SIGNUP
;
97 return PasswordForm::Layout::LAYOUT_OTHER
;
100 // Checks in a case-insensitive way if the autocomplete attribute for the given
101 // |element| is present and has the specified |value_in_lowercase|.
102 bool HasAutocompleteAttributeValue(const WebInputElement
& element
,
103 const char* value_in_lowercase
) {
104 return base::LowerCaseEqualsASCII(element
.getAttribute("autocomplete"),
108 // Helper to determine which password is the main (current) one, and which is
109 // the new password (e.g., on a sign-up or change password form), if any.
110 bool LocateSpecificPasswords(std::vector
<WebInputElement
> passwords
,
111 WebInputElement
* current_password
,
112 WebInputElement
* new_password
) {
113 DCHECK(current_password
&& current_password
->isNull());
114 DCHECK(new_password
&& new_password
->isNull());
116 // First, look for elements marked with either autocomplete='current-password'
117 // or 'new-password' -- if we find any, take the hint, and treat the first of
118 // each kind as the element we are looking for.
119 for (std::vector
<WebInputElement
>::const_iterator it
= passwords
.begin();
120 it
!= passwords
.end(); it
++) {
121 if (HasAutocompleteAttributeValue(*it
, "current-password") &&
122 current_password
->isNull()) {
123 *current_password
= *it
;
124 } else if (HasAutocompleteAttributeValue(*it
, "new-password") &&
125 new_password
->isNull()) {
130 // If we have seen an element with either of autocomplete attributes above,
131 // take that as a signal that the page author must have intentionally left the
132 // rest of the password fields unmarked. Perhaps they are used for other
133 // purposes, e.g., PINs, OTPs, and the like. So we skip all the heuristics we
134 // normally do, and ignore the rest of the password fields.
135 if (!current_password
->isNull() || !new_password
->isNull())
138 if (passwords
.empty())
141 switch (passwords
.size()) {
143 // Single password, easy.
144 *current_password
= passwords
[0];
147 if (!passwords
[0].value().isEmpty() &&
148 passwords
[0].value() == passwords
[1].value()) {
149 // Two identical non-empty passwords: assume we are seeing a new
150 // password with a confirmation. This can be either a sign-up form or a
151 // password change form that does not ask for the old password.
152 *new_password
= passwords
[0];
154 // Assume first is old password, second is new (no choice but to guess).
155 // This case also includes empty passwords in order to allow filling of
156 // password change forms (that also could autofill for sign up form, but
157 // we can't do anything with this using only client side information).
158 *current_password
= passwords
[0];
159 *new_password
= passwords
[1];
163 if (!passwords
[0].value().isEmpty() &&
164 passwords
[0].value() == passwords
[1].value() &&
165 passwords
[0].value() == passwords
[2].value()) {
166 // All three passwords are the same and non-empty? This does not make
167 // any sense, give up.
169 } else if (passwords
[1].value() == passwords
[2].value()) {
170 // New password is the duplicated one, and comes second; or empty form
171 // with 3 password fields, in which case we will assume this layout.
172 *current_password
= passwords
[0];
173 *new_password
= passwords
[1];
174 } else if (passwords
[0].value() == passwords
[1].value()) {
175 // It is strange that the new password comes first, but trust more which
176 // fields are duplicated than the ordering of fields. Assume that
177 // any password fields after the new password contain sensitive
178 // information that isn't actually a password (security hint, SSN, etc.)
179 *new_password
= passwords
[0];
181 // Three different passwords, or first and last match with middle
182 // different. No idea which is which, so no luck.
189 void FindPredictedElements(
190 const WebFormElement
& form
,
191 const std::map
<autofill::FormData
,
192 autofill::PasswordFormFieldPredictionMap
>& form_predictions
,
193 WebVector
<WebFormControlElement
>* control_elements
,
194 std::map
<autofill::PasswordFormFieldPredictionType
, WebInputElement
>*
195 predicted_elements
) {
197 if (!WebFormElementToFormData(form
, WebFormControlElement(), EXTRACT_NONE
,
198 &form_data
, nullptr)) {
202 // Matching only requires that action and name of the form match to allow
203 // the username to be updated even if the form is changed after page load.
204 // See https://crbug.com/476092 for more details.
205 auto predictions_iterator
= form_predictions
.begin();
206 for (;predictions_iterator
!= form_predictions
.end();
207 ++predictions_iterator
) {
208 if (predictions_iterator
->first
.action
== form_data
.action
&&
209 predictions_iterator
->first
.name
== form_data
.name
) {
214 if (predictions_iterator
== form_predictions
.end())
217 std::vector
<blink::WebFormControlElement
> autofillable_elements
=
218 ExtractAutofillableElementsFromSet(*control_elements
);
220 const autofill::PasswordFormFieldPredictionMap
& field_predictions
=
221 predictions_iterator
->second
;
222 for (autofill::PasswordFormFieldPredictionMap::const_iterator prediction
=
223 field_predictions
.begin();
224 prediction
!= field_predictions
.end(); ++prediction
) {
225 const autofill::PasswordFormFieldPredictionType
& type
= prediction
->first
;
226 const autofill::FormFieldData
& target_field
= prediction
->second
;
228 for (size_t i
= 0; i
< autofillable_elements
.size(); ++i
) {
229 if (autofillable_elements
[i
].nameForAutofill() == target_field
.name
) {
230 WebInputElement
* input_element
=
231 toWebInputElement(&autofillable_elements
[i
]);
233 (*predicted_elements
)[type
] = *input_element
;
241 // Get information about a login form encapsulated in a PasswordForm struct.
242 // If an element of |form| has an entry in |nonscript_modified_values|, the
243 // associated string is used instead of the element's value to create
245 void GetPasswordForm(
246 const WebFormElement
& form
,
247 PasswordForm
* password_form
,
248 const std::map
<const blink::WebInputElement
, blink::WebString
>*
249 nonscript_modified_values
,
250 const std::map
<autofill::FormData
,
251 autofill::PasswordFormFieldPredictionMap
>*
253 WebInputElement latest_input_element
;
254 WebInputElement username_element
;
255 password_form
->username_marked_by_site
= false;
256 std::vector
<WebInputElement
> passwords
;
257 std::vector
<base::string16
> other_possible_usernames
;
259 WebVector
<WebFormControlElement
> control_elements
;
260 form
.getFormControlElements(control_elements
);
262 std::string layout_sequence
;
263 layout_sequence
.reserve(control_elements
.size());
264 for (size_t i
= 0; i
< control_elements
.size(); ++i
) {
265 WebFormControlElement control_element
= control_elements
[i
];
266 if (control_element
.isActivatedSubmit())
267 password_form
->submit_element
= control_element
.formControlName();
269 WebInputElement
* input_element
= toWebInputElement(&control_element
);
270 if (!input_element
|| !input_element
->isEnabled())
273 if (input_element
->isTextField()) {
274 if (input_element
->isPasswordField())
275 layout_sequence
.push_back('P');
277 layout_sequence
.push_back('N');
280 // If the password field is readonly, the page is likely using a virtual
281 // keyboard and bypassing the password field value (see
282 // http://crbug.com/475488). There is nothing Chrome can do to fill
283 // passwords for now. Continue processing in case when the password field
284 // was made readonly by JavaScript before submission. We can do this by
285 // checking whether password element was updated not from JavaScript.
286 if (input_element
->isPasswordField() &&
287 (!input_element
->isReadOnly() ||
288 (nonscript_modified_values
&&
289 nonscript_modified_values
->find(*input_element
) !=
290 nonscript_modified_values
->end()) ||
291 HasAutocompleteAttributeValue(*input_element
, "current_password") ||
292 HasAutocompleteAttributeValue(*input_element
, "new-password"))) {
293 passwords
.push_back(*input_element
);
294 // If we have not yet considered any element to be the username so far,
295 // provisionally select the input element just before the first password
296 // element to be the username. This choice will be overruled if we later
297 // find an element with autocomplete='username'.
298 if (username_element
.isNull() && !latest_input_element
.isNull()) {
299 username_element
= latest_input_element
;
300 // Remove the selected username from other_possible_usernames.
301 if (!latest_input_element
.value().isEmpty()) {
302 DCHECK(!other_possible_usernames
.empty());
303 DCHECK_EQ(base::string16(latest_input_element
.value()),
304 other_possible_usernames
.back());
305 other_possible_usernames
.pop_back();
310 // Various input types such as text, url, email can be a username field.
311 if (input_element
->isTextField() && !input_element
->isPasswordField()) {
312 if (HasAutocompleteAttributeValue(*input_element
, "username")) {
313 if (password_form
->username_marked_by_site
) {
314 // A second or subsequent element marked with autocomplete='username'.
315 // This makes us less confident that we have understood the form. We
316 // will stick to our choice that the first such element was the real
317 // username, but will start collecting other_possible_usernames from
318 // the extra elements marked with autocomplete='username'. Note that
319 // unlike username_element, other_possible_usernames is used only for
320 // autofill, not for form identification, and blank autofill entries
321 // are not useful, so we do not collect empty strings.
322 if (!input_element
->value().isEmpty())
323 other_possible_usernames
.push_back(input_element
->value());
325 // The first element marked with autocomplete='username'. Take the
326 // hint and treat it as the username (overruling the tentative choice
327 // we might have made before). Furthermore, drop all other possible
328 // usernames we have accrued so far: they come from fields not marked
329 // with the autocomplete attribute, making them unlikely alternatives.
330 username_element
= *input_element
;
331 password_form
->username_marked_by_site
= true;
332 other_possible_usernames
.clear();
335 if (password_form
->username_marked_by_site
) {
336 // Having seen elements with autocomplete='username', elements without
337 // this attribute are no longer interesting. No-op.
339 // No elements marked with autocomplete='username' so far whatsoever.
340 // If we have not yet selected a username element even provisionally,
341 // then remember this element for the case when the next field turns
342 // out to be a password. Save a non-empty username as a possible
343 // alternative, at least for now.
344 if (username_element
.isNull())
345 latest_input_element
= *input_element
;
346 if (!input_element
->value().isEmpty())
347 other_possible_usernames
.push_back(input_element
->value());
352 password_form
->layout
= SequenceToLayout(layout_sequence
);
354 std::map
<autofill::PasswordFormFieldPredictionType
, WebInputElement
>
356 if (form_predictions
) {
357 FindPredictedElements(form
, *form_predictions
, &control_elements
,
358 &predicted_elements
);
360 // Let server predictions override the selection of the username field. This
361 // allows instant adjusting without changing Chromium code.
362 if (!predicted_elements
[autofill::PREDICTION_USERNAME
].isNull() &&
363 username_element
!= predicted_elements
[autofill::PREDICTION_USERNAME
]) {
365 find(other_possible_usernames
.begin(), other_possible_usernames
.end(),
366 predicted_elements
[autofill::PREDICTION_USERNAME
].value());
367 if (it
!= other_possible_usernames
.end())
368 other_possible_usernames
.erase(it
);
369 if (!username_element
.isNull()) {
370 other_possible_usernames
.push_back(username_element
.value());
372 username_element
= predicted_elements
[autofill::PREDICTION_USERNAME
];
373 password_form
->was_parsed_using_autofill_predictions
= true;
376 if (!username_element
.isNull()) {
377 password_form
->username_element
= username_element
.nameForAutofill();
378 base::string16 username_value
= username_element
.value();
379 if (nonscript_modified_values
!= nullptr) {
380 auto username_iterator
=
381 nonscript_modified_values
->find(username_element
);
382 if (username_iterator
!= nonscript_modified_values
->end()) {
383 base::string16 typed_username_value
= username_iterator
->second
;
384 if (!base::StartsWith(
385 base::i18n::ToLower(username_value
),
386 base::i18n::ToLower(typed_username_value
),
387 base::CompareCase::SENSITIVE
)) {
388 // We check that |username_value| was not obtained by autofilling
389 // |typed_username_value|. In case when it was, |typed_username_value|
390 // is incomplete, so we should leave autofilled value.
391 username_value
= typed_username_value
;
395 password_form
->username_value
= username_value
;
398 WebInputElement password
;
399 WebInputElement new_password
;
400 if (!LocateSpecificPasswords(passwords
, &password
, &new_password
))
403 password_form
->action
= GetCanonicalActionForForm(form
);
404 if (!password_form
->action
.is_valid())
407 password_form
->origin
= GetCanonicalOriginForDocument(form
.document());
408 GURL::Replacements rep
;
410 password_form
->signon_realm
=
411 password_form
->origin
.ReplaceComponents(rep
).spec();
412 password_form
->other_possible_usernames
.swap(other_possible_usernames
);
414 if (!password
.isNull()) {
415 password_form
->password_element
= password
.nameForAutofill();
416 blink::WebString password_value
= password
.value();
417 if (nonscript_modified_values
!= nullptr) {
418 auto password_iterator
= nonscript_modified_values
->find(password
);
419 if (password_iterator
!= nonscript_modified_values
->end())
420 password_value
= password_iterator
->second
;
422 password_form
->password_value
= password_value
;
424 if (!new_password
.isNull()) {
425 password_form
->new_password_element
= new_password
.nameForAutofill();
426 password_form
->new_password_value
= new_password
.value();
427 if (HasAutocompleteAttributeValue(new_password
, "new-password"))
428 password_form
->new_password_marked_by_site
= true;
431 if (username_element
.isNull()) {
432 // To get a better idea on how password forms without a username field
433 // look like, report the total number of text and password fields.
434 UMA_HISTOGRAM_COUNTS_100(
435 "PasswordManager.EmptyUsernames.TextAndPasswordFieldCount",
436 layout_sequence
.size());
437 // For comparison, also report the number of password fields.
438 UMA_HISTOGRAM_COUNTS_100(
439 "PasswordManager.EmptyUsernames.PasswordFieldCount",
440 std::count(layout_sequence
.begin(), layout_sequence
.end(), 'P'));
443 password_form
->scheme
= PasswordForm::SCHEME_HTML
;
444 password_form
->ssl_valid
= false;
445 password_form
->preferred
= false;
446 password_form
->blacklisted_by_user
= false;
447 password_form
->type
= PasswordForm::TYPE_MANUAL
;
450 GURL
StripAuthAndParams(const GURL
& gurl
) {
451 // We want to keep the path but strip any authentication data, as well as
452 // query and ref portions of URL, for the form action and form origin.
453 GURL::Replacements rep
;
458 return gurl
.ReplaceComponents(rep
);
463 GURL
GetCanonicalActionForForm(const WebFormElement
& form
) {
464 WebString action
= form
.action();
466 action
= WebString(""); // missing 'action' attribute implies current URL
467 GURL
full_action(form
.document().completeURL(action
));
468 return StripAuthAndParams(full_action
);
471 GURL
GetCanonicalOriginForDocument(const WebDocument
& document
) {
472 GURL
full_origin(document
.url());
473 return StripAuthAndParams(full_origin
);
476 scoped_ptr
<PasswordForm
> CreatePasswordForm(
477 const WebFormElement
& web_form
,
478 const std::map
<const blink::WebInputElement
, blink::WebString
>*
479 nonscript_modified_values
,
480 const std::map
<autofill::FormData
,
481 autofill::PasswordFormFieldPredictionMap
>*
483 if (web_form
.isNull())
484 return scoped_ptr
<PasswordForm
>();
486 scoped_ptr
<PasswordForm
> password_form(new PasswordForm());
487 GetPasswordForm(web_form
, password_form
.get(), nonscript_modified_values
,
490 if (!password_form
->action
.is_valid())
491 return scoped_ptr
<PasswordForm
>();
493 WebFormElementToFormData(web_form
,
494 blink::WebFormControlElement(),
496 &password_form
->form_data
,
497 NULL
/* FormFieldData */);
499 return password_form
.Pass();
502 } // namespace autofill