Fix some case-insensitive cases for StartsWith.
[chromium-blink-merge.git] / components / autofill / content / renderer / password_form_conversion_utils.cc
blob22be42e8da278c8374a5d11229bcd879e187df4b
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/content/renderer/password_form_conversion_utils.h"
7 #include <vector>
9 #include "base/i18n/case_conversion.h"
10 #include "base/lazy_instance.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/metrics/histogram_macros.h"
13 #include "base/strings/string_util.h"
14 #include "components/autofill/content/renderer/form_autofill_util.h"
15 #include "components/autofill/core/common/password_form.h"
16 #include "components/autofill/core/common/password_form_field_prediction_map.h"
17 #include "third_party/WebKit/public/platform/WebString.h"
18 #include "third_party/WebKit/public/web/WebDocument.h"
19 #include "third_party/WebKit/public/web/WebFormControlElement.h"
20 #include "third_party/WebKit/public/web/WebInputElement.h"
21 #include "third_party/icu/source/i18n/unicode/regex.h"
23 using blink::WebDocument;
24 using blink::WebFormControlElement;
25 using blink::WebFormElement;
26 using blink::WebInputElement;
27 using blink::WebString;
28 using blink::WebVector;
30 namespace autofill {
31 namespace {
33 // Layout classification of password forms
34 // A layout sequence of a form is the sequence of it's non-password and password
35 // input fields, represented by "N" and "P", respectively. A form like this
36 // <form>
37 // <input type='text' ...>
38 // <input type='hidden' ...>
39 // <input type='password' ...>
40 // <input type='submit' ...>
41 // </form>
42 // has the layout sequence "NP" -- "N" for the first field, and "P" for the
43 // third. The second and fourth fields are ignored, because they are not text
44 // fields.
46 // The code below classifies the layout (see PasswordForm::Layout) of a form
47 // based on its layout sequence. This is done by assigning layouts regular
48 // expressions over the alphabet {N, P}. LAYOUT_OTHER is implicitly the type
49 // corresponding to all layout sequences not matching any other layout.
51 // LAYOUT_LOGIN_AND_SIGNUP is classified by NPN+P.*. This corresponds to a form
52 // which starts with a login section (NP) and continues with a sign-up section
53 // (N+P.*). The aim is to distinguish such forms from change password-forms
54 // (N*PPP?.*) and forms which use password fields to store private but
55 // non-password data (could look like, e.g., PN+P.*).
56 const char kLoginAndSignupRegex[] =
57 "NP" // Login section.
58 "N+P" // Sign-up section.
59 ".*"; // Anything beyond that.
61 struct LoginAndSignupLazyInstanceTraits
62 : public base::DefaultLazyInstanceTraits<icu::RegexMatcher> {
63 static icu::RegexMatcher* New(void* instance) {
64 const icu::UnicodeString icu_pattern(kLoginAndSignupRegex);
66 UErrorCode status = U_ZERO_ERROR;
67 // Use placement new to initialize the instance in the preallocated space.
68 // The "(instance)" is very important to force POD type initialization.
69 scoped_ptr<icu::RegexMatcher> matcher(new (instance) icu::RegexMatcher(
70 icu_pattern, UREGEX_CASE_INSENSITIVE, status));
71 DCHECK(U_SUCCESS(status));
72 return matcher.release();
76 base::LazyInstance<icu::RegexMatcher, LoginAndSignupLazyInstanceTraits>
77 login_and_signup_matcher = LAZY_INSTANCE_INITIALIZER;
79 bool MatchesLoginAndSignupPattern(base::StringPiece layout_sequence) {
80 icu::RegexMatcher* matcher = login_and_signup_matcher.Pointer();
81 icu::UnicodeString icu_input(icu::UnicodeString::fromUTF8(
82 icu::StringPiece(layout_sequence.data(), layout_sequence.length())));
83 matcher->reset(icu_input);
85 UErrorCode status = U_ZERO_ERROR;
86 UBool match = matcher->find(0, status);
87 DCHECK(U_SUCCESS(status));
88 return match == TRUE;
91 // Given the sequence of non-password and password text input fields of a form,
92 // represented as a string of Ns (non-password) and Ps (password), computes the
93 // layout type of that form.
94 PasswordForm::Layout SequenceToLayout(base::StringPiece layout_sequence) {
95 if (MatchesLoginAndSignupPattern(layout_sequence))
96 return PasswordForm::Layout::LAYOUT_LOGIN_AND_SIGNUP;
97 return PasswordForm::Layout::LAYOUT_OTHER;
100 // Checks in a case-insensitive way if the autocomplete attribute for the given
101 // |element| is present and has the specified |value_in_lowercase|.
102 bool HasAutocompleteAttributeValue(const WebInputElement& element,
103 const char* value_in_lowercase) {
104 return base::LowerCaseEqualsASCII(element.getAttribute("autocomplete"),
105 value_in_lowercase);
108 // Helper to determine which password is the main (current) one, and which is
109 // the new password (e.g., on a sign-up or change password form), if any.
110 bool LocateSpecificPasswords(std::vector<WebInputElement> passwords,
111 WebInputElement* current_password,
112 WebInputElement* new_password) {
113 DCHECK(current_password && current_password->isNull());
114 DCHECK(new_password && new_password->isNull());
116 // First, look for elements marked with either autocomplete='current-password'
117 // or 'new-password' -- if we find any, take the hint, and treat the first of
118 // each kind as the element we are looking for.
119 for (std::vector<WebInputElement>::const_iterator it = passwords.begin();
120 it != passwords.end(); it++) {
121 if (HasAutocompleteAttributeValue(*it, "current-password") &&
122 current_password->isNull()) {
123 *current_password = *it;
124 } else if (HasAutocompleteAttributeValue(*it, "new-password") &&
125 new_password->isNull()) {
126 *new_password = *it;
130 // If we have seen an element with either of autocomplete attributes above,
131 // take that as a signal that the page author must have intentionally left the
132 // rest of the password fields unmarked. Perhaps they are used for other
133 // purposes, e.g., PINs, OTPs, and the like. So we skip all the heuristics we
134 // normally do, and ignore the rest of the password fields.
135 if (!current_password->isNull() || !new_password->isNull())
136 return true;
138 if (passwords.empty())
139 return false;
141 switch (passwords.size()) {
142 case 1:
143 // Single password, easy.
144 *current_password = passwords[0];
145 break;
146 case 2:
147 if (!passwords[0].value().isEmpty() &&
148 passwords[0].value() == passwords[1].value()) {
149 // Two identical non-empty passwords: assume we are seeing a new
150 // password with a confirmation. This can be either a sign-up form or a
151 // password change form that does not ask for the old password.
152 *new_password = passwords[0];
153 } else {
154 // Assume first is old password, second is new (no choice but to guess).
155 // This case also includes empty passwords in order to allow filling of
156 // password change forms (that also could autofill for sign up form, but
157 // we can't do anything with this using only client side information).
158 *current_password = passwords[0];
159 *new_password = passwords[1];
161 break;
162 default:
163 if (!passwords[0].value().isEmpty() &&
164 passwords[0].value() == passwords[1].value() &&
165 passwords[0].value() == passwords[2].value()) {
166 // All three passwords are the same and non-empty? This does not make
167 // any sense, give up.
168 return false;
169 } else if (passwords[1].value() == passwords[2].value()) {
170 // New password is the duplicated one, and comes second; or empty form
171 // with 3 password fields, in which case we will assume this layout.
172 *current_password = passwords[0];
173 *new_password = passwords[1];
174 } else if (passwords[0].value() == passwords[1].value()) {
175 // It is strange that the new password comes first, but trust more which
176 // fields are duplicated than the ordering of fields. Assume that
177 // any password fields after the new password contain sensitive
178 // information that isn't actually a password (security hint, SSN, etc.)
179 *new_password = passwords[0];
180 } else {
181 // Three different passwords, or first and last match with middle
182 // different. No idea which is which, so no luck.
183 return false;
186 return true;
189 void FindPredictedElements(
190 const WebFormElement& form,
191 const std::map<autofill::FormData,
192 autofill::PasswordFormFieldPredictionMap>& form_predictions,
193 WebVector<WebFormControlElement>* control_elements,
194 std::map<autofill::PasswordFormFieldPredictionType, WebInputElement>*
195 predicted_elements) {
196 FormData form_data;
197 if (!WebFormElementToFormData(form, WebFormControlElement(), EXTRACT_NONE,
198 &form_data, nullptr)) {
199 return;
202 // Matching only requires that action and name of the form match to allow
203 // the username to be updated even if the form is changed after page load.
204 // See https://crbug.com/476092 for more details.
205 auto predictions_iterator = form_predictions.begin();
206 for (;predictions_iterator != form_predictions.end();
207 ++predictions_iterator) {
208 if (predictions_iterator->first.action == form_data.action &&
209 predictions_iterator->first.name == form_data.name) {
210 break;
214 if (predictions_iterator == form_predictions.end())
215 return;
217 std::vector<blink::WebFormControlElement> autofillable_elements =
218 ExtractAutofillableElementsFromSet(*control_elements);
220 const autofill::PasswordFormFieldPredictionMap& field_predictions =
221 predictions_iterator->second;
222 for (autofill::PasswordFormFieldPredictionMap::const_iterator prediction =
223 field_predictions.begin();
224 prediction != field_predictions.end(); ++prediction) {
225 const autofill::PasswordFormFieldPredictionType& type = prediction->first;
226 const autofill::FormFieldData& target_field = prediction->second;
228 for (size_t i = 0; i < autofillable_elements.size(); ++i) {
229 if (autofillable_elements[i].nameForAutofill() == target_field.name) {
230 WebInputElement* input_element =
231 toWebInputElement(&autofillable_elements[i]);
232 if (input_element) {
233 (*predicted_elements)[type] = *input_element;
235 break;
241 // Get information about a login form encapsulated in a PasswordForm struct.
242 // If an element of |form| has an entry in |nonscript_modified_values|, the
243 // associated string is used instead of the element's value to create
244 // the PasswordForm.
245 void GetPasswordForm(
246 const WebFormElement& form,
247 PasswordForm* password_form,
248 const std::map<const blink::WebInputElement, blink::WebString>*
249 nonscript_modified_values,
250 const std::map<autofill::FormData,
251 autofill::PasswordFormFieldPredictionMap>*
252 form_predictions) {
253 WebInputElement latest_input_element;
254 WebInputElement username_element;
255 password_form->username_marked_by_site = false;
256 std::vector<WebInputElement> passwords;
257 std::vector<base::string16> other_possible_usernames;
259 WebVector<WebFormControlElement> control_elements;
260 form.getFormControlElements(control_elements);
262 std::string layout_sequence;
263 layout_sequence.reserve(control_elements.size());
264 for (size_t i = 0; i < control_elements.size(); ++i) {
265 WebFormControlElement control_element = control_elements[i];
266 if (control_element.isActivatedSubmit())
267 password_form->submit_element = control_element.formControlName();
269 WebInputElement* input_element = toWebInputElement(&control_element);
270 if (!input_element || !input_element->isEnabled())
271 continue;
273 if (input_element->isTextField()) {
274 if (input_element->isPasswordField())
275 layout_sequence.push_back('P');
276 else
277 layout_sequence.push_back('N');
280 // If the password field is readonly, the page is likely using a virtual
281 // keyboard and bypassing the password field value (see
282 // http://crbug.com/475488). There is nothing Chrome can do to fill
283 // passwords for now. Continue processing in case when the password field
284 // was made readonly by JavaScript before submission. We can do this by
285 // checking whether password element was updated not from JavaScript.
286 if (input_element->isPasswordField() &&
287 (!input_element->isReadOnly() ||
288 (nonscript_modified_values &&
289 nonscript_modified_values->find(*input_element) !=
290 nonscript_modified_values->end()) ||
291 HasAutocompleteAttributeValue(*input_element, "current_password") ||
292 HasAutocompleteAttributeValue(*input_element, "new-password"))) {
293 passwords.push_back(*input_element);
294 // If we have not yet considered any element to be the username so far,
295 // provisionally select the input element just before the first password
296 // element to be the username. This choice will be overruled if we later
297 // find an element with autocomplete='username'.
298 if (username_element.isNull() && !latest_input_element.isNull()) {
299 username_element = latest_input_element;
300 // Remove the selected username from other_possible_usernames.
301 if (!latest_input_element.value().isEmpty()) {
302 DCHECK(!other_possible_usernames.empty());
303 DCHECK_EQ(base::string16(latest_input_element.value()),
304 other_possible_usernames.back());
305 other_possible_usernames.pop_back();
310 // Various input types such as text, url, email can be a username field.
311 if (input_element->isTextField() && !input_element->isPasswordField()) {
312 if (HasAutocompleteAttributeValue(*input_element, "username")) {
313 if (password_form->username_marked_by_site) {
314 // A second or subsequent element marked with autocomplete='username'.
315 // This makes us less confident that we have understood the form. We
316 // will stick to our choice that the first such element was the real
317 // username, but will start collecting other_possible_usernames from
318 // the extra elements marked with autocomplete='username'. Note that
319 // unlike username_element, other_possible_usernames is used only for
320 // autofill, not for form identification, and blank autofill entries
321 // are not useful, so we do not collect empty strings.
322 if (!input_element->value().isEmpty())
323 other_possible_usernames.push_back(input_element->value());
324 } else {
325 // The first element marked with autocomplete='username'. Take the
326 // hint and treat it as the username (overruling the tentative choice
327 // we might have made before). Furthermore, drop all other possible
328 // usernames we have accrued so far: they come from fields not marked
329 // with the autocomplete attribute, making them unlikely alternatives.
330 username_element = *input_element;
331 password_form->username_marked_by_site = true;
332 other_possible_usernames.clear();
334 } else {
335 if (password_form->username_marked_by_site) {
336 // Having seen elements with autocomplete='username', elements without
337 // this attribute are no longer interesting. No-op.
338 } else {
339 // No elements marked with autocomplete='username' so far whatsoever.
340 // If we have not yet selected a username element even provisionally,
341 // then remember this element for the case when the next field turns
342 // out to be a password. Save a non-empty username as a possible
343 // alternative, at least for now.
344 if (username_element.isNull())
345 latest_input_element = *input_element;
346 if (!input_element->value().isEmpty())
347 other_possible_usernames.push_back(input_element->value());
352 password_form->layout = SequenceToLayout(layout_sequence);
354 std::map<autofill::PasswordFormFieldPredictionType, WebInputElement>
355 predicted_elements;
356 if (form_predictions) {
357 FindPredictedElements(form, *form_predictions, &control_elements,
358 &predicted_elements);
360 // Let server predictions override the selection of the username field. This
361 // allows instant adjusting without changing Chromium code.
362 if (!predicted_elements[autofill::PREDICTION_USERNAME].isNull() &&
363 username_element != predicted_elements[autofill::PREDICTION_USERNAME]) {
364 auto it =
365 find(other_possible_usernames.begin(), other_possible_usernames.end(),
366 predicted_elements[autofill::PREDICTION_USERNAME].value());
367 if (it != other_possible_usernames.end())
368 other_possible_usernames.erase(it);
369 if (!username_element.isNull()) {
370 other_possible_usernames.push_back(username_element.value());
372 username_element = predicted_elements[autofill::PREDICTION_USERNAME];
373 password_form->was_parsed_using_autofill_predictions = true;
376 if (!username_element.isNull()) {
377 password_form->username_element = username_element.nameForAutofill();
378 base::string16 username_value = username_element.value();
379 if (nonscript_modified_values != nullptr) {
380 auto username_iterator =
381 nonscript_modified_values->find(username_element);
382 if (username_iterator != nonscript_modified_values->end()) {
383 base::string16 typed_username_value = username_iterator->second;
384 if (!base::StartsWith(
385 base::i18n::ToLower(username_value),
386 base::i18n::ToLower(typed_username_value),
387 base::CompareCase::SENSITIVE)) {
388 // We check that |username_value| was not obtained by autofilling
389 // |typed_username_value|. In case when it was, |typed_username_value|
390 // is incomplete, so we should leave autofilled value.
391 username_value = typed_username_value;
395 password_form->username_value = username_value;
398 WebInputElement password;
399 WebInputElement new_password;
400 if (!LocateSpecificPasswords(passwords, &password, &new_password))
401 return;
403 password_form->action = GetCanonicalActionForForm(form);
404 if (!password_form->action.is_valid())
405 return;
407 password_form->origin = GetCanonicalOriginForDocument(form.document());
408 GURL::Replacements rep;
409 rep.SetPathStr("");
410 password_form->signon_realm =
411 password_form->origin.ReplaceComponents(rep).spec();
412 password_form->other_possible_usernames.swap(other_possible_usernames);
414 if (!password.isNull()) {
415 password_form->password_element = password.nameForAutofill();
416 blink::WebString password_value = password.value();
417 if (nonscript_modified_values != nullptr) {
418 auto password_iterator = nonscript_modified_values->find(password);
419 if (password_iterator != nonscript_modified_values->end())
420 password_value = password_iterator->second;
422 password_form->password_value = password_value;
424 if (!new_password.isNull()) {
425 password_form->new_password_element = new_password.nameForAutofill();
426 password_form->new_password_value = new_password.value();
427 if (HasAutocompleteAttributeValue(new_password, "new-password"))
428 password_form->new_password_marked_by_site = true;
431 if (username_element.isNull()) {
432 // To get a better idea on how password forms without a username field
433 // look like, report the total number of text and password fields.
434 UMA_HISTOGRAM_COUNTS_100(
435 "PasswordManager.EmptyUsernames.TextAndPasswordFieldCount",
436 layout_sequence.size());
437 // For comparison, also report the number of password fields.
438 UMA_HISTOGRAM_COUNTS_100(
439 "PasswordManager.EmptyUsernames.PasswordFieldCount",
440 std::count(layout_sequence.begin(), layout_sequence.end(), 'P'));
443 password_form->scheme = PasswordForm::SCHEME_HTML;
444 password_form->ssl_valid = false;
445 password_form->preferred = false;
446 password_form->blacklisted_by_user = false;
447 password_form->type = PasswordForm::TYPE_MANUAL;
450 GURL StripAuthAndParams(const GURL& gurl) {
451 // We want to keep the path but strip any authentication data, as well as
452 // query and ref portions of URL, for the form action and form origin.
453 GURL::Replacements rep;
454 rep.ClearUsername();
455 rep.ClearPassword();
456 rep.ClearQuery();
457 rep.ClearRef();
458 return gurl.ReplaceComponents(rep);
461 } // namespace
463 GURL GetCanonicalActionForForm(const WebFormElement& form) {
464 WebString action = form.action();
465 if (action.isNull())
466 action = WebString(""); // missing 'action' attribute implies current URL
467 GURL full_action(form.document().completeURL(action));
468 return StripAuthAndParams(full_action);
471 GURL GetCanonicalOriginForDocument(const WebDocument& document) {
472 GURL full_origin(document.url());
473 return StripAuthAndParams(full_origin);
476 scoped_ptr<PasswordForm> CreatePasswordForm(
477 const WebFormElement& web_form,
478 const std::map<const blink::WebInputElement, blink::WebString>*
479 nonscript_modified_values,
480 const std::map<autofill::FormData,
481 autofill::PasswordFormFieldPredictionMap>*
482 form_predictions) {
483 if (web_form.isNull())
484 return scoped_ptr<PasswordForm>();
486 scoped_ptr<PasswordForm> password_form(new PasswordForm());
487 GetPasswordForm(web_form, password_form.get(), nonscript_modified_values,
488 form_predictions);
490 if (!password_form->action.is_valid())
491 return scoped_ptr<PasswordForm>();
493 WebFormElementToFormData(web_form,
494 blink::WebFormControlElement(),
495 EXTRACT_NONE,
496 &password_form->form_data,
497 NULL /* FormFieldData */);
499 return password_form.Pass();
502 } // namespace autofill