net/cert/internal/verify_name_match.cc

   1 // Copyright 2015 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "net/cert/internal/verify_name_match.h"
   6
   7 #include <string.h>
   8
   9 #include "base/stl_util.h"
  10 #include "base/strings/string16.h"
  11 #include "base/strings/string_util.h"
  12 #include "base/strings/utf_string_conversion_utils.h"
  13 #include "base/strings/utf_string_conversions.h"
  14 #include "base/sys_byteorder.h"
  15 #include "base/third_party/icu/icu_utf.h"
  16 #include "base/tuple.h"
  17 #include "net/der/input.h"
  18 #include "net/der/parser.h"
  19 #include "net/der/tag.h"
  20
  21 namespace net {
  22
  23 namespace {
  24
  25 // Types of character set checking that NormalizeDirectoryString can perform.
  26 enum CharsetEnforcement {
  27   NO_ENFORCEMENT,
  28   ENFORCE_PRINTABLE_STRING,
  29   ENFORCE_ASCII,
  30 };
  31
  32 // Normalizes |output|, a UTF-8 encoded string, as if it contained
  33 // only ASCII characters.
  34 //
  35 // This could be considered a partial subset of RFC 5280 rules, and
  36 // is compatible with RFC 2459/3280.
  37 //
  38 // In particular, RFC 5280, Section 7.1 describes how UTF8String
  39 // and PrintableString should be compared - using the LDAP StringPrep
  40 // profile of RFC 4518, with case folding and whitespace compression.
  41 // However, because it is optional for 2459/3280 implementations and because
  42 // it's desirable to avoid the size cost of the StringPrep tables,
  43 // this function treats |output| as if it was composed of ASCII.
  44 //
  45 // That is, rather than folding all whitespace characters, it only
  46 // folds ' '. Rather than case folding using locale-aware handling,
  47 // it only folds A-Z to a-z.
  48 //
  49 // This gives better results than outright rejecting (due to mismatched
  50 // encodings), or from doing a strict binary comparison (the minimum
  51 // required by RFC 3280), and is sufficient for those certificates
  52 // publicly deployed.
  53 //
  54 // If |charset_enforcement| is not NO_ENFORCEMENT and |output| contains any
  55 // characters not allowed in the specified charset, returns false.
  56 //
  57 // NOTE: |output| will be modified regardless of the return.
  58 WARN_UNUSED_RESULT bool NormalizeDirectoryString(
  59     CharsetEnforcement charset_enforcement,
  60     std::string* output) {
  61   // Normalized version will always be equal or shorter than input.
  62   // Normalize in place and then truncate the output if necessary.
  63   std::string::const_iterator read_iter = output->begin();
  64   std::string::iterator write_iter = output->begin();
  65
  66   for (; read_iter != output->end() && *read_iter == ' '; ++read_iter) {
  67     // Ignore leading whitespace.
  68   }
  69
  70   for (; read_iter != output->end(); ++read_iter) {
  71     const unsigned char c = *read_iter;
  72     if (c == ' ') {
  73       // If there are non-whitespace characters remaining in input, compress
  74       // multiple whitespace chars to a single space, otherwise ignore trailing
  75       // whitespace.
  76       std::string::const_iterator next_iter = read_iter + 1;
  77       if (next_iter != output->end() && *next_iter != ' ')
  78         *(write_iter++) = ' ';
  79     } else if (c >= 'A' && c <= 'Z') {
  80       // Fold case.
  81       *(write_iter++) = c + ('a' - 'A');
  82     } else {
  83       // Note that these checks depend on the characters allowed by earlier
  84       // conditions also being valid for the enforced charset.
  85       switch (charset_enforcement) {
  86         case ENFORCE_PRINTABLE_STRING:
  87           // See NormalizePrintableStringValue comment for the acceptable list
  88           // of characters.
  89           if (!((c >= 'a' && c <= 'z') || (c >= '\'' && c <= ':') || c == '=' ||
  90                 c == '?'))
  91             return false;
  92           break;
  93         case ENFORCE_ASCII:
  94           if (c > 0x7F)
  95             return false;
  96           break;
  97         case NO_ENFORCEMENT:
  98           break;
  99       }
 100       *(write_iter++) = c;
 101     }
 102   }
 103   if (write_iter != output->end())
 104     output->erase(write_iter, output->end());
 105   return true;
 106 }
 107
 108 // Normalizes the DER-encoded PrintableString value |in| according to
 109 // RFC 2459, Section 4.1.2.4
 110 //
 111 // Briefly, normalization involves removing leading and trailing
 112 // whitespace, folding multiple whitespace characters into a single
 113 // whitespace character, and normalizing on case (this function
 114 // normalizes to lowercase).
 115 //
 116 // During normalization, this function also validates that |in|
 117 // is properly encoded - that is, that it restricts to the character
 118 // set defined in X.680 (2008), Section 41.4, Table 10. X.680 defines
 119 // the valid characters as
 120 //   a-z A-Z 0-9 (space) ' ( ) + , - . / : = ?
 121 //
 122 // However, due to an old OpenSSL encoding bug, a number of
 123 // certificates have also included '*', which has historically been
 124 // allowed by implementations, and so is also allowed here.
 125 //
 126 // If |in| can be normalized, returns true and sets |output| to the
 127 // case folded, normalized value. If |in| is invalid, returns false.
 128 // NOTE: |output| will be modified regardless of the return.
 129 WARN_UNUSED_RESULT bool NormalizePrintableStringValue(const der::Input& in,
 130                                                       std::string* output) {
 131   in.AsString().swap(*output);
 132   return NormalizeDirectoryString(ENFORCE_PRINTABLE_STRING, output);
 133 }
 134
 135 // Normalized a UTF8String value. See the comment for NormalizeDirectoryString
 136 // for details.
 137 //
 138 // If |in| can be normalized, returns true and sets |output| to the
 139 // case folded, normalized value. If |in| is invalid, returns false.
 140 // NOTE: |output| will be modified regardless of the return.
 141 WARN_UNUSED_RESULT bool NormalizeUtf8StringValue(const der::Input& in,
 142                                                  std::string* output) {
 143   in.AsString().swap(*output);
 144   return NormalizeDirectoryString(NO_ENFORCEMENT, output);
 145 }
 146
 147 // IA5String is ISO/IEC Registrations 1 and 6 from the ISO
 148 // "International Register of Coded Character Sets to be used
 149 // with Escape Sequences", plus space and delete. That's just the
 150 // polite way of saying 0x00 - 0x7F, aka ASCII (or, more formally,
 151 // ISO/IEC 646)
 152 //
 153 // If |in| can be normalized, returns true and sets |output| to the case folded,
 154 // normalized value. If |in| is invalid, returns false.
 155 // NOTE: |output| will be modified regardless of the return.
 156 WARN_UNUSED_RESULT bool NormalizeIA5StringValue(const der::Input& in,
 157                                                 std::string* output) {
 158   in.AsString().swap(*output);
 159   return NormalizeDirectoryString(ENFORCE_ASCII, output);
 160 }
 161
 162 // Converts BMPString value to UTF-8 and then normalizes it. See the comment for
 163 // NormalizeDirectoryString for details.
 164 //
 165 // If |in| can be normalized, returns true and sets |output| to the case folded,
 166 // normalized value. If |in| is invalid, returns false.
 167 // NOTE: |output| will be modified regardless of the return.
 168 WARN_UNUSED_RESULT bool NormalizeBmpStringValue(const der::Input& in,
 169                                                 std::string* output) {
 170   if (in.Length() % 2 != 0)
 171     return false;
 172
 173   base::string16 in_16bit;
 174   if (in.Length()) {
 175     memcpy(base::WriteInto(&in_16bit, in.Length() / 2 + 1), in.UnsafeData(),
 176            in.Length());
 177   }
 178   for (base::char16& c : in_16bit) {
 179     // BMPString is UCS-2 in big-endian order.
 180     c = base::NetToHost16(c);
 181
 182     // BMPString only supports codepoints in the Basic Multilingual Plane;
 183     // surrogates are not allowed.
 184     if (CBU_IS_SURROGATE(c))
 185       return false;
 186   }
 187   if (!base::UTF16ToUTF8(in_16bit.data(), in_16bit.size(), output))
 188     return false;
 189   return NormalizeDirectoryString(NO_ENFORCEMENT, output);
 190 }
 191
 192 // Converts UniversalString value to UTF-8 and then normalizes it. See the
 193 // comment for NormalizeDirectoryString for details.
 194 //
 195 // If |in| can be normalized, returns true and sets |output| to the case folded,
 196 // normalized value. If |in| is invalid, returns false.
 197 // NOTE: |output| will be modified regardless of the return.
 198 WARN_UNUSED_RESULT bool NormalizeUniversalStringValue(const der::Input& in,
 199                                                       std::string* output) {
 200   if (in.Length() % 4 != 0)
 201     return false;
 202
 203   std::vector<uint32_t> in_32bit(in.Length() / 4);
 204   if (in.Length())
 205     memcpy(vector_as_array(&in_32bit), in.UnsafeData(), in.Length());
 206   for (const uint32_t c : in_32bit) {
 207     // UniversalString is UCS-4 in big-endian order.
 208     uint32_t codepoint = base::NetToHost32(c);
 209     if (!CBU_IS_UNICODE_CHAR(codepoint))
 210       return false;
 211
 212     base::WriteUnicodeCharacter(codepoint, output);
 213   }
 214   return NormalizeDirectoryString(NO_ENFORCEMENT, output);
 215 }
 216
 217 // Converts the string |value| to UTF-8, normalizes it, and stores in |output|.
 218 // |tag| must one of the types for which IsNormalizableDirectoryString is true.
 219 //
 220 // If |value| can be normalized, returns true and sets |output| to the case
 221 // folded, normalized value. If |value| is invalid, returns false.
 222 // NOTE: |output| will be modified regardless of the return.
 223 WARN_UNUSED_RESULT bool NormalizeValue(const der::Tag tag,
 224                                        const der::Input& value,
 225                                        std::string* output) {
 226   switch (tag) {
 227     case der::kPrintableString:
 228       return NormalizePrintableStringValue(value, output);
 229     case der::kUtf8String:
 230       return NormalizeUtf8StringValue(value, output);
 231     case der::kIA5String:
 232       return NormalizeIA5StringValue(value, output);
 233     case der::kUniversalString:
 234       return NormalizeUniversalStringValue(value, output);
 235     case der::kBmpString:
 236       return NormalizeBmpStringValue(value, output);
 237     default:
 238       NOTREACHED();
 239       return false;
 240   }
 241 }
 242
 243 // Returns true if |tag| is a string type that NormalizeValue can handle.
 244 bool IsNormalizableDirectoryString(der::Tag tag) {
 245   switch (tag) {
 246     case der::kPrintableString:
 247     case der::kUtf8String:
 248     // RFC 5280 only requires handling IA5String for comparing domainComponent
 249     // values, but handling it here avoids the need to special case anything.
 250     case der::kIA5String:
 251     case der::kUniversalString:
 252     case der::kBmpString:
 253       return true;
 254     // TeletexString isn't normalized. Section 8 of RFC 5280 briefly
 255     // describes the historical confusion between treating TeletexString
 256     // as Latin1String vs T.61, and there are even incompatibilities within
 257     // T.61 implementations. As this time is virtually unused, simply
 258     // treat it with a binary comparison, as permitted by RFC 3280/5280.
 259     default:
 260       return false;
 261   }
 262 }
 263
 264 // Returns true if the AttributeValue (|a_tag|, |a_value|) matches (|b_tag|,
 265 // |b_value|).
 266 bool VerifyValueMatch(const der::Tag a_tag,
 267                       const der::Input& a_value,
 268                       const der::Tag b_tag,
 269                       const der::Input& b_value) {
 270   if (IsNormalizableDirectoryString(a_tag) &&
 271       IsNormalizableDirectoryString(b_tag)) {
 272     std::string a_normalized, b_normalized;
 273     if (!NormalizeValue(a_tag, a_value, &a_normalized) ||
 274         !NormalizeValue(b_tag, b_value, &b_normalized))
 275       return false;
 276     return a_normalized == b_normalized;
 277   }
 278   // Attributes encoded with different types may be assumed to be unequal.
 279   if (a_tag != b_tag)
 280     return false;
 281   // All other types use binary comparison.
 282   return a_value.Equals(b_value);
 283 }
 284
 285 struct AttributeTypeAndValue {
 286   AttributeTypeAndValue(der::Input in_type,
 287                         der::Tag in_value_tag,
 288                         der::Input in_value)
 289       : type(in_type), value_tag(in_value_tag), value(in_value) {}
 290   der::Input type;
 291   der::Tag value_tag;
 292   der::Input value;
 293 };
 294
 295 // Parses all the ASN.1 AttributeTypeAndValue elements in |parser| and stores
 296 // each as an AttributeTypeAndValue object in |out|.
 297 //
 298 // AttributeTypeAndValue is defined in RFC 5280 section 4.1.2.4:
 299 //
 300 // AttributeTypeAndValue ::= SEQUENCE {
 301 //   type     AttributeType,
 302 //   value    AttributeValue }
 303 //
 304 // AttributeType ::= OBJECT IDENTIFIER
 305 //
 306 // AttributeValue ::= ANY -- DEFINED BY AttributeType
 307 //
 308 // DirectoryString ::= CHOICE {
 309 //       teletexString           TeletexString (SIZE (1..MAX)),
 310 //       printableString         PrintableString (SIZE (1..MAX)),
 311 //       universalString         UniversalString (SIZE (1..MAX)),
 312 //       utf8String              UTF8String (SIZE (1..MAX)),
 313 //       bmpString               BMPString (SIZE (1..MAX)) }
 314 //
 315 // The type of the component AttributeValue is determined by the AttributeType;
 316 // in general it will be a DirectoryString.
 317 WARN_UNUSED_RESULT bool ReadRdn(der::Parser* parser,
 318                                 std::vector<AttributeTypeAndValue>* out) {
 319   while (parser->HasMore()) {
 320     der::Parser attr_type_and_value;
 321     if (!parser->ReadSequence(&attr_type_and_value))
 322       return false;
 323     // Read the attribute type, which must be an OBJECT IDENTIFIER.
 324     der::Input type;
 325     if (!attr_type_and_value.ReadTag(der::kOid, &type))
 326       return false;
 327
 328     // Read the attribute value.
 329     der::Tag tag;
 330     der::Input value;
 331     if (!attr_type_and_value.ReadTagAndValue(&tag, &value))
 332       return false;
 333
 334     // There should be no more elements in the sequence after reading the
 335     // attribute type and value.
 336     if (attr_type_and_value.HasMore())
 337       return false;
 338
 339     out->push_back(AttributeTypeAndValue(type, tag, value));
 340   }
 341   return true;
 342 }
 343
 344 // Verifies that |a_parser| and |b_parser| are the same length and that every
 345 // AttributeTypeAndValue in |a_parser| has a matching AttributeTypeAndValue in
 346 // |b_parser|.
 347 bool VerifyRdnMatch(der::Parser* a_parser, der::Parser* b_parser) {
 348   std::vector<AttributeTypeAndValue> a_type_and_values, b_type_and_values;
 349   if (!ReadRdn(a_parser, &a_type_and_values) ||
 350       !ReadRdn(b_parser, &b_type_and_values))
 351     return false;
 352
 353   // RFC 5280 section 4.1.2.4
 354   // RelativeDistinguishedName ::= SET SIZE (1..MAX) OF AttributeTypeAndValue
 355   if (a_type_and_values.empty() || b_type_and_values.empty())
 356     return false;
 357
 358   // RFC 5280 section 7.1:
 359   // Two relative distinguished names RDN1 and RDN2 match if they have the same
 360   // number of naming attributes and for each naming attribute in RDN1 there is
 361   // a matching naming attribute in RDN2.
 362   if (a_type_and_values.size() != b_type_and_values.size())
 363     return false;
 364
 365   // The ordering of elements may differ due to denormalized values sorting
 366   // differently in the DER encoding. Since the number of elements should be
 367   // small, a naive linear search for each element should be fine. (Hostile
 368   // certificates already have ways to provoke pathological behavior.)
 369   for (const auto& a : a_type_and_values) {
 370     std::vector<AttributeTypeAndValue>::iterator b_iter =
 371         b_type_and_values.begin();
 372     for (; b_iter != b_type_and_values.end(); ++b_iter) {
 373       const auto& b = *b_iter;
 374       if (a.type.Equals(b.type) &&
 375           VerifyValueMatch(a.value_tag, a.value, b.value_tag, b.value)) {
 376         break;
 377       }
 378     }
 379     if (b_iter == b_type_and_values.end())
 380       return false;
 381     // Remove the matched element from b_type_and_values to ensure duplicate
 382     // elements in a_type_and_values can't match the same element in
 383     // b_type_and_values multiple times.
 384     b_type_and_values.erase(b_iter);
 385   }
 386
 387   // Every element in |a_type_and_values| had a matching element in
 388   // |b_type_and_values|.
 389   return true;
 390 }
 391
 392 }  // namespace
 393
 394 // |a| and |b| are ASN.1 RDNSequence values (not including the Sequence tag),
 395 // defined in RFC 5280 section 4.1.2.4:
 396 //
 397 // Name ::= CHOICE { -- only one possibility for now --
 398 //   rdnSequence  RDNSequence }
 399 //
 400 // RDNSequence ::= SEQUENCE OF RelativeDistinguishedName
 401 //
 402 // RelativeDistinguishedName ::=
 403 //   SET SIZE (1..MAX) OF AttributeTypeAndValue
 404 bool VerifyNameMatch(const der::Input& a, const der::Input& b) {
 405   // Empty Names are allowed.  RFC 5280 section 4.1.2.4 requires "The issuer
 406   // field MUST contain a non-empty distinguished name (DN)", while section
 407   // 4.1.2.6 allows for the Subject to be empty in certain cases. The caller is
 408   // assumed to have verified those conditions.
 409
 410   // RFC 5280 section 7.1:
 411   // Two distinguished names DN1 and DN2 match if they have the same number of
 412   // RDNs, for each RDN in DN1 there is a matching RDN in DN2, and the matching
 413   // RDNs appear in the same order in both DNs.
 414
 415   // First just check if the inputs have the same number of RDNs:
 416   der::Parser a_rdn_sequence_counter(a);
 417   der::Parser b_rdn_sequence_counter(b);
 418   while (a_rdn_sequence_counter.HasMore() && b_rdn_sequence_counter.HasMore()) {
 419     if (!a_rdn_sequence_counter.SkipTag(der::kSet) ||
 420         !b_rdn_sequence_counter.SkipTag(der::kSet)) {
 421       return false;
 422     }
 423   }
 424   if (a_rdn_sequence_counter.HasMore() || b_rdn_sequence_counter.HasMore())
 425     return false;
 426
 427   // Same number of RDNs, now check if they match.
 428   der::Parser a_rdn_sequence(a);
 429   der::Parser b_rdn_sequence(b);
 430   while (a_rdn_sequence.HasMore() && b_rdn_sequence.HasMore()) {
 431     der::Parser a_rdn, b_rdn;
 432     if (!a_rdn_sequence.ReadConstructed(der::kSet, &a_rdn) ||
 433         !b_rdn_sequence.ReadConstructed(der::kSet, &b_rdn)) {
 434       return false;
 435     }
 436     if (!VerifyRdnMatch(&a_rdn, &b_rdn))
 437       return false;
 438   }
 439
 440   return true;
 441 }
 442
 443 }  // namespace net