1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/cert/internal/verify_name_match.h"
9 #include "base/stl_util.h"
10 #include "base/strings/string16.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversion_utils.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "base/sys_byteorder.h"
15 #include "base/third_party/icu/icu_utf.h"
16 #include "base/tuple.h"
17 #include "net/der/input.h"
18 #include "net/der/parser.h"
19 #include "net/der/tag.h"
25 // Types of character set checking that NormalizeDirectoryString can perform.
26 enum CharsetEnforcement
{
28 ENFORCE_PRINTABLE_STRING
,
32 // Normalizes |output|, a UTF-8 encoded string, as if it contained
33 // only ASCII characters.
35 // This could be considered a partial subset of RFC 5280 rules, and
36 // is compatible with RFC 2459/3280.
38 // In particular, RFC 5280, Section 7.1 describes how UTF8String
39 // and PrintableString should be compared - using the LDAP StringPrep
40 // profile of RFC 4518, with case folding and whitespace compression.
41 // However, because it is optional for 2459/3280 implementations and because
42 // it's desirable to avoid the size cost of the StringPrep tables,
43 // this function treats |output| as if it was composed of ASCII.
45 // That is, rather than folding all whitespace characters, it only
46 // folds ' '. Rather than case folding using locale-aware handling,
47 // it only folds A-Z to a-z.
49 // This gives better results than outright rejecting (due to mismatched
50 // encodings), or from doing a strict binary comparison (the minimum
51 // required by RFC 3280), and is sufficient for those certificates
54 // If |charset_enforcement| is not NO_ENFORCEMENT and |output| contains any
55 // characters not allowed in the specified charset, returns false.
57 // NOTE: |output| will be modified regardless of the return.
58 WARN_UNUSED_RESULT
bool NormalizeDirectoryString(
59 CharsetEnforcement charset_enforcement
,
60 std::string
* output
) {
61 // Normalized version will always be equal or shorter than input.
62 // Normalize in place and then truncate the output if necessary.
63 std::string::const_iterator read_iter
= output
->begin();
64 std::string::iterator write_iter
= output
->begin();
66 for (; read_iter
!= output
->end() && *read_iter
== ' '; ++read_iter
) {
67 // Ignore leading whitespace.
70 for (; read_iter
!= output
->end(); ++read_iter
) {
71 const unsigned char c
= *read_iter
;
73 // If there are non-whitespace characters remaining in input, compress
74 // multiple whitespace chars to a single space, otherwise ignore trailing
76 std::string::const_iterator next_iter
= read_iter
+ 1;
77 if (next_iter
!= output
->end() && *next_iter
!= ' ')
78 *(write_iter
++) = ' ';
79 } else if (c
>= 'A' && c
<= 'Z') {
81 *(write_iter
++) = c
+ ('a' - 'A');
83 // Note that these checks depend on the characters allowed by earlier
84 // conditions also being valid for the enforced charset.
85 switch (charset_enforcement
) {
86 case ENFORCE_PRINTABLE_STRING
:
87 // See NormalizePrintableStringValue comment for the acceptable list
89 if (!((c
>= 'a' && c
<= 'z') || (c
>= '\'' && c
<= ':') || c
== '=' ||
103 if (write_iter
!= output
->end())
104 output
->erase(write_iter
, output
->end());
108 // Normalizes the DER-encoded PrintableString value |in| according to
109 // RFC 2459, Section 4.1.2.4
111 // Briefly, normalization involves removing leading and trailing
112 // whitespace, folding multiple whitespace characters into a single
113 // whitespace character, and normalizing on case (this function
114 // normalizes to lowercase).
116 // During normalization, this function also validates that |in|
117 // is properly encoded - that is, that it restricts to the character
118 // set defined in X.680 (2008), Section 41.4, Table 10. X.680 defines
119 // the valid characters as
120 // a-z A-Z 0-9 (space) ' ( ) + , - . / : = ?
122 // However, due to an old OpenSSL encoding bug, a number of
123 // certificates have also included '*', which has historically been
124 // allowed by implementations, and so is also allowed here.
126 // If |in| can be normalized, returns true and sets |output| to the
127 // case folded, normalized value. If |in| is invalid, returns false.
128 // NOTE: |output| will be modified regardless of the return.
129 WARN_UNUSED_RESULT
bool NormalizePrintableStringValue(const der::Input
& in
,
130 std::string
* output
) {
131 in
.AsString().swap(*output
);
132 return NormalizeDirectoryString(ENFORCE_PRINTABLE_STRING
, output
);
135 // Normalized a UTF8String value. See the comment for NormalizeDirectoryString
138 // If |in| can be normalized, returns true and sets |output| to the
139 // case folded, normalized value. If |in| is invalid, returns false.
140 // NOTE: |output| will be modified regardless of the return.
141 WARN_UNUSED_RESULT
bool NormalizeUtf8StringValue(const der::Input
& in
,
142 std::string
* output
) {
143 in
.AsString().swap(*output
);
144 return NormalizeDirectoryString(NO_ENFORCEMENT
, output
);
147 // IA5String is ISO/IEC Registrations 1 and 6 from the ISO
148 // "International Register of Coded Character Sets to be used
149 // with Escape Sequences", plus space and delete. That's just the
150 // polite way of saying 0x00 - 0x7F, aka ASCII (or, more formally,
153 // If |in| can be normalized, returns true and sets |output| to the case folded,
154 // normalized value. If |in| is invalid, returns false.
155 // NOTE: |output| will be modified regardless of the return.
156 WARN_UNUSED_RESULT
bool NormalizeIA5StringValue(const der::Input
& in
,
157 std::string
* output
) {
158 in
.AsString().swap(*output
);
159 return NormalizeDirectoryString(ENFORCE_ASCII
, output
);
162 // Converts BMPString value to UTF-8 and then normalizes it. See the comment for
163 // NormalizeDirectoryString for details.
165 // If |in| can be normalized, returns true and sets |output| to the case folded,
166 // normalized value. If |in| is invalid, returns false.
167 // NOTE: |output| will be modified regardless of the return.
168 WARN_UNUSED_RESULT
bool NormalizeBmpStringValue(const der::Input
& in
,
169 std::string
* output
) {
170 if (in
.Length() % 2 != 0)
173 base::string16 in_16bit
;
175 memcpy(base::WriteInto(&in_16bit
, in
.Length() / 2 + 1), in
.UnsafeData(),
178 for (base::char16
& c
: in_16bit
) {
179 // BMPString is UCS-2 in big-endian order.
180 c
= base::NetToHost16(c
);
182 // BMPString only supports codepoints in the Basic Multilingual Plane;
183 // surrogates are not allowed.
184 if (CBU_IS_SURROGATE(c
))
187 if (!base::UTF16ToUTF8(in_16bit
.data(), in_16bit
.size(), output
))
189 return NormalizeDirectoryString(NO_ENFORCEMENT
, output
);
192 // Converts UniversalString value to UTF-8 and then normalizes it. See the
193 // comment for NormalizeDirectoryString for details.
195 // If |in| can be normalized, returns true and sets |output| to the case folded,
196 // normalized value. If |in| is invalid, returns false.
197 // NOTE: |output| will be modified regardless of the return.
198 WARN_UNUSED_RESULT
bool NormalizeUniversalStringValue(const der::Input
& in
,
199 std::string
* output
) {
200 if (in
.Length() % 4 != 0)
203 std::vector
<uint32_t> in_32bit(in
.Length() / 4);
205 memcpy(vector_as_array(&in_32bit
), in
.UnsafeData(), in
.Length());
206 for (const uint32_t c
: in_32bit
) {
207 // UniversalString is UCS-4 in big-endian order.
208 uint32_t codepoint
= base::NetToHost32(c
);
209 if (!CBU_IS_UNICODE_CHAR(codepoint
))
212 base::WriteUnicodeCharacter(codepoint
, output
);
214 return NormalizeDirectoryString(NO_ENFORCEMENT
, output
);
217 // Converts the string |value| to UTF-8, normalizes it, and stores in |output|.
218 // |tag| must one of the types for which IsNormalizableDirectoryString is true.
220 // If |value| can be normalized, returns true and sets |output| to the case
221 // folded, normalized value. If |value| is invalid, returns false.
222 // NOTE: |output| will be modified regardless of the return.
223 WARN_UNUSED_RESULT
bool NormalizeValue(const der::Tag tag
,
224 const der::Input
& value
,
225 std::string
* output
) {
227 case der::kPrintableString
:
228 return NormalizePrintableStringValue(value
, output
);
229 case der::kUtf8String
:
230 return NormalizeUtf8StringValue(value
, output
);
231 case der::kIA5String
:
232 return NormalizeIA5StringValue(value
, output
);
233 case der::kUniversalString
:
234 return NormalizeUniversalStringValue(value
, output
);
235 case der::kBmpString
:
236 return NormalizeBmpStringValue(value
, output
);
243 // Returns true if |tag| is a string type that NormalizeValue can handle.
244 bool IsNormalizableDirectoryString(der::Tag tag
) {
246 case der::kPrintableString
:
247 case der::kUtf8String
:
248 // RFC 5280 only requires handling IA5String for comparing domainComponent
249 // values, but handling it here avoids the need to special case anything.
250 case der::kIA5String
:
251 case der::kUniversalString
:
252 case der::kBmpString
:
254 // TeletexString isn't normalized. Section 8 of RFC 5280 briefly
255 // describes the historical confusion between treating TeletexString
256 // as Latin1String vs T.61, and there are even incompatibilities within
257 // T.61 implementations. As this time is virtually unused, simply
258 // treat it with a binary comparison, as permitted by RFC 3280/5280.
264 // Returns true if the AttributeValue (|a_tag|, |a_value|) matches (|b_tag|,
266 bool VerifyValueMatch(const der::Tag a_tag
,
267 const der::Input
& a_value
,
268 const der::Tag b_tag
,
269 const der::Input
& b_value
) {
270 if (IsNormalizableDirectoryString(a_tag
) &&
271 IsNormalizableDirectoryString(b_tag
)) {
272 std::string a_normalized
, b_normalized
;
273 if (!NormalizeValue(a_tag
, a_value
, &a_normalized
) ||
274 !NormalizeValue(b_tag
, b_value
, &b_normalized
))
276 return a_normalized
== b_normalized
;
278 // Attributes encoded with different types may be assumed to be unequal.
281 // All other types use binary comparison.
282 return a_value
.Equals(b_value
);
285 struct AttributeTypeAndValue
{
286 AttributeTypeAndValue(der::Input in_type
,
287 der::Tag in_value_tag
,
289 : type(in_type
), value_tag(in_value_tag
), value(in_value
) {}
295 // Parses all the ASN.1 AttributeTypeAndValue elements in |parser| and stores
296 // each as an AttributeTypeAndValue object in |out|.
298 // AttributeTypeAndValue is defined in RFC 5280 section 4.1.2.4:
300 // AttributeTypeAndValue ::= SEQUENCE {
301 // type AttributeType,
302 // value AttributeValue }
304 // AttributeType ::= OBJECT IDENTIFIER
306 // AttributeValue ::= ANY -- DEFINED BY AttributeType
308 // DirectoryString ::= CHOICE {
309 // teletexString TeletexString (SIZE (1..MAX)),
310 // printableString PrintableString (SIZE (1..MAX)),
311 // universalString UniversalString (SIZE (1..MAX)),
312 // utf8String UTF8String (SIZE (1..MAX)),
313 // bmpString BMPString (SIZE (1..MAX)) }
315 // The type of the component AttributeValue is determined by the AttributeType;
316 // in general it will be a DirectoryString.
317 WARN_UNUSED_RESULT
bool ReadRdn(der::Parser
* parser
,
318 std::vector
<AttributeTypeAndValue
>* out
) {
319 while (parser
->HasMore()) {
320 der::Parser attr_type_and_value
;
321 if (!parser
->ReadSequence(&attr_type_and_value
))
323 // Read the attribute type, which must be an OBJECT IDENTIFIER.
325 if (!attr_type_and_value
.ReadTag(der::kOid
, &type
))
328 // Read the attribute value.
331 if (!attr_type_and_value
.ReadTagAndValue(&tag
, &value
))
334 // There should be no more elements in the sequence after reading the
335 // attribute type and value.
336 if (attr_type_and_value
.HasMore())
339 out
->push_back(AttributeTypeAndValue(type
, tag
, value
));
344 // Verifies that |a_parser| and |b_parser| are the same length and that every
345 // AttributeTypeAndValue in |a_parser| has a matching AttributeTypeAndValue in
347 bool VerifyRdnMatch(der::Parser
* a_parser
, der::Parser
* b_parser
) {
348 std::vector
<AttributeTypeAndValue
> a_type_and_values
, b_type_and_values
;
349 if (!ReadRdn(a_parser
, &a_type_and_values
) ||
350 !ReadRdn(b_parser
, &b_type_and_values
))
353 // RFC 5280 section 4.1.2.4
354 // RelativeDistinguishedName ::= SET SIZE (1..MAX) OF AttributeTypeAndValue
355 if (a_type_and_values
.empty() || b_type_and_values
.empty())
358 // RFC 5280 section 7.1:
359 // Two relative distinguished names RDN1 and RDN2 match if they have the same
360 // number of naming attributes and for each naming attribute in RDN1 there is
361 // a matching naming attribute in RDN2.
362 if (a_type_and_values
.size() != b_type_and_values
.size())
365 // The ordering of elements may differ due to denormalized values sorting
366 // differently in the DER encoding. Since the number of elements should be
367 // small, a naive linear search for each element should be fine. (Hostile
368 // certificates already have ways to provoke pathological behavior.)
369 for (const auto& a
: a_type_and_values
) {
370 std::vector
<AttributeTypeAndValue
>::iterator b_iter
=
371 b_type_and_values
.begin();
372 for (; b_iter
!= b_type_and_values
.end(); ++b_iter
) {
373 const auto& b
= *b_iter
;
374 if (a
.type
.Equals(b
.type
) &&
375 VerifyValueMatch(a
.value_tag
, a
.value
, b
.value_tag
, b
.value
)) {
379 if (b_iter
== b_type_and_values
.end())
381 // Remove the matched element from b_type_and_values to ensure duplicate
382 // elements in a_type_and_values can't match the same element in
383 // b_type_and_values multiple times.
384 b_type_and_values
.erase(b_iter
);
387 // Every element in |a_type_and_values| had a matching element in
388 // |b_type_and_values|.
394 // |a| and |b| are ASN.1 RDNSequence values (not including the Sequence tag),
395 // defined in RFC 5280 section 4.1.2.4:
397 // Name ::= CHOICE { -- only one possibility for now --
398 // rdnSequence RDNSequence }
400 // RDNSequence ::= SEQUENCE OF RelativeDistinguishedName
402 // RelativeDistinguishedName ::=
403 // SET SIZE (1..MAX) OF AttributeTypeAndValue
404 bool VerifyNameMatch(const der::Input
& a
, const der::Input
& b
) {
405 // Empty Names are allowed. RFC 5280 section 4.1.2.4 requires "The issuer
406 // field MUST contain a non-empty distinguished name (DN)", while section
407 // 4.1.2.6 allows for the Subject to be empty in certain cases. The caller is
408 // assumed to have verified those conditions.
410 // RFC 5280 section 7.1:
411 // Two distinguished names DN1 and DN2 match if they have the same number of
412 // RDNs, for each RDN in DN1 there is a matching RDN in DN2, and the matching
413 // RDNs appear in the same order in both DNs.
415 // First just check if the inputs have the same number of RDNs:
416 der::Parser
a_rdn_sequence_counter(a
);
417 der::Parser
b_rdn_sequence_counter(b
);
418 while (a_rdn_sequence_counter
.HasMore() && b_rdn_sequence_counter
.HasMore()) {
419 if (!a_rdn_sequence_counter
.SkipTag(der::kSet
) ||
420 !b_rdn_sequence_counter
.SkipTag(der::kSet
)) {
424 if (a_rdn_sequence_counter
.HasMore() || b_rdn_sequence_counter
.HasMore())
427 // Same number of RDNs, now check if they match.
428 der::Parser
a_rdn_sequence(a
);
429 der::Parser
b_rdn_sequence(b
);
430 while (a_rdn_sequence
.HasMore() && b_rdn_sequence
.HasMore()) {
431 der::Parser a_rdn
, b_rdn
;
432 if (!a_rdn_sequence
.ReadConstructed(der::kSet
, &a_rdn
) ||
433 !b_rdn_sequence
.ReadConstructed(der::kSet
, &b_rdn
)) {
436 if (!VerifyRdnMatch(&a_rdn
, &b_rdn
))