1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Handling of format string in printf and friends. The structure of format
11 // strings for fprintf() are described in C99 7.19.6.1.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
18 using clang::analyze_format_string::ArgTypeResult
;
19 using clang::analyze_format_string::FormatStringHandler
;
20 using clang::analyze_format_string::LengthModifier
;
21 using clang::analyze_format_string::OptionalAmount
;
22 using clang::analyze_format_string::ConversionSpecifier
;
23 using clang::analyze_printf::PrintfSpecifier
;
25 using namespace clang
;
27 typedef clang::analyze_format_string::SpecifierResult
<PrintfSpecifier
>
28 PrintfSpecifierResult
;
30 //===----------------------------------------------------------------------===//
31 // Methods for parsing format strings.
32 //===----------------------------------------------------------------------===//
34 using analyze_format_string::ParseNonPositionAmount
;
36 static bool ParsePrecision(FormatStringHandler
&H
, PrintfSpecifier
&FS
,
37 const char *Start
, const char *&Beg
, const char *E
,
40 FS
.setPrecision(ParseNonPositionAmount(Beg
, E
, *argIndex
));
43 const OptionalAmount Amt
= ParsePositionAmount(H
, Start
, Beg
, E
,
44 analyze_format_string::PrecisionPos
);
52 static PrintfSpecifierResult
ParsePrintfSpecifier(FormatStringHandler
&H
,
57 using namespace clang::analyze_format_string
;
58 using namespace clang::analyze_printf
;
61 const char *Start
= 0;
62 UpdateOnReturn
<const char*> UpdateBeg(Beg
, I
);
64 // Look for a '%' character that indicates the start of a format specifier.
65 for ( ; I
!= E
; ++I
) {
68 // Detect spurious null characters, which are likely errors.
73 Start
= I
++; // Record the start of the format specifier.
78 // No format specifier found?
83 // No more characters left?
84 H
.HandleIncompleteSpecifier(Start
, E
- Start
);
89 if (ParseArgPosition(H
, FS
, Start
, I
, E
))
93 // No more characters left?
94 H
.HandleIncompleteSpecifier(Start
, E
- Start
);
98 // Look for flags (if any).
100 for ( ; I
!= E
; ++I
) {
102 default: hasMore
= false; break;
103 case '-': FS
.setIsLeftJustified(I
); break;
104 case '+': FS
.setHasPlusPrefix(I
); break;
105 case ' ': FS
.setHasSpacePrefix(I
); break;
106 case '#': FS
.setHasAlternativeForm(I
); break;
107 case '0': FS
.setHasLeadingZeros(I
); break;
114 // No more characters left?
115 H
.HandleIncompleteSpecifier(Start
, E
- Start
);
119 // Look for the field width (if any).
120 if (ParseFieldWidth(H
, FS
, Start
, I
, E
,
121 FS
.usesPositionalArg() ? 0 : &argIndex
))
125 // No more characters left?
126 H
.HandleIncompleteSpecifier(Start
, E
- Start
);
130 // Look for the precision (if any).
134 H
.HandleIncompleteSpecifier(Start
, E
- Start
);
138 if (ParsePrecision(H
, FS
, Start
, I
, E
,
139 FS
.usesPositionalArg() ? 0 : &argIndex
))
143 // No more characters left?
144 H
.HandleIncompleteSpecifier(Start
, E
- Start
);
149 // Look for the length modifier.
150 if (ParseLengthModifier(FS
, I
, E
) && I
== E
) {
151 // No more characters left?
152 H
.HandleIncompleteSpecifier(Start
, E
- Start
);
157 // Detect spurious null characters, which are likely errors.
162 // Finally, look for the conversion specifier.
163 const char *conversionPosition
= I
++;
164 ConversionSpecifier::Kind k
= ConversionSpecifier::InvalidSpecifier
;
165 switch (*conversionPosition
) {
168 // C99: 7.19.6.1 (section 8).
169 case '%': k
= ConversionSpecifier::PercentArg
; break;
170 case 'A': k
= ConversionSpecifier::AArg
; break;
171 case 'E': k
= ConversionSpecifier::EArg
; break;
172 case 'F': k
= ConversionSpecifier::FArg
; break;
173 case 'G': k
= ConversionSpecifier::GArg
; break;
174 case 'X': k
= ConversionSpecifier::XArg
; break;
175 case 'a': k
= ConversionSpecifier::aArg
; break;
176 case 'c': k
= ConversionSpecifier::cArg
; break;
177 case 'd': k
= ConversionSpecifier::dArg
; break;
178 case 'e': k
= ConversionSpecifier::eArg
; break;
179 case 'f': k
= ConversionSpecifier::fArg
; break;
180 case 'g': k
= ConversionSpecifier::gArg
; break;
181 case 'i': k
= ConversionSpecifier::iArg
; break;
182 case 'n': k
= ConversionSpecifier::nArg
; break;
183 case 'o': k
= ConversionSpecifier::oArg
; break;
184 case 'p': k
= ConversionSpecifier::pArg
; break;
185 case 's': k
= ConversionSpecifier::sArg
; break;
186 case 'u': k
= ConversionSpecifier::uArg
; break;
187 case 'x': k
= ConversionSpecifier::xArg
; break;
188 // Mac OS X (unicode) specific
189 case 'C': k
= ConversionSpecifier::CArg
; break;
190 case 'S': k
= ConversionSpecifier::SArg
; break;
192 case '@': k
= ConversionSpecifier::ObjCObjArg
; break;
194 case 'm': k
= ConversionSpecifier::PrintErrno
; break;
196 PrintfConversionSpecifier
CS(conversionPosition
, k
);
197 FS
.setConversionSpecifier(CS
);
198 if (CS
.consumesDataArgument() && !FS
.usesPositionalArg())
199 FS
.setArgIndex(argIndex
++);
201 if (k
== ConversionSpecifier::InvalidSpecifier
) {
202 // Assume the conversion takes one argument.
203 return !H
.HandleInvalidPrintfConversionSpecifier(FS
, Start
, I
- Start
);
205 return PrintfSpecifierResult(Start
, FS
);
208 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler
&H
,
212 unsigned argIndex
= 0;
214 // Keep looking for a format specifier until we have exhausted the string.
216 const PrintfSpecifierResult
&FSR
= ParsePrintfSpecifier(H
, I
, E
, argIndex
);
217 // Did a fail-stop error of any kind occur when parsing the specifier?
218 // If so, don't do any more processing.
219 if (FSR
.shouldStop())
221 // Did we exhaust the string or encounter an error that
222 // we can recover from?
225 // We have a format specifier. Pass it to the callback.
226 if (!H
.HandlePrintfSpecifier(FSR
.getValue(), FSR
.getStart(),
230 assert(I
== E
&& "Format string not exhausted");
234 //===----------------------------------------------------------------------===//
235 // Methods on ConversionSpecifier.
236 //===----------------------------------------------------------------------===//
237 const char *ConversionSpecifier::toString() const {
239 case dArg
: return "d";
240 case iArg
: return "i";
241 case oArg
: return "o";
242 case uArg
: return "u";
243 case xArg
: return "x";
244 case XArg
: return "X";
245 case fArg
: return "f";
246 case FArg
: return "F";
247 case eArg
: return "e";
248 case EArg
: return "E";
249 case gArg
: return "g";
250 case GArg
: return "G";
251 case aArg
: return "a";
252 case AArg
: return "A";
253 case cArg
: return "c";
254 case sArg
: return "s";
255 case pArg
: return "p";
256 case nArg
: return "n";
257 case PercentArg
: return "%";
258 case ScanListArg
: return "[";
259 case InvalidSpecifier
: return NULL
;
261 // MacOS X unicode extensions.
262 case CArg
: return "C";
263 case SArg
: return "S";
265 // Objective-C specific specifiers.
266 case ObjCObjArg
: return "@";
268 // GlibC specific specifiers.
269 case PrintErrno
: return "m";
274 //===----------------------------------------------------------------------===//
275 // Methods on PrintfSpecifier.
276 //===----------------------------------------------------------------------===//
278 ArgTypeResult
PrintfSpecifier::getArgType(ASTContext
&Ctx
) const {
279 const PrintfConversionSpecifier
&CS
= getConversionSpecifier();
281 if (!CS
.consumesDataArgument())
282 return ArgTypeResult::Invalid();
284 if (CS
.getKind() == ConversionSpecifier::cArg
)
285 switch (LM
.getKind()) {
286 case LengthModifier::None
: return Ctx
.IntTy
;
287 case LengthModifier::AsLong
: return ArgTypeResult::WIntTy
;
289 return ArgTypeResult::Invalid();
293 switch (LM
.getKind()) {
294 case LengthModifier::AsLongDouble
:
295 return ArgTypeResult::Invalid();
296 case LengthModifier::None
: return Ctx
.IntTy
;
297 case LengthModifier::AsChar
: return Ctx
.SignedCharTy
;
298 case LengthModifier::AsShort
: return Ctx
.ShortTy
;
299 case LengthModifier::AsLong
: return Ctx
.LongTy
;
300 case LengthModifier::AsLongLong
: return Ctx
.LongLongTy
;
301 case LengthModifier::AsIntMax
:
302 // FIXME: Return unknown for now.
303 return ArgTypeResult();
304 case LengthModifier::AsSizeT
: return Ctx
.getSizeType();
305 case LengthModifier::AsPtrDiff
: return Ctx
.getPointerDiffType();
309 switch (LM
.getKind()) {
310 case LengthModifier::AsLongDouble
:
311 return ArgTypeResult::Invalid();
312 case LengthModifier::None
: return Ctx
.UnsignedIntTy
;
313 case LengthModifier::AsChar
: return Ctx
.UnsignedCharTy
;
314 case LengthModifier::AsShort
: return Ctx
.UnsignedShortTy
;
315 case LengthModifier::AsLong
: return Ctx
.UnsignedLongTy
;
316 case LengthModifier::AsLongLong
: return Ctx
.UnsignedLongLongTy
;
317 case LengthModifier::AsIntMax
:
318 // FIXME: Return unknown for now.
319 return ArgTypeResult();
320 case LengthModifier::AsSizeT
:
321 // FIXME: How to get the corresponding unsigned
322 // version of size_t?
323 return ArgTypeResult();
324 case LengthModifier::AsPtrDiff
:
325 // FIXME: How to get the corresponding unsigned
326 // version of ptrdiff_t?
327 return ArgTypeResult();
330 if (CS
.isDoubleArg()) {
331 if (LM
.getKind() == LengthModifier::AsLongDouble
)
332 return Ctx
.LongDoubleTy
;
336 switch (CS
.getKind()) {
337 case ConversionSpecifier::sArg
:
338 return ArgTypeResult(LM
.getKind() == LengthModifier::AsWideChar
?
339 ArgTypeResult::WCStrTy
: ArgTypeResult::CStrTy
);
340 case ConversionSpecifier::SArg
:
341 // FIXME: This appears to be Mac OS X specific.
342 return ArgTypeResult::WCStrTy
;
343 case ConversionSpecifier::CArg
:
345 case ConversionSpecifier::pArg
:
346 return ArgTypeResult::CPointerTy
;
351 // FIXME: Handle other cases.
352 return ArgTypeResult();
355 bool PrintfSpecifier::fixType(QualType QT
) {
356 // Handle strings first (char *, wchar_t *)
357 if (QT
->isPointerType() && (QT
->getPointeeType()->isAnyCharacterType())) {
358 CS
.setKind(ConversionSpecifier::sArg
);
360 // Disable irrelevant flags
361 HasAlternativeForm
= 0;
362 HasLeadingZeroes
= 0;
364 // Set the long length modifier for wide characters
365 if (QT
->getPointeeType()->isWideCharType())
366 LM
.setKind(LengthModifier::AsWideChar
);
371 // We can only work with builtin types.
372 if (!QT
->isBuiltinType())
375 // Everything else should be a base type
376 const BuiltinType
*BT
= QT
->getAs
<BuiltinType
>();
378 // Set length modifier
379 switch (BT
->getKind()) {
381 // The rest of the conversions are either optional or for non-builtin types
382 LM
.setKind(LengthModifier::None
);
385 case BuiltinType::Char_U
:
386 case BuiltinType::UChar
:
387 case BuiltinType::Char_S
:
388 case BuiltinType::SChar
:
389 LM
.setKind(LengthModifier::AsChar
);
392 case BuiltinType::Short
:
393 case BuiltinType::UShort
:
394 LM
.setKind(LengthModifier::AsShort
);
397 case BuiltinType::WChar
:
398 case BuiltinType::Long
:
399 case BuiltinType::ULong
:
400 LM
.setKind(LengthModifier::AsLong
);
403 case BuiltinType::LongLong
:
404 case BuiltinType::ULongLong
:
405 LM
.setKind(LengthModifier::AsLongLong
);
408 case BuiltinType::LongDouble
:
409 LM
.setKind(LengthModifier::AsLongDouble
);
413 // Set conversion specifier and disable any flags which do not apply to it.
414 // Let typedefs to char fall through to int, as %c is silly for uint8_t.
415 if (isa
<TypedefType
>(QT
) && QT
->isAnyCharacterType()) {
416 CS
.setKind(ConversionSpecifier::cArg
);
417 LM
.setKind(LengthModifier::None
);
418 Precision
.setHowSpecified(OptionalAmount::NotSpecified
);
419 HasAlternativeForm
= 0;
420 HasLeadingZeroes
= 0;
423 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
424 else if (QT
->isRealFloatingType()) {
425 CS
.setKind(ConversionSpecifier::fArg
);
427 else if (QT
->isPointerType()) {
428 CS
.setKind(ConversionSpecifier::pArg
);
429 Precision
.setHowSpecified(OptionalAmount::NotSpecified
);
430 HasAlternativeForm
= 0;
431 HasLeadingZeroes
= 0;
434 else if (QT
->isSignedIntegerType()) {
435 CS
.setKind(ConversionSpecifier::dArg
);
436 HasAlternativeForm
= 0;
438 else if (QT
->isUnsignedIntegerType()) {
439 CS
.setKind(ConversionSpecifier::uArg
);
440 HasAlternativeForm
= 0;
450 void PrintfSpecifier::toString(llvm::raw_ostream
&os
) const {
451 // Whilst some features have no defined order, we are using the order
452 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ยค7.19.6.1)
456 if (usesPositionalArg()) {
457 os
<< getPositionalArgIndex() << "$";
461 if (IsLeftJustified
) os
<< "-";
462 if (HasPlusPrefix
) os
<< "+";
463 if (HasSpacePrefix
) os
<< " ";
464 if (HasAlternativeForm
) os
<< "#";
465 if (HasLeadingZeroes
) os
<< "0";
467 // Minimum field width
468 FieldWidth
.toString(os
);
470 Precision
.toString(os
);
473 // Conversion specifier
477 bool PrintfSpecifier::hasValidPlusPrefix() const {
481 // The plus prefix only makes sense for signed conversions
482 switch (CS
.getKind()) {
483 case ConversionSpecifier::dArg
:
484 case ConversionSpecifier::iArg
:
485 case ConversionSpecifier::fArg
:
486 case ConversionSpecifier::FArg
:
487 case ConversionSpecifier::eArg
:
488 case ConversionSpecifier::EArg
:
489 case ConversionSpecifier::gArg
:
490 case ConversionSpecifier::GArg
:
491 case ConversionSpecifier::aArg
:
492 case ConversionSpecifier::AArg
:
500 bool PrintfSpecifier::hasValidAlternativeForm() const {
501 if (!HasAlternativeForm
)
504 // Alternate form flag only valid with the oxXaAeEfFgG conversions
505 switch (CS
.getKind()) {
506 case ConversionSpecifier::oArg
:
507 case ConversionSpecifier::xArg
:
508 case ConversionSpecifier::XArg
:
509 case ConversionSpecifier::aArg
:
510 case ConversionSpecifier::AArg
:
511 case ConversionSpecifier::eArg
:
512 case ConversionSpecifier::EArg
:
513 case ConversionSpecifier::fArg
:
514 case ConversionSpecifier::FArg
:
515 case ConversionSpecifier::gArg
:
516 case ConversionSpecifier::GArg
:
524 bool PrintfSpecifier::hasValidLeadingZeros() const {
525 if (!HasLeadingZeroes
)
528 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
529 switch (CS
.getKind()) {
530 case ConversionSpecifier::dArg
:
531 case ConversionSpecifier::iArg
:
532 case ConversionSpecifier::oArg
:
533 case ConversionSpecifier::uArg
:
534 case ConversionSpecifier::xArg
:
535 case ConversionSpecifier::XArg
:
536 case ConversionSpecifier::aArg
:
537 case ConversionSpecifier::AArg
:
538 case ConversionSpecifier::eArg
:
539 case ConversionSpecifier::EArg
:
540 case ConversionSpecifier::fArg
:
541 case ConversionSpecifier::FArg
:
542 case ConversionSpecifier::gArg
:
543 case ConversionSpecifier::GArg
:
551 bool PrintfSpecifier::hasValidSpacePrefix() const {
555 // The space prefix only makes sense for signed conversions
556 switch (CS
.getKind()) {
557 case ConversionSpecifier::dArg
:
558 case ConversionSpecifier::iArg
:
559 case ConversionSpecifier::fArg
:
560 case ConversionSpecifier::FArg
:
561 case ConversionSpecifier::eArg
:
562 case ConversionSpecifier::EArg
:
563 case ConversionSpecifier::gArg
:
564 case ConversionSpecifier::GArg
:
565 case ConversionSpecifier::aArg
:
566 case ConversionSpecifier::AArg
:
574 bool PrintfSpecifier::hasValidLeftJustified() const {
575 if (!IsLeftJustified
)
578 // The left justified flag is valid for all conversions except n
579 switch (CS
.getKind()) {
580 case ConversionSpecifier::nArg
:
588 bool PrintfSpecifier::hasValidPrecision() const {
589 if (Precision
.getHowSpecified() == OptionalAmount::NotSpecified
)
592 // Precision is only valid with the diouxXaAeEfFgGs conversions
593 switch (CS
.getKind()) {
594 case ConversionSpecifier::dArg
:
595 case ConversionSpecifier::iArg
:
596 case ConversionSpecifier::oArg
:
597 case ConversionSpecifier::uArg
:
598 case ConversionSpecifier::xArg
:
599 case ConversionSpecifier::XArg
:
600 case ConversionSpecifier::aArg
:
601 case ConversionSpecifier::AArg
:
602 case ConversionSpecifier::eArg
:
603 case ConversionSpecifier::EArg
:
604 case ConversionSpecifier::fArg
:
605 case ConversionSpecifier::FArg
:
606 case ConversionSpecifier::gArg
:
607 case ConversionSpecifier::GArg
:
608 case ConversionSpecifier::sArg
:
615 bool PrintfSpecifier::hasValidFieldWidth() const {
616 if (FieldWidth
.getHowSpecified() == OptionalAmount::NotSpecified
)
619 // The field width is valid for all conversions except n
620 switch (CS
.getKind()) {
621 case ConversionSpecifier::nArg
: