1 //===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
6 //===----------------------------------------------------------------------===//
8 // Scanf/printf implementation for use in *Sanitizer interceptors.
9 // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
10 // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
11 // with a few common GNU extensions.
13 //===----------------------------------------------------------------------===//
16 static const char *parse_number(const char *p, int *out) {
17 *out = internal_atoll(p);
18 while (*p >= '0' && *p <= '9')
23 static const char *maybe_parse_param_index(const char *p, int *out) {
25 if (*p >= '0' && *p <= '9') {
27 const char *q = parse_number(p, &number);
35 // Otherwise, do not change p. This will be re-parsed later as the field
40 static bool char_is_one_of(char c, const char *s) {
41 return !!internal_strchr(s, c);
44 static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
45 if (char_is_one_of(*p, "jztLq")) {
48 } else if (*p == 'h') {
55 } else if (*p == 'l') {
66 // Returns true if the character is an integer conversion specifier.
67 static bool format_is_integer_conv(char c) {
68 return char_is_one_of(c, "diouxXn");
71 // Returns true if the character is an floating point conversion specifier.
72 static bool format_is_float_conv(char c) {
73 return char_is_one_of(c, "aAeEfFgG");
76 // Returns string output character size for string-like conversions,
77 // or 0 if the conversion is invalid.
78 static int format_get_char_size(char convSpecifier,
79 const char lengthModifier[2]) {
80 if (char_is_one_of(convSpecifier, "CS")) {
81 return sizeof(wchar_t);
84 if (char_is_one_of(convSpecifier, "cs[")) {
85 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
86 return sizeof(wchar_t);
87 else if (lengthModifier[0] == '\0')
94 enum FormatStoreSize {
95 // Store size not known in advance; can be calculated as wcslen() of the
96 // destination buffer.
98 // Store size not known in advance; can be calculated as strlen() of the
99 // destination buffer.
101 // Invalid conversion specifier.
105 // Returns the memory size of a format directive (if >0), or a value of
107 static int format_get_value_size(char convSpecifier,
108 const char lengthModifier[2],
109 bool promote_float) {
110 if (format_is_integer_conv(convSpecifier)) {
111 switch (lengthModifier[0]) {
113 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
115 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
117 return sizeof(long long);
119 return sizeof(long long);
121 return sizeof(INTMAX_T);
123 return sizeof(SIZE_T);
125 return sizeof(PTRDIFF_T);
133 if (format_is_float_conv(convSpecifier)) {
134 switch (lengthModifier[0]) {
137 return sizeof(long double);
139 return lengthModifier[1] == 'l' ? sizeof(long double)
142 // Printf promotes floats to doubles but scanf does not
143 return promote_float ? sizeof(double) : sizeof(float);
149 if (convSpecifier == 'p') {
150 if (lengthModifier[0] != 0)
152 return sizeof(void *);
158 struct ScanfDirective {
159 int argIdx; // argument index, or -1 if not specified ("%n$")
163 bool suppressed; // suppress assignment ("*")
164 bool allocate; // allocate space ("m")
165 char lengthModifier[2];
170 // Parse scanf format string. If a valid directive in encountered, it is
171 // returned in dir. This function returns the pointer to the first
172 // unprocessed character, or 0 in case of error.
173 // In case of the end-of-string, a pointer to the closing \0 is returned.
174 static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
175 ScanfDirective *dir) {
176 internal_memset(dir, 0, sizeof(*dir));
195 p = maybe_parse_param_index(p, &dir->argIdx);
199 dir->suppressed = true;
203 if (*p >= '0' && *p <= '9') {
204 p = parse_number(p, &dir->fieldWidth);
206 if (dir->fieldWidth <= 0) // Width if at all must be non-zero
211 dir->allocate = true;
215 p = maybe_parse_length_modifier(p, dir->lengthModifier);
216 // Conversion specifier.
217 dir->convSpecifier = *p++;
218 // Consume %[...] expression.
219 if (dir->convSpecifier == '[') {
224 while (*p && *p != ']')
227 return 0; // unexpected end of string
228 // Consume the closing ']'.
231 // This is unfortunately ambiguous between old GNU extension
232 // of %as, %aS and %a[...] and newer POSIX %a followed by
233 // letters s, S or [.
234 if (allowGnuMalloc && dir->convSpecifier == 'a' &&
235 !dir->lengthModifier[0]) {
236 if (*p == 's' || *p == 'S') {
237 dir->maybeGnuMalloc = true;
239 } else if (*p == '[') {
240 // Watch for %a[h-j%d], if % appears in the
241 // [...] range, then we need to give up, we don't know
242 // if scanf will parse it as POSIX %a [h-j %d ] or
243 // GNU allocation of string with range dh-j plus %.
244 const char *q = p + 1;
249 while (*q && *q != ']' && *q != '%')
251 if (*q == 0 || *q == '%')
253 p = q + 1; // Consume the closing ']'.
254 dir->maybeGnuMalloc = true;
263 static int scanf_get_value_size(ScanfDirective *dir) {
265 if (!char_is_one_of(dir->convSpecifier, "cCsS["))
267 return sizeof(char *);
270 if (dir->maybeGnuMalloc) {
271 if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
273 // This is ambiguous, so check the smaller size of char * (if it is
274 // a GNU extension of %as, %aS or %a[...]) and float (if it is
275 // POSIX %a followed by s, S or [ letters).
276 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
279 if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
280 bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
282 format_get_char_size(dir->convSpecifier, dir->lengthModifier);
285 if (dir->fieldWidth == 0) {
286 if (!needsTerminator)
288 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
290 return (dir->fieldWidth + needsTerminator) * charSize;
293 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
296 // Common part of *scanf interceptors.
297 // Process format string and va_list, and report all store ranges.
298 // Stops when "consuming" n_inputs input items.
299 static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
300 const char *format, va_list aq) {
301 CHECK_GT(n_inputs, 0);
302 const char *p = format;
304 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
308 p = scanf_parse_next(p, allowGnuMalloc, &dir);
311 if (dir.convSpecifier == 0) {
312 // This can only happen at the end of the format string.
316 // Here the directive is valid. Do what it says.
317 if (dir.argIdx != -1) {
323 int size = scanf_get_value_size(&dir);
324 if (size == FSS_INVALID) {
325 Report("WARNING: unexpected format specifier in scanf interceptor: "
326 "%.*s\n", dir.end - dir.begin, dir.begin);
329 void *argp = va_arg(aq, void *);
330 if (dir.convSpecifier != 'n')
334 if (size == FSS_STRLEN) {
335 size = internal_strlen((const char *)argp) + 1;
336 } else if (size == FSS_WCSLEN) {
337 // FIXME: actually use wcslen() to calculate it.
340 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
344 #if SANITIZER_INTERCEPT_PRINTF
346 struct PrintfDirective {
349 int argIdx; // width argument index, or -1 if not specified ("%*n$")
350 int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
354 bool starredPrecision;
355 char lengthModifier[2];
359 static const char *maybe_parse_number(const char *p, int *out) {
360 if (*p >= '0' && *p <= '9')
361 p = parse_number(p, out);
365 static const char *maybe_parse_number_or_star(const char *p, int *out,
372 p = maybe_parse_number(p, out);
377 // Parse printf format string. Same as scanf_parse_next.
378 static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
379 internal_memset(dir, 0, sizeof(*dir));
381 dir->precisionIdx = -1;
399 p = maybe_parse_param_index(p, &dir->precisionIdx);
402 while (char_is_one_of(*p, "'-+ #0")) {
406 p = maybe_parse_number_or_star(p, &dir->fieldWidth,
413 // Actual precision is optional (surprise!)
414 p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
415 &dir->starredPrecision);
419 if (dir->starredPrecision) {
420 p = maybe_parse_param_index(p, &dir->precisionIdx);
425 p = maybe_parse_length_modifier(p, dir->lengthModifier);
426 // Conversion specifier.
427 dir->convSpecifier = *p++;
434 static int printf_get_value_size(PrintfDirective *dir) {
435 if (dir->convSpecifier == 'm') {
436 return sizeof(char *);
439 if (char_is_one_of(dir->convSpecifier, "cCsS")) {
441 format_get_char_size(dir->convSpecifier, dir->lengthModifier);
444 if (char_is_one_of(dir->convSpecifier, "sS")) {
445 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
450 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
453 #define SKIP_SCALAR_ARG(aq, convSpecifier, size) \
455 if (format_is_float_conv(convSpecifier)) { \
458 va_arg(*aq, double); \
461 va_arg(*aq, long double); \
464 Report("WARNING: unexpected floating-point arg size" \
465 " in printf interceptor: %d\n", size); \
479 Report("WARNING: unexpected arg size" \
480 " in printf interceptor: %d\n", size); \
486 // Common part of *printf interceptors.
487 // Process format string and va_list, and report all load ranges.
488 static void printf_common(void *ctx, const char *format, va_list aq) {
489 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
491 const char *p = format;
495 p = printf_parse_next(p, &dir);
498 if (dir.convSpecifier == 0) {
499 // This can only happen at the end of the format string.
503 // Here the directive is valid. Do what it says.
504 if (dir.argIdx != -1 || dir.precisionIdx != -1) {
508 if (dir.starredWidth) {
510 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
512 if (dir.starredPrecision) {
514 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
516 int size = printf_get_value_size(&dir);
517 if (size == FSS_INVALID) {
518 Report("WARNING: unexpected format specifier in printf "
519 "interceptor: %.*s\n", dir.end - dir.begin, dir.begin);
522 if (dir.convSpecifier == 'n') {
523 void *argp = va_arg(aq, void *);
524 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
526 } else if (size == FSS_STRLEN) {
527 if (void *argp = va_arg(aq, void *)) {
528 if (dir.starredPrecision) {
529 // FIXME: properly support starred precision for strings.
531 } else if (dir.fieldPrecision > 0) {
532 // Won't read more than "precision" symbols.
533 size = internal_strnlen((const char *)argp, dir.fieldPrecision);
534 if (size < dir.fieldPrecision) size++;
536 // Whole string will be accessed.
537 size = internal_strlen((const char *)argp) + 1;
539 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
541 } else if (size == FSS_WCSLEN) {
542 if (void *argp = va_arg(aq, void *)) {
543 // FIXME: Properly support wide-character strings (via wcsrtombs).
545 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
548 // Skip non-pointer args
549 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
554 #endif // SANITIZER_INTERCEPT_PRINTF