1 //===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Scanf/printf implementation for use in *Sanitizer interceptors.
10 // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
11 // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
12 // with a few common GNU extensions.
14 //===----------------------------------------------------------------------===//
18 static const char *parse_number(const char *p, int *out) {
19 *out = internal_atoll(p);
20 while (*p >= '0' && *p <= '9')
25 static const char *maybe_parse_param_index(const char *p, int *out) {
27 if (*p >= '0' && *p <= '9') {
29 const char *q = parse_number(p, &number);
37 // Otherwise, do not change p. This will be re-parsed later as the field
42 static bool char_is_one_of(char c, const char *s) {
43 return !!internal_strchr(s, c);
46 static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
47 if (char_is_one_of(*p, "jztLq")) {
50 } else if (*p == 'h') {
57 } else if (*p == 'l') {
68 // Returns true if the character is an integer conversion specifier.
69 static bool format_is_integer_conv(char c) {
70 return char_is_one_of(c, "diouxXn");
73 // Returns true if the character is an floating point conversion specifier.
74 static bool format_is_float_conv(char c) {
75 return char_is_one_of(c, "aAeEfFgG");
78 // Returns string output character size for string-like conversions,
79 // or 0 if the conversion is invalid.
80 static int format_get_char_size(char convSpecifier,
81 const char lengthModifier[2]) {
82 if (char_is_one_of(convSpecifier, "CS")) {
83 return sizeof(wchar_t);
86 if (char_is_one_of(convSpecifier, "cs[")) {
87 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
88 return sizeof(wchar_t);
89 else if (lengthModifier[0] == '\0')
96 enum FormatStoreSize {
97 // Store size not known in advance; can be calculated as wcslen() of the
98 // destination buffer.
100 // Store size not known in advance; can be calculated as strlen() of the
101 // destination buffer.
103 // Invalid conversion specifier.
107 // Returns the memory size of a format directive (if >0), or a value of
109 static int format_get_value_size(char convSpecifier,
110 const char lengthModifier[2],
111 bool promote_float) {
112 if (format_is_integer_conv(convSpecifier)) {
113 switch (lengthModifier[0]) {
115 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
117 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
119 return sizeof(long long);
121 return sizeof(long long);
123 return sizeof(INTMAX_T);
125 return sizeof(SIZE_T);
127 return sizeof(PTRDIFF_T);
135 if (format_is_float_conv(convSpecifier)) {
136 switch (lengthModifier[0]) {
139 return sizeof(long double);
141 return lengthModifier[1] == 'l' ? sizeof(long double)
144 // Printf promotes floats to doubles but scanf does not
145 return promote_float ? sizeof(double) : sizeof(float);
151 if (convSpecifier == 'p') {
152 if (lengthModifier[0] != 0)
154 return sizeof(void *);
160 struct ScanfDirective {
161 int argIdx; // argument index, or -1 if not specified ("%n$")
165 bool suppressed; // suppress assignment ("*")
166 bool allocate; // allocate space ("m")
167 char lengthModifier[2];
172 // Parse scanf format string. If a valid directive in encountered, it is
173 // returned in dir. This function returns the pointer to the first
174 // unprocessed character, or 0 in case of error.
175 // In case of the end-of-string, a pointer to the closing \0 is returned.
176 static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
177 ScanfDirective *dir) {
178 internal_memset(dir, 0, sizeof(*dir));
197 p = maybe_parse_param_index(p, &dir->argIdx);
201 dir->suppressed = true;
205 if (*p >= '0' && *p <= '9') {
206 p = parse_number(p, &dir->fieldWidth);
208 if (dir->fieldWidth <= 0) // Width if at all must be non-zero
213 dir->allocate = true;
217 p = maybe_parse_length_modifier(p, dir->lengthModifier);
218 // Conversion specifier.
219 dir->convSpecifier = *p++;
220 // Consume %[...] expression.
221 if (dir->convSpecifier == '[') {
226 while (*p && *p != ']')
229 return nullptr; // unexpected end of string
230 // Consume the closing ']'.
233 // This is unfortunately ambiguous between old GNU extension
234 // of %as, %aS and %a[...] and newer POSIX %a followed by
235 // letters s, S or [.
236 if (allowGnuMalloc && dir->convSpecifier == 'a' &&
237 !dir->lengthModifier[0]) {
238 if (*p == 's' || *p == 'S') {
239 dir->maybeGnuMalloc = true;
241 } else if (*p == '[') {
242 // Watch for %a[h-j%d], if % appears in the
243 // [...] range, then we need to give up, we don't know
244 // if scanf will parse it as POSIX %a [h-j %d ] or
245 // GNU allocation of string with range dh-j plus %.
246 const char *q = p + 1;
251 while (*q && *q != ']' && *q != '%')
253 if (*q == 0 || *q == '%')
255 p = q + 1; // Consume the closing ']'.
256 dir->maybeGnuMalloc = true;
265 static int scanf_get_value_size(ScanfDirective *dir) {
267 if (!char_is_one_of(dir->convSpecifier, "cCsS["))
269 return sizeof(char *);
272 if (dir->maybeGnuMalloc) {
273 if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
275 // This is ambiguous, so check the smaller size of char * (if it is
276 // a GNU extension of %as, %aS or %a[...]) and float (if it is
277 // POSIX %a followed by s, S or [ letters).
278 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
281 if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
282 bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
284 format_get_char_size(dir->convSpecifier, dir->lengthModifier);
287 if (dir->fieldWidth == 0) {
288 if (!needsTerminator)
290 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
292 return (dir->fieldWidth + needsTerminator) * charSize;
295 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
298 // Common part of *scanf interceptors.
299 // Process format string and va_list, and report all store ranges.
300 // Stops when "consuming" n_inputs input items.
301 static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
302 const char *format, va_list aq) {
303 CHECK_GT(n_inputs, 0);
304 const char *p = format;
306 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
310 p = scanf_parse_next(p, allowGnuMalloc, &dir);
313 if (dir.convSpecifier == 0) {
314 // This can only happen at the end of the format string.
318 // Here the directive is valid. Do what it says.
319 if (dir.argIdx != -1) {
325 int size = scanf_get_value_size(&dir);
326 if (size == FSS_INVALID) {
327 Report("%s: WARNING: unexpected format specifier in scanf interceptor: ",
328 SanitizerToolName, "%.*s\n", dir.end - dir.begin, dir.begin);
331 void *argp = va_arg(aq, void *);
332 if (dir.convSpecifier != 'n')
336 if (size == FSS_STRLEN) {
337 size = internal_strlen((const char *)argp) + 1;
338 } else if (size == FSS_WCSLEN) {
339 // FIXME: actually use wcslen() to calculate it.
342 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
346 #if SANITIZER_INTERCEPT_PRINTF
348 struct PrintfDirective {
351 int argIdx; // width argument index, or -1 if not specified ("%*n$")
352 int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
356 bool starredPrecision;
357 char lengthModifier[2];
361 static const char *maybe_parse_number(const char *p, int *out) {
362 if (*p >= '0' && *p <= '9')
363 p = parse_number(p, out);
367 static const char *maybe_parse_number_or_star(const char *p, int *out,
374 p = maybe_parse_number(p, out);
379 // Parse printf format string. Same as scanf_parse_next.
380 static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
381 internal_memset(dir, 0, sizeof(*dir));
383 dir->precisionIdx = -1;
401 p = maybe_parse_param_index(p, &dir->precisionIdx);
404 while (char_is_one_of(*p, "'-+ #0")) {
408 p = maybe_parse_number_or_star(p, &dir->fieldWidth,
415 // Actual precision is optional (surprise!)
416 p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
417 &dir->starredPrecision);
421 if (dir->starredPrecision) {
422 p = maybe_parse_param_index(p, &dir->precisionIdx);
427 p = maybe_parse_length_modifier(p, dir->lengthModifier);
428 // Conversion specifier.
429 dir->convSpecifier = *p++;
436 static int printf_get_value_size(PrintfDirective *dir) {
437 if (char_is_one_of(dir->convSpecifier, "cCsS")) {
439 format_get_char_size(dir->convSpecifier, dir->lengthModifier);
442 if (char_is_one_of(dir->convSpecifier, "sS")) {
443 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
448 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
451 #define SKIP_SCALAR_ARG(aq, convSpecifier, size) \
453 if (format_is_float_conv(convSpecifier)) { \
456 va_arg(*aq, double); \
459 va_arg(*aq, long double); \
462 va_arg(*aq, long double); \
465 Report("WARNING: unexpected floating-point arg size" \
466 " in printf interceptor: %d\n", size); \
480 Report("WARNING: unexpected arg size" \
481 " in printf interceptor: %d\n", size); \
487 // Common part of *printf interceptors.
488 // Process format string and va_list, and report all load ranges.
489 static void printf_common(void *ctx, const char *format, va_list aq) {
490 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
492 const char *p = format;
496 p = printf_parse_next(p, &dir);
499 if (dir.convSpecifier == 0) {
500 // This can only happen at the end of the format string.
504 // Here the directive is valid. Do what it says.
505 if (dir.argIdx != -1 || dir.precisionIdx != -1) {
509 if (dir.starredWidth) {
511 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
513 if (dir.starredPrecision) {
515 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
517 // %m does not require an argument: strlen(errno).
518 if (dir.convSpecifier == 'm')
520 int size = printf_get_value_size(&dir);
521 if (size == FSS_INVALID) {
522 static int ReportedOnce;
525 "%s: WARNING: unexpected format specifier in printf "
526 "interceptor: %.*s (reported once per process)\n",
527 SanitizerToolName, dir.end - dir.begin, dir.begin);
530 if (dir.convSpecifier == 'n') {
531 void *argp = va_arg(aq, void *);
532 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
534 } else if (size == FSS_STRLEN) {
535 if (void *argp = va_arg(aq, void *)) {
536 if (dir.starredPrecision) {
537 // FIXME: properly support starred precision for strings.
539 } else if (dir.fieldPrecision > 0) {
540 // Won't read more than "precision" symbols.
541 size = internal_strnlen((const char *)argp, dir.fieldPrecision);
542 if (size < dir.fieldPrecision) size++;
544 // Whole string will be accessed.
545 size = internal_strlen((const char *)argp) + 1;
547 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
549 } else if (size == FSS_WCSLEN) {
550 if (void *argp = va_arg(aq, void *)) {
551 // FIXME: Properly support wide-character strings (via wcsrtombs).
553 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
556 // Skip non-pointer args
557 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
562 #endif // SANITIZER_INTERCEPT_PRINTF