(arm_comp_type_attributes): Simply and comment tests on type attributes.
[official-gcc.git] / gcc / cppfiles.c
blob592cb6b9201a395442d23eb2944a61dbcd986740
1 /* Part of CPP library. (include file handling)
2 Copyright (C) 1986, 87, 89, 92-95, 98, 99, 2000 Free Software Foundation, Inc.
3 Written by Per Bothner, 1994.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Split out of cpplib.c, Zack Weinberg, Oct 1998
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 In other words, you are welcome to use, share and improve this program.
23 You are forbidden to forbid anyone else to use, share and improve
24 what you give them. Help stamp out software-hoarding! */
26 #include "config.h"
27 #include "system.h"
28 #include "cpplib.h"
29 #include "intl.h"
31 /* The entry points to this file are: find_include_file, finclude,
32 include_hash, append_include_chain, deps_output, and file_cleanup.
33 file_cleanup is only called through CPP_BUFFER(pfile)->cleanup,
34 so it's static anyway. */
36 static struct include_hash *redundant_include_p
37 PARAMS ((cpp_reader *,
38 struct include_hash *,
39 struct file_name_list *));
40 static struct file_name_map *read_name_map
41 PARAMS ((cpp_reader *, const char *));
42 static char *read_filename_string PARAMS ((int, FILE *));
43 static char *remap_filename PARAMS ((cpp_reader *, char *,
44 struct file_name_list *));
45 static long read_and_prescan PARAMS ((cpp_reader *, cpp_buffer *,
46 int, size_t));
47 static struct file_name_list *actual_directory
48 PARAMS ((cpp_reader *, const char *));
49 static void initialize_input_buffer PARAMS ((cpp_reader *, int,
50 struct stat *));
51 static int file_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
52 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *,
53 unsigned long *));
55 #if 0
56 static void hack_vms_include_specification PARAMS ((char *));
57 #endif
59 /* Windows does not natively support inodes, and neither does MSDOS.
60 Cygwin's emulation can generate non-unique inodes, so don't use it.
61 VMS has non-numeric inodes. */
62 #ifdef VMS
63 #define INO_T_EQ(a, b) (!bcmp((char *) &(a), (char *) &(b), sizeof (a)))
64 #elif (defined _WIN32 && ! defined (_UWIN)) \
65 || defined __MSDOS__
66 #define INO_T_EQ(a, b) 0
67 #else
68 #define INO_T_EQ(a, b) ((a) == (b))
69 #endif
71 #ifndef INCLUDE_LEN_FUDGE
72 #define INCLUDE_LEN_FUDGE 0
73 #endif
75 /* Merge the four include chains together in the order quote, bracket,
76 system, after. Remove duplicate dirs (as determined by
77 INO_T_EQ()). The system_include and after_include chains are never
78 referred to again after this function; all access is through the
79 bracket_include path.
81 For the future: Check if the directory is empty (but
82 how?) and possibly preload the include hash. */
84 void
85 merge_include_chains (opts)
86 struct cpp_options *opts;
88 struct file_name_list *prev, *cur, *other;
89 struct file_name_list *quote, *brack, *systm, *after;
90 struct file_name_list *qtail, *btail, *stail, *atail;
92 qtail = opts->pending->quote_tail;
93 btail = opts->pending->brack_tail;
94 stail = opts->pending->systm_tail;
95 atail = opts->pending->after_tail;
97 quote = opts->pending->quote_head;
98 brack = opts->pending->brack_head;
99 systm = opts->pending->systm_head;
100 after = opts->pending->after_head;
102 /* Paste together bracket, system, and after include chains. */
103 if (stail)
104 stail->next = after;
105 else
106 systm = after;
107 if (btail)
108 btail->next = systm;
109 else
110 brack = systm;
112 /* This is a bit tricky.
113 First we drop dupes from the quote-include list.
114 Then we drop dupes from the bracket-include list.
115 Finally, if qtail and brack are the same directory,
116 we cut out qtail.
118 We can't just merge the lists and then uniquify them because
119 then we may lose directories from the <> search path that should
120 be there; consider -Ifoo -Ibar -I- -Ifoo -Iquux. It is however
121 safe to treat -Ibar -Ifoo -I- -Ifoo -Iquux as if written
122 -Ibar -I- -Ifoo -Iquux.
124 Note that this algorithm is quadratic in the number of -I switches,
125 which is acceptable since there aren't usually that many of them. */
127 for (cur = quote, prev = NULL; cur; cur = cur->next)
129 for (other = quote; other != cur; other = other->next)
130 if (INO_T_EQ (cur->ino, other->ino)
131 && cur->dev == other->dev)
133 if (opts->verbose)
134 fprintf (stderr, _("ignoring duplicate directory `%s'\n"),
135 cur->name);
137 prev->next = cur->next;
138 free (cur->name);
139 free (cur);
140 cur = prev;
141 break;
143 prev = cur;
145 qtail = prev;
147 for (cur = brack; cur; cur = cur->next)
149 for (other = brack; other != cur; other = other->next)
150 if (INO_T_EQ (cur->ino, other->ino)
151 && cur->dev == other->dev)
153 if (opts->verbose)
154 fprintf (stderr, _("ignoring duplicate directory `%s'\n"),
155 cur->name);
157 prev->next = cur->next;
158 free (cur->name);
159 free (cur);
160 cur = prev;
161 break;
163 prev = cur;
166 if (quote)
168 if (INO_T_EQ (qtail->ino, brack->ino) && qtail->dev == brack->dev)
170 if (quote == qtail)
172 if (opts->verbose)
173 fprintf (stderr, _("ignoring duplicate directory `%s'\n"),
174 quote->name);
176 free (quote->name);
177 free (quote);
178 quote = brack;
180 else
182 cur = quote;
183 while (cur->next != qtail)
184 cur = cur->next;
185 cur->next = brack;
186 if (opts->verbose)
187 fprintf (stderr, _("ignoring duplicate directory `%s'\n"),
188 qtail->name);
190 free (qtail->name);
191 free (qtail);
194 else
195 qtail->next = brack;
197 else
198 quote = brack;
200 opts->quote_include = quote;
201 opts->bracket_include = brack;
204 /* Look up or add an entry to the table of all includes. This table
205 is indexed by the name as it appears in the #include line. The
206 ->next_this_file chain stores all different files with the same
207 #include name (there are at least three ways this can happen). The
208 hash function could probably be improved a bit. */
210 struct include_hash *
211 include_hash (pfile, fname, add)
212 cpp_reader *pfile;
213 const char *fname;
214 int add;
216 unsigned int hash = 0;
217 struct include_hash *l, *m;
218 const char *f = fname;
220 while (*f)
221 hash += *f++;
223 l = pfile->all_include_files[hash % ALL_INCLUDE_HASHSIZE];
224 m = 0;
225 for (; l; m = l, l = l->next)
226 if (!strcmp (l->nshort, fname))
227 return l;
229 if (!add)
230 return 0;
232 l = (struct include_hash *) xmalloc (sizeof (struct include_hash));
233 l->next = NULL;
234 l->next_this_file = NULL;
235 l->foundhere = NULL;
236 l->buf = NULL;
237 l->limit = NULL;
238 if (m)
239 m->next = l;
240 else
241 pfile->all_include_files[hash % ALL_INCLUDE_HASHSIZE] = l;
243 return l;
246 /* Return 0 if the file pointed to by IHASH has never been included before,
247 -1 if it has been included before and need not be again,
248 or a pointer to an IHASH entry which is the file to be reread.
249 "Never before" is with respect to the position in ILIST.
251 This will not detect redundancies involving odd uses of the
252 `current directory' rule for "" includes. They aren't quite
253 pathological, but I think they are rare enough not to worry about.
254 The simplest example is:
256 top.c:
257 #include "a/a.h"
258 #include "b/b.h"
260 a/a.h:
261 #include "../b/b.h"
263 and the problem is that for `current directory' includes,
264 ihash->foundhere is not on any of the global include chains,
265 so the test below (i->foundhere == l) may be false even when
266 the directories are in fact the same. */
268 static struct include_hash *
269 redundant_include_p (pfile, ihash, ilist)
270 cpp_reader *pfile;
271 struct include_hash *ihash;
272 struct file_name_list *ilist;
274 struct file_name_list *l;
275 struct include_hash *i;
277 if (! ihash->foundhere)
278 return 0;
280 for (i = ihash; i; i = i->next_this_file)
281 for (l = ilist; l; l = l->next)
282 if (i->foundhere == l)
283 /* The control_macro works like this: If it's NULL, the file
284 is to be included again. If it's "", the file is never to
285 be included again. If it's a string, the file is not to be
286 included again if the string is the name of a defined macro. */
287 return (i->control_macro
288 && (i->control_macro[0] == '\0'
289 || cpp_defined (pfile, i->control_macro, -1)))
290 ? (struct include_hash *)-1 : i;
292 return 0;
295 static int
296 file_cleanup (pbuf, pfile)
297 cpp_buffer *pbuf;
298 cpp_reader *pfile;
300 if (pbuf->buf)
302 free (pbuf->buf);
303 pbuf->buf = 0;
305 if (pfile->system_include_depth)
306 pfile->system_include_depth--;
307 return 0;
310 /* Search for include file FNAME in the include chain starting at
311 SEARCH_START. Return -2 if this file doesn't need to be included
312 (because it was included already and it's marked idempotent),
313 -1 if an error occurred, or a file descriptor open on the file.
314 *IHASH is set to point to the include hash entry for this file, and
315 *BEFORE is 1 if the file was included before (but needs to be read
316 again). */
318 find_include_file (pfile, fname, search_start, ihash, before)
319 cpp_reader *pfile;
320 const char *fname;
321 struct file_name_list *search_start;
322 struct include_hash **ihash;
323 int *before;
325 struct file_name_list *l;
326 struct include_hash *ih, *jh;
327 int f, len;
328 char *name;
330 ih = include_hash (pfile, fname, 1);
331 jh = redundant_include_p (pfile, ih,
332 fname[0] == '/' ? ABSOLUTE_PATH : search_start);
334 if (jh != 0)
336 *before = 1;
337 *ihash = jh;
339 if (jh == (struct include_hash *)-1)
340 return -2;
341 else
342 return open (jh->name, O_RDONLY, 0666);
345 if (ih->foundhere)
346 /* A file is already known by this name, but it's not the same file.
347 Allocate another include_hash block and add it to the next_this_file
348 chain. */
350 jh = (struct include_hash *)xmalloc (sizeof (struct include_hash));
351 while (ih->next_this_file) ih = ih->next_this_file;
353 ih->next_this_file = jh;
354 jh = ih;
355 ih = ih->next_this_file;
357 ih->next = NULL;
358 ih->next_this_file = NULL;
359 ih->buf = NULL;
360 ih->limit = NULL;
362 *before = 0;
363 *ihash = ih;
364 ih->nshort = xstrdup (fname);
365 ih->control_macro = NULL;
367 /* If the pathname is absolute, just open it. */
368 if (fname[0] == '/')
370 ih->foundhere = ABSOLUTE_PATH;
371 ih->name = ih->nshort;
372 return open (ih->name, O_RDONLY, 0666);
375 /* Search directory path, trying to open the file. */
377 len = strlen (fname);
378 name = xmalloc (len + pfile->max_include_len + 2 + INCLUDE_LEN_FUDGE);
380 for (l = search_start; l; l = l->next)
382 bcopy (l->name, name, l->nlen);
383 name[l->nlen] = '/';
384 strcpy (&name[l->nlen+1], fname);
385 simplify_pathname (name);
386 if (CPP_OPTIONS (pfile)->remap)
387 name = remap_filename (pfile, name, l);
389 f = open (name, O_RDONLY|O_NONBLOCK|O_NOCTTY, 0666);
390 #ifdef EACCES
391 if (f == -1 && errno == EACCES)
393 cpp_error(pfile, "included file `%s' exists but is not readable",
394 name);
395 return -1;
397 #endif
399 if (f >= 0)
401 ih->foundhere = l;
402 ih->name = xrealloc (name, strlen (name)+1);
403 return f;
407 if (jh)
409 jh->next_this_file = NULL;
410 free (ih);
412 free (name);
413 *ihash = (struct include_hash *)-1;
414 return -1;
417 /* The file_name_map structure holds a mapping of file names for a
418 particular directory. This mapping is read from the file named
419 FILE_NAME_MAP_FILE in that directory. Such a file can be used to
420 map filenames on a file system with severe filename restrictions,
421 such as DOS. The format of the file name map file is just a series
422 of lines with two tokens on each line. The first token is the name
423 to map, and the second token is the actual name to use. */
425 struct file_name_map
427 struct file_name_map *map_next;
428 char *map_from;
429 char *map_to;
432 #define FILE_NAME_MAP_FILE "header.gcc"
434 /* Read a space delimited string of unlimited length from a stdio
435 file. */
437 static char *
438 read_filename_string (ch, f)
439 int ch;
440 FILE *f;
442 char *alloc, *set;
443 int len;
445 len = 20;
446 set = alloc = xmalloc (len + 1);
447 if (! is_space(ch))
449 *set++ = ch;
450 while ((ch = getc (f)) != EOF && ! is_space(ch))
452 if (set - alloc == len)
454 len *= 2;
455 alloc = xrealloc (alloc, len + 1);
456 set = alloc + len / 2;
458 *set++ = ch;
461 *set = '\0';
462 ungetc (ch, f);
463 return alloc;
466 /* This structure holds a linked list of file name maps, one per directory. */
468 struct file_name_map_list
470 struct file_name_map_list *map_list_next;
471 char *map_list_name;
472 struct file_name_map *map_list_map;
475 /* Read the file name map file for DIRNAME. */
477 static struct file_name_map *
478 read_name_map (pfile, dirname)
479 cpp_reader *pfile;
480 const char *dirname;
482 register struct file_name_map_list *map_list_ptr;
483 char *name;
484 FILE *f;
486 for (map_list_ptr = CPP_OPTIONS (pfile)->map_list; map_list_ptr;
487 map_list_ptr = map_list_ptr->map_list_next)
488 if (! strcmp (map_list_ptr->map_list_name, dirname))
489 return map_list_ptr->map_list_map;
491 map_list_ptr = ((struct file_name_map_list *)
492 xmalloc (sizeof (struct file_name_map_list)));
493 map_list_ptr->map_list_name = xstrdup (dirname);
495 name = (char *) alloca (strlen (dirname) + strlen (FILE_NAME_MAP_FILE) + 2);
496 strcpy (name, dirname);
497 if (*dirname)
498 strcat (name, "/");
499 strcat (name, FILE_NAME_MAP_FILE);
500 f = fopen (name, "r");
501 if (!f)
502 map_list_ptr->map_list_map = (struct file_name_map *)-1;
503 else
505 int ch;
506 int dirlen = strlen (dirname);
508 while ((ch = getc (f)) != EOF)
510 char *from, *to;
511 struct file_name_map *ptr;
513 if (is_space(ch))
514 continue;
515 from = read_filename_string (ch, f);
516 while ((ch = getc (f)) != EOF && is_hspace(ch))
518 to = read_filename_string (ch, f);
520 ptr = ((struct file_name_map *)
521 xmalloc (sizeof (struct file_name_map)));
522 ptr->map_from = from;
524 /* Make the real filename absolute. */
525 if (*to == '/')
526 ptr->map_to = to;
527 else
529 ptr->map_to = xmalloc (dirlen + strlen (to) + 2);
530 strcpy (ptr->map_to, dirname);
531 ptr->map_to[dirlen] = '/';
532 strcpy (ptr->map_to + dirlen + 1, to);
533 free (to);
536 ptr->map_next = map_list_ptr->map_list_map;
537 map_list_ptr->map_list_map = ptr;
539 while ((ch = getc (f)) != '\n')
540 if (ch == EOF)
541 break;
543 fclose (f);
546 map_list_ptr->map_list_next = CPP_OPTIONS (pfile)->map_list;
547 CPP_OPTIONS (pfile)->map_list = map_list_ptr;
549 return map_list_ptr->map_list_map;
552 /* Remap NAME based on the file_name_map (if any) for LOC. */
554 static char *
555 remap_filename (pfile, name, loc)
556 cpp_reader *pfile;
557 char *name;
558 struct file_name_list *loc;
560 struct file_name_map *map;
561 const char *from, *p, *dir;
563 if (! loc->name_map)
564 loc->name_map = read_name_map (pfile,
565 loc->name
566 ? loc->name : ".");
568 if (loc->name_map == (struct file_name_map *)-1)
569 return name;
571 from = name + strlen (loc->name) + 1;
573 for (map = loc->name_map; map; map = map->map_next)
574 if (!strcmp (map->map_from, from))
575 return map->map_to;
577 /* Try to find a mapping file for the particular directory we are
578 looking in. Thus #include <sys/types.h> will look up sys/types.h
579 in /usr/include/header.gcc and look up types.h in
580 /usr/include/sys/header.gcc. */
581 p = rindex (name, '/');
582 if (!p)
583 p = name;
584 if (loc && loc->name
585 && strlen (loc->name) == (size_t) (p - name)
586 && !strncmp (loc->name, name, p - name))
587 /* FILENAME is in SEARCHPTR, which we've already checked. */
588 return name;
590 if (p == name)
592 dir = ".";
593 from = name;
595 else
597 char * newdir = (char *) alloca (p - name + 1);
598 bcopy (name, newdir, p - name);
599 newdir[p - name] = '\0';
600 dir = newdir;
601 from = p + 1;
604 for (map = read_name_map (pfile, dir); map; map = map->map_next)
605 if (! strcmp (map->map_from, name))
606 return map->map_to;
608 return name;
611 /* Read the contents of FD into the buffer on the top of PFILE's stack.
612 IHASH points to the include hash entry for the file associated with
615 The caller is responsible for the cpp_push_buffer. */
618 finclude (pfile, fd, ihash)
619 cpp_reader *pfile;
620 int fd;
621 struct include_hash *ihash;
623 struct stat st;
624 size_t st_size;
625 long length;
626 cpp_buffer *fp;
628 if (fstat (fd, &st) < 0)
629 goto perror_fail;
630 if (fcntl (fd, F_SETFL, 0) == -1) /* turn off nonblocking mode */
631 goto perror_fail;
633 fp = CPP_BUFFER (pfile);
635 /* If fd points to a plain file, we know how big it is, so we can
636 allocate the buffer all at once. If fd is a pipe or terminal, we
637 can't. Most C source files are 4k or less, so we guess that. If
638 fd is something weird, like a block device or a directory, we
639 don't want to read it at all.
641 Unfortunately, different systems use different st.st_mode values
642 for pipes: some have S_ISFIFO, some S_ISSOCK, some are buggy and
643 zero the entire struct stat except a couple fields. Hence the
644 mess below.
646 In all cases, read_and_prescan will resize the buffer if it
647 turns out there's more data than we thought. */
649 if (S_ISREG (st.st_mode))
651 /* off_t might have a wider range than size_t - in other words,
652 the max size of a file might be bigger than the address
653 space. We can't handle a file that large. (Anyone with
654 a single source file bigger than 4GB needs to rethink
655 their coding style.) */
656 st_size = (size_t) st.st_size;
657 if ((unsigned HOST_WIDEST_INT) st_size
658 != (unsigned HOST_WIDEST_INT) st.st_size)
660 cpp_error (pfile, "file `%s' is too large", ihash->name);
661 goto fail;
664 else if (S_ISFIFO (st.st_mode) || S_ISSOCK (st.st_mode)
665 /* Permit any kind of character device: the sensible ones are
666 ttys and /dev/null, but weeding out the others is too hard. */
667 || S_ISCHR (st.st_mode)
668 /* Some 4.x (x<4) derivatives have a bug that makes fstat() of a
669 socket or pipe return a stat struct with most fields zeroed. */
670 || (st.st_mode == 0 && st.st_nlink == 0 && st.st_size == 0))
672 /* Cannot get its file size before reading. 4k is a decent
673 first guess. */
674 st_size = 4096;
676 else
678 cpp_error (pfile, "`%s' is not a file, pipe, or tty", ihash->name);
679 goto fail;
682 if (pfile->input_buffer == NULL)
683 initialize_input_buffer (pfile, fd, &st);
685 /* Read the file, converting end-of-line characters and trigraphs
686 (if enabled). */
687 fp->ihash = ihash;
688 fp->nominal_fname = fp->fname = ihash->name;
689 length = read_and_prescan (pfile, fp, fd, st_size);
690 if (length < 0)
691 goto fail;
692 if (length == 0)
693 ihash->control_macro = ""; /* never re-include */
695 close (fd);
696 fp->rlimit = fp->alimit = fp->buf + length;
697 fp->cur = fp->buf;
698 if (ihash->foundhere != ABSOLUTE_PATH)
699 fp->system_header_p = ihash->foundhere->sysp;
700 fp->lineno = 1;
701 fp->colno = 1;
702 fp->line_base = fp->buf;
703 fp->cleanup = file_cleanup;
705 /* The ->actual_dir field is only used when ignore_srcdir is not in effect;
706 see do_include */
707 if (!CPP_OPTIONS (pfile)->ignore_srcdir)
708 fp->actual_dir = actual_directory (pfile, fp->fname);
710 pfile->input_stack_listing_current = 0;
711 return 1;
713 perror_fail:
714 cpp_error_from_errno (pfile, ihash->name);
715 fail:
716 cpp_pop_buffer (pfile);
717 close (fd);
718 return 0;
721 /* Given a path FNAME, extract the directory component and place it
722 onto the actual_dirs list. Return a pointer to the allocated
723 file_name_list structure. These structures are used to implement
724 current-directory "" include searching. */
726 static struct file_name_list *
727 actual_directory (pfile, fname)
728 cpp_reader *pfile;
729 const char *fname;
731 char *last_slash, *dir;
732 size_t dlen;
733 struct file_name_list *x;
735 dir = xstrdup (fname);
736 last_slash = rindex (dir, '/');
737 if (last_slash)
739 if (last_slash == dir)
741 dlen = 1;
742 last_slash[1] = '\0';
744 else
746 dlen = last_slash - dir;
747 *last_slash = '\0';
750 else
752 dir[0] = '.';
753 dir[1] = '\0';
754 dlen = 1;
757 if (dlen > pfile->max_include_len)
758 pfile->max_include_len = dlen;
760 for (x = pfile->actual_dirs; x; x = x->alloc)
761 if (!strcmp (x->name, dir))
763 free (dir);
764 return x;
767 /* Not found, make a new one. */
768 x = (struct file_name_list *) xmalloc (sizeof (struct file_name_list));
769 x->name = dir;
770 x->nlen = dlen;
771 x->next = CPP_OPTIONS (pfile)->quote_include;
772 x->alloc = pfile->actual_dirs;
773 x->sysp = CPP_BUFFER (pfile)->system_header_p;
774 x->name_map = NULL;
776 pfile->actual_dirs = x;
777 return x;
780 /* Determine the current line and column. Used only by read_and_prescan. */
781 static U_CHAR *
782 find_position (start, limit, linep)
783 U_CHAR *start;
784 U_CHAR *limit;
785 unsigned long *linep;
787 unsigned long line = *linep;
788 U_CHAR *lbase = start;
789 while (start < limit)
791 U_CHAR ch = *start++;
792 if (ch == '\n' || ch == '\r')
794 line++;
795 lbase = start;
798 *linep = line;
799 return lbase;
802 /* Read the entire contents of file DESC into buffer BUF. LEN is how
803 much memory to allocate initially; more will be allocated if
804 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
805 canonical form (\n). If enabled, convert and/or warn about
806 trigraphs. Convert backslash-newline to a one-character escape
807 (\r) and remove it from "embarrassing" places (i.e. the middle of a
808 token). If there is no newline at the end of the file, add one and
809 warn. Returns -1 on failure, or the actual length of the data to
810 be scanned.
812 This function does a lot of work, and can be a serious performance
813 bottleneck. It has been tuned heavily; make sure you understand it
814 before hacking. The common case - no trigraphs, Unix style line
815 breaks, backslash-newline set off by whitespace, newline at EOF -
816 has been optimized at the expense of the others. The performance
817 penalty for DOS style line breaks (\r\n) is about 15%.
819 Warnings lose particularly heavily since we have to determine the
820 line number, which involves scanning from the beginning of the file
821 or from the last warning. The penalty for the absence of a newline
822 at the end of reload1.c is about 60%. (reload1.c is 329k.)
824 If your file has more than one kind of end-of-line marker, you
825 will get messed-up line numbering. */
827 /* Table of characters that can't be handled in the inner loop.
828 Keep these contiguous to optimize the performance of the code generated
829 for the switch that uses them. */
830 #define SPECCASE_EMPTY 0
831 #define SPECCASE_NUL 1
832 #define SPECCASE_CR 2
833 #define SPECCASE_BACKSLASH 3
834 #define SPECCASE_QUESTION 4
836 static long
837 read_and_prescan (pfile, fp, desc, len)
838 cpp_reader *pfile;
839 cpp_buffer *fp;
840 int desc;
841 size_t len;
843 U_CHAR *buf = (U_CHAR *) xmalloc (len);
844 U_CHAR *ip, *op, *line_base;
845 U_CHAR *ibase;
846 U_CHAR *speccase = pfile->input_speccase;
847 unsigned long line;
848 unsigned int deferred_newlines;
849 int count;
850 size_t offset;
852 offset = 0;
853 op = buf;
854 line_base = buf;
855 line = 1;
856 ibase = pfile->input_buffer + 2;
857 deferred_newlines = 0;
859 for (;;)
861 read_next:
863 count = read (desc, pfile->input_buffer + 2, pfile->input_buffer_len);
864 if (count < 0)
865 goto error;
866 else if (count == 0)
867 break;
869 offset += count;
870 ip = ibase;
871 ibase = pfile->input_buffer + 2;
872 ibase[count] = ibase[count+1] = '\0';
874 if (offset > len)
876 size_t delta_op;
877 size_t delta_line_base;
878 len *= 2;
879 if (offset > len)
880 /* len overflowed.
881 This could happen if the file is larger than half the
882 maximum address space of the machine. */
883 goto too_big;
885 delta_op = op - buf;
886 delta_line_base = line_base - buf;
887 buf = (U_CHAR *) xrealloc (buf, len);
888 op = buf + delta_op;
889 line_base = buf + delta_line_base;
892 for (;;)
894 unsigned int span = 0;
896 /* Deal with \-newline in the middle of a token. */
897 if (deferred_newlines)
899 while (speccase[ip[span]] == SPECCASE_EMPTY
900 && ip[span] != '\n'
901 && ip[span] != '\t'
902 && ip[span] != ' ')
903 span++;
904 memcpy (op, ip, span);
905 op += span;
906 ip += span;
907 /* If ip[0] is SPECCASE_EMPTY, we have hit white space.
908 Dump out the remaining deferred \-newlines. */
909 if (speccase[ip[0]] == SPECCASE_EMPTY)
910 while (deferred_newlines)
911 deferred_newlines--, *op++ = '\r';
912 span = 0;
915 /* Copy as much as we can without special treatment. */
916 while (speccase[ip[span]] == SPECCASE_EMPTY) span++;
917 memcpy (op, ip, span);
918 op += span;
919 ip += span;
921 switch (speccase[*ip++])
923 case SPECCASE_NUL: /* \0 */
924 ibase[-1] = op[-1];
925 goto read_next;
927 case SPECCASE_CR: /* \r */
928 if (ip[-2] == '\n')
929 continue;
930 else if (*ip == '\n')
931 ip++;
932 else if (*ip == '\0')
934 *--ibase = '\r';
935 goto read_next;
937 *op++ = '\n';
938 break;
940 case SPECCASE_BACKSLASH: /* \ */
941 backslash:
943 /* If we're at the end of the intermediate buffer,
944 we have to shift the backslash down to the start
945 and come back next pass. */
946 if (*ip == '\0')
948 *--ibase = '\\';
949 goto read_next;
951 else if (*ip == '\n')
953 ip++;
954 if (*ip == '\r') ip++;
955 if (*ip == '\n' || *ip == '\t' || *ip == ' ')
956 *op++ = '\r';
957 else if (op[-1] == '\t' || op[-1] == ' '
958 || op[-1] == '\r' || op[-1] == '\n')
959 *op++ = '\r';
960 else
961 deferred_newlines++;
963 else if (*ip == '\r')
965 ip++;
966 if (*ip == '\n') ip++;
967 else if (*ip == '\0')
969 *--ibase = '\r';
970 *--ibase = '\\';
971 goto read_next;
973 else if (*ip == '\r' || *ip == '\t' || *ip == ' ')
974 *op++ = '\r';
975 else
976 deferred_newlines++;
978 else
979 *op++ = '\\';
981 break;
983 case SPECCASE_QUESTION: /* ? */
985 unsigned int d, t;
986 /* If we're at the end of the intermediate buffer,
987 we have to shift the ?'s down to the start and
988 come back next pass. */
989 d = ip[0];
990 if (d == '\0')
992 *--ibase = '?';
993 goto read_next;
995 if (d != '?')
997 *op++ = '?';
998 break;
1000 d = ip[1];
1001 if (d == '\0')
1003 *--ibase = '?';
1004 *--ibase = '?';
1005 goto read_next;
1008 /* Trigraph map:
1009 * from to from to from to
1010 * ?? = # ?? ) ] ?? ! |
1011 * ?? ( [ ?? ' ^ ?? > }
1012 * ?? / \ ?? < { ?? - ~
1014 if (d == '=') t = '#';
1015 else if (d == ')') t = ']';
1016 else if (d == '!') t = '|';
1017 else if (d == '(') t = '[';
1018 else if (d == '\'') t = '^';
1019 else if (d == '>') t = '}';
1020 else if (d == '/') t = '\\';
1021 else if (d == '<') t = '{';
1022 else if (d == '-') t = '~';
1023 else
1025 *op++ = '?';
1026 break;
1028 ip += 2;
1029 if (CPP_OPTIONS (pfile)->warn_trigraphs)
1031 unsigned long col;
1032 line_base = find_position (line_base, op, &line);
1033 col = op - line_base + 1;
1034 if (CPP_OPTIONS (pfile)->trigraphs)
1035 cpp_warning_with_line (pfile, line, col,
1036 "trigraph ??%c converted to %c", d, t);
1037 else
1038 cpp_warning_with_line (pfile, line, col,
1039 "trigraph ??%c ignored", d);
1041 if (CPP_OPTIONS (pfile)->trigraphs)
1043 if (t == '\\')
1044 goto backslash;
1045 else
1046 *op++ = t;
1048 else
1050 *op++ = '?';
1051 *op++ = '?';
1052 *op++ = d;
1059 if (offset == 0)
1060 return 0;
1062 /* Deal with pushed-back chars at true EOF.
1063 This may be any of: ?? ? \ \r \n \\r \\n.
1064 \r must become \n, \\r or \\n must become \r.
1065 We know we have space already. */
1066 if (ibase == pfile->input_buffer)
1068 if (*ibase == '?')
1070 *op++ = '?';
1071 *op++ = '?';
1073 else
1074 *op++ = '\r';
1076 else if (ibase == pfile->input_buffer + 1)
1078 if (*ibase == '\r')
1079 *op++ = '\n';
1080 else
1081 *op++ = *ibase;
1084 if (op[-1] != '\n')
1086 unsigned long col;
1087 line_base = find_position (line_base, op, &line);
1088 col = op - line_base + 1;
1089 cpp_warning_with_line (pfile, line, col, "no newline at end of file\n");
1090 if (offset + 1 > len)
1092 len += 1;
1093 if (offset + 1 > len)
1094 goto too_big;
1095 buf = (U_CHAR *) xrealloc (buf, len);
1096 op = buf + offset;
1098 *op++ = '\n';
1101 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
1102 return op - buf;
1104 too_big:
1105 cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset);
1106 free (buf);
1107 return -1;
1109 error:
1110 cpp_error_from_errno (pfile, fp->fname);
1111 free (buf);
1112 return -1;
1115 /* Initialize the `input_buffer' and `input_speccase' tables.
1116 These are only used by read_and_prescan, but they're large and
1117 somewhat expensive to set up, so we want them allocated once for
1118 the duration of the cpp run. */
1120 static void
1121 initialize_input_buffer (pfile, fd, st)
1122 cpp_reader *pfile;
1123 int fd;
1124 struct stat *st;
1126 long pipe_buf;
1127 U_CHAR *tmp;
1129 /* Table of characters that cannot be handled by the
1130 read_and_prescan inner loop. The number of non-EMPTY entries
1131 should be as small as humanly possible. */
1133 tmp = (U_CHAR *) xmalloc (1 << CHAR_BIT);
1134 memset (tmp, SPECCASE_EMPTY, 1 << CHAR_BIT);
1135 tmp['\0'] = SPECCASE_NUL;
1136 tmp['\r'] = SPECCASE_CR;
1137 tmp['\\'] = SPECCASE_BACKSLASH;
1138 if (CPP_OPTIONS (pfile)->trigraphs || CPP_OPTIONS (pfile)->warn_trigraphs)
1139 tmp['?'] = SPECCASE_QUESTION;
1141 pfile->input_speccase = tmp;
1143 /* Determine the appropriate size for the input buffer. Normal C
1144 source files are smaller than eight K. If we are reading a pipe,
1145 we want to make sure the input buffer is bigger than the kernel's
1146 pipe buffer. */
1147 pipe_buf = -1;
1149 if (! S_ISREG (st->st_mode))
1151 #ifdef _PC_PIPE_BUF
1152 pipe_buf = fpathconf (fd, _PC_PIPE_BUF);
1153 #endif
1154 if (pipe_buf == -1)
1156 #ifdef PIPE_BUF
1157 pipe_buf = PIPE_BUF;
1158 #else
1159 pipe_buf = 8192;
1160 #endif
1164 if (pipe_buf < 8192)
1165 pipe_buf = 8192;
1166 /* PIPE_BUF bytes of buffer proper, 2 to detect running off the end
1167 without address arithmetic all the time, and 2 for pushback in
1168 the case there's a potential trigraph or end-of-line digraph at
1169 the end of a block. */
1171 tmp = (U_CHAR *) xmalloc (pipe_buf + 2 + 2);
1172 pfile->input_buffer = tmp;
1173 pfile->input_buffer_len = pipe_buf;
1176 /* Add output to `deps_buffer' for the -M switch.
1177 STRING points to the text to be output.
1178 SPACER is ':' for targets, ' ' for dependencies, zero for text
1179 to be inserted literally. */
1181 void
1182 deps_output (pfile, string, spacer)
1183 cpp_reader *pfile;
1184 const char *string;
1185 int spacer;
1187 int size;
1188 int cr = 0;
1190 if (!*string)
1191 return;
1193 size = strlen (string);
1195 #ifndef MAX_OUTPUT_COLUMNS
1196 #define MAX_OUTPUT_COLUMNS 72
1197 #endif
1198 if (pfile->deps_column > 0
1199 && (pfile->deps_column + size) > MAX_OUTPUT_COLUMNS)
1201 cr = 5;
1202 pfile->deps_column = 0;
1205 if (pfile->deps_size + size + cr + 8 > pfile->deps_allocated_size)
1207 pfile->deps_allocated_size = (pfile->deps_size + size + 50) * 2;
1208 pfile->deps_buffer = (char *) xrealloc (pfile->deps_buffer,
1209 pfile->deps_allocated_size);
1212 if (cr)
1214 bcopy (" \\\n ", &pfile->deps_buffer[pfile->deps_size], 5);
1215 pfile->deps_size += 5;
1218 if (spacer == ' ' && pfile->deps_column > 0)
1219 pfile->deps_buffer[pfile->deps_size++] = ' ';
1220 bcopy (string, &pfile->deps_buffer[pfile->deps_size], size);
1221 pfile->deps_size += size;
1222 pfile->deps_column += size + 1; /* count spacer too */
1223 if (spacer == ':')
1224 pfile->deps_buffer[pfile->deps_size++] = ':';
1225 pfile->deps_buffer[pfile->deps_size] = 0;
1228 /* Simplify a path name in place, deleting redundant components. This
1229 reduces OS overhead and guarantees that equivalent paths compare
1230 the same (modulo symlinks).
1232 Transforms made:
1233 foo/bar/../quux foo/quux
1234 foo/./bar foo/bar
1235 foo//bar foo/bar
1236 /../quux /quux
1237 //quux //quux (POSIX allows leading // as a namespace escape)
1239 Guarantees no trailing slashes. All transforms reduce the length
1240 of the string.
1242 void
1243 simplify_pathname (path)
1244 char *path;
1246 char *from, *to;
1247 char *base;
1248 int absolute = 0;
1250 #if defined (HAVE_DOS_BASED_FILE_SYSTEM)
1251 /* Convert all backslashes to slashes. */
1252 for (from = path; *from; from++)
1253 if (*from == '\\') *from = '/';
1255 /* Skip over leading drive letter if present. */
1256 if (ISALPHA (path[0]) && path[1] == ':')
1257 from = to = &path[2];
1258 else
1259 from = to = path;
1260 #else
1261 from = to = path;
1262 #endif
1264 /* Remove redundant initial /s. */
1265 if (*from == '/')
1267 absolute = 1;
1268 to++;
1269 from++;
1270 if (*from == '/')
1272 if (*++from == '/')
1273 /* 3 or more initial /s are equivalent to 1 /. */
1274 while (*++from == '/');
1275 else
1276 /* On some hosts // differs from /; Posix allows this. */
1277 to++;
1280 base = to;
1282 for (;;)
1284 while (*from == '/')
1285 from++;
1287 if (from[0] == '.' && from[1] == '/')
1288 from += 2;
1289 else if (from[0] == '.' && from[1] == '\0')
1290 goto done;
1291 else if (from[0] == '.' && from[1] == '.' && from[2] == '/')
1293 if (base == to)
1295 if (absolute)
1296 from += 3;
1297 else
1299 *to++ = *from++;
1300 *to++ = *from++;
1301 *to++ = *from++;
1302 base = to;
1305 else
1307 to -= 2;
1308 while (to > base && *to != '/') to--;
1309 if (*to == '/')
1310 to++;
1311 from += 3;
1314 else if (from[0] == '.' && from[1] == '.' && from[2] == '\0')
1316 if (base == to)
1318 if (!absolute)
1320 *to++ = *from++;
1321 *to++ = *from++;
1324 else
1326 to -= 2;
1327 while (to > base && *to != '/') to--;
1328 if (*to == '/')
1329 to++;
1331 goto done;
1333 else
1334 /* Copy this component and trailing /, if any. */
1335 while ((*to++ = *from++) != '/')
1337 if (!to[-1])
1339 to--;
1340 goto done;
1346 done:
1347 /* Trim trailing slash */
1348 if (to[0] == '/' && (!absolute || to > path+1))
1349 to--;
1351 /* Change the empty string to "." so that stat() on the result
1352 will always work. */
1353 if (to == path)
1354 *to++ = '.';
1356 *to = '\0';
1358 return;
1361 /* It is not clear when this should be used if at all, so I've
1362 disabled it until someone who understands VMS can look at it. */
1363 #if 0
1365 /* Under VMS we need to fix up the "include" specification filename.
1367 Rules for possible conversions
1369 fullname tried paths
1371 name name
1372 ./dir/name [.dir]name
1373 /dir/name dir:name
1374 /name [000000]name, name
1375 dir/name dir:[000000]name, dir:name, dir/name
1376 dir1/dir2/name dir1:[dir2]name, dir1:[000000.dir2]name
1377 path:/name path:[000000]name, path:name
1378 path:/dir/name path:[000000.dir]name, path:[dir]name
1379 path:dir/name path:[dir]name
1380 [path]:[dir]name [path.dir]name
1381 path/[dir]name [path.dir]name
1383 The path:/name input is constructed when expanding <> includes. */
1386 static void
1387 hack_vms_include_specification (fullname)
1388 char *fullname;
1390 register char *basename, *unixname, *local_ptr, *first_slash;
1391 int f, check_filename_before_returning, must_revert;
1392 char Local[512];
1394 check_filename_before_returning = 0;
1395 must_revert = 0;
1396 /* See if we can find a 1st slash. If not, there's no path information. */
1397 first_slash = index (fullname, '/');
1398 if (first_slash == 0)
1399 return 0; /* Nothing to do!!! */
1401 /* construct device spec if none given. */
1403 if (index (fullname, ':') == 0)
1406 /* If fullname has a slash, take it as device spec. */
1408 if (first_slash == fullname)
1410 first_slash = index (fullname+1, '/'); /* 2nd slash ? */
1411 if (first_slash)
1412 *first_slash = ':'; /* make device spec */
1413 for (basename = fullname; *basename != 0; basename++)
1414 *basename = *(basename+1); /* remove leading slash */
1416 else if ((first_slash[-1] != '.') /* keep ':/', './' */
1417 && (first_slash[-1] != ':')
1418 && (first_slash[-1] != ']')) /* or a vms path */
1420 *first_slash = ':';
1422 else if ((first_slash[1] == '[') /* skip './' in './[dir' */
1423 && (first_slash[-1] == '.'))
1424 fullname += 2;
1427 /* Get part after first ':' (basename[-1] == ':')
1428 or last '/' (basename[-1] == '/'). */
1430 basename = base_name (fullname);
1432 local_ptr = Local; /* initialize */
1434 /* We are trying to do a number of things here. First of all, we are
1435 trying to hammer the filenames into a standard format, such that later
1436 processing can handle them.
1438 If the file name contains something like [dir.], then it recognizes this
1439 as a root, and strips the ".]". Later processing will add whatever is
1440 needed to get things working properly.
1442 If no device is specified, then the first directory name is taken to be
1443 a device name (or a rooted logical). */
1445 /* Point to the UNIX filename part (which needs to be fixed!)
1446 but skip vms path information.
1447 [basename != fullname since first_slash != 0]. */
1449 if ((basename[-1] == ':') /* vms path spec. */
1450 || (basename[-1] == ']')
1451 || (basename[-1] == '>'))
1452 unixname = basename;
1453 else
1454 unixname = fullname;
1456 if (*unixname == '/')
1457 unixname++;
1459 /* If the directory spec is not rooted, we can just copy
1460 the UNIX filename part and we are done. */
1462 if (((basename - fullname) > 1)
1463 && ( (basename[-1] == ']')
1464 || (basename[-1] == '>')))
1466 if (basename[-2] != '.')
1469 /* The VMS part ends in a `]', and the preceding character is not a `.'.
1470 -> PATH]:/name (basename = '/name', unixname = 'name')
1471 We strip the `]', and then splice the two parts of the name in the
1472 usual way. Given the default locations for include files in cccp.c,
1473 we will only use this code if the user specifies alternate locations
1474 with the /include (-I) switch on the command line. */
1476 basename -= 1; /* Strip "]" */
1477 unixname--; /* backspace */
1479 else
1482 /* The VMS part has a ".]" at the end, and this will not do. Later
1483 processing will add a second directory spec, and this would be a syntax
1484 error. Thus we strip the ".]", and thus merge the directory specs.
1485 We also backspace unixname, so that it points to a '/'. This inhibits the
1486 generation of the 000000 root directory spec (which does not belong here
1487 in this case). */
1489 basename -= 2; /* Strip ".]" */
1490 unixname--; /* backspace */
1494 else
1498 /* We drop in here if there is no VMS style directory specification yet.
1499 If there is no device specification either, we make the first dir a
1500 device and try that. If we do not do this, then we will be essentially
1501 searching the users default directory (as if they did a #include "asdf.h").
1503 Then all we need to do is to push a '[' into the output string. Later
1504 processing will fill this in, and close the bracket. */
1506 if ((unixname != fullname) /* vms path spec found. */
1507 && (basename[-1] != ':'))
1508 *local_ptr++ = ':'; /* dev not in spec. take first dir */
1510 *local_ptr++ = '['; /* Open the directory specification */
1513 if (unixname == fullname) /* no vms dir spec. */
1515 must_revert = 1;
1516 if ((first_slash != 0) /* unix dir spec. */
1517 && (*unixname != '/') /* not beginning with '/' */
1518 && (*unixname != '.')) /* or './' or '../' */
1519 *local_ptr++ = '.'; /* dir is local ! */
1522 /* at this point we assume that we have the device spec, and (at least
1523 the opening "[" for a directory specification. We may have directories
1524 specified already.
1526 If there are no other slashes then the filename will be
1527 in the "root" directory. Otherwise, we need to add
1528 directory specifications. */
1530 if (index (unixname, '/') == 0)
1532 /* if no directories specified yet and none are following. */
1533 if (local_ptr[-1] == '[')
1535 /* Just add "000000]" as the directory string */
1536 strcpy (local_ptr, "000000]");
1537 local_ptr += strlen (local_ptr);
1538 check_filename_before_returning = 1; /* we might need to fool with this later */
1541 else
1544 /* As long as there are still subdirectories to add, do them. */
1545 while (index (unixname, '/') != 0)
1547 /* If this token is "." we can ignore it
1548 if it's not at the beginning of a path. */
1549 if ((unixname[0] == '.') && (unixname[1] == '/'))
1551 /* remove it at beginning of path. */
1552 if ( ((unixname == fullname) /* no device spec */
1553 && (fullname+2 != basename)) /* starts with ./ */
1554 /* or */
1555 || ((basename[-1] == ':') /* device spec */
1556 && (unixname-1 == basename))) /* and ./ afterwards */
1557 *local_ptr++ = '.'; /* make '[.' start of path. */
1558 unixname += 2;
1559 continue;
1562 /* Add a subdirectory spec. Do not duplicate "." */
1563 if ( local_ptr[-1] != '.'
1564 && local_ptr[-1] != '['
1565 && local_ptr[-1] != '<')
1566 *local_ptr++ = '.';
1568 /* If this is ".." then the spec becomes "-" */
1569 if ( (unixname[0] == '.')
1570 && (unixname[1] == '.')
1571 && (unixname[2] == '/'))
1573 /* Add "-" and skip the ".." */
1574 if ((local_ptr[-1] == '.')
1575 && (local_ptr[-2] == '['))
1576 local_ptr--; /* prevent [.- */
1577 *local_ptr++ = '-';
1578 unixname += 3;
1579 continue;
1582 /* Copy the subdirectory */
1583 while (*unixname != '/')
1584 *local_ptr++= *unixname++;
1586 unixname++; /* Skip the "/" */
1589 /* Close the directory specification */
1590 if (local_ptr[-1] == '.') /* no trailing periods */
1591 local_ptr--;
1593 if (local_ptr[-1] == '[') /* no dir needed */
1594 local_ptr--;
1595 else
1596 *local_ptr++ = ']';
1599 /* Now add the filename. */
1601 while (*unixname)
1602 *local_ptr++ = *unixname++;
1603 *local_ptr = 0;
1605 /* Now append it to the original VMS spec. */
1607 strcpy ((must_revert==1)?fullname:basename, Local);
1609 /* If we put a [000000] in the filename, try to open it first. If this fails,
1610 remove the [000000], and return that name. This provides flexibility
1611 to the user in that they can use both rooted and non-rooted logical names
1612 to point to the location of the file. */
1614 if (check_filename_before_returning)
1616 f = open (fullname, O_RDONLY, 0666);
1617 if (f >= 0)
1619 /* The file name is OK as it is, so return it as is. */
1620 close (f);
1621 return 1;
1624 /* The filename did not work. Try to remove the [000000] from the name,
1625 and return it. */
1627 basename = index (fullname, '[');
1628 local_ptr = index (fullname, ']') + 1;
1629 strcpy (basename, local_ptr); /* this gets rid of it */
1633 return 1;
1635 #endif /* VMS */