oops - omitted from previous delta fixing UNIQUE_SECTION
[official-gcc.git] / gcc / cppfiles.c
bloba89a0cafc04bd32d31a6669a5f7c309c213e10d8
1 /* Part of CPP library. (include file handling)
2 Copyright (C) 1986, 87, 89, 92-95, 98, 99, 2000 Free Software Foundation, Inc.
3 Written by Per Bothner, 1994.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Split out of cpplib.c, Zack Weinberg, Oct 1998
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 In other words, you are welcome to use, share and improve this program.
23 You are forbidden to forbid anyone else to use, share and improve
24 what you give them. Help stamp out software-hoarding! */
26 #include "config.h"
27 #include "system.h"
28 #include "cpplib.h"
30 /* The entry points to this file are: find_include_file, finclude,
31 include_hash, append_include_chain, deps_output, and file_cleanup.
32 file_cleanup is only called through CPP_BUFFER(pfile)->cleanup,
33 so it's static anyway. */
35 static struct include_hash *redundant_include_p
36 PARAMS ((cpp_reader *,
37 struct include_hash *,
38 struct file_name_list *));
39 static struct file_name_map *read_name_map
40 PARAMS ((cpp_reader *, const char *));
41 static char *read_filename_string PARAMS ((int, FILE *));
42 static char *remap_filename PARAMS ((cpp_reader *, char *,
43 struct file_name_list *));
44 static long read_and_prescan PARAMS ((cpp_reader *, cpp_buffer *,
45 int, size_t));
46 static struct file_name_list *actual_directory
47 PARAMS ((cpp_reader *, const char *));
48 static void initialize_input_buffer PARAMS ((cpp_reader *, int,
49 struct stat *));
50 static int file_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
51 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *,
52 unsigned long *));
54 #if 0
55 static void hack_vms_include_specification PARAMS ((char *));
56 #endif
58 /* Windows does not natively support inodes, and neither does MSDOS.
59 Cygwin's emulation can generate non-unique inodes, so don't use it.
60 VMS has non-numeric inodes. */
61 #ifdef VMS
62 #define INO_T_EQ(a, b) (!bcmp((char *) &(a), (char *) &(b), sizeof (a)))
63 #elif (defined _WIN32 && ! defined (_UWIN)) \
64 || defined __MSDOS__
65 #define INO_T_EQ(a, b) 0
66 #else
67 #define INO_T_EQ(a, b) ((a) == (b))
68 #endif
70 /* Merge the four include chains together in the order quote, bracket,
71 system, after. Remove duplicate dirs (as determined by
72 INO_T_EQ()). The system_include and after_include chains are never
73 referred to again after this function; all access is through the
74 bracket_include path.
76 For the future: Check if the directory is empty (but
77 how?) and possibly preload the include hash. */
79 void
80 merge_include_chains (opts)
81 struct cpp_options *opts;
83 struct file_name_list *prev, *cur, *other;
84 struct file_name_list *quote, *brack, *systm, *after;
85 struct file_name_list *qtail, *btail, *stail, *atail;
87 qtail = opts->pending->quote_tail;
88 btail = opts->pending->brack_tail;
89 stail = opts->pending->systm_tail;
90 atail = opts->pending->after_tail;
92 quote = opts->pending->quote_head;
93 brack = opts->pending->brack_head;
94 systm = opts->pending->systm_head;
95 after = opts->pending->after_head;
97 /* Paste together bracket, system, and after include chains. */
98 if (stail)
99 stail->next = after;
100 else
101 systm = after;
102 if (btail)
103 btail->next = systm;
104 else
105 brack = systm;
107 /* This is a bit tricky.
108 First we drop dupes from the quote-include list.
109 Then we drop dupes from the bracket-include list.
110 Finally, if qtail and brack are the same directory,
111 we cut out qtail.
113 We can't just merge the lists and then uniquify them because
114 then we may lose directories from the <> search path that should
115 be there; consider -Ifoo -Ibar -I- -Ifoo -Iquux. It is however
116 safe to treat -Ibar -Ifoo -I- -Ifoo -Iquux as if written
117 -Ibar -I- -Ifoo -Iquux.
119 Note that this algorithm is quadratic in the number of -I switches,
120 which is acceptable since there aren't usually that many of them. */
122 for (cur = quote, prev = NULL; cur; cur = cur->next)
124 for (other = quote; other != cur; other = other->next)
125 if (INO_T_EQ (cur->ino, other->ino)
126 && cur->dev == other->dev)
128 if (opts->verbose)
129 cpp_notice ("ignoring duplicate directory `%s'\n", cur->name);
131 prev->next = cur->next;
132 free (cur->name);
133 free (cur);
134 cur = prev;
135 break;
137 prev = cur;
139 qtail = prev;
141 for (cur = brack; cur; cur = cur->next)
143 for (other = brack; other != cur; other = other->next)
144 if (INO_T_EQ (cur->ino, other->ino)
145 && cur->dev == other->dev)
147 if (opts->verbose)
148 cpp_notice ("ignoring duplicate directory `%s'\n", cur->name);
150 prev->next = cur->next;
151 free (cur->name);
152 free (cur);
153 cur = prev;
154 break;
156 prev = cur;
159 if (quote)
161 if (INO_T_EQ (qtail->ino, brack->ino) && qtail->dev == brack->dev)
163 if (quote == qtail)
165 if (opts->verbose)
166 cpp_notice ("ignoring duplicate directory `%s'\n",
167 quote->name);
169 free (quote->name);
170 free (quote);
171 quote = brack;
173 else
175 cur = quote;
176 while (cur->next != qtail)
177 cur = cur->next;
178 cur->next = brack;
179 if (opts->verbose)
180 cpp_notice ("ignoring duplicate directory `%s'\n",
181 qtail->name);
183 free (qtail->name);
184 free (qtail);
187 else
188 qtail->next = brack;
190 else
191 quote = brack;
193 opts->quote_include = quote;
194 opts->bracket_include = brack;
197 /* Look up or add an entry to the table of all includes. This table
198 is indexed by the name as it appears in the #include line. The
199 ->next_this_file chain stores all different files with the same
200 #include name (there are at least three ways this can happen). The
201 hash function could probably be improved a bit. */
203 struct include_hash *
204 include_hash (pfile, fname, add)
205 cpp_reader *pfile;
206 const char *fname;
207 int add;
209 unsigned int hash = 0;
210 struct include_hash *l, *m;
211 const char *f = fname;
213 while (*f)
214 hash += *f++;
216 l = pfile->all_include_files[hash % ALL_INCLUDE_HASHSIZE];
217 m = 0;
218 for (; l; m = l, l = l->next)
219 if (!strcmp (l->nshort, fname))
220 return l;
222 if (!add)
223 return 0;
225 l = (struct include_hash *) xmalloc (sizeof (struct include_hash));
226 l->next = NULL;
227 l->next_this_file = NULL;
228 l->foundhere = NULL;
229 l->buf = NULL;
230 l->limit = NULL;
231 if (m)
232 m->next = l;
233 else
234 pfile->all_include_files[hash % ALL_INCLUDE_HASHSIZE] = l;
236 return l;
239 /* Return 0 if the file pointed to by IHASH has never been included before,
240 -1 if it has been included before and need not be again,
241 or a pointer to an IHASH entry which is the file to be reread.
242 "Never before" is with respect to the position in ILIST.
244 This will not detect redundancies involving odd uses of the
245 `current directory' rule for "" includes. They aren't quite
246 pathological, but I think they are rare enough not to worry about.
247 The simplest example is:
249 top.c:
250 #include "a/a.h"
251 #include "b/b.h"
253 a/a.h:
254 #include "../b/b.h"
256 and the problem is that for `current directory' includes,
257 ihash->foundhere is not on any of the global include chains,
258 so the test below (i->foundhere == l) may be false even when
259 the directories are in fact the same. */
261 static struct include_hash *
262 redundant_include_p (pfile, ihash, ilist)
263 cpp_reader *pfile;
264 struct include_hash *ihash;
265 struct file_name_list *ilist;
267 struct file_name_list *l;
268 struct include_hash *i;
270 if (! ihash->foundhere)
271 return 0;
273 for (i = ihash; i; i = i->next_this_file)
274 for (l = ilist; l; l = l->next)
275 if (i->foundhere == l)
276 /* The control_macro works like this: If it's NULL, the file
277 is to be included again. If it's "", the file is never to
278 be included again. If it's a string, the file is not to be
279 included again if the string is the name of a defined macro. */
280 return (i->control_macro
281 && (i->control_macro[0] == '\0'
282 || cpp_lookup (pfile, i->control_macro, -1, -1)))
283 ? (struct include_hash *)-1 : i;
285 return 0;
288 static int
289 file_cleanup (pbuf, pfile)
290 cpp_buffer *pbuf;
291 cpp_reader *pfile;
293 if (pbuf->buf)
295 free (pbuf->buf);
296 pbuf->buf = 0;
298 if (pfile->system_include_depth)
299 pfile->system_include_depth--;
300 return 0;
303 /* Search for include file FNAME in the include chain starting at
304 SEARCH_START. Return -2 if this file doesn't need to be included
305 (because it was included already and it's marked idempotent),
306 -1 if an error occurred, or a file descriptor open on the file.
307 *IHASH is set to point to the include hash entry for this file, and
308 *BEFORE is 1 if the file was included before (but needs to be read
309 again). */
311 find_include_file (pfile, fname, search_start, ihash, before)
312 cpp_reader *pfile;
313 const char *fname;
314 struct file_name_list *search_start;
315 struct include_hash **ihash;
316 int *before;
318 struct file_name_list *l;
319 struct include_hash *ih, *jh;
320 int f, len;
321 char *name;
323 ih = include_hash (pfile, fname, 1);
324 jh = redundant_include_p (pfile, ih,
325 fname[0] == '/' ? ABSOLUTE_PATH : search_start);
327 if (jh != 0)
329 *before = 1;
330 *ihash = jh;
332 if (jh == (struct include_hash *)-1)
333 return -2;
334 else
335 return open (jh->name, O_RDONLY, 0666);
338 if (ih->foundhere)
339 /* A file is already known by this name, but it's not the same file.
340 Allocate another include_hash block and add it to the next_this_file
341 chain. */
343 jh = (struct include_hash *)xmalloc (sizeof (struct include_hash));
344 while (ih->next_this_file) ih = ih->next_this_file;
346 ih->next_this_file = jh;
347 jh = ih;
348 ih = ih->next_this_file;
350 ih->next = NULL;
351 ih->next_this_file = NULL;
352 ih->buf = NULL;
353 ih->limit = NULL;
355 *before = 0;
356 *ihash = ih;
357 ih->nshort = xstrdup (fname);
358 ih->control_macro = NULL;
360 /* If the pathname is absolute, just open it. */
361 if (fname[0] == '/')
363 ih->foundhere = ABSOLUTE_PATH;
364 ih->name = ih->nshort;
365 return open (ih->name, O_RDONLY, 0666);
368 /* Search directory path, trying to open the file. */
370 len = strlen (fname);
371 name = xmalloc (len + pfile->max_include_len + 2 + INCLUDE_LEN_FUDGE);
373 for (l = search_start; l; l = l->next)
375 bcopy (l->name, name, l->nlen);
376 name[l->nlen] = '/';
377 strcpy (&name[l->nlen+1], fname);
378 simplify_pathname (name);
379 if (CPP_OPTIONS (pfile)->remap)
380 name = remap_filename (pfile, name, l);
382 f = open (name, O_RDONLY|O_NONBLOCK|O_NOCTTY, 0666);
383 #ifdef EACCES
384 if (f == -1 && errno == EACCES)
386 cpp_error(pfile, "included file `%s' exists but is not readable",
387 name);
388 return -1;
390 #endif
392 if (f >= 0)
394 ih->foundhere = l;
395 ih->name = xrealloc (name, strlen (name)+1);
396 return f;
400 if (jh)
402 jh->next_this_file = NULL;
403 free (ih);
405 free (name);
406 *ihash = (struct include_hash *)-1;
407 return -1;
410 /* The file_name_map structure holds a mapping of file names for a
411 particular directory. This mapping is read from the file named
412 FILE_NAME_MAP_FILE in that directory. Such a file can be used to
413 map filenames on a file system with severe filename restrictions,
414 such as DOS. The format of the file name map file is just a series
415 of lines with two tokens on each line. The first token is the name
416 to map, and the second token is the actual name to use. */
418 struct file_name_map
420 struct file_name_map *map_next;
421 char *map_from;
422 char *map_to;
425 #define FILE_NAME_MAP_FILE "header.gcc"
427 /* Read a space delimited string of unlimited length from a stdio
428 file. */
430 static char *
431 read_filename_string (ch, f)
432 int ch;
433 FILE *f;
435 char *alloc, *set;
436 int len;
438 len = 20;
439 set = alloc = xmalloc (len + 1);
440 if (! is_space(ch))
442 *set++ = ch;
443 while ((ch = getc (f)) != EOF && ! is_space(ch))
445 if (set - alloc == len)
447 len *= 2;
448 alloc = xrealloc (alloc, len + 1);
449 set = alloc + len / 2;
451 *set++ = ch;
454 *set = '\0';
455 ungetc (ch, f);
456 return alloc;
459 /* This structure holds a linked list of file name maps, one per directory. */
461 struct file_name_map_list
463 struct file_name_map_list *map_list_next;
464 char *map_list_name;
465 struct file_name_map *map_list_map;
468 /* Read the file name map file for DIRNAME. */
470 static struct file_name_map *
471 read_name_map (pfile, dirname)
472 cpp_reader *pfile;
473 const char *dirname;
475 register struct file_name_map_list *map_list_ptr;
476 char *name;
477 FILE *f;
479 for (map_list_ptr = CPP_OPTIONS (pfile)->map_list; map_list_ptr;
480 map_list_ptr = map_list_ptr->map_list_next)
481 if (! strcmp (map_list_ptr->map_list_name, dirname))
482 return map_list_ptr->map_list_map;
484 map_list_ptr = ((struct file_name_map_list *)
485 xmalloc (sizeof (struct file_name_map_list)));
486 map_list_ptr->map_list_name = xstrdup (dirname);
488 name = (char *) alloca (strlen (dirname) + strlen (FILE_NAME_MAP_FILE) + 2);
489 strcpy (name, dirname);
490 if (*dirname)
491 strcat (name, "/");
492 strcat (name, FILE_NAME_MAP_FILE);
493 f = fopen (name, "r");
494 if (!f)
495 map_list_ptr->map_list_map = (struct file_name_map *)-1;
496 else
498 int ch;
499 int dirlen = strlen (dirname);
501 while ((ch = getc (f)) != EOF)
503 char *from, *to;
504 struct file_name_map *ptr;
506 if (is_space(ch))
507 continue;
508 from = read_filename_string (ch, f);
509 while ((ch = getc (f)) != EOF && is_hspace(ch))
511 to = read_filename_string (ch, f);
513 ptr = ((struct file_name_map *)
514 xmalloc (sizeof (struct file_name_map)));
515 ptr->map_from = from;
517 /* Make the real filename absolute. */
518 if (*to == '/')
519 ptr->map_to = to;
520 else
522 ptr->map_to = xmalloc (dirlen + strlen (to) + 2);
523 strcpy (ptr->map_to, dirname);
524 ptr->map_to[dirlen] = '/';
525 strcpy (ptr->map_to + dirlen + 1, to);
526 free (to);
529 ptr->map_next = map_list_ptr->map_list_map;
530 map_list_ptr->map_list_map = ptr;
532 while ((ch = getc (f)) != '\n')
533 if (ch == EOF)
534 break;
536 fclose (f);
539 map_list_ptr->map_list_next = CPP_OPTIONS (pfile)->map_list;
540 CPP_OPTIONS (pfile)->map_list = map_list_ptr;
542 return map_list_ptr->map_list_map;
545 /* Remap NAME based on the file_name_map (if any) for LOC. */
547 static char *
548 remap_filename (pfile, name, loc)
549 cpp_reader *pfile;
550 char *name;
551 struct file_name_list *loc;
553 struct file_name_map *map;
554 const char *from, *p, *dir;
556 if (! loc->name_map)
557 loc->name_map = read_name_map (pfile,
558 loc->name
559 ? loc->name : ".");
561 if (loc->name_map == (struct file_name_map *)-1)
562 return name;
564 from = name + strlen (loc->name) + 1;
566 for (map = loc->name_map; map; map = map->map_next)
567 if (!strcmp (map->map_from, from))
568 return map->map_to;
570 /* Try to find a mapping file for the particular directory we are
571 looking in. Thus #include <sys/types.h> will look up sys/types.h
572 in /usr/include/header.gcc and look up types.h in
573 /usr/include/sys/header.gcc. */
574 p = rindex (name, '/');
575 if (!p)
576 p = name;
577 if (loc && loc->name
578 && strlen (loc->name) == (size_t) (p - name)
579 && !strncmp (loc->name, name, p - name))
580 /* FILENAME is in SEARCHPTR, which we've already checked. */
581 return name;
583 if (p == name)
585 dir = ".";
586 from = name;
588 else
590 char * newdir = (char *) alloca (p - name + 1);
591 bcopy (name, newdir, p - name);
592 newdir[p - name] = '\0';
593 dir = newdir;
594 from = p + 1;
597 for (map = read_name_map (pfile, dir); map; map = map->map_next)
598 if (! strcmp (map->map_from, name))
599 return map->map_to;
601 return name;
604 /* Read the contents of FD into the buffer on the top of PFILE's stack.
605 IHASH points to the include hash entry for the file associated with
608 The caller is responsible for the cpp_push_buffer. */
611 finclude (pfile, fd, ihash)
612 cpp_reader *pfile;
613 int fd;
614 struct include_hash *ihash;
616 struct stat st;
617 size_t st_size;
618 long length;
619 cpp_buffer *fp;
621 if (fstat (fd, &st) < 0)
622 goto perror_fail;
623 if (fcntl (fd, F_SETFL, 0) == -1) /* turn off nonblocking mode */
624 goto perror_fail;
626 fp = CPP_BUFFER (pfile);
628 /* If fd points to a plain file, we know how big it is, so we can
629 allocate the buffer all at once. If fd is a pipe or terminal, we
630 can't. Most C source files are 4k or less, so we guess that. If
631 fd is something weird, like a block device or a directory, we
632 don't want to read it at all.
634 Unfortunately, different systems use different st.st_mode values
635 for pipes: some have S_ISFIFO, some S_ISSOCK, some are buggy and
636 zero the entire struct stat except a couple fields. Hence the
637 mess below.
639 In all cases, read_and_prescan will resize the buffer if it
640 turns out there's more data than we thought. */
642 if (S_ISREG (st.st_mode))
644 /* off_t might have a wider range than size_t - in other words,
645 the max size of a file might be bigger than the address
646 space. We can't handle a file that large. (Anyone with
647 a single source file bigger than 4GB needs to rethink
648 their coding style.) */
649 st_size = (size_t) st.st_size;
650 if ((unsigned HOST_WIDEST_INT) st_size
651 != (unsigned HOST_WIDEST_INT) st.st_size)
653 cpp_error (pfile, "file `%s' is too large", ihash->name);
654 goto fail;
657 else if (S_ISFIFO (st.st_mode) || S_ISSOCK (st.st_mode)
658 /* Permit any kind of character device: the sensible ones are
659 ttys and /dev/null, but weeding out the others is too hard. */
660 || S_ISCHR (st.st_mode)
661 /* Some 4.x (x<4) derivatives have a bug that makes fstat() of a
662 socket or pipe return a stat struct with most fields zeroed. */
663 || (st.st_mode == 0 && st.st_nlink == 0 && st.st_size == 0))
665 /* Cannot get its file size before reading. 4k is a decent
666 first guess. */
667 st_size = 4096;
669 else
671 cpp_error (pfile, "`%s' is not a file, pipe, or tty", ihash->name);
672 goto fail;
675 if (pfile->input_buffer == NULL)
676 initialize_input_buffer (pfile, fd, &st);
678 /* Read the file, converting end-of-line characters and trigraphs
679 (if enabled). */
680 fp->ihash = ihash;
681 fp->nominal_fname = fp->fname = ihash->name;
682 length = read_and_prescan (pfile, fp, fd, st_size);
683 if (length < 0)
684 goto fail;
685 if (length == 0)
686 ihash->control_macro = ""; /* never re-include */
688 close (fd);
689 fp->rlimit = fp->alimit = fp->buf + length;
690 fp->cur = fp->buf;
691 if (ihash->foundhere != ABSOLUTE_PATH)
692 fp->system_header_p = ihash->foundhere->sysp;
693 fp->lineno = 1;
694 fp->colno = 1;
695 fp->line_base = fp->buf;
696 fp->cleanup = file_cleanup;
698 /* The ->actual_dir field is only used when ignore_srcdir is not in effect;
699 see do_include */
700 if (!CPP_OPTIONS (pfile)->ignore_srcdir)
701 fp->actual_dir = actual_directory (pfile, fp->fname);
703 pfile->input_stack_listing_current = 0;
704 return 1;
706 perror_fail:
707 cpp_error_from_errno (pfile, ihash->name);
708 fail:
709 cpp_pop_buffer (pfile);
710 close (fd);
711 return 0;
714 /* Given a path FNAME, extract the directory component and place it
715 onto the actual_dirs list. Return a pointer to the allocated
716 file_name_list structure. These structures are used to implement
717 current-directory "" include searching. */
719 static struct file_name_list *
720 actual_directory (pfile, fname)
721 cpp_reader *pfile;
722 const char *fname;
724 char *last_slash, *dir;
725 size_t dlen;
726 struct file_name_list *x;
728 dir = xstrdup (fname);
729 last_slash = rindex (dir, '/');
730 if (last_slash)
732 if (last_slash == dir)
734 dlen = 1;
735 last_slash[1] = '\0';
737 else
739 dlen = last_slash - dir;
740 *last_slash = '\0';
743 else
745 dir[0] = '.';
746 dir[1] = '\0';
747 dlen = 1;
750 if (dlen > pfile->max_include_len)
751 pfile->max_include_len = dlen;
753 for (x = pfile->actual_dirs; x; x = x->alloc)
754 if (!strcmp (x->name, dir))
756 free (dir);
757 return x;
760 /* Not found, make a new one. */
761 x = (struct file_name_list *) xmalloc (sizeof (struct file_name_list));
762 x->name = dir;
763 x->nlen = dlen;
764 x->next = CPP_OPTIONS (pfile)->quote_include;
765 x->alloc = pfile->actual_dirs;
766 x->sysp = CPP_BUFFER (pfile)->system_header_p;
767 x->name_map = NULL;
769 pfile->actual_dirs = x;
770 return x;
773 /* Determine the current line and column. Used only by read_and_prescan. */
774 static U_CHAR *
775 find_position (start, limit, linep)
776 U_CHAR *start;
777 U_CHAR *limit;
778 unsigned long *linep;
780 unsigned long line = *linep;
781 U_CHAR *lbase = start;
782 while (start < limit)
784 U_CHAR ch = *start++;
785 if (ch == '\n' || ch == '\r')
787 line++;
788 lbase = start;
791 *linep = line;
792 return lbase;
795 /* Read the entire contents of file DESC into buffer BUF. LEN is how
796 much memory to allocate initially; more will be allocated if
797 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
798 canonical form (\n). If enabled, convert and/or warn about
799 trigraphs. Convert backslash-newline to a one-character escape
800 (\r) and remove it from "embarrassing" places (i.e. the middle of a
801 token). If there is no newline at the end of the file, add one and
802 warn. Returns -1 on failure, or the actual length of the data to
803 be scanned.
805 This function does a lot of work, and can be a serious performance
806 bottleneck. It has been tuned heavily; make sure you understand it
807 before hacking. The common case - no trigraphs, Unix style line
808 breaks, backslash-newline set off by whitespace, newline at EOF -
809 has been optimized at the expense of the others. The performance
810 penalty for DOS style line breaks (\r\n) is about 15%.
812 Warnings lose particularly heavily since we have to determine the
813 line number, which involves scanning from the beginning of the file
814 or from the last warning. The penalty for the absence of a newline
815 at the end of reload1.c is about 60%. (reload1.c is 329k.)
817 If your file has more than one kind of end-of-line marker, you
818 will get messed-up line numbering. */
820 /* Table of characters that can't be handled in the inner loop.
821 Keep these contiguous to optimize the performance of the code generated
822 for the switch that uses them. */
823 #define SPECCASE_EMPTY 0
824 #define SPECCASE_NUL 1
825 #define SPECCASE_CR 2
826 #define SPECCASE_BACKSLASH 3
827 #define SPECCASE_QUESTION 4
829 static long
830 read_and_prescan (pfile, fp, desc, len)
831 cpp_reader *pfile;
832 cpp_buffer *fp;
833 int desc;
834 size_t len;
836 U_CHAR *buf = (U_CHAR *) xmalloc (len);
837 U_CHAR *ip, *op, *line_base;
838 U_CHAR *ibase;
839 U_CHAR *speccase = pfile->input_speccase;
840 unsigned long line;
841 unsigned int deferred_newlines;
842 int count;
843 size_t offset;
845 offset = 0;
846 op = buf;
847 line_base = buf;
848 line = 1;
849 ibase = pfile->input_buffer + 2;
850 deferred_newlines = 0;
852 for (;;)
854 read_next:
856 count = read (desc, pfile->input_buffer + 2, pfile->input_buffer_len);
857 if (count < 0)
858 goto error;
859 else if (count == 0)
860 break;
862 offset += count;
863 ip = ibase;
864 ibase = pfile->input_buffer + 2;
865 ibase[count] = ibase[count+1] = '\0';
867 if (offset > len)
869 size_t delta_op;
870 size_t delta_line_base;
871 len *= 2;
872 if (offset > len)
873 /* len overflowed.
874 This could happen if the file is larger than half the
875 maximum address space of the machine. */
876 goto too_big;
878 delta_op = op - buf;
879 delta_line_base = line_base - buf;
880 buf = (U_CHAR *) xrealloc (buf, len);
881 op = buf + delta_op;
882 line_base = buf + delta_line_base;
885 for (;;)
887 unsigned int span = 0;
889 /* Deal with \-newline in the middle of a token. */
890 if (deferred_newlines)
892 while (speccase[ip[span]] == SPECCASE_EMPTY
893 && ip[span] != '\n'
894 && ip[span] != '\t'
895 && ip[span] != ' ')
896 span++;
897 memcpy (op, ip, span);
898 op += span;
899 ip += span;
900 if (*ip == '\n' || *ip == '\t'
901 || *ip == ' ' || *ip == ' ')
902 while (deferred_newlines)
903 deferred_newlines--, *op++ = '\r';
904 span = 0;
907 /* Copy as much as we can without special treatment. */
908 while (speccase[ip[span]] == SPECCASE_EMPTY) span++;
909 memcpy (op, ip, span);
910 op += span;
911 ip += span;
913 switch (speccase[*ip++])
915 case SPECCASE_NUL: /* \0 */
916 ibase[-1] = op[-1];
917 goto read_next;
919 case SPECCASE_CR: /* \r */
920 if (*ip == '\n')
921 ip++;
922 else if (*ip == '\0')
924 *--ibase = '\r';
925 goto read_next;
927 else if (ip[-2] == '\n')
928 continue;
929 *op++ = '\n';
930 break;
932 case SPECCASE_BACKSLASH: /* \ */
933 backslash:
935 /* If we're at the end of the intermediate buffer,
936 we have to shift the backslash down to the start
937 and come back next pass. */
938 if (*ip == '\0')
940 *--ibase = '\\';
941 goto read_next;
943 else if (*ip == '\n')
945 ip++;
946 if (*ip == '\r') ip++;
947 if (*ip == '\n' || *ip == '\t' || *ip == ' ')
948 *op++ = '\r';
949 else if (op[-1] == '\t' || op[-1] == ' '
950 || op[-1] == '\r' || op[-1] == '\n')
951 *op++ = '\r';
952 else
953 deferred_newlines++;
955 else if (*ip == '\r')
957 ip++;
958 if (*ip == '\n') ip++;
959 else if (*ip == '\0')
961 *--ibase = '\r';
962 *--ibase = '\\';
963 goto read_next;
965 else if (*ip == '\r' || *ip == '\t' || *ip == ' ')
966 *op++ = '\r';
967 else
968 deferred_newlines++;
970 else
971 *op++ = '\\';
973 break;
975 case SPECCASE_QUESTION: /* ? */
977 unsigned int d, t;
978 /* If we're at the end of the intermediate buffer,
979 we have to shift the ?'s down to the start and
980 come back next pass. */
981 d = ip[0];
982 if (d == '\0')
984 *--ibase = '?';
985 goto read_next;
987 if (d != '?')
989 *op++ = '?';
990 break;
992 d = ip[1];
993 if (d == '\0')
995 *--ibase = '?';
996 *--ibase = '?';
997 goto read_next;
1000 /* Trigraph map:
1001 * from to from to from to
1002 * ?? = # ?? ) ] ?? ! |
1003 * ?? ( [ ?? ' ^ ?? > }
1004 * ?? / \ ?? < { ?? - ~
1006 if (d == '=') t = '#';
1007 else if (d == ')') t = ']';
1008 else if (d == '!') t = '|';
1009 else if (d == '(') t = '[';
1010 else if (d == '\'') t = '^';
1011 else if (d == '>') t = '}';
1012 else if (d == '/') t = '\\';
1013 else if (d == '<') t = '{';
1014 else if (d == '-') t = '~';
1015 else
1017 *op++ = '?';
1018 break;
1020 if (CPP_OPTIONS (pfile)->warn_trigraphs)
1022 unsigned long col;
1023 line_base = find_position (line_base, op, &line);
1024 col = op - line_base + 1;
1025 if (CPP_OPTIONS (pfile)->trigraphs)
1026 cpp_warning_with_line (pfile, line, col,
1027 "trigraph ??%c converted to %c", d, t);
1028 else
1029 cpp_warning_with_line (pfile, line, col,
1030 "trigraph ??%c ignored", d);
1032 if (CPP_OPTIONS (pfile)->trigraphs)
1034 if (t == '\\')
1035 goto backslash;
1036 else
1037 *op++ = t;
1039 else
1041 *op++ = '?';
1042 *op++ = '?';
1043 *op++ = d;
1045 ip += 2;
1051 if (offset == 0)
1052 return 0;
1054 /* Deal with pushed-back chars at true EOF.
1055 This may be any of: ?? ? \ \r \n \\r \\n.
1056 \r must become \n, \\r or \\n must become \r.
1057 We know we have space already. */
1058 if (ibase == pfile->input_buffer)
1060 if (*ibase == '?')
1062 *op++ = '?';
1063 *op++ = '?';
1065 else
1066 *op++ = '\r';
1068 else if (ibase == pfile->input_buffer + 1)
1070 if (*ibase == '\r')
1071 *op++ = '\n';
1072 else
1073 *op++ = *ibase;
1076 if (op[-1] != '\n')
1078 unsigned long col;
1079 line_base = find_position (line_base, op, &line);
1080 col = op - line_base + 1;
1081 cpp_warning_with_line (pfile, line, col, "no newline at end of file\n");
1082 if (offset + 1 > len)
1084 len += 1;
1085 if (offset + 1 > len)
1086 goto too_big;
1087 buf = (U_CHAR *) xrealloc (buf, len);
1088 op = buf + offset;
1090 *op++ = '\n';
1093 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
1094 return op - buf;
1096 too_big:
1097 cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset);
1098 free (buf);
1099 return -1;
1101 error:
1102 cpp_error_from_errno (pfile, fp->fname);
1103 free (buf);
1104 return -1;
1107 /* Initialize the `input_buffer' and `input_speccase' tables.
1108 These are only used by read_and_prescan, but they're large and
1109 somewhat expensive to set up, so we want them allocated once for
1110 the duration of the cpp run. */
1112 static void
1113 initialize_input_buffer (pfile, fd, st)
1114 cpp_reader *pfile;
1115 int fd;
1116 struct stat *st;
1118 long pipe_buf;
1119 U_CHAR *tmp;
1121 /* Table of characters that cannot be handled by the
1122 read_and_prescan inner loop. The number of non-EMPTY entries
1123 should be as small as humanly possible. */
1125 tmp = (U_CHAR *) xmalloc (1 << CHAR_BIT);
1126 memset (tmp, SPECCASE_EMPTY, 1 << CHAR_BIT);
1127 tmp['\0'] = SPECCASE_NUL;
1128 tmp['\r'] = SPECCASE_CR;
1129 tmp['\\'] = SPECCASE_BACKSLASH;
1130 if (CPP_OPTIONS (pfile)->trigraphs || CPP_OPTIONS (pfile)->warn_trigraphs)
1131 tmp['?'] = SPECCASE_QUESTION;
1133 pfile->input_speccase = tmp;
1135 /* Determine the appropriate size for the input buffer. Normal C
1136 source files are smaller than eight K. If we are reading a pipe,
1137 we want to make sure the input buffer is bigger than the kernel's
1138 pipe buffer. */
1139 pipe_buf = -1;
1141 if (! S_ISREG (st->st_mode))
1143 #ifdef _PC_PIPE_BUF
1144 pipe_buf = fpathconf (fd, _PC_PIPE_BUF);
1145 #endif
1146 if (pipe_buf == -1)
1148 #ifdef PIPE_BUF
1149 pipe_buf = PIPE_BUF;
1150 #else
1151 pipe_buf = 8192;
1152 #endif
1156 if (pipe_buf < 8192)
1157 pipe_buf = 8192;
1158 /* PIPE_BUF bytes of buffer proper, 2 to detect running off the end
1159 without address arithmetic all the time, and 2 for pushback in
1160 the case there's a potential trigraph or end-of-line digraph at
1161 the end of a block. */
1163 tmp = (U_CHAR *) xmalloc (pipe_buf + 2 + 2);
1164 pfile->input_buffer = tmp;
1165 pfile->input_buffer_len = pipe_buf;
1168 /* Add output to `deps_buffer' for the -M switch.
1169 STRING points to the text to be output.
1170 SPACER is ':' for targets, ' ' for dependencies, zero for text
1171 to be inserted literally. */
1173 void
1174 deps_output (pfile, string, spacer)
1175 cpp_reader *pfile;
1176 const char *string;
1177 int spacer;
1179 int size;
1180 int cr = 0;
1182 if (!*string)
1183 return;
1185 size = strlen (string);
1187 #ifndef MAX_OUTPUT_COLUMNS
1188 #define MAX_OUTPUT_COLUMNS 72
1189 #endif
1190 if (pfile->deps_column > 0
1191 && (pfile->deps_column + size) > MAX_OUTPUT_COLUMNS)
1193 cr = 5;
1194 pfile->deps_column = 0;
1197 if (pfile->deps_size + size + cr + 8 > pfile->deps_allocated_size)
1199 pfile->deps_allocated_size = (pfile->deps_size + size + 50) * 2;
1200 pfile->deps_buffer = (char *) xrealloc (pfile->deps_buffer,
1201 pfile->deps_allocated_size);
1204 if (cr)
1206 bcopy (" \\\n ", &pfile->deps_buffer[pfile->deps_size], 5);
1207 pfile->deps_size += 5;
1210 if (spacer == ' ' && pfile->deps_column > 0)
1211 pfile->deps_buffer[pfile->deps_size++] = ' ';
1212 bcopy (string, &pfile->deps_buffer[pfile->deps_size], size);
1213 pfile->deps_size += size;
1214 pfile->deps_column += size;
1215 if (spacer == ':')
1216 pfile->deps_buffer[pfile->deps_size++] = ':';
1217 pfile->deps_buffer[pfile->deps_size] = 0;
1220 /* Simplify a path name in place, deleting redundant components. This
1221 reduces OS overhead and guarantees that equivalent paths compare
1222 the same (modulo symlinks).
1224 Transforms made:
1225 foo/bar/../quux foo/quux
1226 foo/./bar foo/bar
1227 foo//bar foo/bar
1228 /../quux /quux
1229 //quux //quux (POSIX allows leading // as a namespace escape)
1231 Guarantees no trailing slashes. All transforms reduce the length
1232 of the string.
1234 void
1235 simplify_pathname (path)
1236 char *path;
1238 char *from, *to;
1239 char *base;
1240 int absolute = 0;
1242 #if defined (HAVE_DOS_BASED_FILE_SYSTEM)
1243 /* Convert all backslashes to slashes. */
1244 for (from = path; *from; from++)
1245 if (*from == '\\') *from = '/';
1247 /* Skip over leading drive letter if present. */
1248 if (ISALPHA (path[0]) && path[1] == ':')
1249 from = to = &path[2];
1250 else
1251 from = to = path;
1252 #else
1253 from = to = path;
1254 #endif
1256 /* Remove redundant initial /s. */
1257 if (*from == '/')
1259 absolute = 1;
1260 to++;
1261 from++;
1262 if (*from == '/')
1264 if (*++from == '/')
1265 /* 3 or more initial /s are equivalent to 1 /. */
1266 while (*++from == '/');
1267 else
1268 /* On some hosts // differs from /; Posix allows this. */
1269 to++;
1272 base = to;
1274 for (;;)
1276 while (*from == '/')
1277 from++;
1279 if (from[0] == '.' && from[1] == '/')
1280 from += 2;
1281 else if (from[0] == '.' && from[1] == '\0')
1282 goto done;
1283 else if (from[0] == '.' && from[1] == '.' && from[2] == '/')
1285 if (base == to)
1287 if (absolute)
1288 from += 3;
1289 else
1291 *to++ = *from++;
1292 *to++ = *from++;
1293 *to++ = *from++;
1294 base = to;
1297 else
1299 to -= 2;
1300 while (to > base && *to != '/') to--;
1301 if (*to == '/')
1302 to++;
1303 from += 3;
1306 else if (from[0] == '.' && from[1] == '.' && from[2] == '\0')
1308 if (base == to)
1310 if (!absolute)
1312 *to++ = *from++;
1313 *to++ = *from++;
1316 else
1318 to -= 2;
1319 while (to > base && *to != '/') to--;
1320 if (*to == '/')
1321 to++;
1323 goto done;
1325 else
1326 /* Copy this component and trailing /, if any. */
1327 while ((*to++ = *from++) != '/')
1329 if (!to[-1])
1331 to--;
1332 goto done;
1338 done:
1339 /* Trim trailing slash */
1340 if (to[0] == '/' && (!absolute || to > path+1))
1341 to--;
1343 /* Change the empty string to "." so that stat() on the result
1344 will always work. */
1345 if (to == path)
1346 *to++ = '.';
1348 *to = '\0';
1350 return;
1353 /* It is not clear when this should be used if at all, so I've
1354 disabled it until someone who understands VMS can look at it. */
1355 #if 0
1357 /* Under VMS we need to fix up the "include" specification filename.
1359 Rules for possible conversions
1361 fullname tried paths
1363 name name
1364 ./dir/name [.dir]name
1365 /dir/name dir:name
1366 /name [000000]name, name
1367 dir/name dir:[000000]name, dir:name, dir/name
1368 dir1/dir2/name dir1:[dir2]name, dir1:[000000.dir2]name
1369 path:/name path:[000000]name, path:name
1370 path:/dir/name path:[000000.dir]name, path:[dir]name
1371 path:dir/name path:[dir]name
1372 [path]:[dir]name [path.dir]name
1373 path/[dir]name [path.dir]name
1375 The path:/name input is constructed when expanding <> includes. */
1378 static void
1379 hack_vms_include_specification (fullname)
1380 char *fullname;
1382 register char *basename, *unixname, *local_ptr, *first_slash;
1383 int f, check_filename_before_returning, must_revert;
1384 char Local[512];
1386 check_filename_before_returning = 0;
1387 must_revert = 0;
1388 /* See if we can find a 1st slash. If not, there's no path information. */
1389 first_slash = index (fullname, '/');
1390 if (first_slash == 0)
1391 return 0; /* Nothing to do!!! */
1393 /* construct device spec if none given. */
1395 if (index (fullname, ':') == 0)
1398 /* If fullname has a slash, take it as device spec. */
1400 if (first_slash == fullname)
1402 first_slash = index (fullname+1, '/'); /* 2nd slash ? */
1403 if (first_slash)
1404 *first_slash = ':'; /* make device spec */
1405 for (basename = fullname; *basename != 0; basename++)
1406 *basename = *(basename+1); /* remove leading slash */
1408 else if ((first_slash[-1] != '.') /* keep ':/', './' */
1409 && (first_slash[-1] != ':')
1410 && (first_slash[-1] != ']')) /* or a vms path */
1412 *first_slash = ':';
1414 else if ((first_slash[1] == '[') /* skip './' in './[dir' */
1415 && (first_slash[-1] == '.'))
1416 fullname += 2;
1419 /* Get part after first ':' (basename[-1] == ':')
1420 or last '/' (basename[-1] == '/'). */
1422 basename = base_name (fullname);
1424 local_ptr = Local; /* initialize */
1426 /* We are trying to do a number of things here. First of all, we are
1427 trying to hammer the filenames into a standard format, such that later
1428 processing can handle them.
1430 If the file name contains something like [dir.], then it recognizes this
1431 as a root, and strips the ".]". Later processing will add whatever is
1432 needed to get things working properly.
1434 If no device is specified, then the first directory name is taken to be
1435 a device name (or a rooted logical). */
1437 /* Point to the UNIX filename part (which needs to be fixed!)
1438 but skip vms path information.
1439 [basename != fullname since first_slash != 0]. */
1441 if ((basename[-1] == ':') /* vms path spec. */
1442 || (basename[-1] == ']')
1443 || (basename[-1] == '>'))
1444 unixname = basename;
1445 else
1446 unixname = fullname;
1448 if (*unixname == '/')
1449 unixname++;
1451 /* If the directory spec is not rooted, we can just copy
1452 the UNIX filename part and we are done. */
1454 if (((basename - fullname) > 1)
1455 && ( (basename[-1] == ']')
1456 || (basename[-1] == '>')))
1458 if (basename[-2] != '.')
1461 /* The VMS part ends in a `]', and the preceding character is not a `.'.
1462 -> PATH]:/name (basename = '/name', unixname = 'name')
1463 We strip the `]', and then splice the two parts of the name in the
1464 usual way. Given the default locations for include files in cccp.c,
1465 we will only use this code if the user specifies alternate locations
1466 with the /include (-I) switch on the command line. */
1468 basename -= 1; /* Strip "]" */
1469 unixname--; /* backspace */
1471 else
1474 /* The VMS part has a ".]" at the end, and this will not do. Later
1475 processing will add a second directory spec, and this would be a syntax
1476 error. Thus we strip the ".]", and thus merge the directory specs.
1477 We also backspace unixname, so that it points to a '/'. This inhibits the
1478 generation of the 000000 root directory spec (which does not belong here
1479 in this case). */
1481 basename -= 2; /* Strip ".]" */
1482 unixname--; /* backspace */
1486 else
1490 /* We drop in here if there is no VMS style directory specification yet.
1491 If there is no device specification either, we make the first dir a
1492 device and try that. If we do not do this, then we will be essentially
1493 searching the users default directory (as if they did a #include "asdf.h").
1495 Then all we need to do is to push a '[' into the output string. Later
1496 processing will fill this in, and close the bracket. */
1498 if ((unixname != fullname) /* vms path spec found. */
1499 && (basename[-1] != ':'))
1500 *local_ptr++ = ':'; /* dev not in spec. take first dir */
1502 *local_ptr++ = '['; /* Open the directory specification */
1505 if (unixname == fullname) /* no vms dir spec. */
1507 must_revert = 1;
1508 if ((first_slash != 0) /* unix dir spec. */
1509 && (*unixname != '/') /* not beginning with '/' */
1510 && (*unixname != '.')) /* or './' or '../' */
1511 *local_ptr++ = '.'; /* dir is local ! */
1514 /* at this point we assume that we have the device spec, and (at least
1515 the opening "[" for a directory specification. We may have directories
1516 specified already.
1518 If there are no other slashes then the filename will be
1519 in the "root" directory. Otherwise, we need to add
1520 directory specifications. */
1522 if (index (unixname, '/') == 0)
1524 /* if no directories specified yet and none are following. */
1525 if (local_ptr[-1] == '[')
1527 /* Just add "000000]" as the directory string */
1528 strcpy (local_ptr, "000000]");
1529 local_ptr += strlen (local_ptr);
1530 check_filename_before_returning = 1; /* we might need to fool with this later */
1533 else
1536 /* As long as there are still subdirectories to add, do them. */
1537 while (index (unixname, '/') != 0)
1539 /* If this token is "." we can ignore it
1540 if it's not at the beginning of a path. */
1541 if ((unixname[0] == '.') && (unixname[1] == '/'))
1543 /* remove it at beginning of path. */
1544 if ( ((unixname == fullname) /* no device spec */
1545 && (fullname+2 != basename)) /* starts with ./ */
1546 /* or */
1547 || ((basename[-1] == ':') /* device spec */
1548 && (unixname-1 == basename))) /* and ./ afterwards */
1549 *local_ptr++ = '.'; /* make '[.' start of path. */
1550 unixname += 2;
1551 continue;
1554 /* Add a subdirectory spec. Do not duplicate "." */
1555 if ( local_ptr[-1] != '.'
1556 && local_ptr[-1] != '['
1557 && local_ptr[-1] != '<')
1558 *local_ptr++ = '.';
1560 /* If this is ".." then the spec becomes "-" */
1561 if ( (unixname[0] == '.')
1562 && (unixname[1] == '.')
1563 && (unixname[2] == '/'))
1565 /* Add "-" and skip the ".." */
1566 if ((local_ptr[-1] == '.')
1567 && (local_ptr[-2] == '['))
1568 local_ptr--; /* prevent [.- */
1569 *local_ptr++ = '-';
1570 unixname += 3;
1571 continue;
1574 /* Copy the subdirectory */
1575 while (*unixname != '/')
1576 *local_ptr++= *unixname++;
1578 unixname++; /* Skip the "/" */
1581 /* Close the directory specification */
1582 if (local_ptr[-1] == '.') /* no trailing periods */
1583 local_ptr--;
1585 if (local_ptr[-1] == '[') /* no dir needed */
1586 local_ptr--;
1587 else
1588 *local_ptr++ = ']';
1591 /* Now add the filename. */
1593 while (*unixname)
1594 *local_ptr++ = *unixname++;
1595 *local_ptr = 0;
1597 /* Now append it to the original VMS spec. */
1599 strcpy ((must_revert==1)?fullname:basename, Local);
1601 /* If we put a [000000] in the filename, try to open it first. If this fails,
1602 remove the [000000], and return that name. This provides flexibility
1603 to the user in that they can use both rooted and non-rooted logical names
1604 to point to the location of the file. */
1606 if (check_filename_before_returning)
1608 f = open (fullname, O_RDONLY, 0666);
1609 if (f >= 0)
1611 /* The file name is OK as it is, so return it as is. */
1612 close (f);
1613 return 1;
1616 /* The filename did not work. Try to remove the [000000] from the name,
1617 and return it. */
1619 basename = index (fullname, '[');
1620 local_ptr = index (fullname, ']') + 1;
1621 strcpy (basename, local_ptr); /* this gets rid of it */
1625 return 1;
1627 #endif /* VMS */