more copyright nits
[official-gcc.git] / gcc / cppfiles.c
blob95f1196c7f4777b14967911837db66f741c77625
1 /* Part of CPP library. (include file handling)
2 Copyright (C) 1986, 87, 89, 92-95, 98, 99, 2000 Free Software Foundation, Inc.
3 Written by Per Bothner, 1994.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Split out of cpplib.c, Zack Weinberg, Oct 1998
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 In other words, you are welcome to use, share and improve this program.
23 You are forbidden to forbid anyone else to use, share and improve
24 what you give them. Help stamp out software-hoarding! */
26 #include "config.h"
27 #include "system.h"
28 #include "cpplib.h"
30 /* The entry points to this file are: find_include_file, finclude,
31 include_hash, append_include_chain, deps_output, and file_cleanup.
32 file_cleanup is only called through CPP_BUFFER(pfile)->cleanup,
33 so it's static anyway. */
35 static struct include_hash *redundant_include_p
36 PARAMS ((cpp_reader *,
37 struct include_hash *,
38 struct file_name_list *));
39 static struct file_name_map *read_name_map PARAMS ((cpp_reader *,
40 const char *));
41 static char *read_filename_string PARAMS ((int, FILE *));
42 static char *remap_filename PARAMS ((cpp_reader *, char *,
43 struct file_name_list *));
44 static long read_and_prescan PARAMS ((cpp_reader *, cpp_buffer *,
45 int, size_t));
46 static struct file_name_list *actual_directory PARAMS ((cpp_reader *,
47 const char *));
48 static void initialize_input_buffer PARAMS ((cpp_reader *, int,
49 struct stat *));
50 static int file_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
51 static void find_position PARAMS ((U_CHAR *, U_CHAR *,
52 unsigned long *,
53 unsigned long *));
55 #if 0
56 static void hack_vms_include_specification PARAMS ((char *));
57 #endif
59 /* Windows does not natively support inodes, and neither does MSDOS.
60 Cygwin's emulation can generate non-unique inodes, so don't use it.
61 VMS has non-numeric inodes. */
62 #ifdef VMS
63 #define INO_T_EQ(a, b) (!bcmp((char *) &(a), (char *) &(b), sizeof (a)))
64 #elif (defined _WIN32 && ! defined (_UWIN)) \
65 || defined __MSDOS__
66 #define INO_T_EQ(a, b) 0
67 #else
68 #define INO_T_EQ(a, b) ((a) == (b))
69 #endif
71 /* Merge the four include chains together in the order quote, bracket,
72 system, after. Remove duplicate dirs (as determined by
73 INO_T_EQ()). The system_include and after_include chains are never
74 referred to again after this function; all access is through the
75 bracket_include path.
77 For the future: Check if the directory is empty (but
78 how?) and possibly preload the include hash. */
80 void
81 merge_include_chains (opts)
82 struct cpp_options *opts;
84 struct file_name_list *prev, *cur, *other;
85 struct file_name_list *quote, *brack, *systm, *after;
86 struct file_name_list *qtail, *btail, *stail, *atail;
88 qtail = opts->pending->quote_tail;
89 btail = opts->pending->brack_tail;
90 stail = opts->pending->systm_tail;
91 atail = opts->pending->after_tail;
93 quote = opts->pending->quote_head;
94 brack = opts->pending->brack_head;
95 systm = opts->pending->systm_head;
96 after = opts->pending->after_head;
98 /* Paste together bracket, system, and after include chains. */
99 if (stail)
100 stail->next = after;
101 else
102 systm = after;
103 if (btail)
104 btail->next = systm;
105 else
106 brack = systm;
108 /* This is a bit tricky.
109 First we drop dupes from the quote-include list.
110 Then we drop dupes from the bracket-include list.
111 Finally, if qtail and brack are the same directory,
112 we cut out qtail.
114 We can't just merge the lists and then uniquify them because
115 then we may lose directories from the <> search path that should
116 be there; consider -Ifoo -Ibar -I- -Ifoo -Iquux. It is however
117 safe to treat -Ibar -Ifoo -I- -Ifoo -Iquux as if written
118 -Ibar -I- -Ifoo -Iquux.
120 Note that this algorithm is quadratic in the number of -I switches,
121 which is acceptable since there aren't usually that many of them. */
123 for (cur = quote, prev = NULL; cur; cur = cur->next)
125 for (other = quote; other != cur; other = other->next)
126 if (INO_T_EQ (cur->ino, other->ino)
127 && cur->dev == other->dev)
129 if (opts->verbose)
130 cpp_notice ("ignoring duplicate directory `%s'\n", cur->name);
132 prev->next = cur->next;
133 free (cur->name);
134 free (cur);
135 cur = prev;
136 break;
138 prev = cur;
140 qtail = prev;
142 for (cur = brack; cur; cur = cur->next)
144 for (other = brack; other != cur; other = other->next)
145 if (INO_T_EQ (cur->ino, other->ino)
146 && cur->dev == other->dev)
148 if (opts->verbose)
149 cpp_notice ("ignoring duplicate directory `%s'\n", cur->name);
151 prev->next = cur->next;
152 free (cur->name);
153 free (cur);
154 cur = prev;
155 break;
157 prev = cur;
160 if (quote)
162 if (INO_T_EQ (qtail->ino, brack->ino) && qtail->dev == brack->dev)
164 if (quote == qtail)
166 if (opts->verbose)
167 cpp_notice ("ignoring duplicate directory `%s'\n",
168 quote->name);
170 free (quote->name);
171 free (quote);
172 quote = brack;
174 else
176 cur = quote;
177 while (cur->next != qtail)
178 cur = cur->next;
179 cur->next = brack;
180 if (opts->verbose)
181 cpp_notice ("ignoring duplicate directory `%s'\n",
182 qtail->name);
184 free (qtail->name);
185 free (qtail);
188 else
189 qtail->next = brack;
191 else
192 quote = brack;
194 opts->quote_include = quote;
195 opts->bracket_include = brack;
198 /* Look up or add an entry to the table of all includes. This table
199 is indexed by the name as it appears in the #include line. The
200 ->next_this_file chain stores all different files with the same
201 #include name (there are at least three ways this can happen). The
202 hash function could probably be improved a bit. */
204 struct include_hash *
205 include_hash (pfile, fname, add)
206 cpp_reader *pfile;
207 const char *fname;
208 int add;
210 unsigned int hash = 0;
211 struct include_hash *l, *m;
212 const char *f = fname;
214 while (*f)
215 hash += *f++;
217 l = pfile->all_include_files[hash % ALL_INCLUDE_HASHSIZE];
218 m = 0;
219 for (; l; m = l, l = l->next)
220 if (!strcmp (l->nshort, fname))
221 return l;
223 if (!add)
224 return 0;
226 l = (struct include_hash *) xmalloc (sizeof (struct include_hash));
227 l->next = NULL;
228 l->next_this_file = NULL;
229 l->foundhere = NULL;
230 l->buf = NULL;
231 l->limit = NULL;
232 if (m)
233 m->next = l;
234 else
235 pfile->all_include_files[hash % ALL_INCLUDE_HASHSIZE] = l;
237 return l;
240 /* Return 0 if the file pointed to by IHASH has never been included before,
241 -1 if it has been included before and need not be again,
242 or a pointer to an IHASH entry which is the file to be reread.
243 "Never before" is with respect to the position in ILIST.
245 This will not detect redundancies involving odd uses of the
246 `current directory' rule for "" includes. They aren't quite
247 pathological, but I think they are rare enough not to worry about.
248 The simplest example is:
250 top.c:
251 #include "a/a.h"
252 #include "b/b.h"
254 a/a.h:
255 #include "../b/b.h"
257 and the problem is that for `current directory' includes,
258 ihash->foundhere is not on any of the global include chains,
259 so the test below (i->foundhere == l) may be false even when
260 the directories are in fact the same. */
262 static struct include_hash *
263 redundant_include_p (pfile, ihash, ilist)
264 cpp_reader *pfile;
265 struct include_hash *ihash;
266 struct file_name_list *ilist;
268 struct file_name_list *l;
269 struct include_hash *i;
271 if (! ihash->foundhere)
272 return 0;
274 for (i = ihash; i; i = i->next_this_file)
275 for (l = ilist; l; l = l->next)
276 if (i->foundhere == l)
277 /* The control_macro works like this: If it's NULL, the file
278 is to be included again. If it's "", the file is never to
279 be included again. If it's a string, the file is not to be
280 included again if the string is the name of a defined macro. */
281 return (i->control_macro
282 && (i->control_macro[0] == '\0'
283 || cpp_lookup (pfile, i->control_macro, -1, -1)))
284 ? (struct include_hash *)-1 : i;
286 return 0;
289 static int
290 file_cleanup (pbuf, pfile)
291 cpp_buffer *pbuf;
292 cpp_reader *pfile;
294 if (pbuf->buf)
296 free (pbuf->buf);
297 pbuf->buf = 0;
299 if (pfile->system_include_depth)
300 pfile->system_include_depth--;
301 return 0;
304 /* Search for include file FNAME in the include chain starting at
305 SEARCH_START. Return -2 if this file doesn't need to be included
306 (because it was included already and it's marked idempotent),
307 -1 if an error occurred, or a file descriptor open on the file.
308 *IHASH is set to point to the include hash entry for this file, and
309 *BEFORE is 1 if the file was included before (but needs to be read
310 again). */
312 find_include_file (pfile, fname, search_start, ihash, before)
313 cpp_reader *pfile;
314 const char *fname;
315 struct file_name_list *search_start;
316 struct include_hash **ihash;
317 int *before;
319 struct file_name_list *l;
320 struct include_hash *ih, *jh;
321 int f, len;
322 char *name;
324 ih = include_hash (pfile, fname, 1);
325 jh = redundant_include_p (pfile, ih,
326 fname[0] == '/' ? ABSOLUTE_PATH : search_start);
328 if (jh != 0)
330 *before = 1;
331 *ihash = jh;
333 if (jh == (struct include_hash *)-1)
334 return -2;
335 else
336 return open (jh->name, O_RDONLY, 0666);
339 if (ih->foundhere)
340 /* A file is already known by this name, but it's not the same file.
341 Allocate another include_hash block and add it to the next_this_file
342 chain. */
344 jh = (struct include_hash *)xmalloc (sizeof (struct include_hash));
345 while (ih->next_this_file) ih = ih->next_this_file;
347 ih->next_this_file = jh;
348 jh = ih;
349 ih = ih->next_this_file;
351 ih->next = NULL;
352 ih->next_this_file = NULL;
353 ih->buf = NULL;
354 ih->limit = NULL;
356 *before = 0;
357 *ihash = ih;
358 ih->nshort = xstrdup (fname);
359 ih->control_macro = NULL;
361 /* If the pathname is absolute, just open it. */
362 if (fname[0] == '/')
364 ih->foundhere = ABSOLUTE_PATH;
365 ih->name = ih->nshort;
366 return open (ih->name, O_RDONLY, 0666);
369 /* Search directory path, trying to open the file. */
371 len = strlen (fname);
372 name = xmalloc (len + pfile->max_include_len + 2 + INCLUDE_LEN_FUDGE);
374 for (l = search_start; l; l = l->next)
376 bcopy (l->name, name, l->nlen);
377 name[l->nlen] = '/';
378 strcpy (&name[l->nlen+1], fname);
379 simplify_pathname (name);
380 if (CPP_OPTIONS (pfile)->remap)
381 name = remap_filename (pfile, name, l);
383 f = open (name, O_RDONLY|O_NONBLOCK|O_NOCTTY, 0666);
384 #ifdef EACCES
385 if (f == -1 && errno == EACCES)
387 cpp_error(pfile, "included file `%s' exists but is not readable",
388 name);
389 return -1;
391 #endif
393 if (f >= 0)
395 ih->foundhere = l;
396 ih->name = xrealloc (name, strlen (name)+1);
397 return f;
401 if (jh)
403 jh->next_this_file = NULL;
404 free (ih);
406 free (name);
407 *ihash = (struct include_hash *)-1;
408 return -1;
411 /* The file_name_map structure holds a mapping of file names for a
412 particular directory. This mapping is read from the file named
413 FILE_NAME_MAP_FILE in that directory. Such a file can be used to
414 map filenames on a file system with severe filename restrictions,
415 such as DOS. The format of the file name map file is just a series
416 of lines with two tokens on each line. The first token is the name
417 to map, and the second token is the actual name to use. */
419 struct file_name_map
421 struct file_name_map *map_next;
422 char *map_from;
423 char *map_to;
426 #define FILE_NAME_MAP_FILE "header.gcc"
428 /* Read a space delimited string of unlimited length from a stdio
429 file. */
431 static char *
432 read_filename_string (ch, f)
433 int ch;
434 FILE *f;
436 char *alloc, *set;
437 int len;
439 len = 20;
440 set = alloc = xmalloc (len + 1);
441 if (! is_space(ch))
443 *set++ = ch;
444 while ((ch = getc (f)) != EOF && ! is_space(ch))
446 if (set - alloc == len)
448 len *= 2;
449 alloc = xrealloc (alloc, len + 1);
450 set = alloc + len / 2;
452 *set++ = ch;
455 *set = '\0';
456 ungetc (ch, f);
457 return alloc;
460 /* This structure holds a linked list of file name maps, one per directory. */
462 struct file_name_map_list
464 struct file_name_map_list *map_list_next;
465 char *map_list_name;
466 struct file_name_map *map_list_map;
469 /* Read the file name map file for DIRNAME. */
471 static struct file_name_map *
472 read_name_map (pfile, dirname)
473 cpp_reader *pfile;
474 const char *dirname;
476 register struct file_name_map_list *map_list_ptr;
477 char *name;
478 FILE *f;
480 for (map_list_ptr = CPP_OPTIONS (pfile)->map_list; map_list_ptr;
481 map_list_ptr = map_list_ptr->map_list_next)
482 if (! strcmp (map_list_ptr->map_list_name, dirname))
483 return map_list_ptr->map_list_map;
485 map_list_ptr = ((struct file_name_map_list *)
486 xmalloc (sizeof (struct file_name_map_list)));
487 map_list_ptr->map_list_name = xstrdup (dirname);
489 name = (char *) alloca (strlen (dirname) + strlen (FILE_NAME_MAP_FILE) + 2);
490 strcpy (name, dirname);
491 if (*dirname)
492 strcat (name, "/");
493 strcat (name, FILE_NAME_MAP_FILE);
494 f = fopen (name, "r");
495 if (!f)
496 map_list_ptr->map_list_map = (struct file_name_map *)-1;
497 else
499 int ch;
500 int dirlen = strlen (dirname);
502 while ((ch = getc (f)) != EOF)
504 char *from, *to;
505 struct file_name_map *ptr;
507 if (is_space(ch))
508 continue;
509 from = read_filename_string (ch, f);
510 while ((ch = getc (f)) != EOF && is_hspace(ch))
512 to = read_filename_string (ch, f);
514 ptr = ((struct file_name_map *)
515 xmalloc (sizeof (struct file_name_map)));
516 ptr->map_from = from;
518 /* Make the real filename absolute. */
519 if (*to == '/')
520 ptr->map_to = to;
521 else
523 ptr->map_to = xmalloc (dirlen + strlen (to) + 2);
524 strcpy (ptr->map_to, dirname);
525 ptr->map_to[dirlen] = '/';
526 strcpy (ptr->map_to + dirlen + 1, to);
527 free (to);
530 ptr->map_next = map_list_ptr->map_list_map;
531 map_list_ptr->map_list_map = ptr;
533 while ((ch = getc (f)) != '\n')
534 if (ch == EOF)
535 break;
537 fclose (f);
540 map_list_ptr->map_list_next = CPP_OPTIONS (pfile)->map_list;
541 CPP_OPTIONS (pfile)->map_list = map_list_ptr;
543 return map_list_ptr->map_list_map;
546 /* Remap NAME based on the file_name_map (if any) for LOC. */
548 static char *
549 remap_filename (pfile, name, loc)
550 cpp_reader *pfile;
551 char *name;
552 struct file_name_list *loc;
554 struct file_name_map *map;
555 const char *from, *p, *dir;
557 if (! loc->name_map)
558 loc->name_map = read_name_map (pfile,
559 loc->name
560 ? loc->name : ".");
562 if (loc->name_map == (struct file_name_map *)-1)
563 return name;
565 from = name + strlen (loc->name) + 1;
567 for (map = loc->name_map; map; map = map->map_next)
568 if (!strcmp (map->map_from, from))
569 return map->map_to;
571 /* Try to find a mapping file for the particular directory we are
572 looking in. Thus #include <sys/types.h> will look up sys/types.h
573 in /usr/include/header.gcc and look up types.h in
574 /usr/include/sys/header.gcc. */
575 p = rindex (name, '/');
576 if (!p)
577 p = name;
578 if (loc && loc->name
579 && strlen (loc->name) == (size_t) (p - name)
580 && !strncmp (loc->name, name, p - name))
581 /* FILENAME is in SEARCHPTR, which we've already checked. */
582 return name;
584 if (p == name)
586 dir = ".";
587 from = name;
589 else
591 char * newdir = (char *) alloca (p - name + 1);
592 bcopy (name, newdir, p - name);
593 newdir[p - name] = '\0';
594 dir = newdir;
595 from = p + 1;
598 for (map = read_name_map (pfile, dir); map; map = map->map_next)
599 if (! strcmp (map->map_from, name))
600 return map->map_to;
602 return name;
605 /* Read the contents of FD into the buffer on the top of PFILE's stack.
606 IHASH points to the include hash entry for the file associated with
609 The caller is responsible for the cpp_push_buffer. */
612 finclude (pfile, fd, ihash)
613 cpp_reader *pfile;
614 int fd;
615 struct include_hash *ihash;
617 struct stat st;
618 size_t st_size;
619 long length;
620 cpp_buffer *fp;
622 if (fstat (fd, &st) < 0)
623 goto perror_fail;
624 if (fcntl (fd, F_SETFL, 0) == -1) /* turn off nonblocking mode */
625 goto perror_fail;
627 fp = CPP_BUFFER (pfile);
629 /* If fd points to a plain file, we know how big it is, so we can
630 allocate the buffer all at once. If fd is a pipe or terminal, we
631 can't. Most C source files are 4k or less, so we guess that. If
632 fd is something weird, like a block device or a directory, we
633 don't want to read it at all.
635 Unfortunately, different systems use different st.st_mode values
636 for pipes: some have S_ISFIFO, some S_ISSOCK, some are buggy and
637 zero the entire struct stat except a couple fields. Hence the
638 mess below.
640 In all cases, read_and_prescan will resize the buffer if it
641 turns out there's more data than we thought. */
643 if (S_ISREG (st.st_mode))
645 /* off_t might have a wider range than size_t - in other words,
646 the max size of a file might be bigger than the address
647 space. We can't handle a file that large. (Anyone with
648 a single source file bigger than 4GB needs to rethink
649 their coding style.) */
650 st_size = (size_t) st.st_size;
651 if ((unsigned HOST_WIDEST_INT) st_size
652 != (unsigned HOST_WIDEST_INT) st.st_size)
654 cpp_error (pfile, "file `%s' is too large", ihash->name);
655 goto fail;
658 else if (S_ISFIFO (st.st_mode) || S_ISSOCK (st.st_mode)
659 /* Permit any kind of character device: the sensible ones are
660 ttys and /dev/null, but weeding out the others is too hard. */
661 || S_ISCHR (st.st_mode)
662 /* Some 4.x (x<4) derivatives have a bug that makes fstat() of a
663 socket or pipe return a stat struct with most fields zeroed. */
664 || (st.st_mode == 0 && st.st_nlink == 0 && st.st_size == 0))
666 /* Cannot get its file size before reading. 4k is a decent
667 first guess. */
668 st_size = 4096;
670 else
672 cpp_error (pfile, "`%s' is not a file, pipe, or tty", ihash->name);
673 goto fail;
676 if (pfile->input_buffer == NULL)
677 initialize_input_buffer (pfile, fd, &st);
679 /* Read the file, converting end-of-line characters and trigraphs
680 (if enabled). */
681 fp->ihash = ihash;
682 fp->nominal_fname = fp->fname = ihash->name;
683 length = read_and_prescan (pfile, fp, fd, st_size);
684 if (length < 0)
685 goto fail;
686 if (length == 0)
687 ihash->control_macro = ""; /* never re-include */
689 close (fd);
690 fp->rlimit = fp->alimit = fp->buf + length;
691 fp->cur = fp->buf;
692 if (ihash->foundhere != ABSOLUTE_PATH)
693 fp->system_header_p = ihash->foundhere->sysp;
694 fp->lineno = 1;
695 fp->colno = 1;
696 fp->line_base = fp->buf;
697 fp->cleanup = file_cleanup;
699 /* The ->actual_dir field is only used when ignore_srcdir is not in effect;
700 see do_include */
701 if (!CPP_OPTIONS (pfile)->ignore_srcdir)
702 fp->actual_dir = actual_directory (pfile, fp->fname);
704 pfile->input_stack_listing_current = 0;
705 return 1;
707 perror_fail:
708 cpp_error_from_errno (pfile, ihash->name);
709 fail:
710 cpp_pop_buffer (pfile);
711 close (fd);
712 return 0;
715 /* Given a path FNAME, extract the directory component and place it
716 onto the actual_dirs list. Return a pointer to the allocated
717 file_name_list structure. These structures are used to implement
718 current-directory "" include searching. */
720 static struct file_name_list *
721 actual_directory (pfile, fname)
722 cpp_reader *pfile;
723 const char *fname;
725 char *last_slash, *dir;
726 size_t dlen;
727 struct file_name_list *x;
729 dir = xstrdup (fname);
730 last_slash = rindex (dir, '/');
731 if (last_slash)
733 if (last_slash == dir)
735 dlen = 1;
736 last_slash[1] = '\0';
738 else
740 dlen = last_slash - dir;
741 *last_slash = '\0';
744 else
746 dir[0] = '.';
747 dir[1] = '\0';
748 dlen = 1;
751 if (dlen > pfile->max_include_len)
752 pfile->max_include_len = dlen;
754 for (x = pfile->actual_dirs; x; x = x->alloc)
755 if (!strcmp (x->name, dir))
757 free (dir);
758 return x;
761 /* Not found, make a new one. */
762 x = (struct file_name_list *) xmalloc (sizeof (struct file_name_list));
763 x->name = dir;
764 x->nlen = dlen;
765 x->next = CPP_OPTIONS (pfile)->quote_include;
766 x->alloc = pfile->actual_dirs;
767 x->sysp = CPP_BUFFER (pfile)->system_header_p;
768 x->name_map = NULL;
770 pfile->actual_dirs = x;
771 return x;
774 /* Determine the current line and column. Used only by read_and_prescan. */
775 static void
776 find_position (start, limit, linep, colp)
777 U_CHAR *start;
778 U_CHAR *limit;
779 unsigned long *linep;
780 unsigned long *colp;
782 unsigned long line = *linep, col = 0;
783 while (start < limit)
785 U_CHAR ch = *start++;
786 if (ch == '\n' || ch == '\r')
787 line++, col = 1;
788 else
789 col++;
791 *linep = line, *colp = col;
794 /* Read the entire contents of file DESC into buffer BUF. LEN is how
795 much memory to allocate initially; more will be allocated if
796 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
797 canonical form (\n). If enabled, convert and/or warn about
798 trigraphs. Convert backslash-newline to a one-character escape
799 (\r) and remove it from "embarrassing" places (i.e. the middle of a
800 token). If there is no newline at the end of the file, add one and
801 warn. Returns -1 on failure, or the actual length of the data to
802 be scanned.
804 This function does a lot of work, and can be a serious performance
805 bottleneck. It has been tuned heavily; make sure you understand it
806 before hacking. The common case - no trigraphs, Unix style line
807 breaks, backslash-newline set off by whitespace, newline at EOF -
808 has been optimized at the expense of the others. The performance
809 penalty for DOS style line breaks (\r\n) is about 15%.
811 Warnings lose particularly heavily since we have to determine the
812 line number, which involves scanning from the beginning of the file
813 or from the last warning. The penalty for the absence of a newline
814 at the end of reload1.c is about 60%. (reload1.c is 329k.)
816 If your file has more than one kind of end-of-line marker, you
817 will get messed-up line numbering. */
819 /* Table of characters that can't be handled in the inner loop.
820 Keep these contiguous to optimize the performance of the code generated
821 for the switch that uses them. */
822 #define SPECCASE_EMPTY 0
823 #define SPECCASE_NUL 1
824 #define SPECCASE_CR 2
825 #define SPECCASE_BACKSLASH 3
826 #define SPECCASE_QUESTION 4
828 static long
829 read_and_prescan (pfile, fp, desc, len)
830 cpp_reader *pfile;
831 cpp_buffer *fp;
832 int desc;
833 size_t len;
835 U_CHAR *buf = (U_CHAR *) xmalloc (len);
836 U_CHAR *ip, *op, *line_base;
837 U_CHAR *ibase;
838 U_CHAR *speccase = pfile->input_speccase;
839 unsigned long line;
840 unsigned int deferred_newlines;
841 int count;
842 size_t offset;
844 offset = 0;
845 op = buf;
846 line_base = buf;
847 line = 1;
848 ibase = pfile->input_buffer + 2;
849 deferred_newlines = 0;
851 for (;;)
853 read_next:
855 count = read (desc, pfile->input_buffer + 2, pfile->input_buffer_len);
856 if (count < 0)
857 goto error;
858 else if (count == 0)
859 break;
861 offset += count;
862 ip = ibase;
863 ibase = pfile->input_buffer + 2;
864 ibase[count] = ibase[count+1] = '\0';
866 if (offset > len)
868 size_t delta_op;
869 size_t delta_line_base;
870 len *= 2;
871 if (offset > len)
872 /* len overflowed.
873 This could happen if the file is larger than half the
874 maximum address space of the machine. */
875 goto too_big;
877 delta_op = op - buf;
878 delta_line_base = line_base - buf;
879 buf = (U_CHAR *) xrealloc (buf, len);
880 op = buf + delta_op;
881 line_base = buf + delta_line_base;
884 for (;;)
886 unsigned int span = 0;
888 /* Deal with \-newline in the middle of a token. */
889 if (deferred_newlines)
891 while (speccase[ip[span]] == SPECCASE_EMPTY
892 && ip[span] != '\n'
893 && ip[span] != '\t'
894 && ip[span] != ' ')
895 span++;
896 memcpy (op, ip, span);
897 op += span;
898 ip += span;
899 if (*ip == '\n' || *ip == '\t'
900 || *ip == ' ' || *ip == ' ')
901 while (deferred_newlines)
902 deferred_newlines--, *op++ = '\r';
903 span = 0;
906 /* Copy as much as we can without special treatment. */
907 while (speccase[ip[span]] == SPECCASE_EMPTY) span++;
908 memcpy (op, ip, span);
909 op += span;
910 ip += span;
912 switch (speccase[*ip++])
914 case SPECCASE_NUL: /* \0 */
915 ibase[-1] = op[-1];
916 goto read_next;
918 case SPECCASE_CR: /* \r */
919 if (*ip == '\n')
920 ip++;
921 else if (*ip == '\0')
923 *--ibase = '\r';
924 goto read_next;
926 else if (ip[-2] == '\n')
927 continue;
928 *op++ = '\n';
929 break;
931 case SPECCASE_BACKSLASH: /* \ */
932 backslash:
934 /* If we're at the end of the intermediate buffer,
935 we have to shift the backslash down to the start
936 and come back next pass. */
937 if (*ip == '\0')
939 *--ibase = '\\';
940 goto read_next;
942 else if (*ip == '\n')
944 ip++;
945 if (*ip == '\r') ip++;
946 if (*ip == '\n' || *ip == '\t' || *ip == ' ')
947 *op++ = '\r';
948 else if (op[-1] == '\t' || op[-1] == ' '
949 || op[-1] == '\r' || op[-1] == '\n')
950 *op++ = '\r';
951 else
952 deferred_newlines++;
953 line++;
954 line_base = op;
956 else if (*ip == '\r')
958 ip++;
959 if (*ip == '\n') ip++;
960 else if (*ip == '\0')
962 *--ibase = '\r';
963 *--ibase = '\\';
964 goto read_next;
966 else if (*ip == '\r' || *ip == '\t' || *ip == ' ')
967 *op++ = '\r';
968 else
969 deferred_newlines++;
970 line++;
971 line_base = op;
973 else
974 *op++ = '\\';
976 break;
978 case SPECCASE_QUESTION: /* ? */
980 unsigned int d, t;
981 /* If we're at the end of the intermediate buffer,
982 we have to shift the ?'s down to the start and
983 come back next pass. */
984 d = ip[0];
985 if (d == '\0')
987 *--ibase = '?';
988 goto read_next;
990 if (d != '?')
992 *op++ = '?';
993 break;
995 d = ip[1];
996 if (d == '\0')
998 *--ibase = '?';
999 *--ibase = '?';
1000 goto read_next;
1003 /* Trigraph map:
1004 * from to from to from to
1005 * ?? = # ?? ) ] ?? ! |
1006 * ?? ( [ ?? ' ^ ?? > }
1007 * ?? / \ ?? < { ?? - ~
1009 if (d == '=') t = '#';
1010 else if (d == ')') t = ']';
1011 else if (d == '!') t = '|';
1012 else if (d == '(') t = '[';
1013 else if (d == '\'') t = '^';
1014 else if (d == '>') t = '}';
1015 else if (d == '/') t = '\\';
1016 else if (d == '<') t = '{';
1017 else if (d == '-') t = '~';
1018 else
1020 *op++ = '?';
1021 break;
1023 if (CPP_OPTIONS (pfile)->warn_trigraphs)
1025 unsigned long col;
1026 find_position (line_base, op, &line, &col);
1027 line_base = op - col;
1028 cpp_warning_with_line (pfile, line, col,
1029 "trigraph ??%c encountered", d);
1031 if (CPP_OPTIONS (pfile)->trigraphs)
1033 if (t == '\\')
1034 goto backslash;
1035 else
1036 *op++ = t;
1038 else
1040 *op++ = '?';
1041 *op++ = '?';
1042 *op++ = d;
1044 ip += 2;
1050 if (offset == 0)
1051 return 0;
1053 /* Deal with pushed-back chars at true EOF.
1054 This may be any of: ?? ? \ \r \n \\r \\n.
1055 \r must become \n, \\r or \\n must become \r.
1056 We know we have space already. */
1057 if (ibase == pfile->input_buffer)
1059 if (*ibase == '?')
1061 *op++ = '?';
1062 *op++ = '?';
1064 else
1065 *op++ = '\r';
1067 else if (ibase == pfile->input_buffer + 1)
1069 if (*ibase == '\r')
1070 *op++ = '\n';
1071 else
1072 *op++ = *ibase;
1075 if (op[-1] != '\n')
1077 unsigned long col;
1078 find_position (line_base, op, &line, &col);
1079 cpp_warning_with_line (pfile, line, col, "no newline at end of file\n");
1080 if (offset + 1 > len)
1082 len += 1;
1083 if (offset + 1 > len)
1084 goto too_big;
1085 buf = (U_CHAR *) xrealloc (buf, len);
1086 op = buf + offset;
1088 *op++ = '\n';
1091 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
1092 return op - buf;
1094 too_big:
1095 cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset);
1096 free (buf);
1097 return -1;
1099 error:
1100 cpp_error_from_errno (pfile, fp->fname);
1101 free (buf);
1102 return -1;
1105 /* Initialize the `input_buffer' and `input_speccase' tables.
1106 These are only used by read_and_prescan, but they're large and
1107 somewhat expensive to set up, so we want them allocated once for
1108 the duration of the cpp run. */
1110 static void
1111 initialize_input_buffer (pfile, fd, st)
1112 cpp_reader *pfile;
1113 int fd;
1114 struct stat *st;
1116 long pipe_buf;
1117 U_CHAR *tmp;
1119 /* Table of characters that cannot be handled by the
1120 read_and_prescan inner loop. The number of non-EMPTY entries
1121 should be as small as humanly possible. */
1123 tmp = (U_CHAR *) xmalloc (1 << CHAR_BIT);
1124 memset (tmp, SPECCASE_EMPTY, 1 << CHAR_BIT);
1125 tmp['\0'] = SPECCASE_NUL;
1126 tmp['\r'] = SPECCASE_CR;
1127 tmp['\\'] = SPECCASE_BACKSLASH;
1128 if (CPP_OPTIONS (pfile)->trigraphs || CPP_OPTIONS (pfile)->warn_trigraphs)
1129 tmp['?'] = SPECCASE_QUESTION;
1131 pfile->input_speccase = tmp;
1133 /* Determine the appropriate size for the input buffer. Normal C
1134 source files are smaller than eight K. If we are reading a pipe,
1135 we want to make sure the input buffer is bigger than the kernel's
1136 pipe buffer. */
1137 pipe_buf = -1;
1139 if (! S_ISREG (st->st_mode))
1141 #ifdef _PC_PIPE_BUF
1142 pipe_buf = fpathconf (fd, _PC_PIPE_BUF);
1143 #endif
1144 if (pipe_buf == -1)
1146 #ifdef PIPE_BUF
1147 pipe_buf = PIPE_BUF;
1148 #else
1149 pipe_buf = 8192;
1150 #endif
1154 if (pipe_buf < 8192)
1155 pipe_buf = 8192;
1156 /* PIPE_BUF bytes of buffer proper, 2 to detect running off the end
1157 without address arithmetic all the time, and 2 for pushback in
1158 the case there's a potential trigraph or end-of-line digraph at
1159 the end of a block. */
1161 tmp = (U_CHAR *) xmalloc (pipe_buf + 2 + 2);
1162 pfile->input_buffer = tmp;
1163 pfile->input_buffer_len = pipe_buf;
1166 /* Add output to `deps_buffer' for the -M switch.
1167 STRING points to the text to be output.
1168 SPACER is ':' for targets, ' ' for dependencies, zero for text
1169 to be inserted literally. */
1171 void
1172 deps_output (pfile, string, spacer)
1173 cpp_reader *pfile;
1174 const char *string;
1175 int spacer;
1177 int size;
1178 int cr = 0;
1180 if (!*string)
1181 return;
1183 size = strlen (string);
1185 #ifndef MAX_OUTPUT_COLUMNS
1186 #define MAX_OUTPUT_COLUMNS 72
1187 #endif
1188 if (pfile->deps_column > 0
1189 && (pfile->deps_column + size) > MAX_OUTPUT_COLUMNS)
1191 cr = 5;
1192 pfile->deps_column = 0;
1195 if (pfile->deps_size + size + cr + 8 > pfile->deps_allocated_size)
1197 pfile->deps_allocated_size = (pfile->deps_size + size + 50) * 2;
1198 pfile->deps_buffer = (char *) xrealloc (pfile->deps_buffer,
1199 pfile->deps_allocated_size);
1202 if (cr)
1204 bcopy (" \\\n ", &pfile->deps_buffer[pfile->deps_size], 5);
1205 pfile->deps_size += 5;
1208 if (spacer == ' ' && pfile->deps_column > 0)
1209 pfile->deps_buffer[pfile->deps_size++] = ' ';
1210 bcopy (string, &pfile->deps_buffer[pfile->deps_size], size);
1211 pfile->deps_size += size;
1212 pfile->deps_column += size;
1213 if (spacer == ':')
1214 pfile->deps_buffer[pfile->deps_size++] = ':';
1215 pfile->deps_buffer[pfile->deps_size] = 0;
1218 /* Simplify a path name in place, deleting redundant components. This
1219 reduces OS overhead and guarantees that equivalent paths compare
1220 the same (modulo symlinks).
1222 Transforms made:
1223 foo/bar/../quux foo/quux
1224 foo/./bar foo/bar
1225 foo//bar foo/bar
1226 /../quux /quux
1227 //quux //quux (POSIX allows leading // as a namespace escape)
1229 Guarantees no trailing slashes. All transforms reduce the length
1230 of the string.
1232 void
1233 simplify_pathname (path)
1234 char *path;
1236 char *from, *to;
1237 char *base;
1238 int absolute = 0;
1240 #if defined (HAVE_DOS_BASED_FILE_SYSTEM)
1241 /* Convert all backslashes to slashes. */
1242 for (from = path; *from; from++)
1243 if (*from == '\\') *from = '/';
1245 /* Skip over leading drive letter if present. */
1246 if (ISALPHA (path[0]) && path[1] == ':')
1247 from = to = &path[2];
1248 else
1249 from = to = path;
1250 #else
1251 from = to = path;
1252 #endif
1254 /* Remove redundant initial /s. */
1255 if (*from == '/')
1257 absolute = 1;
1258 to++;
1259 from++;
1260 if (*from == '/')
1262 if (*++from == '/')
1263 /* 3 or more initial /s are equivalent to 1 /. */
1264 while (*++from == '/');
1265 else
1266 /* On some hosts // differs from /; Posix allows this. */
1267 to++;
1270 base = to;
1272 for (;;)
1274 while (*from == '/')
1275 from++;
1277 if (from[0] == '.' && from[1] == '/')
1278 from += 2;
1279 else if (from[0] == '.' && from[1] == '\0')
1280 goto done;
1281 else if (from[0] == '.' && from[1] == '.' && from[2] == '/')
1283 if (base == to)
1285 if (absolute)
1286 from += 3;
1287 else
1289 *to++ = *from++;
1290 *to++ = *from++;
1291 *to++ = *from++;
1292 base = to;
1295 else
1297 to -= 2;
1298 while (to > base && *to != '/') to--;
1299 if (*to == '/')
1300 to++;
1301 from += 3;
1304 else if (from[0] == '.' && from[1] == '.' && from[2] == '\0')
1306 if (base == to)
1308 if (!absolute)
1310 *to++ = *from++;
1311 *to++ = *from++;
1314 else
1316 to -= 2;
1317 while (to > base && *to != '/') to--;
1318 if (*to == '/')
1319 to++;
1321 goto done;
1323 else
1324 /* Copy this component and trailing /, if any. */
1325 while ((*to++ = *from++) != '/')
1327 if (!to[-1])
1329 to--;
1330 goto done;
1336 done:
1337 /* Trim trailing slash */
1338 if (to[0] == '/' && (!absolute || to > path+1))
1339 to--;
1341 /* Change the empty string to "." so that stat() on the result
1342 will always work. */
1343 if (to == path)
1344 *to++ = '.';
1346 *to = '\0';
1348 return;
1351 /* It is not clear when this should be used if at all, so I've
1352 disabled it until someone who understands VMS can look at it. */
1353 #if 0
1355 /* Under VMS we need to fix up the "include" specification filename.
1357 Rules for possible conversions
1359 fullname tried paths
1361 name name
1362 ./dir/name [.dir]name
1363 /dir/name dir:name
1364 /name [000000]name, name
1365 dir/name dir:[000000]name, dir:name, dir/name
1366 dir1/dir2/name dir1:[dir2]name, dir1:[000000.dir2]name
1367 path:/name path:[000000]name, path:name
1368 path:/dir/name path:[000000.dir]name, path:[dir]name
1369 path:dir/name path:[dir]name
1370 [path]:[dir]name [path.dir]name
1371 path/[dir]name [path.dir]name
1373 The path:/name input is constructed when expanding <> includes. */
1376 static void
1377 hack_vms_include_specification (fullname)
1378 char *fullname;
1380 register char *basename, *unixname, *local_ptr, *first_slash;
1381 int f, check_filename_before_returning, must_revert;
1382 char Local[512];
1384 check_filename_before_returning = 0;
1385 must_revert = 0;
1386 /* See if we can find a 1st slash. If not, there's no path information. */
1387 first_slash = index (fullname, '/');
1388 if (first_slash == 0)
1389 return 0; /* Nothing to do!!! */
1391 /* construct device spec if none given. */
1393 if (index (fullname, ':') == 0)
1396 /* If fullname has a slash, take it as device spec. */
1398 if (first_slash == fullname)
1400 first_slash = index (fullname+1, '/'); /* 2nd slash ? */
1401 if (first_slash)
1402 *first_slash = ':'; /* make device spec */
1403 for (basename = fullname; *basename != 0; basename++)
1404 *basename = *(basename+1); /* remove leading slash */
1406 else if ((first_slash[-1] != '.') /* keep ':/', './' */
1407 && (first_slash[-1] != ':')
1408 && (first_slash[-1] != ']')) /* or a vms path */
1410 *first_slash = ':';
1412 else if ((first_slash[1] == '[') /* skip './' in './[dir' */
1413 && (first_slash[-1] == '.'))
1414 fullname += 2;
1417 /* Get part after first ':' (basename[-1] == ':')
1418 or last '/' (basename[-1] == '/'). */
1420 basename = base_name (fullname);
1422 local_ptr = Local; /* initialize */
1424 /* We are trying to do a number of things here. First of all, we are
1425 trying to hammer the filenames into a standard format, such that later
1426 processing can handle them.
1428 If the file name contains something like [dir.], then it recognizes this
1429 as a root, and strips the ".]". Later processing will add whatever is
1430 needed to get things working properly.
1432 If no device is specified, then the first directory name is taken to be
1433 a device name (or a rooted logical). */
1435 /* Point to the UNIX filename part (which needs to be fixed!)
1436 but skip vms path information.
1437 [basename != fullname since first_slash != 0]. */
1439 if ((basename[-1] == ':') /* vms path spec. */
1440 || (basename[-1] == ']')
1441 || (basename[-1] == '>'))
1442 unixname = basename;
1443 else
1444 unixname = fullname;
1446 if (*unixname == '/')
1447 unixname++;
1449 /* If the directory spec is not rooted, we can just copy
1450 the UNIX filename part and we are done. */
1452 if (((basename - fullname) > 1)
1453 && ( (basename[-1] == ']')
1454 || (basename[-1] == '>')))
1456 if (basename[-2] != '.')
1459 /* The VMS part ends in a `]', and the preceding character is not a `.'.
1460 -> PATH]:/name (basename = '/name', unixname = 'name')
1461 We strip the `]', and then splice the two parts of the name in the
1462 usual way. Given the default locations for include files in cccp.c,
1463 we will only use this code if the user specifies alternate locations
1464 with the /include (-I) switch on the command line. */
1466 basename -= 1; /* Strip "]" */
1467 unixname--; /* backspace */
1469 else
1472 /* The VMS part has a ".]" at the end, and this will not do. Later
1473 processing will add a second directory spec, and this would be a syntax
1474 error. Thus we strip the ".]", and thus merge the directory specs.
1475 We also backspace unixname, so that it points to a '/'. This inhibits the
1476 generation of the 000000 root directory spec (which does not belong here
1477 in this case). */
1479 basename -= 2; /* Strip ".]" */
1480 unixname--; /* backspace */
1484 else
1488 /* We drop in here if there is no VMS style directory specification yet.
1489 If there is no device specification either, we make the first dir a
1490 device and try that. If we do not do this, then we will be essentially
1491 searching the users default directory (as if they did a #include "asdf.h").
1493 Then all we need to do is to push a '[' into the output string. Later
1494 processing will fill this in, and close the bracket. */
1496 if ((unixname != fullname) /* vms path spec found. */
1497 && (basename[-1] != ':'))
1498 *local_ptr++ = ':'; /* dev not in spec. take first dir */
1500 *local_ptr++ = '['; /* Open the directory specification */
1503 if (unixname == fullname) /* no vms dir spec. */
1505 must_revert = 1;
1506 if ((first_slash != 0) /* unix dir spec. */
1507 && (*unixname != '/') /* not beginning with '/' */
1508 && (*unixname != '.')) /* or './' or '../' */
1509 *local_ptr++ = '.'; /* dir is local ! */
1512 /* at this point we assume that we have the device spec, and (at least
1513 the opening "[" for a directory specification. We may have directories
1514 specified already.
1516 If there are no other slashes then the filename will be
1517 in the "root" directory. Otherwise, we need to add
1518 directory specifications. */
1520 if (index (unixname, '/') == 0)
1522 /* if no directories specified yet and none are following. */
1523 if (local_ptr[-1] == '[')
1525 /* Just add "000000]" as the directory string */
1526 strcpy (local_ptr, "000000]");
1527 local_ptr += strlen (local_ptr);
1528 check_filename_before_returning = 1; /* we might need to fool with this later */
1531 else
1534 /* As long as there are still subdirectories to add, do them. */
1535 while (index (unixname, '/') != 0)
1537 /* If this token is "." we can ignore it
1538 if it's not at the beginning of a path. */
1539 if ((unixname[0] == '.') && (unixname[1] == '/'))
1541 /* remove it at beginning of path. */
1542 if ( ((unixname == fullname) /* no device spec */
1543 && (fullname+2 != basename)) /* starts with ./ */
1544 /* or */
1545 || ((basename[-1] == ':') /* device spec */
1546 && (unixname-1 == basename))) /* and ./ afterwards */
1547 *local_ptr++ = '.'; /* make '[.' start of path. */
1548 unixname += 2;
1549 continue;
1552 /* Add a subdirectory spec. Do not duplicate "." */
1553 if ( local_ptr[-1] != '.'
1554 && local_ptr[-1] != '['
1555 && local_ptr[-1] != '<')
1556 *local_ptr++ = '.';
1558 /* If this is ".." then the spec becomes "-" */
1559 if ( (unixname[0] == '.')
1560 && (unixname[1] == '.')
1561 && (unixname[2] == '/'))
1563 /* Add "-" and skip the ".." */
1564 if ((local_ptr[-1] == '.')
1565 && (local_ptr[-2] == '['))
1566 local_ptr--; /* prevent [.- */
1567 *local_ptr++ = '-';
1568 unixname += 3;
1569 continue;
1572 /* Copy the subdirectory */
1573 while (*unixname != '/')
1574 *local_ptr++= *unixname++;
1576 unixname++; /* Skip the "/" */
1579 /* Close the directory specification */
1580 if (local_ptr[-1] == '.') /* no trailing periods */
1581 local_ptr--;
1583 if (local_ptr[-1] == '[') /* no dir needed */
1584 local_ptr--;
1585 else
1586 *local_ptr++ = ']';
1589 /* Now add the filename. */
1591 while (*unixname)
1592 *local_ptr++ = *unixname++;
1593 *local_ptr = 0;
1595 /* Now append it to the original VMS spec. */
1597 strcpy ((must_revert==1)?fullname:basename, Local);
1599 /* If we put a [000000] in the filename, try to open it first. If this fails,
1600 remove the [000000], and return that name. This provides flexibility
1601 to the user in that they can use both rooted and non-rooted logical names
1602 to point to the location of the file. */
1604 if (check_filename_before_returning)
1606 f = open (fullname, O_RDONLY, 0666);
1607 if (f >= 0)
1609 /* The file name is OK as it is, so return it as is. */
1610 close (f);
1611 return 1;
1614 /* The filename did not work. Try to remove the [000000] from the name,
1615 and return it. */
1617 basename = index (fullname, '[');
1618 local_ptr = index (fullname, ']') + 1;
1619 strcpy (basename, local_ptr); /* this gets rid of it */
1623 return 1;
1625 #endif /* VMS */