Daily bump.
[official-gcc.git] / gcc / cppfiles.c
blob1e70d3e6c529bb91ef295c9f104b450d3030fe3b
1 /* Part of CPP library. (include file handling)
2 Copyright (C) 1986, 87, 89, 92 - 95, 98, 1999 Free Software Foundation, Inc.
3 Written by Per Bothner, 1994.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Split out of cpplib.c, Zack Weinberg, Oct 1998
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 In other words, you are welcome to use, share and improve this program.
23 You are forbidden to forbid anyone else to use, share and improve
24 what you give them. Help stamp out software-hoarding! */
26 #include "config.h"
27 #include "system.h"
28 #include "cpplib.h"
30 /* The entry points to this file are: find_include_file, finclude,
31 include_hash, append_include_chain, deps_output, and file_cleanup.
32 file_cleanup is only called through CPP_BUFFER(pfile)->cleanup,
33 so it's static anyway. */
35 static struct include_hash *redundant_include_p
36 PROTO ((cpp_reader *,
37 struct include_hash *,
38 struct file_name_list *));
39 static struct file_name_map *read_name_map PROTO ((cpp_reader *,
40 const char *));
41 static char *read_filename_string PROTO ((int, FILE *));
42 static char *remap_filename PROTO ((cpp_reader *, char *,
43 struct file_name_list *));
44 static long read_and_prescan PROTO ((cpp_reader *, cpp_buffer *,
45 int, size_t));
46 static struct file_name_list *actual_directory PROTO ((cpp_reader *, char *));
48 static void initialize_input_buffer PROTO ((cpp_reader *, int,
49 struct stat *));
51 #if 0
52 static void hack_vms_include_specification PROTO ((char *));
53 #endif
55 /* Windows does not natively support inodes, and neither does MSDOS.
56 VMS has non-numeric inodes. */
57 #ifdef VMS
58 #define INO_T_EQ(a, b) (!bcmp((char *) &(a), (char *) &(b), sizeof (a)))
59 #elif (defined _WIN32 && !defined CYGWIN && ! defined (_UWIN)) \
60 || defined __MSDOS__
61 #define INO_T_EQ(a, b) 0
62 #else
63 #define INO_T_EQ(a, b) ((a) == (b))
64 #endif
66 /* Merge the four include chains together in the order quote, bracket,
67 system, after. Remove duplicate dirs (as determined by
68 INO_T_EQ()). The system_include and after_include chains are never
69 referred to again after this function; all access is through the
70 bracket_include path.
72 For the future: Check if the directory is empty (but
73 how?) and possibly preload the include hash. */
75 void
76 merge_include_chains (opts)
77 struct cpp_options *opts;
79 struct file_name_list *prev, *cur, *other;
80 struct file_name_list *quote, *brack, *systm, *after;
81 struct file_name_list *qtail, *btail, *stail, *atail;
83 qtail = opts->pending->quote_tail;
84 btail = opts->pending->brack_tail;
85 stail = opts->pending->systm_tail;
86 atail = opts->pending->after_tail;
88 quote = opts->pending->quote_head;
89 brack = opts->pending->brack_head;
90 systm = opts->pending->systm_head;
91 after = opts->pending->after_head;
93 /* Paste together bracket, system, and after include chains. */
94 if (stail)
95 stail->next = after;
96 else
97 systm = after;
98 if (btail)
99 btail->next = systm;
100 else
101 brack = systm;
103 /* This is a bit tricky.
104 First we drop dupes from the quote-include list.
105 Then we drop dupes from the bracket-include list.
106 Finally, if qtail and brack are the same directory,
107 we cut out qtail.
109 We can't just merge the lists and then uniquify them because
110 then we may lose directories from the <> search path that should
111 be there; consider -Ifoo -Ibar -I- -Ifoo -Iquux. It is however
112 safe to treat -Ibar -Ifoo -I- -Ifoo -Iquux as if written
113 -Ibar -I- -Ifoo -Iquux.
115 Note that this algorithm is quadratic in the number of -I switches,
116 which is acceptable since there aren't usually that many of them. */
118 for (cur = quote, prev = NULL; cur; cur = cur->next)
120 for (other = quote; other != cur; other = other->next)
121 if (INO_T_EQ (cur->ino, other->ino)
122 && cur->dev == other->dev)
124 if (opts->verbose)
125 cpp_notice ("ignoring duplicate directory `%s'\n", cur->name);
127 prev->next = cur->next;
128 free (cur->name);
129 free (cur);
130 cur = prev;
131 break;
133 prev = cur;
135 qtail = prev;
137 for (cur = brack; cur; cur = cur->next)
139 for (other = brack; other != cur; other = other->next)
140 if (INO_T_EQ (cur->ino, other->ino)
141 && cur->dev == other->dev)
143 if (opts->verbose)
144 cpp_notice ("ignoring duplicate directory `%s'\n", cur->name);
146 prev->next = cur->next;
147 free (cur->name);
148 free (cur);
149 cur = prev;
150 break;
152 prev = cur;
155 if (quote)
157 if (INO_T_EQ (qtail->ino, brack->ino) && qtail->dev == brack->dev)
159 if (quote == qtail)
161 if (opts->verbose)
162 cpp_notice ("ignoring duplicate directory `%s'\n",
163 quote->name);
165 free (quote->name);
166 free (quote);
167 quote = brack;
169 else
171 cur = quote;
172 while (cur->next != qtail)
173 cur = cur->next;
174 cur->next = brack;
175 if (opts->verbose)
176 cpp_notice ("ignoring duplicate directory `%s'\n",
177 qtail->name);
179 free (qtail->name);
180 free (qtail);
183 else
184 qtail->next = brack;
186 else
187 quote = brack;
189 opts->quote_include = quote;
190 opts->bracket_include = brack;
193 /* Look up or add an entry to the table of all includes. This table
194 is indexed by the name as it appears in the #include line. The
195 ->next_this_file chain stores all different files with the same
196 #include name (there are at least three ways this can happen). The
197 hash function could probably be improved a bit. */
199 struct include_hash *
200 include_hash (pfile, fname, add)
201 cpp_reader *pfile;
202 char *fname;
203 int add;
205 unsigned int hash = 0;
206 struct include_hash *l, *m;
207 char *f = fname;
209 while (*f)
210 hash += *f++;
212 l = pfile->all_include_files[hash % ALL_INCLUDE_HASHSIZE];
213 m = 0;
214 for (; l; m = l, l = l->next)
215 if (!strcmp (l->nshort, fname))
216 return l;
218 if (!add)
219 return 0;
221 l = (struct include_hash *) xmalloc (sizeof (struct include_hash));
222 l->next = NULL;
223 l->next_this_file = NULL;
224 l->foundhere = NULL;
225 l->buf = NULL;
226 l->limit = NULL;
227 if (m)
228 m->next = l;
229 else
230 pfile->all_include_files[hash % ALL_INCLUDE_HASHSIZE] = l;
232 return l;
235 /* Return 0 if the file pointed to by IHASH has never been included before,
236 -1 if it has been included before and need not be again,
237 or a pointer to an IHASH entry which is the file to be reread.
238 "Never before" is with respect to the position in ILIST.
240 This will not detect redundancies involving odd uses of the
241 `current directory' rule for "" includes. They aren't quite
242 pathological, but I think they are rare enough not to worry about.
243 The simplest example is:
245 top.c:
246 #include "a/a.h"
247 #include "b/b.h"
249 a/a.h:
250 #include "../b/b.h"
252 and the problem is that for `current directory' includes,
253 ihash->foundhere is not on any of the global include chains,
254 so the test below (i->foundhere == l) may be false even when
255 the directories are in fact the same. */
257 static struct include_hash *
258 redundant_include_p (pfile, ihash, ilist)
259 cpp_reader *pfile;
260 struct include_hash *ihash;
261 struct file_name_list *ilist;
263 struct file_name_list *l;
264 struct include_hash *i;
266 if (! ihash->foundhere)
267 return 0;
269 for (i = ihash; i; i = i->next_this_file)
270 for (l = ilist; l; l = l->next)
271 if (i->foundhere == l)
272 /* The control_macro works like this: If it's NULL, the file
273 is to be included again. If it's "", the file is never to
274 be included again. If it's a string, the file is not to be
275 included again if the string is the name of a defined macro. */
276 return (i->control_macro
277 && (i->control_macro[0] == '\0'
278 || cpp_lookup (pfile, i->control_macro, -1, -1)))
279 ? (struct include_hash *)-1 : i;
281 return 0;
284 static int
285 file_cleanup (pbuf, pfile)
286 cpp_buffer *pbuf;
287 cpp_reader *pfile;
289 if (pbuf->buf)
291 free (pbuf->buf);
292 pbuf->buf = 0;
294 if (pfile->system_include_depth)
295 pfile->system_include_depth--;
296 return 0;
299 /* Search for include file FNAME in the include chain starting at
300 SEARCH_START. Return -2 if this file doesn't need to be included
301 (because it was included already and it's marked idempotent),
302 -1 if an error occurred, or a file descriptor open on the file.
303 *IHASH is set to point to the include hash entry for this file, and
304 *BEFORE is 1 if the file was included before (but needs to be read
305 again). */
307 find_include_file (pfile, fname, search_start, ihash, before)
308 cpp_reader *pfile;
309 char *fname;
310 struct file_name_list *search_start;
311 struct include_hash **ihash;
312 int *before;
314 struct file_name_list *l;
315 struct include_hash *ih, *jh;
316 int f, len;
317 char *name;
319 ih = include_hash (pfile, fname, 1);
320 jh = redundant_include_p (pfile, ih,
321 fname[0] == '/' ? ABSOLUTE_PATH : search_start);
323 if (jh != 0)
325 *before = 1;
326 *ihash = jh;
328 if (jh == (struct include_hash *)-1)
329 return -2;
330 else
331 return open (jh->name, O_RDONLY, 0666);
334 if (ih->foundhere)
335 /* A file is already known by this name, but it's not the same file.
336 Allocate another include_hash block and add it to the next_this_file
337 chain. */
339 jh = (struct include_hash *)xmalloc (sizeof (struct include_hash));
340 while (ih->next_this_file) ih = ih->next_this_file;
342 ih->next_this_file = jh;
343 jh = ih;
344 ih = ih->next_this_file;
346 ih->next = NULL;
347 ih->next_this_file = NULL;
348 ih->buf = NULL;
349 ih->limit = NULL;
351 *before = 0;
352 *ihash = ih;
353 ih->nshort = xstrdup (fname);
354 ih->control_macro = NULL;
356 /* If the pathname is absolute, just open it. */
357 if (fname[0] == '/')
359 ih->foundhere = ABSOLUTE_PATH;
360 ih->name = ih->nshort;
361 return open (ih->name, O_RDONLY, 0666);
364 /* Search directory path, trying to open the file. */
366 len = strlen (fname);
367 name = xmalloc (len + pfile->max_include_len + 2 + INCLUDE_LEN_FUDGE);
369 for (l = search_start; l; l = l->next)
371 bcopy (l->name, name, l->nlen);
372 name[l->nlen] = '/';
373 strcpy (&name[l->nlen+1], fname);
374 simplify_pathname (name);
375 if (CPP_OPTIONS (pfile)->remap)
376 name = remap_filename (pfile, name, l);
378 f = open (name, O_RDONLY|O_NONBLOCK|O_NOCTTY, 0666);
379 #ifdef EACCES
380 if (f == -1 && errno == EACCES)
382 cpp_error(pfile, "included file `%s' exists but is not readable",
383 name);
384 return -1;
386 #endif
388 if (f >= 0)
390 ih->foundhere = l;
391 ih->name = xrealloc (name, strlen (name)+1);
392 return f;
396 if (jh)
398 jh->next_this_file = NULL;
399 free (ih);
401 free (name);
402 *ihash = (struct include_hash *)-1;
403 return -1;
406 /* The file_name_map structure holds a mapping of file names for a
407 particular directory. This mapping is read from the file named
408 FILE_NAME_MAP_FILE in that directory. Such a file can be used to
409 map filenames on a file system with severe filename restrictions,
410 such as DOS. The format of the file name map file is just a series
411 of lines with two tokens on each line. The first token is the name
412 to map, and the second token is the actual name to use. */
414 struct file_name_map
416 struct file_name_map *map_next;
417 char *map_from;
418 char *map_to;
421 #define FILE_NAME_MAP_FILE "header.gcc"
423 /* Read a space delimited string of unlimited length from a stdio
424 file. */
426 static char *
427 read_filename_string (ch, f)
428 int ch;
429 FILE *f;
431 char *alloc, *set;
432 int len;
434 len = 20;
435 set = alloc = xmalloc (len + 1);
436 if (! is_space[ch])
438 *set++ = ch;
439 while ((ch = getc (f)) != EOF && ! is_space[ch])
441 if (set - alloc == len)
443 len *= 2;
444 alloc = xrealloc (alloc, len + 1);
445 set = alloc + len / 2;
447 *set++ = ch;
450 *set = '\0';
451 ungetc (ch, f);
452 return alloc;
455 /* This structure holds a linked list of file name maps, one per directory. */
457 struct file_name_map_list
459 struct file_name_map_list *map_list_next;
460 char *map_list_name;
461 struct file_name_map *map_list_map;
464 /* Read the file name map file for DIRNAME. */
466 static struct file_name_map *
467 read_name_map (pfile, dirname)
468 cpp_reader *pfile;
469 const char *dirname;
471 register struct file_name_map_list *map_list_ptr;
472 char *name;
473 FILE *f;
475 for (map_list_ptr = CPP_OPTIONS (pfile)->map_list; map_list_ptr;
476 map_list_ptr = map_list_ptr->map_list_next)
477 if (! strcmp (map_list_ptr->map_list_name, dirname))
478 return map_list_ptr->map_list_map;
480 map_list_ptr = ((struct file_name_map_list *)
481 xmalloc (sizeof (struct file_name_map_list)));
482 map_list_ptr->map_list_name = xstrdup (dirname);
484 name = (char *) alloca (strlen (dirname) + strlen (FILE_NAME_MAP_FILE) + 2);
485 strcpy (name, dirname);
486 if (*dirname)
487 strcat (name, "/");
488 strcat (name, FILE_NAME_MAP_FILE);
489 f = fopen (name, "r");
490 if (!f)
491 map_list_ptr->map_list_map = (struct file_name_map *)-1;
492 else
494 int ch;
495 int dirlen = strlen (dirname);
497 while ((ch = getc (f)) != EOF)
499 char *from, *to;
500 struct file_name_map *ptr;
502 if (is_space[ch])
503 continue;
504 from = read_filename_string (ch, f);
505 while ((ch = getc (f)) != EOF && is_hor_space[ch])
507 to = read_filename_string (ch, f);
509 ptr = ((struct file_name_map *)
510 xmalloc (sizeof (struct file_name_map)));
511 ptr->map_from = from;
513 /* Make the real filename absolute. */
514 if (*to == '/')
515 ptr->map_to = to;
516 else
518 ptr->map_to = xmalloc (dirlen + strlen (to) + 2);
519 strcpy (ptr->map_to, dirname);
520 ptr->map_to[dirlen] = '/';
521 strcpy (ptr->map_to + dirlen + 1, to);
522 free (to);
525 ptr->map_next = map_list_ptr->map_list_map;
526 map_list_ptr->map_list_map = ptr;
528 while ((ch = getc (f)) != '\n')
529 if (ch == EOF)
530 break;
532 fclose (f);
535 map_list_ptr->map_list_next = CPP_OPTIONS (pfile)->map_list;
536 CPP_OPTIONS (pfile)->map_list = map_list_ptr;
538 return map_list_ptr->map_list_map;
541 /* Remap NAME based on the file_name_map (if any) for LOC. */
543 static char *
544 remap_filename (pfile, name, loc)
545 cpp_reader *pfile;
546 char *name;
547 struct file_name_list *loc;
549 struct file_name_map *map;
550 const char *from, *p, *dir;
552 if (! loc->name_map)
553 loc->name_map = read_name_map (pfile,
554 loc->name
555 ? loc->name : ".");
557 if (loc->name_map == (struct file_name_map *)-1)
558 return name;
560 from = name + strlen (loc->name) + 1;
562 for (map = loc->name_map; map; map = map->map_next)
563 if (!strcmp (map->map_from, from))
564 return map->map_to;
566 /* Try to find a mapping file for the particular directory we are
567 looking in. Thus #include <sys/types.h> will look up sys/types.h
568 in /usr/include/header.gcc and look up types.h in
569 /usr/include/sys/header.gcc. */
570 p = rindex (name, '/');
571 if (!p)
572 p = name;
573 if (loc && loc->name
574 && strlen (loc->name) == (size_t) (p - name)
575 && !strncmp (loc->name, name, p - name))
576 /* FILENAME is in SEARCHPTR, which we've already checked. */
577 return name;
579 if (p == name)
581 dir = ".";
582 from = name;
584 else
586 char * newdir = (char *) alloca (p - name + 1);
587 bcopy (name, newdir, p - name);
588 newdir[p - name] = '\0';
589 dir = newdir;
590 from = p + 1;
593 for (map = read_name_map (pfile, dir); map; map = map->map_next)
594 if (! strcmp (map->map_from, name))
595 return map->map_to;
597 return name;
600 /* Read the contents of FD into the buffer on the top of PFILE's stack.
601 IHASH points to the include hash entry for the file associated with
604 The caller is responsible for the cpp_push_buffer. */
607 finclude (pfile, fd, ihash)
608 cpp_reader *pfile;
609 int fd;
610 struct include_hash *ihash;
612 struct stat st;
613 size_t st_size;
614 long length;
615 cpp_buffer *fp;
617 if (fstat (fd, &st) < 0)
618 goto perror_fail;
619 if (fcntl (fd, F_SETFL, 0) == -1) /* turn off nonblocking mode */
620 goto perror_fail;
622 fp = CPP_BUFFER (pfile);
624 /* If fd points to a plain file, we know how big it is, so we can
625 allocate the buffer all at once. If fd is a pipe or terminal, we
626 can't. Most C source files are 4k or less, so we guess that. If
627 fd is something weird, like a block device or a directory, we
628 don't want to read it at all.
630 Unfortunately, different systems use different st.st_mode values
631 for pipes: some have S_ISFIFO, some S_ISSOCK, some are buggy and
632 zero the entire struct stat except a couple fields. Hence the
633 mess below.
635 In all cases, read_and_prescan will resize the buffer if it
636 turns out there's more data than we thought. */
638 if (S_ISREG (st.st_mode))
640 /* off_t might have a wider range than size_t - in other words,
641 the max size of a file might be bigger than the address
642 space. We can't handle a file that large. (Anyone with
643 a single source file bigger than 4GB needs to rethink
644 their coding style.) */
645 st_size = (size_t) st.st_size;
646 if ((unsigned HOST_WIDEST_INT) st_size
647 != (unsigned HOST_WIDEST_INT) st.st_size)
649 cpp_error (pfile, "file `%s' is too large", ihash->name);
650 goto fail;
653 else if (S_ISFIFO (st.st_mode) || S_ISSOCK (st.st_mode)
654 /* Permit any kind of character device: the sensible ones are
655 ttys and /dev/null, but weeding out the others is too hard. */
656 || S_ISCHR (st.st_mode)
657 /* Some 4.x (x<4) derivatives have a bug that makes fstat() of a
658 socket or pipe return a stat struct with most fields zeroed. */
659 || (st.st_mode == 0 && st.st_nlink == 0 && st.st_size == 0))
661 /* Cannot get its file size before reading. 4k is a decent
662 first guess. */
663 st_size = 4096;
665 else
667 cpp_error (pfile, "`%s' is not a file, pipe, or tty", ihash->name);
668 goto fail;
671 if (pfile->input_buffer == NULL)
672 initialize_input_buffer (pfile, fd, &st);
674 /* Read the file, converting end-of-line characters and trigraphs
675 (if enabled). */
676 fp->ihash = ihash;
677 fp->nominal_fname = fp->fname = ihash->name;
678 length = read_and_prescan (pfile, fp, fd, st_size);
679 if (length < 0)
680 goto fail;
681 if (length == 0)
682 ihash->control_macro = ""; /* never re-include */
684 close (fd);
685 fp->rlimit = fp->alimit = fp->buf + length;
686 fp->cur = fp->buf;
687 if (ihash->foundhere != ABSOLUTE_PATH)
688 fp->system_header_p = ihash->foundhere->sysp;
689 fp->lineno = 1;
690 fp->colno = 1;
691 fp->line_base = fp->buf;
692 fp->cleanup = file_cleanup;
694 /* The ->actual_dir field is only used when ignore_srcdir is not in effect;
695 see do_include */
696 if (!CPP_OPTIONS (pfile)->ignore_srcdir)
697 fp->actual_dir = actual_directory (pfile, fp->fname);
699 pfile->input_stack_listing_current = 0;
700 return 1;
702 perror_fail:
703 cpp_error_from_errno (pfile, ihash->name);
704 fail:
705 cpp_pop_buffer (pfile);
706 close (fd);
707 return 0;
710 /* Given a path FNAME, extract the directory component and place it
711 onto the actual_dirs list. Return a pointer to the allocated
712 file_name_list structure. These structures are used to implement
713 current-directory "" include searching. */
715 static struct file_name_list *
716 actual_directory (pfile, fname)
717 cpp_reader *pfile;
718 char *fname;
720 char *last_slash, *dir;
721 size_t dlen;
722 struct file_name_list *x;
724 dir = xstrdup (fname);
725 last_slash = rindex (dir, '/');
726 if (last_slash)
728 if (last_slash == dir)
730 dlen = 1;
731 last_slash[1] = '\0';
733 else
735 dlen = last_slash - dir;
736 *last_slash = '\0';
739 else
741 dir[0] = '.';
742 dir[1] = '\0';
743 dlen = 1;
746 if (dlen > pfile->max_include_len)
747 pfile->max_include_len = dlen;
749 for (x = pfile->actual_dirs; x; x = x->alloc)
750 if (!strcmp (x->name, dir))
752 free (dir);
753 return x;
756 /* Not found, make a new one. */
757 x = (struct file_name_list *) xmalloc (sizeof (struct file_name_list));
758 x->name = dir;
759 x->nlen = dlen;
760 x->next = CPP_OPTIONS (pfile)->quote_include;
761 x->alloc = pfile->actual_dirs;
762 x->sysp = CPP_BUFFER (pfile)->system_header_p;
763 x->name_map = NULL;
765 pfile->actual_dirs = x;
766 return x;
769 /* Determine the current line and column. Used only by read_and_prescan. */
770 static void
771 find_position (start, limit, linep, colp)
772 U_CHAR *start;
773 U_CHAR *limit;
774 unsigned long *linep;
775 unsigned long *colp;
777 unsigned long line = *linep, col = 0;
778 while (start < limit)
780 U_CHAR ch = *start++;
781 if (ch == '\n' || ch == '\r')
782 line++, col = 1;
783 else
784 col++;
786 *linep = line, *colp = col;
789 /* Read the entire contents of file DESC into buffer BUF. LEN is how
790 much memory to allocate initially; more will be allocated if
791 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
792 canonical form (\n). If enabled, convert and/or warn about
793 trigraphs. Convert backslash-newline to a one-character escape
794 (\r) and remove it from "embarrassing" places (i.e. the middle of a
795 token). If there is no newline at the end of the file, add one and
796 warn. Returns -1 on failure, or the actual length of the data to
797 be scanned.
799 This function does a lot of work, and can be a serious performance
800 bottleneck. It has been tuned heavily; make sure you understand it
801 before hacking. The common case - no trigraphs, Unix style line
802 breaks, backslash-newline set off by whitespace, newline at EOF -
803 has been optimized at the expense of the others. The performance
804 penalty for DOS style line breaks (\r\n) is about 15%.
806 Warnings lose particularly heavily since we have to determine the
807 line number, which involves scanning from the beginning of the file
808 or from the last warning. The penalty for the absence of a newline
809 at the end of reload1.c is about 60%. (reload1.c is 329k.)
811 If your file has more than one kind of end-of-line marker, you
812 will get messed-up line numbering. */
814 /* Table of characters that can't be handled in the inner loop.
815 Keep these contiguous to optimize the performance of the code generated
816 for the switch that uses them. */
817 #define SPECCASE_EMPTY 0
818 #define SPECCASE_NUL 1
819 #define SPECCASE_CR 2
820 #define SPECCASE_BACKSLASH 3
821 #define SPECCASE_QUESTION 4
823 static long
824 read_and_prescan (pfile, fp, desc, len)
825 cpp_reader *pfile;
826 cpp_buffer *fp;
827 int desc;
828 size_t len;
830 U_CHAR *buf = (U_CHAR *) xmalloc (len);
831 U_CHAR *ip, *op, *line_base;
832 U_CHAR *ibase;
833 U_CHAR *speccase = pfile->input_speccase;
834 unsigned long line;
835 unsigned int deferred_newlines;
836 int count;
837 size_t offset;
839 offset = 0;
840 op = buf;
841 line_base = buf;
842 line = 1;
843 ibase = pfile->input_buffer + 2;
844 deferred_newlines = 0;
846 for (;;)
848 read_next:
850 count = read (desc, pfile->input_buffer + 2, pfile->input_buffer_len);
851 if (count < 0)
852 goto error;
853 else if (count == 0)
854 break;
856 offset += count;
857 ip = ibase;
858 ibase = pfile->input_buffer + 2;
859 ibase[count] = ibase[count+1] = '\0';
861 if (offset > len)
863 size_t delta_op;
864 size_t delta_line_base;
865 len *= 2;
866 if (offset > len)
867 /* len overflowed.
868 This could happen if the file is larger than half the
869 maximum address space of the machine. */
870 goto too_big;
872 delta_op = op - buf;
873 delta_line_base = line_base - buf;
874 buf = (U_CHAR *) xrealloc (buf, len);
875 op = buf + delta_op;
876 line_base = buf + delta_line_base;
879 for (;;)
881 unsigned int span = 0;
883 /* Deal with \-newline in the middle of a token. */
884 if (deferred_newlines)
886 while (speccase[ip[span]] == SPECCASE_EMPTY
887 && ip[span] != '\n'
888 && ip[span] != '\t'
889 && ip[span] != ' ')
890 span++;
891 memcpy (op, ip, span);
892 op += span;
893 ip += span;
894 if (*ip == '\n' || *ip == '\t'
895 || *ip == ' ' || *ip == ' ')
896 while (deferred_newlines)
897 deferred_newlines--, *op++ = '\r';
898 span = 0;
901 /* Copy as much as we can without special treatment. */
902 while (speccase[ip[span]] == SPECCASE_EMPTY) span++;
903 memcpy (op, ip, span);
904 op += span;
905 ip += span;
907 switch (speccase[*ip++])
909 case SPECCASE_NUL: /* \0 */
910 ibase[-1] = op[-1];
911 goto read_next;
913 case SPECCASE_CR: /* \r */
914 if (*ip == '\n')
915 ip++;
916 else if (*ip == '\0')
918 *--ibase = '\r';
919 goto read_next;
921 else if (ip[-2] == '\n')
922 continue;
923 *op++ = '\n';
924 break;
926 case SPECCASE_BACKSLASH: /* \ */
927 backslash:
929 /* If we're at the end of the intermediate buffer,
930 we have to shift the backslash down to the start
931 and come back next pass. */
932 if (*ip == '\0')
934 *--ibase = '\\';
935 goto read_next;
937 else if (*ip == '\n')
939 ip++;
940 if (*ip == '\r') ip++;
941 if (*ip == '\n' || *ip == '\t' || *ip == ' ')
942 *op++ = '\r';
943 else if (op[-1] == '\t' || op[-1] == ' '
944 || op[-1] == '\r' || op[-1] == '\n')
945 *op++ = '\r';
946 else
947 deferred_newlines++;
948 line++;
949 line_base = op;
951 else if (*ip == '\r')
953 ip++;
954 if (*ip == '\n') ip++;
955 else if (*ip == '\0')
957 *--ibase = '\r';
958 *--ibase = '\\';
959 goto read_next;
961 else if (*ip == '\r' || *ip == '\t' || *ip == ' ')
962 *op++ = '\r';
963 else
964 deferred_newlines++;
965 line++;
966 line_base = op;
968 else
969 *op++ = '\\';
971 break;
973 case SPECCASE_QUESTION: /* ? */
975 unsigned int d;
976 /* If we're at the end of the intermediate buffer,
977 we have to shift the ?'s down to the start and
978 come back next pass. */
979 d = ip[0];
980 if (d == '\0')
982 *--ibase = '?';
983 goto read_next;
985 if (d != '?')
987 *op++ = '?';
988 break;
990 d = ip[1];
991 if (d == '\0')
993 *--ibase = '?';
994 *--ibase = '?';
995 goto read_next;
997 if (!trigraph_table[d])
999 *op++ = '?';
1000 break;
1003 if (CPP_OPTIONS (pfile)->warn_trigraphs)
1005 unsigned long col;
1006 find_position (line_base, op, &line, &col);
1007 line_base = op - col;
1008 cpp_warning_with_line (pfile, line, col,
1009 "trigraph ??%c encountered", d);
1011 if (CPP_OPTIONS (pfile)->trigraphs)
1013 if (trigraph_table[d] == '\\')
1014 goto backslash;
1015 else
1016 *op++ = trigraph_table[d];
1018 else
1020 *op++ = '?';
1021 *op++ = '?';
1022 *op++ = d;
1024 ip += 2;
1030 if (offset == 0)
1031 return 0;
1033 /* Deal with pushed-back chars at true EOF.
1034 This may be any of: ?? ? \ \r \n \\r \\n.
1035 \r must become \n, \\r or \\n must become \r.
1036 We know we have space already. */
1037 if (ibase == pfile->input_buffer)
1039 if (*ibase == '?')
1041 *op++ = '?';
1042 *op++ = '?';
1044 else
1045 *op++ = '\r';
1047 else if (ibase == pfile->input_buffer + 1)
1049 if (*ibase == '\r')
1050 *op++ = '\n';
1051 else
1052 *op++ = *ibase;
1055 if (op[-1] != '\n')
1057 unsigned long col;
1058 find_position (line_base, op, &line, &col);
1059 cpp_warning_with_line (pfile, line, col, "no newline at end of file\n");
1060 if (offset + 1 > len)
1062 len += 1;
1063 if (offset + 1 > len)
1064 goto too_big;
1065 buf = (U_CHAR *) xrealloc (buf, len);
1066 op = buf + offset;
1068 *op++ = '\n';
1071 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
1072 return op - buf;
1074 too_big:
1075 cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset);
1076 free (buf);
1077 return -1;
1079 error:
1080 cpp_error_from_errno (pfile, fp->fname);
1081 free (buf);
1082 return -1;
1085 /* Initialize the `input_buffer' and `input_speccase' tables.
1086 These are only used by read_and_prescan, but they're large and
1087 somewhat expensive to set up, so we want them allocated once for
1088 the duration of the cpp run. */
1090 static void
1091 initialize_input_buffer (pfile, fd, st)
1092 cpp_reader *pfile;
1093 int fd;
1094 struct stat *st;
1096 long pipe_buf;
1097 U_CHAR *tmp;
1099 /* Table of characters that cannot be handled by the
1100 read_and_prescan inner loop. The number of non-EMPTY entries
1101 should be as small as humanly possible. */
1103 tmp = (U_CHAR *) xmalloc (1 << CHAR_BIT);
1104 memset (tmp, SPECCASE_EMPTY, 1 << CHAR_BIT);
1105 tmp['\0'] = SPECCASE_NUL;
1106 tmp['\r'] = SPECCASE_CR;
1107 tmp['\\'] = SPECCASE_BACKSLASH;
1108 if (CPP_OPTIONS (pfile)->trigraphs || CPP_OPTIONS (pfile)->warn_trigraphs)
1109 tmp['?'] = SPECCASE_QUESTION;
1111 pfile->input_speccase = tmp;
1113 /* Determine the appropriate size for the input buffer. Normal C
1114 source files are smaller than eight K. If we are reading a pipe,
1115 we want to make sure the input buffer is bigger than the kernel's
1116 pipe buffer. */
1117 pipe_buf = -1;
1119 if (! S_ISREG (st->st_mode))
1121 #ifdef _PC_PIPE_BUF
1122 pipe_buf = fpathconf (fd, _PC_PIPE_BUF);
1123 #endif
1124 if (pipe_buf == -1)
1126 #ifdef PIPE_BUF
1127 pipe_buf = PIPE_BUF;
1128 #else
1129 pipe_buf = 8192;
1130 #endif
1134 if (pipe_buf < 8192)
1135 pipe_buf = 8192;
1136 /* PIPE_BUF bytes of buffer proper, 2 to detect running off the end
1137 without address arithmetic all the time, and 2 for pushback in
1138 the case there's a potential trigraph or end-of-line digraph at
1139 the end of a block. */
1141 tmp = (U_CHAR *) xmalloc (pipe_buf + 2 + 2);
1142 pfile->input_buffer = tmp;
1143 pfile->input_buffer_len = pipe_buf;
1146 /* Add output to `deps_buffer' for the -M switch.
1147 STRING points to the text to be output.
1148 SPACER is ':' for targets, ' ' for dependencies, zero for text
1149 to be inserted literally. */
1151 void
1152 deps_output (pfile, string, spacer)
1153 cpp_reader *pfile;
1154 char *string;
1155 int spacer;
1157 int size;
1158 int cr = 0;
1160 if (!*string)
1161 return;
1163 size = strlen (string);
1165 #ifndef MAX_OUTPUT_COLUMNS
1166 #define MAX_OUTPUT_COLUMNS 72
1167 #endif
1168 if (pfile->deps_column > 0
1169 && (pfile->deps_column + size) > MAX_OUTPUT_COLUMNS)
1171 cr = 5;
1172 pfile->deps_column = 0;
1175 if (pfile->deps_size + size + cr + 8 > pfile->deps_allocated_size)
1177 pfile->deps_allocated_size = (pfile->deps_size + size + 50) * 2;
1178 pfile->deps_buffer = (char *) xrealloc (pfile->deps_buffer,
1179 pfile->deps_allocated_size);
1182 if (cr)
1184 bcopy (" \\\n ", &pfile->deps_buffer[pfile->deps_size], 5);
1185 pfile->deps_size += 5;
1188 if (spacer == ' ' && pfile->deps_column > 0)
1189 pfile->deps_buffer[pfile->deps_size++] = ' ';
1190 bcopy (string, &pfile->deps_buffer[pfile->deps_size], size);
1191 pfile->deps_size += size;
1192 pfile->deps_column += size;
1193 if (spacer == ':')
1194 pfile->deps_buffer[pfile->deps_size++] = ':';
1195 pfile->deps_buffer[pfile->deps_size] = 0;
1198 /* Simplify a path name in place, deleting redundant components. This
1199 reduces OS overhead and guarantees that equivalent paths compare
1200 the same (modulo symlinks).
1202 Transforms made:
1203 foo/bar/../quux foo/quux
1204 foo/./bar foo/bar
1205 foo//bar foo/bar
1206 /../quux /quux
1207 //quux //quux (POSIX allows leading // as a namespace escape)
1209 Guarantees no trailing slashes. All transforms reduce the length
1210 of the string.
1212 void
1213 simplify_pathname (path)
1214 char *path;
1216 char *from, *to;
1217 char *base;
1218 int absolute = 0;
1220 #if defined (HAVE_DOS_BASED_FILE_SYSTEM)
1221 /* Convert all backslashes to slashes. */
1222 for (from = path; *from; from++)
1223 if (*from == '\\') *from = '/';
1225 /* Skip over leading drive letter if present. */
1226 if (ISALPHA (path[0]) && path[1] == ':')
1227 from = to = &path[2];
1228 else
1229 from = to = path;
1230 #else
1231 from = to = path;
1232 #endif
1234 /* Remove redundant initial /s. */
1235 if (*from == '/')
1237 absolute = 1;
1238 to++;
1239 from++;
1240 if (*from == '/')
1242 if (*++from == '/')
1243 /* 3 or more initial /s are equivalent to 1 /. */
1244 while (*++from == '/');
1245 else
1246 /* On some hosts // differs from /; Posix allows this. */
1247 to++;
1250 base = to;
1252 for (;;)
1254 while (*from == '/')
1255 from++;
1257 if (from[0] == '.' && from[1] == '/')
1258 from += 2;
1259 else if (from[0] == '.' && from[1] == '\0')
1260 goto done;
1261 else if (from[0] == '.' && from[1] == '.' && from[2] == '/')
1263 if (base == to)
1265 if (absolute)
1266 from += 3;
1267 else
1269 *to++ = *from++;
1270 *to++ = *from++;
1271 *to++ = *from++;
1272 base = to;
1275 else
1277 to -= 2;
1278 while (to > base && *to != '/') to--;
1279 if (*to == '/')
1280 to++;
1281 from += 3;
1284 else if (from[0] == '.' && from[1] == '.' && from[2] == '\0')
1286 if (base == to)
1288 if (!absolute)
1290 *to++ = *from++;
1291 *to++ = *from++;
1294 else
1296 to -= 2;
1297 while (to > base && *to != '/') to--;
1298 if (*to == '/')
1299 to++;
1301 goto done;
1303 else
1304 /* Copy this component and trailing /, if any. */
1305 while ((*to++ = *from++) != '/')
1307 if (!to[-1])
1309 to--;
1310 goto done;
1316 done:
1317 /* Trim trailing slash */
1318 if (to[0] == '/' && (!absolute || to > path+1))
1319 to--;
1321 /* Change the empty string to "." so that stat() on the result
1322 will always work. */
1323 if (to == path)
1324 *to++ = '.';
1326 *to = '\0';
1328 return;
1331 /* It is not clear when this should be used if at all, so I've
1332 disabled it until someone who understands VMS can look at it. */
1333 #if 0
1335 /* Under VMS we need to fix up the "include" specification filename.
1337 Rules for possible conversions
1339 fullname tried paths
1341 name name
1342 ./dir/name [.dir]name
1343 /dir/name dir:name
1344 /name [000000]name, name
1345 dir/name dir:[000000]name, dir:name, dir/name
1346 dir1/dir2/name dir1:[dir2]name, dir1:[000000.dir2]name
1347 path:/name path:[000000]name, path:name
1348 path:/dir/name path:[000000.dir]name, path:[dir]name
1349 path:dir/name path:[dir]name
1350 [path]:[dir]name [path.dir]name
1351 path/[dir]name [path.dir]name
1353 The path:/name input is constructed when expanding <> includes. */
1356 static void
1357 hack_vms_include_specification (fullname)
1358 char *fullname;
1360 register char *basename, *unixname, *local_ptr, *first_slash;
1361 int f, check_filename_before_returning, must_revert;
1362 char Local[512];
1364 check_filename_before_returning = 0;
1365 must_revert = 0;
1366 /* See if we can find a 1st slash. If not, there's no path information. */
1367 first_slash = index (fullname, '/');
1368 if (first_slash == 0)
1369 return 0; /* Nothing to do!!! */
1371 /* construct device spec if none given. */
1373 if (index (fullname, ':') == 0)
1376 /* If fullname has a slash, take it as device spec. */
1378 if (first_slash == fullname)
1380 first_slash = index (fullname+1, '/'); /* 2nd slash ? */
1381 if (first_slash)
1382 *first_slash = ':'; /* make device spec */
1383 for (basename = fullname; *basename != 0; basename++)
1384 *basename = *(basename+1); /* remove leading slash */
1386 else if ((first_slash[-1] != '.') /* keep ':/', './' */
1387 && (first_slash[-1] != ':')
1388 && (first_slash[-1] != ']')) /* or a vms path */
1390 *first_slash = ':';
1392 else if ((first_slash[1] == '[') /* skip './' in './[dir' */
1393 && (first_slash[-1] == '.'))
1394 fullname += 2;
1397 /* Get part after first ':' (basename[-1] == ':')
1398 or last '/' (basename[-1] == '/'). */
1400 basename = base_name (fullname);
1402 local_ptr = Local; /* initialize */
1404 /* We are trying to do a number of things here. First of all, we are
1405 trying to hammer the filenames into a standard format, such that later
1406 processing can handle them.
1408 If the file name contains something like [dir.], then it recognizes this
1409 as a root, and strips the ".]". Later processing will add whatever is
1410 needed to get things working properly.
1412 If no device is specified, then the first directory name is taken to be
1413 a device name (or a rooted logical). */
1415 /* Point to the UNIX filename part (which needs to be fixed!)
1416 but skip vms path information.
1417 [basename != fullname since first_slash != 0]. */
1419 if ((basename[-1] == ':') /* vms path spec. */
1420 || (basename[-1] == ']')
1421 || (basename[-1] == '>'))
1422 unixname = basename;
1423 else
1424 unixname = fullname;
1426 if (*unixname == '/')
1427 unixname++;
1429 /* If the directory spec is not rooted, we can just copy
1430 the UNIX filename part and we are done. */
1432 if (((basename - fullname) > 1)
1433 && ( (basename[-1] == ']')
1434 || (basename[-1] == '>')))
1436 if (basename[-2] != '.')
1439 /* The VMS part ends in a `]', and the preceding character is not a `.'.
1440 -> PATH]:/name (basename = '/name', unixname = 'name')
1441 We strip the `]', and then splice the two parts of the name in the
1442 usual way. Given the default locations for include files in cccp.c,
1443 we will only use this code if the user specifies alternate locations
1444 with the /include (-I) switch on the command line. */
1446 basename -= 1; /* Strip "]" */
1447 unixname--; /* backspace */
1449 else
1452 /* The VMS part has a ".]" at the end, and this will not do. Later
1453 processing will add a second directory spec, and this would be a syntax
1454 error. Thus we strip the ".]", and thus merge the directory specs.
1455 We also backspace unixname, so that it points to a '/'. This inhibits the
1456 generation of the 000000 root directory spec (which does not belong here
1457 in this case). */
1459 basename -= 2; /* Strip ".]" */
1460 unixname--; /* backspace */
1464 else
1468 /* We drop in here if there is no VMS style directory specification yet.
1469 If there is no device specification either, we make the first dir a
1470 device and try that. If we do not do this, then we will be essentially
1471 searching the users default directory (as if they did a #include "asdf.h").
1473 Then all we need to do is to push a '[' into the output string. Later
1474 processing will fill this in, and close the bracket. */
1476 if ((unixname != fullname) /* vms path spec found. */
1477 && (basename[-1] != ':'))
1478 *local_ptr++ = ':'; /* dev not in spec. take first dir */
1480 *local_ptr++ = '['; /* Open the directory specification */
1483 if (unixname == fullname) /* no vms dir spec. */
1485 must_revert = 1;
1486 if ((first_slash != 0) /* unix dir spec. */
1487 && (*unixname != '/') /* not beginning with '/' */
1488 && (*unixname != '.')) /* or './' or '../' */
1489 *local_ptr++ = '.'; /* dir is local ! */
1492 /* at this point we assume that we have the device spec, and (at least
1493 the opening "[" for a directory specification. We may have directories
1494 specified already.
1496 If there are no other slashes then the filename will be
1497 in the "root" directory. Otherwise, we need to add
1498 directory specifications. */
1500 if (index (unixname, '/') == 0)
1502 /* if no directories specified yet and none are following. */
1503 if (local_ptr[-1] == '[')
1505 /* Just add "000000]" as the directory string */
1506 strcpy (local_ptr, "000000]");
1507 local_ptr += strlen (local_ptr);
1508 check_filename_before_returning = 1; /* we might need to fool with this later */
1511 else
1514 /* As long as there are still subdirectories to add, do them. */
1515 while (index (unixname, '/') != 0)
1517 /* If this token is "." we can ignore it
1518 if it's not at the beginning of a path. */
1519 if ((unixname[0] == '.') && (unixname[1] == '/'))
1521 /* remove it at beginning of path. */
1522 if ( ((unixname == fullname) /* no device spec */
1523 && (fullname+2 != basename)) /* starts with ./ */
1524 /* or */
1525 || ((basename[-1] == ':') /* device spec */
1526 && (unixname-1 == basename))) /* and ./ afterwards */
1527 *local_ptr++ = '.'; /* make '[.' start of path. */
1528 unixname += 2;
1529 continue;
1532 /* Add a subdirectory spec. Do not duplicate "." */
1533 if ( local_ptr[-1] != '.'
1534 && local_ptr[-1] != '['
1535 && local_ptr[-1] != '<')
1536 *local_ptr++ = '.';
1538 /* If this is ".." then the spec becomes "-" */
1539 if ( (unixname[0] == '.')
1540 && (unixname[1] == '.')
1541 && (unixname[2] == '/'))
1543 /* Add "-" and skip the ".." */
1544 if ((local_ptr[-1] == '.')
1545 && (local_ptr[-2] == '['))
1546 local_ptr--; /* prevent [.- */
1547 *local_ptr++ = '-';
1548 unixname += 3;
1549 continue;
1552 /* Copy the subdirectory */
1553 while (*unixname != '/')
1554 *local_ptr++= *unixname++;
1556 unixname++; /* Skip the "/" */
1559 /* Close the directory specification */
1560 if (local_ptr[-1] == '.') /* no trailing periods */
1561 local_ptr--;
1563 if (local_ptr[-1] == '[') /* no dir needed */
1564 local_ptr--;
1565 else
1566 *local_ptr++ = ']';
1569 /* Now add the filename. */
1571 while (*unixname)
1572 *local_ptr++ = *unixname++;
1573 *local_ptr = 0;
1575 /* Now append it to the original VMS spec. */
1577 strcpy ((must_revert==1)?fullname:basename, Local);
1579 /* If we put a [000000] in the filename, try to open it first. If this fails,
1580 remove the [000000], and return that name. This provides flexibility
1581 to the user in that they can use both rooted and non-rooted logical names
1582 to point to the location of the file. */
1584 if (check_filename_before_returning)
1586 f = open (fullname, O_RDONLY, 0666);
1587 if (f >= 0)
1589 /* The file name is OK as it is, so return it as is. */
1590 close (f);
1591 return 1;
1594 /* The filename did not work. Try to remove the [000000] from the name,
1595 and return it. */
1597 basename = index (fullname, '[');
1598 local_ptr = index (fullname, ']') + 1;
1599 strcpy (basename, local_ptr); /* this gets rid of it */
1603 return 1;
1605 #endif /* VMS */