gcc:
[official-gcc.git] / gcc / fortran / scanner.c
blob738e17280a0603c951cb7ae57ca87b37d28a14ce
1 /* Character scanner.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by Andy Vaught
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 02110-1301, USA. */
23 /* Set of subroutines to (ultimately) return the next character to the
24 various matching subroutines. This file's job is to read files and
25 build up lines that are parsed by the parser. This means that we
26 handle continuation lines and "include" lines.
28 The first thing the scanner does is to load an entire file into
29 memory. We load the entire file into memory for a couple reasons.
30 The first is that we want to be able to deal with nonseekable input
31 (pipes, stdin) and there is a lot of backing up involved during
32 parsing.
34 The second is that we want to be able to print the locus of errors,
35 and an error on line 999999 could conflict with something on line
36 one. Given nonseekable input, we've got to store the whole thing.
38 One thing that helps are the column truncation limits that give us
39 an upper bound on the size of individual lines. We don't store the
40 truncated stuff.
42 From the scanner's viewpoint, the higher level subroutines ask for
43 new characters and do a lot of jumping backwards. */
45 #include "config.h"
46 #include "system.h"
47 #include "gfortran.h"
49 /* Structure for holding module and include file search path. */
50 typedef struct gfc_directorylist
52 char *path;
53 struct gfc_directorylist *next;
55 gfc_directorylist;
57 /* List of include file search directories. */
58 static gfc_directorylist *include_dirs;
60 static gfc_file *file_head, *current_file;
62 static int continue_flag, end_flag;
64 gfc_source_form gfc_current_form;
65 static gfc_linebuf *line_head, *line_tail;
67 locus gfc_current_locus;
68 const char *gfc_source_file;
71 /* Main scanner initialization. */
73 void
74 gfc_scanner_init_1 (void)
76 file_head = NULL;
77 line_head = NULL;
78 line_tail = NULL;
80 end_flag = 0;
84 /* Main scanner destructor. */
86 void
87 gfc_scanner_done_1 (void)
89 gfc_linebuf *lb;
90 gfc_file *f;
92 while(line_head != NULL)
94 lb = line_head->next;
95 gfc_free(line_head);
96 line_head = lb;
99 while(file_head != NULL)
101 f = file_head->next;
102 gfc_free(file_head->filename);
103 gfc_free(file_head);
104 file_head = f;
110 /* Adds path to the list pointed to by list. */
112 void
113 gfc_add_include_path (const char *path)
115 gfc_directorylist *dir;
116 const char *p;
118 p = path;
119 while (*p == ' ' || *p == '\t') /* someone might do 'gfortran "-I include"' */
120 if (*p++ == '\0')
121 return;
123 dir = include_dirs;
124 if (!dir)
126 dir = include_dirs = gfc_getmem (sizeof (gfc_directorylist));
128 else
130 while (dir->next)
131 dir = dir->next;
133 dir->next = gfc_getmem (sizeof (gfc_directorylist));
134 dir = dir->next;
137 dir->next = NULL;
138 dir->path = gfc_getmem (strlen (p) + 2);
139 strcpy (dir->path, p);
140 strcat (dir->path, "/"); /* make '/' last character */
144 /* Release resources allocated for options. */
146 void
147 gfc_release_include_path (void)
149 gfc_directorylist *p;
151 gfc_free (gfc_option.module_dir);
152 while (include_dirs != NULL)
154 p = include_dirs;
155 include_dirs = include_dirs->next;
156 gfc_free (p->path);
157 gfc_free (p);
161 /* Opens file for reading, searching through the include directories
162 given if necessary. */
164 FILE *
165 gfc_open_included_file (const char *name)
167 char *fullname;
168 gfc_directorylist *p;
169 FILE *f;
171 f = gfc_open_file (name);
172 if (f != NULL)
173 return f;
175 for (p = include_dirs; p; p = p->next)
177 fullname = (char *) alloca(strlen (p->path) + strlen (name) + 1);
178 strcpy (fullname, p->path);
179 strcat (fullname, name);
181 f = gfc_open_file (fullname);
182 if (f != NULL)
183 return f;
186 return NULL;
189 /* Test to see if we're at the end of the main source file. */
192 gfc_at_end (void)
195 return end_flag;
199 /* Test to see if we're at the end of the current file. */
202 gfc_at_eof (void)
205 if (gfc_at_end ())
206 return 1;
208 if (line_head == NULL)
209 return 1; /* Null file */
211 if (gfc_current_locus.lb == NULL)
212 return 1;
214 return 0;
218 /* Test to see if we're at the beginning of a new line. */
221 gfc_at_bol (void)
223 if (gfc_at_eof ())
224 return 1;
226 return (gfc_current_locus.nextc == gfc_current_locus.lb->line);
230 /* Test to see if we're at the end of a line. */
233 gfc_at_eol (void)
236 if (gfc_at_eof ())
237 return 1;
239 return (*gfc_current_locus.nextc == '\0');
243 /* Advance the current line pointer to the next line. */
245 void
246 gfc_advance_line (void)
248 if (gfc_at_end ())
249 return;
251 if (gfc_current_locus.lb == NULL)
253 end_flag = 1;
254 return;
257 gfc_current_locus.lb = gfc_current_locus.lb->next;
259 if (gfc_current_locus.lb != NULL)
260 gfc_current_locus.nextc = gfc_current_locus.lb->line;
261 else
263 gfc_current_locus.nextc = NULL;
264 end_flag = 1;
269 /* Get the next character from the input, advancing gfc_current_file's
270 locus. When we hit the end of the line or the end of the file, we
271 start returning a '\n' in order to complete the current statement.
272 No Fortran line conventions are implemented here.
274 Requiring explicit advances to the next line prevents the parse
275 pointer from being on the wrong line if the current statement ends
276 prematurely. */
278 static int
279 next_char (void)
281 int c;
283 if (gfc_current_locus.nextc == NULL)
284 return '\n';
286 c = *gfc_current_locus.nextc++;
287 if (c == '\0')
289 gfc_current_locus.nextc--; /* Remain on this line. */
290 c = '\n';
293 return c;
296 /* Skip a comment. When we come here the parse pointer is positioned
297 immediately after the comment character. If we ever implement
298 compiler directives withing comments, here is where we parse the
299 directive. */
301 static void
302 skip_comment_line (void)
304 char c;
308 c = next_char ();
310 while (c != '\n');
312 gfc_advance_line ();
316 /* Comment lines are null lines, lines containing only blanks or lines
317 on which the first nonblank line is a '!'. */
319 static void
320 skip_free_comments (void)
322 locus start;
323 char c;
325 for (;;)
327 start = gfc_current_locus;
328 if (gfc_at_eof ())
329 break;
333 c = next_char ();
335 while (gfc_is_whitespace (c));
337 if (c == '\n')
339 gfc_advance_line ();
340 continue;
343 if (c == '!')
345 skip_comment_line ();
346 continue;
349 break;
352 gfc_current_locus = start;
356 /* Skip comment lines in fixed source mode. We have the same rules as
357 in skip_free_comment(), except that we can have a 'c', 'C' or '*'
358 in column 1, and a '!' cannot be in column 6. Also, we deal with
359 lines with 'd' or 'D' in column 1, if the user requested this. */
361 static void
362 skip_fixed_comments (void)
364 locus start;
365 int col;
366 char c;
368 for (;;)
370 start = gfc_current_locus;
371 if (gfc_at_eof ())
372 break;
374 c = next_char ();
375 if (c == '\n')
377 gfc_advance_line ();
378 continue;
381 if (c == '!' || c == 'c' || c == 'C' || c == '*')
383 skip_comment_line ();
384 continue;
387 if (gfc_option.flag_d_lines != -1 && (c == 'd' || c == 'D'))
389 if (gfc_option.flag_d_lines == 0)
391 skip_comment_line ();
392 continue;
394 else
395 *start.nextc = c = ' ';
398 col = 1;
400 while (gfc_is_whitespace (c))
402 c = next_char ();
403 col++;
406 if (c == '\n')
408 gfc_advance_line ();
409 continue;
412 if (col != 6 && c == '!')
414 skip_comment_line ();
415 continue;
418 break;
421 gfc_current_locus = start;
425 /* Skips the current line if it is a comment. Assumes that we are at
426 the start of the current line. */
428 void
429 gfc_skip_comments (void)
432 if (!gfc_at_bol () || gfc_current_form == FORM_FREE)
433 skip_free_comments ();
434 else
435 skip_fixed_comments ();
439 /* Get the next character from the input, taking continuation lines
440 and end-of-line comments into account. This implies that comment
441 lines between continued lines must be eaten here. For higher-level
442 subroutines, this flattens continued lines into a single logical
443 line. The in_string flag denotes whether we're inside a character
444 context or not. */
447 gfc_next_char_literal (int in_string)
449 locus old_loc;
450 int i, c;
452 continue_flag = 0;
454 restart:
455 c = next_char ();
456 if (gfc_at_end ())
457 return c;
459 if (gfc_current_form == FORM_FREE)
462 if (!in_string && c == '!')
464 /* This line can't be continued */
467 c = next_char ();
469 while (c != '\n');
471 /* Avoid truncation warnings for comment ending lines. */
472 gfc_current_locus.lb->truncated = 0;
474 goto done;
477 if (c != '&')
478 goto done;
480 /* If the next nonblank character is a ! or \n, we've got a
481 continuation line. */
482 old_loc = gfc_current_locus;
484 c = next_char ();
485 while (gfc_is_whitespace (c))
486 c = next_char ();
488 /* Character constants to be continued cannot have commentary
489 after the '&'. */
491 if (in_string && c != '\n')
493 gfc_current_locus = old_loc;
494 c = '&';
495 goto done;
498 if (c != '!' && c != '\n')
500 gfc_current_locus = old_loc;
501 c = '&';
502 goto done;
505 continue_flag = 1;
506 if (c == '!')
507 skip_comment_line ();
508 else
509 gfc_advance_line ();
511 /* We've got a continuation line and need to find where it continues.
512 First eat any comment lines. */
513 gfc_skip_comments ();
515 /* Now that we have a non-comment line, probe ahead for the
516 first non-whitespace character. If it is another '&', then
517 reading starts at the next character, otherwise we must back
518 up to where the whitespace started and resume from there. */
520 old_loc = gfc_current_locus;
522 c = next_char ();
523 while (gfc_is_whitespace (c))
524 c = next_char ();
526 if (c != '&')
527 gfc_current_locus = old_loc;
530 else
532 /* Fixed form continuation. */
533 if (!in_string && c == '!')
535 /* Skip comment at end of line. */
538 c = next_char ();
540 while (c != '\n');
542 /* Avoid truncation warnings for comment ending lines. */
543 gfc_current_locus.lb->truncated = 0;
546 if (c != '\n')
547 goto done;
549 continue_flag = 1;
550 old_loc = gfc_current_locus;
552 gfc_advance_line ();
553 gfc_skip_comments ();
555 /* See if this line is a continuation line. */
556 for (i = 0; i < 5; i++)
558 c = next_char ();
559 if (c != ' ')
560 goto not_continuation;
563 c = next_char ();
564 if (c == '0' || c == ' ')
565 goto not_continuation;
568 /* Ready to read first character of continuation line, which might
569 be another continuation line! */
570 goto restart;
572 not_continuation:
573 c = '\n';
574 gfc_current_locus = old_loc;
576 done:
577 continue_flag = 0;
578 return c;
582 /* Get the next character of input, folded to lowercase. In fixed
583 form mode, we also ignore spaces. When matcher subroutines are
584 parsing character literals, they have to call
585 gfc_next_char_literal(). */
588 gfc_next_char (void)
590 int c;
594 c = gfc_next_char_literal (0);
596 while (gfc_current_form == FORM_FIXED && gfc_is_whitespace (c));
598 return TOLOWER (c);
603 gfc_peek_char (void)
605 locus old_loc;
606 int c;
608 old_loc = gfc_current_locus;
609 c = gfc_next_char ();
610 gfc_current_locus = old_loc;
612 return c;
616 /* Recover from an error. We try to get past the current statement
617 and get lined up for the next. The next statement follows a '\n'
618 or a ';'. We also assume that we are not within a character
619 constant, and deal with finding a '\'' or '"'. */
621 void
622 gfc_error_recovery (void)
624 char c, delim;
626 if (gfc_at_eof ())
627 return;
629 for (;;)
631 c = gfc_next_char ();
632 if (c == '\n' || c == ';')
633 break;
635 if (c != '\'' && c != '"')
637 if (gfc_at_eof ())
638 break;
639 continue;
641 delim = c;
643 for (;;)
645 c = next_char ();
647 if (c == delim)
648 break;
649 if (c == '\n')
650 return;
651 if (c == '\\')
653 c = next_char ();
654 if (c == '\n')
655 return;
658 if (gfc_at_eof ())
659 break;
664 /* Read ahead until the next character to be read is not whitespace. */
666 void
667 gfc_gobble_whitespace (void)
669 locus old_loc;
670 int c;
674 old_loc = gfc_current_locus;
675 c = gfc_next_char_literal (0);
677 while (gfc_is_whitespace (c));
679 gfc_current_locus = old_loc;
683 /* Load a single line into pbuf.
685 If pbuf points to a NULL pointer, it is allocated.
686 We truncate lines that are too long, unless we're dealing with
687 preprocessor lines or if the option -ffixed-line-length-none is set,
688 in which case we reallocate the buffer to fit the entire line, if
689 need be.
690 In fixed mode, we expand a tab that occurs within the statement
691 label region to expand to spaces that leave the next character in
692 the source region.
693 load_line returns wether the line was truncated. */
695 static int
696 load_line (FILE * input, char **pbuf, int *pbuflen)
698 int c, maxlen, i, preprocessor_flag, buflen = *pbuflen;
699 int trunc_flag = 0;
700 char *buffer;
702 /* Determine the maximum allowed line length. */
703 if (gfc_current_form == FORM_FREE)
704 maxlen = GFC_MAX_LINE;
705 else
706 maxlen = gfc_option.fixed_line_length;
708 if (*pbuf == NULL)
710 /* Allocate the line buffer, storing its length into buflen. */
711 if (maxlen > 0)
712 buflen = maxlen;
713 else
714 buflen = GFC_MAX_LINE;
716 *pbuf = gfc_getmem (buflen + 1);
719 i = 0;
720 buffer = *pbuf;
722 preprocessor_flag = 0;
723 c = fgetc (input);
724 if (c == '#')
725 /* In order to not truncate preprocessor lines, we have to
726 remember that this is one. */
727 preprocessor_flag = 1;
728 ungetc (c, input);
730 for (;;)
732 c = fgetc (input);
734 if (c == EOF)
735 break;
736 if (c == '\n')
737 break;
739 if (c == '\r')
740 continue; /* Gobble characters. */
741 if (c == '\0')
742 continue;
744 if (c == '\032')
746 /* Ctrl-Z ends the file. */
747 while (fgetc (input) != EOF);
748 break;
751 if (gfc_current_form == FORM_FIXED && c == '\t' && i <= 6)
752 { /* Tab expansion. */
753 while (i <= 6)
755 *buffer++ = ' ';
756 i++;
759 continue;
762 *buffer++ = c;
763 i++;
765 if (maxlen == 0 || preprocessor_flag)
767 if (i >= buflen)
769 /* Reallocate line buffer to double size to hold the
770 overlong line. */
771 buflen = buflen * 2;
772 *pbuf = xrealloc (*pbuf, buflen + 1);
773 buffer = (*pbuf)+i;
776 else if (i >= maxlen)
778 /* Truncate the rest of the line. */
779 for (;;)
781 c = fgetc (input);
782 if (c == '\n' || c == EOF)
783 break;
785 trunc_flag = 1;
788 ungetc ('\n', input);
792 /* Pad lines to the selected line length in fixed form. */
793 if (gfc_current_form == FORM_FIXED
794 && gfc_option.fixed_line_length > 0
795 && !preprocessor_flag
796 && c != EOF)
797 while (i++ < gfc_option.fixed_line_length)
798 *buffer++ = ' ';
800 *buffer = '\0';
801 *pbuflen = buflen;
803 return trunc_flag;
807 /* Get a gfc_file structure, initialize it and add it to
808 the file stack. */
810 static gfc_file *
811 get_file (const char *name, enum lc_reason reason ATTRIBUTE_UNUSED)
813 gfc_file *f;
815 f = gfc_getmem (sizeof (gfc_file));
817 f->filename = gfc_getmem (strlen (name) + 1);
818 strcpy (f->filename, name);
820 f->next = file_head;
821 file_head = f;
823 f->included_by = current_file;
824 if (current_file != NULL)
825 f->inclusion_line = current_file->line;
827 #ifdef USE_MAPPED_LOCATION
828 linemap_add (&line_table, reason, false, f->filename, 1);
829 #endif
831 return f;
834 /* Deal with a line from the C preprocessor. The
835 initial octothorp has already been seen. */
837 static void
838 preprocessor_line (char *c)
840 bool flag[5];
841 int i, line;
842 char *filename;
843 gfc_file *f;
844 int escaped;
846 c++;
847 while (*c == ' ' || *c == '\t')
848 c++;
850 if (*c < '0' || *c > '9')
851 goto bad_cpp_line;
853 line = atoi (c);
855 c = strchr (c, ' ');
856 if (c == NULL)
858 /* No file name given. Set new line number. */
859 current_file->line = line;
860 return;
863 /* Skip spaces. */
864 while (*c == ' ' || *c == '\t')
865 c++;
867 /* Skip quote. */
868 if (*c != '"')
869 goto bad_cpp_line;
870 ++c;
872 filename = c;
874 /* Make filename end at quote. */
875 escaped = false;
876 while (*c && ! (! escaped && *c == '"'))
878 if (escaped)
879 escaped = false;
880 else
881 escaped = *c == '\\';
882 ++c;
885 if (! *c)
886 /* Preprocessor line has no closing quote. */
887 goto bad_cpp_line;
889 *c++ = '\0';
893 /* Get flags. */
895 flag[1] = flag[2] = flag[3] = flag[4] = false;
897 for (;;)
899 c = strchr (c, ' ');
900 if (c == NULL)
901 break;
903 c++;
904 i = atoi (c);
906 if (1 <= i && i <= 4)
907 flag[i] = true;
910 /* Interpret flags. */
912 if (flag[1]) /* Starting new file. */
914 f = get_file (filename, LC_RENAME);
915 f->up = current_file;
916 current_file = f;
919 if (flag[2]) /* Ending current file. */
921 if (!current_file->up
922 || strcmp (current_file->up->filename, filename) != 0)
924 gfc_warning_now ("%s:%d: file %s left but not entered",
925 current_file->filename, current_file->line,
926 filename);
927 return;
929 current_file = current_file->up;
932 /* The name of the file can be a temporary file produced by
933 cpp. Replace the name if it is different. */
935 if (strcmp (current_file->filename, filename) != 0)
937 gfc_free (current_file->filename);
938 current_file->filename = gfc_getmem (strlen (filename) + 1);
939 strcpy (current_file->filename, filename);
942 /* Set new line number. */
943 current_file->line = line;
944 return;
946 bad_cpp_line:
947 gfc_warning_now ("%s:%d: Illegal preprocessor directive",
948 current_file->filename, current_file->line);
949 current_file->line++;
953 static try load_file (const char *, bool);
955 /* include_line()-- Checks a line buffer to see if it is an include
956 line. If so, we call load_file() recursively to load the included
957 file. We never return a syntax error because a statement like
958 "include = 5" is perfectly legal. We return false if no include was
959 processed or true if we matched an include. */
961 static bool
962 include_line (char *line)
964 char quote, *c, *begin, *stop;
966 c = line;
967 while (*c == ' ' || *c == '\t')
968 c++;
970 if (strncasecmp (c, "include", 7))
971 return false;
973 c += 7;
974 while (*c == ' ' || *c == '\t')
975 c++;
977 /* Find filename between quotes. */
979 quote = *c++;
980 if (quote != '"' && quote != '\'')
981 return false;
983 begin = c;
985 while (*c != quote && *c != '\0')
986 c++;
988 if (*c == '\0')
989 return false;
991 stop = c++;
993 while (*c == ' ' || *c == '\t')
994 c++;
996 if (*c != '\0' && *c != '!')
997 return false;
999 /* We have an include line at this point. */
1001 *stop = '\0'; /* It's ok to trash the buffer, as this line won't be
1002 read by anything else. */
1004 load_file (begin, false);
1005 return true;
1008 /* Load a file into memory by calling load_line until the file ends. */
1010 static try
1011 load_file (const char *filename, bool initial)
1013 char *line;
1014 gfc_linebuf *b;
1015 gfc_file *f;
1016 FILE *input;
1017 int len, line_len;
1019 for (f = current_file; f; f = f->up)
1020 if (strcmp (filename, f->filename) == 0)
1022 gfc_error_now ("File '%s' is being included recursively", filename);
1023 return FAILURE;
1026 if (initial)
1028 input = gfc_open_file (filename);
1029 if (input == NULL)
1031 gfc_error_now ("Can't open file '%s'", filename);
1032 return FAILURE;
1035 else
1037 input = gfc_open_included_file (filename);
1038 if (input == NULL)
1040 gfc_error_now ("Can't open included file '%s'", filename);
1041 return FAILURE;
1045 /* Load the file. */
1047 f = get_file (filename, initial ? LC_RENAME : LC_ENTER);
1048 f->up = current_file;
1049 current_file = f;
1050 current_file->line = 1;
1051 line = NULL;
1052 line_len = 0;
1054 for (;;)
1056 int trunc = load_line (input, &line, &line_len);
1058 len = strlen (line);
1059 if (feof (input) && len == 0)
1060 break;
1062 /* There are three things this line can be: a line of Fortran
1063 source, an include line or a C preprocessor directive. */
1065 if (line[0] == '#')
1067 preprocessor_line (line);
1068 continue;
1071 if (include_line (line))
1073 current_file->line++;
1074 continue;
1077 /* Add line. */
1079 b = gfc_getmem (gfc_linebuf_header_size + len + 1);
1081 #ifdef USE_MAPPED_LOCATION
1082 b->location
1083 = linemap_line_start (&line_table, current_file->line++, 120);
1084 #else
1085 b->linenum = current_file->line++;
1086 #endif
1087 b->file = current_file;
1088 b->truncated = trunc;
1089 strcpy (b->line, line);
1091 if (line_head == NULL)
1092 line_head = b;
1093 else
1094 line_tail->next = b;
1096 line_tail = b;
1099 /* Release the line buffer allocated in load_line. */
1100 gfc_free (line);
1102 fclose (input);
1104 current_file = current_file->up;
1105 #ifdef USE_MAPPED_LOCATION
1106 linemap_add (&line_table, LC_LEAVE, 0, NULL, 0);
1107 #endif
1108 return SUCCESS;
1112 /* Open a new file and start scanning from that file. Returns SUCCESS
1113 if everything went OK, FAILURE otherwise. If form == FORM_UKNOWN
1114 it tries to determine the source form from the filename, defaulting
1115 to free form. */
1118 gfc_new_file (void)
1120 try result;
1122 result = load_file (gfc_source_file, true);
1124 gfc_current_locus.lb = line_head;
1125 gfc_current_locus.nextc = (line_head == NULL) ? NULL : line_head->line;
1127 #if 0 /* Debugging aid. */
1128 for (; line_head; line_head = line_head->next)
1129 gfc_status ("%s:%3d %s\n", line_head->file->filename,
1130 #ifdef USE_MAPPED_LOCATION
1131 LOCATION_LINE (line_head->location),
1132 #else
1133 line_head->linenum,
1134 #endif
1135 line_head->line);
1137 exit (0);
1138 #endif
1140 return result;