1 /* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
2 Free Software Foundation, Inc.
3 Contributed by Andy Vaught
4 F2003 I/O support contributed by Jerry DeLisle
6 This file is part of the GNU Fortran 95 runtime library (libgfortran).
8 Libgfortran is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgfortran is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
28 /* format.c-- parse a FORMAT string into a binary format suitable for
29 * interpretation during I/O statements */
36 #define FARRAY_SIZE 64
38 typedef struct fnode_array
40 struct fnode_array
*next
;
41 fnode array
[FARRAY_SIZE
];
45 typedef struct format_data
47 char *format_string
, *string
;
50 format_token saved_token
;
51 int value
, format_string_len
, reversion_ok
;
53 const fnode
*saved_format
;
59 static const fnode colon_node
= { FMT_COLON
, 0, NULL
, NULL
, {{ 0, 0, 0 }}, 0,
64 static const char posint_required
[] = "Positive width required in format",
65 period_required
[] = "Period required in format",
66 nonneg_required
[] = "Nonnegative width required in format",
67 unexpected_element
[] = "Unexpected element '%c' in format\n",
68 unexpected_end
[] = "Unexpected end of format string",
69 bad_string
[] = "Unterminated character constant in format",
70 bad_hollerith
[] = "Hollerith constant extends past the end of the format",
71 reversion_error
[] = "Exhausted data descriptors in format",
72 zero_width
[] = "Zero width in format descriptor";
74 /* The following routines support caching format data from parsed format strings
75 into a hash table. This avoids repeatedly parsing duplicate format strings
76 or format strings in I/O statements that are repeated in loops. */
79 /* Traverse the table and free all data. */
82 free_format_hash_table (gfc_unit
*u
)
86 /* free_format_data handles any NULL pointers. */
87 for (i
= 0; i
< FORMAT_HASH_SIZE
; i
++)
89 if (u
->format_hash_table
[i
].hashed_fmt
!= NULL
)
91 free_format_data (u
->format_hash_table
[i
].hashed_fmt
);
92 free_mem (u
->format_hash_table
[i
].key
);
94 u
->format_hash_table
[i
].key
= NULL
;
95 u
->format_hash_table
[i
].key_len
= 0;
96 u
->format_hash_table
[i
].hashed_fmt
= NULL
;
100 /* Traverse the format_data structure and reset the fnode counters. */
103 reset_node (fnode
*fn
)
110 if (fn
->format
!= FMT_LPAREN
)
113 for (f
= fn
->u
.child
; f
; f
= f
->next
)
115 if (f
->format
== FMT_RPAREN
)
122 reset_fnode_counters (st_parameter_dt
*dtp
)
129 /* Clear this pointer at the head so things start at the right place. */
130 fmt
->array
.array
[0].current
= NULL
;
132 for (f
= fmt
->last
->array
[0].u
.child
; f
; f
= f
->next
)
137 /* A simple hashing function to generate an index into the hash table. */
140 uint32_t format_hash (st_parameter_dt
*dtp
)
143 gfc_charlen_type key_len
;
147 /* Hash the format string. Super simple, but what the heck! */
149 key_len
= dtp
->format_len
;
150 for (i
= 0; i
< key_len
; i
++)
152 hash
&= (FORMAT_HASH_SIZE
- 1);
158 save_parsed_format (st_parameter_dt
*dtp
)
163 hash
= format_hash (dtp
);
164 u
= dtp
->u
.p
.current_unit
;
166 /* Index into the hash table. We are simply replacing whatever is there
167 relying on probability. */
168 if (u
->format_hash_table
[hash
].hashed_fmt
!= NULL
)
169 free_format_data (u
->format_hash_table
[hash
].hashed_fmt
);
170 u
->format_hash_table
[hash
].hashed_fmt
= NULL
;
172 if (u
->format_hash_table
[hash
].key
!= NULL
)
173 free_mem (u
->format_hash_table
[hash
].key
);
174 u
->format_hash_table
[hash
].key
= get_mem (dtp
->format_len
);
175 memcpy (u
->format_hash_table
[hash
].key
, dtp
->format
, dtp
->format_len
);
177 u
->format_hash_table
[hash
].key_len
= dtp
->format_len
;
178 u
->format_hash_table
[hash
].hashed_fmt
= dtp
->u
.p
.fmt
;
183 find_parsed_format (st_parameter_dt
*dtp
)
188 hash
= format_hash (dtp
);
189 u
= dtp
->u
.p
.current_unit
;
191 if (u
->format_hash_table
[hash
].key
!= NULL
)
193 /* See if it matches. */
194 if (u
->format_hash_table
[hash
].key_len
== dtp
->format_len
)
196 /* So far so good. */
197 if (strncmp (u
->format_hash_table
[hash
].key
,
198 dtp
->format
, dtp
->format_len
) == 0)
199 return u
->format_hash_table
[hash
].hashed_fmt
;
206 /* next_char()-- Return the next character in the format string.
207 * Returns -1 when the string is done. If the literal flag is set,
208 * spaces are significant, otherwise they are not. */
211 next_char (format_data
*fmt
, int literal
)
217 if (fmt
->format_string_len
== 0)
220 fmt
->format_string_len
--;
221 c
= toupper (*fmt
->format_string
++);
222 fmt
->error_element
= c
;
224 while ((c
== ' ' || c
== '\t') && !literal
);
230 /* unget_char()-- Back up one character position. */
232 #define unget_char(fmt) \
233 { fmt->format_string--; fmt->format_string_len++; }
236 /* get_fnode()-- Allocate a new format node, inserting it into the
237 * current singly linked list. These are initially allocated from the
241 get_fnode (format_data
*fmt
, fnode
**head
, fnode
**tail
, format_token t
)
245 if (fmt
->avail
== &fmt
->last
->array
[FARRAY_SIZE
])
247 fmt
->last
->next
= get_mem (sizeof (fnode_array
));
248 fmt
->last
= fmt
->last
->next
;
249 fmt
->last
->next
= NULL
;
250 fmt
->avail
= &fmt
->last
->array
[0];
253 memset (f
, '\0', sizeof (fnode
));
265 f
->source
= fmt
->format_string
;
270 /* free_format_data()-- Free all allocated format data. */
273 free_format_data (format_data
*fmt
)
275 fnode_array
*fa
, *fa_next
;
281 for (fa
= fmt
->array
.next
; fa
; fa
= fa_next
)
292 /* format_lex()-- Simple lexical analyzer for getting the next token
293 * in a FORMAT string. We support a one-level token pushback in the
294 * fmt->saved_token variable. */
297 format_lex (format_data
*fmt
)
304 if (fmt
->saved_token
!= FMT_NONE
)
306 token
= fmt
->saved_token
;
307 fmt
->saved_token
= FMT_NONE
;
312 c
= next_char (fmt
, 0);
329 c
= next_char (fmt
, 0);
336 fmt
->value
= c
- '0';
340 c
= next_char (fmt
, 0);
344 fmt
->value
= 10 * fmt
->value
+ c
- '0';
350 fmt
->value
= -fmt
->value
;
351 token
= FMT_SIGNED_INT
;
364 fmt
->value
= c
- '0';
368 c
= next_char (fmt
, 0);
372 fmt
->value
= 10 * fmt
->value
+ c
- '0';
376 token
= (fmt
->value
== 0) ? FMT_ZERO
: FMT_POSINT
;
400 switch (next_char (fmt
, 0))
421 switch (next_char (fmt
, 0))
438 switch (next_char (fmt
, 0))
458 fmt
->string
= fmt
->format_string
;
459 fmt
->value
= 0; /* This is the length of the string */
463 c
= next_char (fmt
, 1);
466 token
= FMT_BADSTRING
;
467 fmt
->error
= bad_string
;
473 c
= next_char (fmt
, 1);
477 token
= FMT_BADSTRING
;
478 fmt
->error
= bad_string
;
516 switch (next_char (fmt
, 0))
548 switch (next_char (fmt
, 0))
576 /* parse_format_list()-- Parse a format list. Assumes that a left
577 * paren has already been seen. Returns a list representing the
578 * parenthesis node which contains the rest of the list. */
581 parse_format_list (st_parameter_dt
*dtp
, bool *save_ok
)
584 format_token t
, u
, t2
;
586 format_data
*fmt
= dtp
->u
.p
.fmt
;
592 /* Get the next format item */
594 t
= format_lex (fmt
);
601 t
= format_lex (fmt
);
605 get_fnode (fmt
, &head
, &tail
, FMT_LPAREN
);
606 tail
->repeat
= repeat
;
607 tail
->u
.child
= parse_format_list (dtp
, &saveit
);
608 if (fmt
->error
!= NULL
)
614 get_fnode (fmt
, &head
, &tail
, FMT_SLASH
);
615 tail
->repeat
= repeat
;
619 get_fnode (fmt
, &head
, &tail
, FMT_X
);
621 tail
->u
.k
= fmt
->value
;
632 get_fnode (fmt
, &head
, &tail
, FMT_LPAREN
);
634 tail
->u
.child
= parse_format_list (dtp
, &saveit
);
635 if (fmt
->error
!= NULL
)
640 case FMT_SIGNED_INT
: /* Signed integer can only precede a P format. */
641 case FMT_ZERO
: /* Same for zero. */
642 t
= format_lex (fmt
);
645 fmt
->error
= "Expected P edit descriptor in format";
650 get_fnode (fmt
, &head
, &tail
, FMT_P
);
651 tail
->u
.k
= fmt
->value
;
654 t
= format_lex (fmt
);
655 if (t
== FMT_F
|| t
== FMT_EN
|| t
== FMT_ES
|| t
== FMT_D
656 || t
== FMT_G
|| t
== FMT_E
)
662 fmt
->saved_token
= t
;
665 case FMT_P
: /* P and X require a prior number */
666 fmt
->error
= "P descriptor requires leading scale factor";
673 If we would be pedantic in the library, we would have to reject
674 an X descriptor without an integer prefix:
676 fmt->error = "X descriptor requires leading space count";
679 However, this is an extension supported by many Fortran compilers,
680 including Cray, HP, AIX, and IRIX. Therefore, we allow it in the
681 runtime library, and make the front end reject it if the compiler
682 is in pedantic mode. The interpretation of 'X' is '1X'.
684 get_fnode (fmt
, &head
, &tail
, FMT_X
);
690 /* TODO: Find out why is is necessary to turn off format caching. */
692 get_fnode (fmt
, &head
, &tail
, FMT_STRING
);
693 tail
->u
.string
.p
= fmt
->string
;
694 tail
->u
.string
.length
= fmt
->value
;
700 notify_std (&dtp
->common
, GFC_STD_F2003
, "Fortran 2003: DC or DP "
701 "descriptor not allowed");
708 get_fnode (fmt
, &head
, &tail
, t
);
713 get_fnode (fmt
, &head
, &tail
, FMT_COLON
);
718 get_fnode (fmt
, &head
, &tail
, FMT_SLASH
);
724 get_fnode (fmt
, &head
, &tail
, FMT_DOLLAR
);
726 notify_std (&dtp
->common
, GFC_STD_GNU
, "Extension: $ descriptor");
732 t2
= format_lex (fmt
);
733 if (t2
!= FMT_POSINT
)
735 fmt
->error
= posint_required
;
738 get_fnode (fmt
, &head
, &tail
, t
);
739 tail
->u
.n
= fmt
->value
;
759 get_fnode (fmt
, &head
, &tail
, FMT_STRING
);
760 if (fmt
->format_string_len
< 1)
762 fmt
->error
= bad_hollerith
;
766 tail
->u
.string
.p
= fmt
->format_string
;
767 tail
->u
.string
.length
= 1;
770 fmt
->format_string
++;
771 fmt
->format_string_len
--;
776 fmt
->error
= unexpected_end
;
786 fmt
->error
= unexpected_element
;
790 /* In this state, t must currently be a data descriptor. Deal with
791 things that can/must follow the descriptor */
796 t
= format_lex (fmt
);
799 fmt
->error
= "Repeat count cannot follow P descriptor";
803 fmt
->saved_token
= t
;
804 get_fnode (fmt
, &head
, &tail
, FMT_P
);
809 t
= format_lex (fmt
);
812 if (notification_std(GFC_STD_GNU
) == ERROR
)
814 fmt
->error
= posint_required
;
819 fmt
->saved_token
= t
;
820 fmt
->value
= 1; /* Default width */
821 notify_std (&dtp
->common
, GFC_STD_GNU
, posint_required
);
825 get_fnode (fmt
, &head
, &tail
, FMT_L
);
826 tail
->u
.n
= fmt
->value
;
827 tail
->repeat
= repeat
;
831 t
= format_lex (fmt
);
834 fmt
->error
= zero_width
;
840 fmt
->saved_token
= t
;
841 fmt
->value
= -1; /* Width not present */
844 get_fnode (fmt
, &head
, &tail
, FMT_A
);
845 tail
->repeat
= repeat
;
846 tail
->u
.n
= fmt
->value
;
855 get_fnode (fmt
, &head
, &tail
, t
);
856 tail
->repeat
= repeat
;
858 u
= format_lex (fmt
);
859 if (t
== FMT_G
&& u
== FMT_ZERO
)
861 if (notification_std (GFC_STD_F2008
) == ERROR
862 || dtp
->u
.p
.mode
== READING
)
864 fmt
->error
= zero_width
;
868 u
= format_lex (fmt
);
871 fmt
->saved_token
= u
;
875 u
= format_lex (fmt
);
878 fmt
->error
= posint_required
;
881 tail
->u
.real
.d
= fmt
->value
;
884 if (t
== FMT_F
|| dtp
->u
.p
.mode
== WRITING
)
886 if (u
!= FMT_POSINT
&& u
!= FMT_ZERO
)
888 fmt
->error
= nonneg_required
;
896 fmt
->error
= posint_required
;
901 tail
->u
.real
.w
= fmt
->value
;
903 t
= format_lex (fmt
);
906 /* We treat a missing decimal descriptor as 0. Note: This is only
907 allowed if -std=legacy, otherwise an error occurs. */
908 if (compile_options
.warn_std
!= 0)
910 fmt
->error
= period_required
;
913 fmt
->saved_token
= t
;
918 t
= format_lex (fmt
);
919 if (t
!= FMT_ZERO
&& t
!= FMT_POSINT
)
921 fmt
->error
= nonneg_required
;
925 tail
->u
.real
.d
= fmt
->value
;
927 if (t
== FMT_D
|| t
== FMT_F
)
932 /* Look for optional exponent */
933 t
= format_lex (fmt
);
935 fmt
->saved_token
= t
;
938 t
= format_lex (fmt
);
941 fmt
->error
= "Positive exponent width required in format";
945 tail
->u
.real
.e
= fmt
->value
;
951 if (repeat
> fmt
->format_string_len
)
953 fmt
->error
= bad_hollerith
;
957 get_fnode (fmt
, &head
, &tail
, FMT_STRING
);
958 tail
->u
.string
.p
= fmt
->format_string
;
959 tail
->u
.string
.length
= repeat
;
962 fmt
->format_string
+= fmt
->value
;
963 fmt
->format_string_len
-= repeat
;
971 get_fnode (fmt
, &head
, &tail
, t
);
972 tail
->repeat
= repeat
;
974 t
= format_lex (fmt
);
976 if (dtp
->u
.p
.mode
== READING
)
980 fmt
->error
= posint_required
;
986 if (t
!= FMT_ZERO
&& t
!= FMT_POSINT
)
988 fmt
->error
= nonneg_required
;
993 tail
->u
.integer
.w
= fmt
->value
;
994 tail
->u
.integer
.m
= -1;
996 t
= format_lex (fmt
);
999 fmt
->saved_token
= t
;
1003 t
= format_lex (fmt
);
1004 if (t
!= FMT_ZERO
&& t
!= FMT_POSINT
)
1006 fmt
->error
= nonneg_required
;
1010 tail
->u
.integer
.m
= fmt
->value
;
1013 if (tail
->u
.integer
.w
!= 0 && tail
->u
.integer
.m
> tail
->u
.integer
.w
)
1015 fmt
->error
= "Minimum digits exceeds field width";
1022 fmt
->error
= unexpected_element
;
1026 /* Between a descriptor and what comes next */
1028 t
= format_lex (fmt
);
1039 get_fnode (fmt
, &head
, &tail
, t
);
1041 goto optional_comma
;
1044 fmt
->error
= unexpected_end
;
1048 /* Assume a missing comma, this is a GNU extension */
1052 /* Optional comma is a weird between state where we've just finished
1053 reading a colon, slash or P descriptor. */
1055 t
= format_lex (fmt
);
1064 default: /* Assume that we have another format item */
1065 fmt
->saved_token
= t
;
1079 /* format_error()-- Generate an error message for a format statement.
1080 * If the node that gives the location of the error is NULL, the error
1081 * is assumed to happen at parse time, and the current location of the
1084 * We generate a message showing where the problem is. We take extra
1085 * care to print only the relevant part of the format if it is longer
1086 * than a standard 80 column display. */
1089 format_error (st_parameter_dt
*dtp
, const fnode
*f
, const char *message
)
1091 int width
, i
, j
, offset
;
1092 char *p
, buffer
[300];
1093 format_data
*fmt
= dtp
->u
.p
.fmt
;
1096 fmt
->format_string
= f
->source
;
1098 if (message
== unexpected_element
)
1099 sprintf (buffer
, message
, fmt
->error_element
);
1101 sprintf (buffer
, "%s\n", message
);
1103 j
= fmt
->format_string
- dtp
->format
;
1105 offset
= (j
> 60) ? j
- 40 : 0;
1108 width
= dtp
->format_len
- offset
;
1113 /* Show the format */
1115 p
= strchr (buffer
, '\0');
1117 memcpy (p
, dtp
->format
+ offset
, width
);
1122 /* Show where the problem is */
1124 for (i
= 1; i
< j
; i
++)
1130 generate_error (&dtp
->common
, LIBERROR_FORMAT
, buffer
);
1134 /* revert()-- Do reversion of the format. Control reverts to the left
1135 * parenthesis that matches the rightmost right parenthesis. From our
1136 * tree structure, we are looking for the rightmost parenthesis node
1137 * at the second level, the first level always being a single
1138 * parenthesis node. If this node doesn't exit, we use the top
1142 revert (st_parameter_dt
*dtp
)
1145 format_data
*fmt
= dtp
->u
.p
.fmt
;
1147 dtp
->u
.p
.reversion_flag
= 1;
1151 for (f
= fmt
->array
.array
[0].u
.child
; f
; f
= f
->next
)
1152 if (f
->format
== FMT_LPAREN
)
1155 /* If r is NULL because no node was found, the whole tree will be used */
1157 fmt
->array
.array
[0].current
= r
;
1158 fmt
->array
.array
[0].count
= 0;
1161 /* parse_format()-- Parse a format string. */
1164 parse_format (st_parameter_dt
*dtp
)
1167 bool format_cache_ok
;
1169 format_cache_ok
= !is_internal_unit (dtp
);
1171 /* Lookup format string to see if it has already been parsed. */
1172 if (format_cache_ok
)
1174 dtp
->u
.p
.fmt
= find_parsed_format (dtp
);
1176 if (dtp
->u
.p
.fmt
!= NULL
)
1178 dtp
->u
.p
.fmt
->reversion_ok
= 0;
1179 dtp
->u
.p
.fmt
->saved_token
= FMT_NONE
;
1180 dtp
->u
.p
.fmt
->saved_format
= NULL
;
1181 reset_fnode_counters (dtp
);
1186 /* Not found so proceed as follows. */
1188 dtp
->u
.p
.fmt
= fmt
= get_mem (sizeof (format_data
));
1189 fmt
->format_string
= dtp
->format
;
1190 fmt
->format_string_len
= dtp
->format_len
;
1193 fmt
->saved_token
= FMT_NONE
;
1197 /* Initialize variables used during traversal of the tree. */
1199 fmt
->reversion_ok
= 0;
1200 fmt
->saved_format
= NULL
;
1202 /* Allocate the first format node as the root of the tree. */
1204 fmt
->last
= &fmt
->array
;
1205 fmt
->last
->next
= NULL
;
1206 fmt
->avail
= &fmt
->array
.array
[0];
1208 memset (fmt
->avail
, 0, sizeof (*fmt
->avail
));
1209 fmt
->avail
->format
= FMT_LPAREN
;
1210 fmt
->avail
->repeat
= 1;
1213 if (format_lex (fmt
) == FMT_LPAREN
)
1214 fmt
->array
.array
[0].u
.child
= parse_format_list (dtp
, &format_cache_ok
);
1216 fmt
->error
= "Missing initial left parenthesis in format";
1220 format_error (dtp
, NULL
, fmt
->error
);
1221 free_format_hash_table (dtp
->u
.p
.current_unit
);
1225 if (format_cache_ok
)
1226 save_parsed_format (dtp
);
1228 dtp
->u
.p
.format_not_saved
= 1;
1232 /* next_format0()-- Get the next format node without worrying about
1233 * reversion. Returns NULL when we hit the end of the list.
1234 * Parenthesis nodes are incremented after the list has been
1235 * exhausted, other nodes are incremented before they are returned. */
1237 static const fnode
*
1238 next_format0 (fnode
* f
)
1245 if (f
->format
!= FMT_LPAREN
)
1248 if (f
->count
<= f
->repeat
)
1255 /* Deal with a parenthesis node */
1257 for (; f
->count
< f
->repeat
; f
->count
++)
1259 if (f
->current
== NULL
)
1260 f
->current
= f
->u
.child
;
1262 for (; f
->current
!= NULL
; f
->current
= f
->current
->next
)
1264 r
= next_format0 (f
->current
);
1275 /* next_format()-- Return the next format node. If the format list
1276 * ends up being exhausted, we do reversion. Reversion is only
1277 * allowed if we've seen a data descriptor since the
1278 * initialization or the last reversion. We return NULL if there
1279 * are no more data descriptors to return (which is an error
1283 next_format (st_parameter_dt
*dtp
)
1287 format_data
*fmt
= dtp
->u
.p
.fmt
;
1289 if (fmt
->saved_format
!= NULL
)
1290 { /* Deal with a pushed-back format node */
1291 f
= fmt
->saved_format
;
1292 fmt
->saved_format
= NULL
;
1296 f
= next_format0 (&fmt
->array
.array
[0]);
1299 if (!fmt
->reversion_ok
)
1302 fmt
->reversion_ok
= 0;
1305 f
= next_format0 (&fmt
->array
.array
[0]);
1308 format_error (dtp
, NULL
, reversion_error
);
1312 /* Push the first reverted token and return a colon node in case
1313 * there are no more data items. */
1315 fmt
->saved_format
= f
;
1319 /* If this is a data edit descriptor, then reversion has become OK. */
1323 if (!fmt
->reversion_ok
&&
1324 (t
== FMT_I
|| t
== FMT_B
|| t
== FMT_O
|| t
== FMT_Z
|| t
== FMT_F
||
1325 t
== FMT_E
|| t
== FMT_EN
|| t
== FMT_ES
|| t
== FMT_G
|| t
== FMT_L
||
1326 t
== FMT_A
|| t
== FMT_D
))
1327 fmt
->reversion_ok
= 1;
1332 /* unget_format()-- Push the given format back so that it will be
1333 * returned on the next call to next_format() without affecting
1334 * counts. This is necessary when we've encountered a data
1335 * descriptor, but don't know what the data item is yet. The format
1336 * node is pushed back, and we return control to the main program,
1337 * which calls the library back with the data item (or not). */
1340 unget_format (st_parameter_dt
*dtp
, const fnode
*f
)
1342 dtp
->u
.p
.fmt
->saved_format
= f
;