1 /* Copyright (C) 2002-2013 Free Software Foundation, Inc.
2 Contributed by Andy Vaught
3 F2003 I/O support contributed by Jerry DeLisle
5 This file is part of the GNU Fortran runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
36 typedef unsigned char uchar
;
38 /* read.c -- Deal with formatted reads */
41 /* set_integer()-- All of the integer assignments come here to
42 actually place the value into memory. */
45 set_integer (void *dest
, GFC_INTEGER_LARGEST value
, int length
)
49 #ifdef HAVE_GFC_INTEGER_16
50 /* length=10 comes about for kind=10 real/complex BOZ, cf. PR41711. */
54 GFC_INTEGER_16 tmp
= value
;
55 memcpy (dest
, (void *) &tmp
, length
);
61 GFC_INTEGER_8 tmp
= value
;
62 memcpy (dest
, (void *) &tmp
, length
);
67 GFC_INTEGER_4 tmp
= value
;
68 memcpy (dest
, (void *) &tmp
, length
);
73 GFC_INTEGER_2 tmp
= value
;
74 memcpy (dest
, (void *) &tmp
, length
);
79 GFC_INTEGER_1 tmp
= value
;
80 memcpy (dest
, (void *) &tmp
, length
);
84 internal_error (NULL
, "Bad integer kind");
89 /* Max signed value of size give by length argument. */
94 GFC_UINTEGER_LARGEST value
;
98 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
102 for (int n
= 1; n
< 4 * length
; n
++)
103 value
= (value
<< 2) + 3;
107 return GFC_INTEGER_8_HUGE
;
109 return GFC_INTEGER_4_HUGE
;
111 return GFC_INTEGER_2_HUGE
;
113 return GFC_INTEGER_1_HUGE
;
115 internal_error (NULL
, "Bad integer kind");
120 /* convert_real()-- Convert a character representation of a floating
121 point number to the machine number. Returns nonzero if there is an
122 invalid input. Note: many architectures (e.g. IA-64, HP-PA)
123 require that the storage pointed to by the dest argument is
124 properly aligned for the type in question. */
127 convert_real (st_parameter_dt
*dtp
, void *dest
, const char *buffer
, int length
)
134 *((GFC_REAL_4
*) dest
) =
135 #if defined(HAVE_STRTOF)
136 gfc_strtof (buffer
, &endptr
);
138 (GFC_REAL_4
) gfc_strtod (buffer
, &endptr
);
143 *((GFC_REAL_8
*) dest
) = gfc_strtod (buffer
, &endptr
);
146 #if defined(HAVE_GFC_REAL_10) && defined (HAVE_STRTOLD)
148 *((GFC_REAL_10
*) dest
) = gfc_strtold (buffer
, &endptr
);
152 #if defined(HAVE_GFC_REAL_16)
153 # if defined(GFC_REAL_16_IS_FLOAT128)
155 *((GFC_REAL_16
*) dest
) = __qmath_(strtoflt128
) (buffer
, &endptr
);
157 # elif defined(HAVE_STRTOLD)
159 *((GFC_REAL_16
*) dest
) = gfc_strtold (buffer
, &endptr
);
165 internal_error (&dtp
->common
, "Unsupported real kind during IO");
168 if (buffer
== endptr
)
170 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
171 "Error during floating point read");
172 next_record (dtp
, 1);
179 /* convert_infnan()-- Convert character INF/NAN representation to the
180 machine number. Note: many architectures (e.g. IA-64, HP-PA) require
181 that the storage pointed to by the dest argument is properly aligned
182 for the type in question. */
185 convert_infnan (st_parameter_dt
*dtp
, void *dest
, const char *buffer
,
188 const char *s
= buffer
;
189 int is_inf
, plus
= 1;
205 *((GFC_REAL_4
*) dest
) = plus
? __builtin_inff () : -__builtin_inff ();
207 *((GFC_REAL_4
*) dest
) = plus
? __builtin_nanf ("") : -__builtin_nanf ("");
212 *((GFC_REAL_8
*) dest
) = plus
? __builtin_inf () : -__builtin_inf ();
214 *((GFC_REAL_8
*) dest
) = plus
? __builtin_nan ("") : -__builtin_nan ("");
217 #if defined(HAVE_GFC_REAL_10)
220 *((GFC_REAL_10
*) dest
) = plus
? __builtin_infl () : -__builtin_infl ();
222 *((GFC_REAL_10
*) dest
) = plus
? __builtin_nanl ("") : -__builtin_nanl ("");
226 #if defined(HAVE_GFC_REAL_16)
227 # if defined(GFC_REAL_16_IS_FLOAT128)
229 *((GFC_REAL_16
*) dest
) = __qmath_(strtoflt128
) (buffer
, NULL
);
234 *((GFC_REAL_16
*) dest
) = plus
? __builtin_infl () : -__builtin_infl ();
236 *((GFC_REAL_16
*) dest
) = plus
? __builtin_nanl ("") : -__builtin_nanl ("");
242 internal_error (&dtp
->common
, "Unsupported real kind during IO");
249 /* read_l()-- Read a logical value */
252 read_l (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
259 p
= read_block_form (dtp
, &w
);
282 set_integer (dest
, (GFC_INTEGER_LARGEST
) 1, length
);
286 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
290 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
291 "Bad value on logical read");
292 next_record (dtp
, 1);
299 read_utf8 (st_parameter_dt
*dtp
, int *nbytes
)
301 static const uchar masks
[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
302 static const uchar patns
[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
309 s
= read_block_form (dtp
, nbytes
);
313 /* If this is a short read, just return. */
321 /* The number of leading 1-bits in the first byte indicates how many
323 for (nb
= 2; nb
< 7; nb
++)
324 if ((c
& ~masks
[nb
-1]) == patns
[nb
-1])
329 c
= (c
& masks
[nb
-1]);
332 s
= read_block_form (dtp
, &nread
);
335 /* Decode the bytes read. */
336 for (i
= 1; i
< nb
; i
++)
338 gfc_char4_t n
= *s
++;
340 if ((n
& 0xC0) != 0x80)
343 c
= ((c
<< 6) + (n
& 0x3F));
346 /* Make sure the shortest possible encoding was used. */
347 if (c
<= 0x7F && nb
> 1) goto invalid
;
348 if (c
<= 0x7FF && nb
> 2) goto invalid
;
349 if (c
<= 0xFFFF && nb
> 3) goto invalid
;
350 if (c
<= 0x1FFFFF && nb
> 4) goto invalid
;
351 if (c
<= 0x3FFFFFF && nb
> 5) goto invalid
;
353 /* Make sure the character is valid. */
354 if (c
> 0x7FFFFFFF || (c
>= 0xD800 && c
<= 0xDFFF))
360 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
, "Invalid UTF-8 encoding");
361 return (gfc_char4_t
) '?';
366 read_utf8_char1 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
373 len
= (width
< len
) ? len
: width
;
377 /* Proceed with decoding one character at a time. */
378 for (j
= 0; j
< len
; j
++, dest
++)
380 c
= read_utf8 (dtp
, &nbytes
);
382 /* Check for a short read and if so, break out. */
386 *dest
= c
> 255 ? '?' : (uchar
) c
;
389 /* If there was a short read, pad the remaining characters. */
390 for (i
= j
; i
< len
; i
++)
396 read_default_char1 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
401 s
= read_block_form (dtp
, &width
);
408 m
= (width
> len
) ? len
: width
;
413 memset (p
+ m
, ' ', n
);
418 read_utf8_char4 (st_parameter_dt
*dtp
, void *p
, int len
, int width
)
424 len
= (width
< len
) ? len
: width
;
426 dest
= (gfc_char4_t
*) p
;
428 /* Proceed with decoding one character at a time. */
429 for (j
= 0; j
< len
; j
++, dest
++)
431 *dest
= read_utf8 (dtp
, &nbytes
);
433 /* Check for a short read and if so, break out. */
438 /* If there was a short read, pad the remaining characters. */
439 for (i
= j
; i
< len
; i
++)
440 *dest
++ = (gfc_char4_t
) ' ';
446 read_default_char4 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
451 if (is_char4_unit(dtp
))
455 s4
= (gfc_char4_t
*) read_block_form4 (dtp
, &width
);
462 m
= ((int) width
> len
) ? len
: (int) width
;
464 dest
= (gfc_char4_t
*) p
;
466 for (n
= 0; n
< m
; n
++)
469 for (n
= 0; n
< len
- (int) width
; n
++)
470 *dest
++ = (gfc_char4_t
) ' ';
476 s
= read_block_form (dtp
, &width
);
483 m
= ((int) width
> len
) ? len
: (int) width
;
485 dest
= (gfc_char4_t
*) p
;
487 for (n
= 0; n
< m
; n
++, dest
++, s
++)
488 *dest
= (unsigned char ) *s
;
490 for (n
= 0; n
< len
- (int) width
; n
++, dest
++)
491 *dest
= (unsigned char) ' ';
496 /* read_a()-- Read a character record into a KIND=1 character destination,
497 processing UTF-8 encoding if necessary. */
500 read_a (st_parameter_dt
*dtp
, const fnode
*f
, char *p
, int length
)
506 if (wi
== -1) /* '(A)' edit descriptor */
510 /* Read in w characters, treating comma as not a separator. */
511 dtp
->u
.p
.sf_read_comma
= 0;
513 if (dtp
->u
.p
.current_unit
->flags
.encoding
== ENCODING_UTF8
)
514 read_utf8_char1 (dtp
, p
, length
, w
);
516 read_default_char1 (dtp
, p
, length
, w
);
518 dtp
->u
.p
.sf_read_comma
=
519 dtp
->u
.p
.current_unit
->decimal_status
== DECIMAL_COMMA
? 0 : 1;
523 /* read_a_char4()-- Read a character record into a KIND=4 character destination,
524 processing UTF-8 encoding if necessary. */
527 read_a_char4 (st_parameter_dt
*dtp
, const fnode
*f
, char *p
, int length
)
532 if (w
== -1) /* '(A)' edit descriptor */
535 /* Read in w characters, treating comma as not a separator. */
536 dtp
->u
.p
.sf_read_comma
= 0;
538 if (dtp
->u
.p
.current_unit
->flags
.encoding
== ENCODING_UTF8
)
539 read_utf8_char4 (dtp
, p
, length
, w
);
541 read_default_char4 (dtp
, p
, length
, w
);
543 dtp
->u
.p
.sf_read_comma
=
544 dtp
->u
.p
.current_unit
->decimal_status
== DECIMAL_COMMA
? 0 : 1;
547 /* eat_leading_spaces()-- Given a character pointer and a width,
548 * ignore the leading spaces. */
551 eat_leading_spaces (int *width
, char *p
)
555 if (*width
== 0 || *p
!= ' ')
567 next_char (st_parameter_dt
*dtp
, char **p
, int *w
)
582 if (dtp
->u
.p
.blank_status
!= BLANK_UNSPECIFIED
)
583 return ' '; /* return a blank to signal a null */
585 /* At this point, the rest of the field has to be trailing blanks */
599 /* read_decimal()-- Read a decimal integer value. The values here are
603 read_decimal (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
605 GFC_UINTEGER_LARGEST value
, maxv
, maxv_10
;
606 GFC_INTEGER_LARGEST v
;
612 p
= read_block_form (dtp
, &w
);
617 p
= eat_leading_spaces (&w
, p
);
620 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
642 maxv
= si_max (length
);
647 /* At this point we have a digit-string */
652 c
= next_char (dtp
, &p
, &w
);
658 if (dtp
->u
.p
.blank_status
== BLANK_NULL
) continue;
659 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
) c
= '0';
662 if (c
< '0' || c
> '9')
671 if (value
> maxv
- c
)
681 set_integer (dest
, v
, length
);
685 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
686 "Bad value during integer read");
687 next_record (dtp
, 1);
691 generate_error (&dtp
->common
, LIBERROR_READ_OVERFLOW
,
692 "Value overflowed during integer read");
693 next_record (dtp
, 1);
698 /* read_radix()-- This function reads values for non-decimal radixes.
699 * The difference here is that we treat the values here as unsigned
700 * values for the purposes of overflow. If minus sign is present and
701 * the top bit is set, the value will be incorrect. */
704 read_radix (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
,
707 GFC_UINTEGER_LARGEST value
, maxv
, maxv_r
;
708 GFC_INTEGER_LARGEST v
;
714 p
= read_block_form (dtp
, &w
);
719 p
= eat_leading_spaces (&w
, p
);
722 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
726 /* Maximum unsigned value, assuming two's complement. */
727 maxv
= 2 * si_max (length
) + 1;
728 maxv_r
= maxv
/ radix
;
749 /* At this point we have a digit-string */
754 c
= next_char (dtp
, &p
, &w
);
759 if (dtp
->u
.p
.blank_status
== BLANK_NULL
) continue;
760 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
) c
= '0';
766 if (c
< '0' || c
> '1')
771 if (c
< '0' || c
> '7')
796 c
= c
- 'a' + '9' + 1;
805 c
= c
- 'A' + '9' + 1;
819 value
= radix
* value
;
821 if (maxv
- c
< value
)
830 set_integer (dest
, v
, length
);
834 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
835 "Bad value during integer read");
836 next_record (dtp
, 1);
840 generate_error (&dtp
->common
, LIBERROR_READ_OVERFLOW
,
841 "Value overflowed during integer read");
842 next_record (dtp
, 1);
847 /* read_f()-- Read a floating point number with F-style editing, which
848 is what all of the other floating point descriptors behave as. The
849 tricky part is that optional spaces are allowed after an E or D,
850 and the implicit decimal point if a decimal point is not present in
854 read_f (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
856 int w
, seen_dp
, exponent
;
861 int seen_int_digit
; /* Seen a digit before the decimal point? */
862 int seen_dec_digit
; /* Seen a digit after the decimal point? */
871 /* Read in the next block. */
872 p
= read_block_form (dtp
, &w
);
875 p
= eat_leading_spaces (&w
, (char*) p
);
879 /* In this buffer we're going to re-format the number cleanly to be parsed
880 by convert_real in the end; this assures we're using strtod from the
881 C library for parsing and thus probably get the best accuracy possible.
882 This process may add a '+0.0' in front of the number as well as change the
883 exponent because of an implicit decimal point or the like. Thus allocating
884 strlen ("+0.0e-1000") == 10 characters plus one for NUL more than the
885 original buffer had should be enough. */
886 buffer
= gfc_alloca (w
+ 11);
890 if (*p
== '-' || *p
== '+')
898 p
= eat_leading_spaces (&w
, (char*) p
);
902 /* Check for Infinity or NaN. */
903 if (unlikely ((w
>= 3 && (*p
== 'i' || *p
== 'I' || *p
== 'n' || *p
== 'N'))))
908 /* Scan through the buffer keeping track of spaces and parenthesis. We
909 null terminate the string as soon as we see a left paren or if we are
910 BLANK_NULL mode. Leading spaces have already been skipped above,
911 trailing spaces are ignored by converting to '\0'. A space
912 between "NaN" and the optional perenthesis is not permitted. */
919 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
933 if (seen_paren
++ != 1)
947 if (seen_paren
!= 0 && seen_paren
!= 2)
950 if ((strcmp (save
, "inf") == 0) || (strcmp (save
, "infinity") == 0))
955 else if (strcmp (save
, "nan") != 0)
958 convert_infnan (dtp
, dest
, buffer
, length
);
962 /* Process the mantissa string. */
968 if (dtp
->u
.p
.current_unit
->decimal_status
!= DECIMAL_COMMA
)
981 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
986 else if (dtp
->u
.p
.blank_status
== BLANK_NULL
)
989 /* TODO: Should we check instead that there are only trailing
990 blanks here, as is done below for exponents? */
1033 /* No exponent has been seen, so we use the current scale factor. */
1034 exponent
= - dtp
->u
.p
.scale_factor
;
1037 /* At this point the start of an exponent has been found. */
1039 p
= eat_leading_spaces (&w
, (char*) p
);
1040 if (*p
== '-' || *p
== '+')
1048 /* At this point a digit string is required. We calculate the value
1049 of the exponent in order to take account of the scale factor and
1050 the d parameter before explict conversion takes place. */
1055 if (dtp
->u
.p
.blank_status
== BLANK_UNSPECIFIED
)
1057 while (w
> 0 && isdigit (*p
))
1060 exponent
+= *p
- '0';
1065 /* Only allow trailing blanks. */
1074 else /* BZ or BN status is enabled. */
1080 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
1083 assert (dtp
->u
.p
.blank_status
== BLANK_NULL
);
1085 else if (!isdigit (*p
))
1090 exponent
+= *p
- '0';
1098 exponent
*= exponent_sign
;
1101 /* Use the precision specified in the format if no decimal point has been
1104 exponent
-= f
->u
.real
.d
;
1106 /* Output a trailing '0' after decimal point if not yet found. */
1107 if (seen_dp
&& !seen_dec_digit
)
1109 /* Handle input of style "E+NN" by inserting a 0 for the
1111 else if (!seen_int_digit
&& !seen_dec_digit
)
1113 notify_std (&dtp
->common
, GFC_STD_LEGACY
,
1114 "REAL input of style 'E+NN'");
1118 /* Print out the exponent to finish the reformatted number. Maximum 4
1119 digits for the exponent. */
1128 exponent
= - exponent
;
1131 assert (exponent
< 10000);
1132 for (dig
= 3; dig
>= 0; --dig
)
1134 out
[dig
] = (char) ('0' + exponent
% 10);
1141 /* Do the actual conversion. */
1142 convert_real (dtp
, dest
, buffer
, length
);
1146 /* The value read is zero. */
1151 *((GFC_REAL_4
*) dest
) = 0.0;
1155 *((GFC_REAL_8
*) dest
) = 0.0;
1158 #ifdef HAVE_GFC_REAL_10
1160 *((GFC_REAL_10
*) dest
) = 0.0;
1164 #ifdef HAVE_GFC_REAL_16
1166 *((GFC_REAL_16
*) dest
) = 0.0;
1171 internal_error (&dtp
->common
, "Unsupported real kind during IO");
1176 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
1177 "Bad value during floating point read");
1178 next_record (dtp
, 1);
1183 /* read_x()-- Deal with the X/TR descriptor. We just read some data
1184 * and never look at it. */
1187 read_x (st_parameter_dt
*dtp
, int n
)
1191 if ((dtp
->u
.p
.current_unit
->pad_status
== PAD_NO
|| is_internal_unit (dtp
))
1192 && dtp
->u
.p
.current_unit
->bytes_left
< n
)
1193 n
= dtp
->u
.p
.current_unit
->bytes_left
;
1200 if (is_internal_unit (dtp
))
1202 mem_alloc_r (dtp
->u
.p
.current_unit
->s
, &length
);
1203 if (unlikely (length
< n
))
1208 if (dtp
->u
.p
.sf_seen_eor
)
1214 q
= fbuf_getc (dtp
->u
.p
.current_unit
);
1217 else if (q
== '\n' || q
== '\r')
1219 /* Unexpected end of line. Set the position. */
1220 dtp
->u
.p
.sf_seen_eor
= 1;
1222 /* If we see an EOR during non-advancing I/O, we need to skip
1223 the rest of the I/O statement. Set the corresponding flag. */
1224 if (dtp
->u
.p
.advance_status
== ADVANCE_NO
|| dtp
->u
.p
.seen_dollar
)
1225 dtp
->u
.p
.eor_condition
= 1;
1227 /* If we encounter a CR, it might be a CRLF. */
1228 if (q
== '\r') /* Probably a CRLF */
1230 /* See if there is an LF. */
1231 q2
= fbuf_getc (dtp
->u
.p
.current_unit
);
1233 dtp
->u
.p
.sf_seen_eor
= 2;
1234 else if (q2
!= EOF
) /* Oops, seek back. */
1235 fbuf_seek (dtp
->u
.p
.current_unit
, -1, SEEK_CUR
);
1243 if ((dtp
->common
.flags
& IOPARM_DT_HAS_SIZE
) != 0)
1244 dtp
->u
.p
.size_used
+= (GFC_IO_INT
) n
;
1245 dtp
->u
.p
.current_unit
->bytes_left
-= n
;
1246 dtp
->u
.p
.current_unit
->strm_pos
+= (gfc_offset
) n
;