1 /* Copyright (C) 2002-2013 Free Software Foundation, Inc.
2 Contributed by Andy Vaught
3 F2003 I/O support contributed by Jerry DeLisle
5 This file is part of the GNU Fortran runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
36 typedef unsigned char uchar
;
38 /* read.c -- Deal with formatted reads */
41 /* set_integer()-- All of the integer assignments come here to
42 actually place the value into memory. */
45 set_integer (void *dest
, GFC_INTEGER_LARGEST value
, int length
)
49 #ifdef HAVE_GFC_INTEGER_16
50 /* length=10 comes about for kind=10 real/complex BOZ, cf. PR41711. */
54 GFC_INTEGER_16 tmp
= value
;
55 memcpy (dest
, (void *) &tmp
, length
);
61 GFC_INTEGER_8 tmp
= value
;
62 memcpy (dest
, (void *) &tmp
, length
);
67 GFC_INTEGER_4 tmp
= value
;
68 memcpy (dest
, (void *) &tmp
, length
);
73 GFC_INTEGER_2 tmp
= value
;
74 memcpy (dest
, (void *) &tmp
, length
);
79 GFC_INTEGER_1 tmp
= value
;
80 memcpy (dest
, (void *) &tmp
, length
);
84 internal_error (NULL
, "Bad integer kind");
89 /* Max signed value of size give by length argument. */
94 GFC_UINTEGER_LARGEST value
;
98 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
102 for (int n
= 1; n
< 4 * length
; n
++)
103 value
= (value
<< 2) + 3;
107 return GFC_INTEGER_8_HUGE
;
109 return GFC_INTEGER_4_HUGE
;
111 return GFC_INTEGER_2_HUGE
;
113 return GFC_INTEGER_1_HUGE
;
115 internal_error (NULL
, "Bad integer kind");
120 /* convert_real()-- Convert a character representation of a floating
121 point number to the machine number. Returns nonzero if there is an
122 invalid input. Note: many architectures (e.g. IA-64, HP-PA)
123 require that the storage pointed to by the dest argument is
124 properly aligned for the type in question. */
127 convert_real (st_parameter_dt
*dtp
, void *dest
, const char *buffer
, int length
)
134 *((GFC_REAL_4
*) dest
) =
135 #if defined(HAVE_STRTOF)
136 gfc_strtof (buffer
, &endptr
);
138 (GFC_REAL_4
) gfc_strtod (buffer
, &endptr
);
143 *((GFC_REAL_8
*) dest
) = gfc_strtod (buffer
, &endptr
);
146 #if defined(HAVE_GFC_REAL_10) && defined (HAVE_STRTOLD)
148 *((GFC_REAL_10
*) dest
) = gfc_strtold (buffer
, &endptr
);
152 #if defined(HAVE_GFC_REAL_16)
153 # if defined(GFC_REAL_16_IS_FLOAT128)
155 *((GFC_REAL_16
*) dest
) = __qmath_(strtoflt128
) (buffer
, &endptr
);
157 # elif defined(HAVE_STRTOLD)
159 *((GFC_REAL_16
*) dest
) = gfc_strtold (buffer
, &endptr
);
165 internal_error (&dtp
->common
, "Unsupported real kind during IO");
168 if (buffer
== endptr
)
170 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
171 "Error during floating point read");
172 next_record (dtp
, 1);
179 /* convert_infnan()-- Convert character INF/NAN representation to the
180 machine number. Note: many architectures (e.g. IA-64, HP-PA) require
181 that the storage pointed to by the dest argument is properly aligned
182 for the type in question. */
185 convert_infnan (st_parameter_dt
*dtp
, void *dest
, const char *buffer
,
188 const char *s
= buffer
;
189 int is_inf
, plus
= 1;
205 *((GFC_REAL_4
*) dest
) = plus
? __builtin_inff () : -__builtin_inff ();
207 *((GFC_REAL_4
*) dest
) = plus
? __builtin_nanf ("") : -__builtin_nanf ("");
212 *((GFC_REAL_8
*) dest
) = plus
? __builtin_inf () : -__builtin_inf ();
214 *((GFC_REAL_8
*) dest
) = plus
? __builtin_nan ("") : -__builtin_nan ("");
217 #if defined(HAVE_GFC_REAL_10)
220 *((GFC_REAL_10
*) dest
) = plus
? __builtin_infl () : -__builtin_infl ();
222 *((GFC_REAL_10
*) dest
) = plus
? __builtin_nanl ("") : -__builtin_nanl ("");
226 #if defined(HAVE_GFC_REAL_16)
227 # if defined(GFC_REAL_16_IS_FLOAT128)
229 *((GFC_REAL_16
*) dest
) = __qmath_(strtoflt128
) (buffer
, NULL
);
234 *((GFC_REAL_16
*) dest
) = plus
? __builtin_infl () : -__builtin_infl ();
236 *((GFC_REAL_16
*) dest
) = plus
? __builtin_nanl ("") : -__builtin_nanl ("");
242 internal_error (&dtp
->common
, "Unsupported real kind during IO");
249 /* read_l()-- Read a logical value */
252 read_l (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
259 p
= read_block_form (dtp
, &w
);
282 set_integer (dest
, (GFC_INTEGER_LARGEST
) 1, length
);
286 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
290 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
291 "Bad value on logical read");
292 next_record (dtp
, 1);
299 read_utf8 (st_parameter_dt
*dtp
, int *nbytes
)
301 static const uchar masks
[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
302 static const uchar patns
[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
309 s
= read_block_form (dtp
, nbytes
);
313 /* If this is a short read, just return. */
321 /* The number of leading 1-bits in the first byte indicates how many
323 for (nb
= 2; nb
< 7; nb
++)
324 if ((c
& ~masks
[nb
-1]) == patns
[nb
-1])
329 c
= (c
& masks
[nb
-1]);
332 s
= read_block_form (dtp
, &nread
);
335 /* Decode the bytes read. */
336 for (i
= 1; i
< nb
; i
++)
338 gfc_char4_t n
= *s
++;
340 if ((n
& 0xC0) != 0x80)
343 c
= ((c
<< 6) + (n
& 0x3F));
346 /* Make sure the shortest possible encoding was used. */
347 if (c
<= 0x7F && nb
> 1) goto invalid
;
348 if (c
<= 0x7FF && nb
> 2) goto invalid
;
349 if (c
<= 0xFFFF && nb
> 3) goto invalid
;
350 if (c
<= 0x1FFFFF && nb
> 4) goto invalid
;
351 if (c
<= 0x3FFFFFF && nb
> 5) goto invalid
;
353 /* Make sure the character is valid. */
354 if (c
> 0x7FFFFFFF || (c
>= 0xD800 && c
<= 0xDFFF))
360 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
, "Invalid UTF-8 encoding");
361 return (gfc_char4_t
) '?';
366 read_utf8_char1 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
373 len
= (width
< len
) ? len
: width
;
377 /* Proceed with decoding one character at a time. */
378 for (j
= 0; j
< len
; j
++, dest
++)
380 c
= read_utf8 (dtp
, &nbytes
);
382 /* Check for a short read and if so, break out. */
386 *dest
= c
> 255 ? '?' : (uchar
) c
;
389 /* If there was a short read, pad the remaining characters. */
390 for (i
= j
; i
< len
; i
++)
396 read_default_char1 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
401 s
= read_block_form (dtp
, &width
);
408 m
= (width
> len
) ? len
: width
;
413 memset (p
+ m
, ' ', n
);
418 read_utf8_char4 (st_parameter_dt
*dtp
, void *p
, int len
, int width
)
424 len
= (width
< len
) ? len
: width
;
426 dest
= (gfc_char4_t
*) p
;
428 /* Proceed with decoding one character at a time. */
429 for (j
= 0; j
< len
; j
++, dest
++)
431 *dest
= read_utf8 (dtp
, &nbytes
);
433 /* Check for a short read and if so, break out. */
438 /* If there was a short read, pad the remaining characters. */
439 for (i
= j
; i
< len
; i
++)
440 *dest
++ = (gfc_char4_t
) ' ';
446 read_default_char4 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
451 if (is_char4_unit(dtp
))
455 s4
= (gfc_char4_t
*) read_block_form4 (dtp
, &width
);
462 m
= ((int) width
> len
) ? len
: (int) width
;
464 dest
= (gfc_char4_t
*) p
;
466 for (n
= 0; n
< m
; n
++)
469 for (n
= 0; n
< len
- (int) width
; n
++)
470 *dest
++ = (gfc_char4_t
) ' ';
476 s
= read_block_form (dtp
, &width
);
483 m
= ((int) width
> len
) ? len
: (int) width
;
485 dest
= (gfc_char4_t
*) p
;
487 for (n
= 0; n
< m
; n
++, dest
++, s
++)
488 *dest
= (unsigned char ) *s
;
490 for (n
= 0; n
< len
- (int) width
; n
++, dest
++)
491 *dest
= (unsigned char) ' ';
496 /* read_a()-- Read a character record into a KIND=1 character destination,
497 processing UTF-8 encoding if necessary. */
500 read_a (st_parameter_dt
*dtp
, const fnode
*f
, char *p
, int length
)
506 if (wi
== -1) /* '(A)' edit descriptor */
510 /* Read in w characters, treating comma as not a separator. */
511 dtp
->u
.p
.sf_read_comma
= 0;
513 if (dtp
->u
.p
.current_unit
->flags
.encoding
== ENCODING_UTF8
)
514 read_utf8_char1 (dtp
, p
, length
, w
);
516 read_default_char1 (dtp
, p
, length
, w
);
518 dtp
->u
.p
.sf_read_comma
=
519 dtp
->u
.p
.current_unit
->decimal_status
== DECIMAL_COMMA
? 0 : 1;
523 /* read_a_char4()-- Read a character record into a KIND=4 character destination,
524 processing UTF-8 encoding if necessary. */
527 read_a_char4 (st_parameter_dt
*dtp
, const fnode
*f
, char *p
, int length
)
532 if (w
== -1) /* '(A)' edit descriptor */
535 /* Read in w characters, treating comma as not a separator. */
536 dtp
->u
.p
.sf_read_comma
= 0;
538 if (dtp
->u
.p
.current_unit
->flags
.encoding
== ENCODING_UTF8
)
539 read_utf8_char4 (dtp
, p
, length
, w
);
541 read_default_char4 (dtp
, p
, length
, w
);
543 dtp
->u
.p
.sf_read_comma
=
544 dtp
->u
.p
.current_unit
->decimal_status
== DECIMAL_COMMA
? 0 : 1;
547 /* eat_leading_spaces()-- Given a character pointer and a width,
548 * ignore the leading spaces. */
551 eat_leading_spaces (int *width
, char *p
)
555 if (*width
== 0 || *p
!= ' ')
567 next_char (st_parameter_dt
*dtp
, char **p
, int *w
)
582 if (dtp
->u
.p
.blank_status
!= BLANK_UNSPECIFIED
)
583 return ' '; /* return a blank to signal a null */
585 /* At this point, the rest of the field has to be trailing blanks */
599 /* read_decimal()-- Read a decimal integer value. The values here are
603 read_decimal (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
605 GFC_UINTEGER_LARGEST value
, maxv
, maxv_10
;
606 GFC_INTEGER_LARGEST v
;
612 p
= read_block_form (dtp
, &w
);
617 p
= eat_leading_spaces (&w
, p
);
620 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
642 maxv
= si_max (length
);
647 /* At this point we have a digit-string */
652 c
= next_char (dtp
, &p
, &w
);
658 if (dtp
->u
.p
.blank_status
== BLANK_NULL
)
661 for ( ; w
> 0; p
++, w
--)
662 if (*p
!= ' ') break;
665 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
) c
= '0';
668 if (c
< '0' || c
> '9')
677 if (value
> maxv
- c
)
687 set_integer (dest
, v
, length
);
691 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
692 "Bad value during integer read");
693 next_record (dtp
, 1);
697 generate_error (&dtp
->common
, LIBERROR_READ_OVERFLOW
,
698 "Value overflowed during integer read");
699 next_record (dtp
, 1);
704 /* read_radix()-- This function reads values for non-decimal radixes.
705 * The difference here is that we treat the values here as unsigned
706 * values for the purposes of overflow. If minus sign is present and
707 * the top bit is set, the value will be incorrect. */
710 read_radix (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
,
713 GFC_UINTEGER_LARGEST value
, maxv
, maxv_r
;
714 GFC_INTEGER_LARGEST v
;
720 p
= read_block_form (dtp
, &w
);
725 p
= eat_leading_spaces (&w
, p
);
728 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
732 /* Maximum unsigned value, assuming two's complement. */
733 maxv
= 2 * si_max (length
) + 1;
734 maxv_r
= maxv
/ radix
;
755 /* At this point we have a digit-string */
760 c
= next_char (dtp
, &p
, &w
);
765 if (dtp
->u
.p
.blank_status
== BLANK_NULL
) continue;
766 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
) c
= '0';
772 if (c
< '0' || c
> '1')
777 if (c
< '0' || c
> '7')
802 c
= c
- 'a' + '9' + 1;
811 c
= c
- 'A' + '9' + 1;
825 value
= radix
* value
;
827 if (maxv
- c
< value
)
836 set_integer (dest
, v
, length
);
840 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
841 "Bad value during integer read");
842 next_record (dtp
, 1);
846 generate_error (&dtp
->common
, LIBERROR_READ_OVERFLOW
,
847 "Value overflowed during integer read");
848 next_record (dtp
, 1);
853 /* read_f()-- Read a floating point number with F-style editing, which
854 is what all of the other floating point descriptors behave as. The
855 tricky part is that optional spaces are allowed after an E or D,
856 and the implicit decimal point if a decimal point is not present in
860 read_f (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
862 int w
, seen_dp
, exponent
;
867 int seen_int_digit
; /* Seen a digit before the decimal point? */
868 int seen_dec_digit
; /* Seen a digit after the decimal point? */
877 /* Read in the next block. */
878 p
= read_block_form (dtp
, &w
);
881 p
= eat_leading_spaces (&w
, (char*) p
);
885 /* In this buffer we're going to re-format the number cleanly to be parsed
886 by convert_real in the end; this assures we're using strtod from the
887 C library for parsing and thus probably get the best accuracy possible.
888 This process may add a '+0.0' in front of the number as well as change the
889 exponent because of an implicit decimal point or the like. Thus allocating
890 strlen ("+0.0e-1000") == 10 characters plus one for NUL more than the
891 original buffer had should be enough. */
892 buffer
= gfc_alloca (w
+ 11);
896 if (*p
== '-' || *p
== '+')
904 p
= eat_leading_spaces (&w
, (char*) p
);
908 /* Check for Infinity or NaN. */
909 if (unlikely ((w
>= 3 && (*p
== 'i' || *p
== 'I' || *p
== 'n' || *p
== 'N'))))
914 /* Scan through the buffer keeping track of spaces and parenthesis. We
915 null terminate the string as soon as we see a left paren or if we are
916 BLANK_NULL mode. Leading spaces have already been skipped above,
917 trailing spaces are ignored by converting to '\0'. A space
918 between "NaN" and the optional perenthesis is not permitted. */
925 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
939 if (seen_paren
++ != 1)
953 if (seen_paren
!= 0 && seen_paren
!= 2)
956 if ((strcmp (save
, "inf") == 0) || (strcmp (save
, "infinity") == 0))
961 else if (strcmp (save
, "nan") != 0)
964 convert_infnan (dtp
, dest
, buffer
, length
);
968 /* Process the mantissa string. */
974 if (dtp
->u
.p
.current_unit
->decimal_status
!= DECIMAL_COMMA
)
987 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
992 else if (dtp
->u
.p
.blank_status
== BLANK_NULL
)
995 /* TODO: Should we check instead that there are only trailing
996 blanks here, as is done below for exponents? */
1039 /* No exponent has been seen, so we use the current scale factor. */
1040 exponent
= - dtp
->u
.p
.scale_factor
;
1043 /* At this point the start of an exponent has been found. */
1045 p
= eat_leading_spaces (&w
, (char*) p
);
1046 if (*p
== '-' || *p
== '+')
1054 /* At this point a digit string is required. We calculate the value
1055 of the exponent in order to take account of the scale factor and
1056 the d parameter before explict conversion takes place. */
1061 if (dtp
->u
.p
.blank_status
== BLANK_UNSPECIFIED
)
1063 while (w
> 0 && isdigit (*p
))
1066 exponent
+= *p
- '0';
1071 /* Only allow trailing blanks. */
1080 else /* BZ or BN status is enabled. */
1086 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
1089 assert (dtp
->u
.p
.blank_status
== BLANK_NULL
);
1091 else if (!isdigit (*p
))
1096 exponent
+= *p
- '0';
1104 exponent
*= exponent_sign
;
1107 /* Use the precision specified in the format if no decimal point has been
1110 exponent
-= f
->u
.real
.d
;
1112 /* Output a trailing '0' after decimal point if not yet found. */
1113 if (seen_dp
&& !seen_dec_digit
)
1115 /* Handle input of style "E+NN" by inserting a 0 for the
1117 else if (!seen_int_digit
&& !seen_dec_digit
)
1119 notify_std (&dtp
->common
, GFC_STD_LEGACY
,
1120 "REAL input of style 'E+NN'");
1124 /* Print out the exponent to finish the reformatted number. Maximum 4
1125 digits for the exponent. */
1134 exponent
= - exponent
;
1137 assert (exponent
< 10000);
1138 for (dig
= 3; dig
>= 0; --dig
)
1140 out
[dig
] = (char) ('0' + exponent
% 10);
1147 /* Do the actual conversion. */
1148 convert_real (dtp
, dest
, buffer
, length
);
1152 /* The value read is zero. */
1157 *((GFC_REAL_4
*) dest
) = 0.0;
1161 *((GFC_REAL_8
*) dest
) = 0.0;
1164 #ifdef HAVE_GFC_REAL_10
1166 *((GFC_REAL_10
*) dest
) = 0.0;
1170 #ifdef HAVE_GFC_REAL_16
1172 *((GFC_REAL_16
*) dest
) = 0.0;
1177 internal_error (&dtp
->common
, "Unsupported real kind during IO");
1182 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
1183 "Bad value during floating point read");
1184 next_record (dtp
, 1);
1189 /* read_x()-- Deal with the X/TR descriptor. We just read some data
1190 * and never look at it. */
1193 read_x (st_parameter_dt
*dtp
, int n
)
1197 if ((dtp
->u
.p
.current_unit
->pad_status
== PAD_NO
|| is_internal_unit (dtp
))
1198 && dtp
->u
.p
.current_unit
->bytes_left
< n
)
1199 n
= dtp
->u
.p
.current_unit
->bytes_left
;
1206 if (is_internal_unit (dtp
))
1208 mem_alloc_r (dtp
->u
.p
.current_unit
->s
, &length
);
1209 if (unlikely (length
< n
))
1214 if (dtp
->u
.p
.sf_seen_eor
)
1220 q
= fbuf_getc (dtp
->u
.p
.current_unit
);
1223 else if (q
== '\n' || q
== '\r')
1225 /* Unexpected end of line. Set the position. */
1226 dtp
->u
.p
.sf_seen_eor
= 1;
1228 /* If we see an EOR during non-advancing I/O, we need to skip
1229 the rest of the I/O statement. Set the corresponding flag. */
1230 if (dtp
->u
.p
.advance_status
== ADVANCE_NO
|| dtp
->u
.p
.seen_dollar
)
1231 dtp
->u
.p
.eor_condition
= 1;
1233 /* If we encounter a CR, it might be a CRLF. */
1234 if (q
== '\r') /* Probably a CRLF */
1236 /* See if there is an LF. */
1237 q2
= fbuf_getc (dtp
->u
.p
.current_unit
);
1239 dtp
->u
.p
.sf_seen_eor
= 2;
1240 else if (q2
!= EOF
) /* Oops, seek back. */
1241 fbuf_seek (dtp
->u
.p
.current_unit
, -1, SEEK_CUR
);
1249 if ((dtp
->common
.flags
& IOPARM_DT_HAS_SIZE
) != 0)
1250 dtp
->u
.p
.size_used
+= (GFC_IO_INT
) n
;
1251 dtp
->u
.p
.current_unit
->bytes_left
-= n
;
1252 dtp
->u
.p
.current_unit
->strm_pos
+= (gfc_offset
) n
;