1 /* Copyright (C) 2002, 2003, 2005, 2007, 2008, 2009, 2010, 2011
2 Free Software Foundation, Inc.
3 Contributed by Andy Vaught
4 F2003 I/O support contributed by Jerry DeLisle
6 This file is part of the GNU Fortran runtime library (libgfortran).
8 Libgfortran is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgfortran is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
37 typedef unsigned char uchar
;
39 /* read.c -- Deal with formatted reads */
42 /* set_integer()-- All of the integer assignments come here to
43 actually place the value into memory. */
46 set_integer (void *dest
, GFC_INTEGER_LARGEST value
, int length
)
50 #ifdef HAVE_GFC_INTEGER_16
51 /* length=10 comes about for kind=10 real/complex BOZ, cf. PR41711. */
55 GFC_INTEGER_16 tmp
= value
;
56 memcpy (dest
, (void *) &tmp
, length
);
62 GFC_INTEGER_8 tmp
= value
;
63 memcpy (dest
, (void *) &tmp
, length
);
68 GFC_INTEGER_4 tmp
= value
;
69 memcpy (dest
, (void *) &tmp
, length
);
74 GFC_INTEGER_2 tmp
= value
;
75 memcpy (dest
, (void *) &tmp
, length
);
80 GFC_INTEGER_1 tmp
= value
;
81 memcpy (dest
, (void *) &tmp
, length
);
85 internal_error (NULL
, "Bad integer kind");
90 /* Max signed value of size give by length argument. */
95 GFC_UINTEGER_LARGEST value
;
99 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
103 for (int n
= 1; n
< 4 * length
; n
++)
104 value
= (value
<< 2) + 3;
108 return GFC_INTEGER_8_HUGE
;
110 return GFC_INTEGER_4_HUGE
;
112 return GFC_INTEGER_2_HUGE
;
114 return GFC_INTEGER_1_HUGE
;
116 internal_error (NULL
, "Bad integer kind");
121 /* convert_real()-- Convert a character representation of a floating
122 point number to the machine number. Returns nonzero if there is an
123 invalid input. Note: many architectures (e.g. IA-64, HP-PA)
124 require that the storage pointed to by the dest argument is
125 properly aligned for the type in question. */
128 convert_real (st_parameter_dt
*dtp
, void *dest
, const char *buffer
, int length
)
135 *((GFC_REAL_4
*) dest
) =
136 #if defined(HAVE_STRTOF)
137 gfc_strtof (buffer
, &endptr
);
139 (GFC_REAL_4
) gfc_strtod (buffer
, &endptr
);
144 *((GFC_REAL_8
*) dest
) = gfc_strtod (buffer
, &endptr
);
147 #if defined(HAVE_GFC_REAL_10) && defined (HAVE_STRTOLD)
149 *((GFC_REAL_10
*) dest
) = gfc_strtold (buffer
, &endptr
);
153 #if defined(HAVE_GFC_REAL_16)
154 # if defined(GFC_REAL_16_IS_FLOAT128)
156 *((GFC_REAL_16
*) dest
) = __qmath_(strtoflt128
) (buffer
, &endptr
);
158 # elif defined(HAVE_STRTOLD)
160 *((GFC_REAL_16
*) dest
) = gfc_strtold (buffer
, &endptr
);
166 internal_error (&dtp
->common
, "Unsupported real kind during IO");
169 if (buffer
== endptr
)
171 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
172 "Error during floating point read");
173 next_record (dtp
, 1);
180 /* convert_infnan()-- Convert character INF/NAN representation to the
181 machine number. Note: many architectures (e.g. IA-64, HP-PA) require
182 that the storage pointed to by the dest argument is properly aligned
183 for the type in question. */
186 convert_infnan (st_parameter_dt
*dtp
, void *dest
, const char *buffer
,
189 const char *s
= buffer
;
190 int is_inf
, plus
= 1;
206 *((GFC_REAL_4
*) dest
) = plus
? __builtin_inff () : -__builtin_inff ();
208 *((GFC_REAL_4
*) dest
) = plus
? __builtin_nanf ("") : -__builtin_nanf ("");
213 *((GFC_REAL_8
*) dest
) = plus
? __builtin_inf () : -__builtin_inf ();
215 *((GFC_REAL_8
*) dest
) = plus
? __builtin_nan ("") : -__builtin_nan ("");
218 #if defined(HAVE_GFC_REAL_10)
221 *((GFC_REAL_10
*) dest
) = plus
? __builtin_infl () : -__builtin_infl ();
223 *((GFC_REAL_10
*) dest
) = plus
? __builtin_nanl ("") : -__builtin_nanl ("");
227 #if defined(HAVE_GFC_REAL_16)
228 # if defined(GFC_REAL_16_IS_FLOAT128)
230 *((GFC_REAL_16
*) dest
) = __qmath_(strtoflt128
) (buffer
, NULL
);
235 *((GFC_REAL_16
*) dest
) = plus
? __builtin_infl () : -__builtin_infl ();
237 *((GFC_REAL_16
*) dest
) = plus
? __builtin_nanl ("") : -__builtin_nanl ("");
243 internal_error (&dtp
->common
, "Unsupported real kind during IO");
250 /* read_l()-- Read a logical value */
253 read_l (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
260 p
= read_block_form (dtp
, &w
);
283 set_integer (dest
, (GFC_INTEGER_LARGEST
) 1, length
);
287 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
291 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
292 "Bad value on logical read");
293 next_record (dtp
, 1);
300 read_utf8 (st_parameter_dt
*dtp
, int *nbytes
)
302 static const uchar masks
[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
303 static const uchar patns
[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
310 s
= read_block_form (dtp
, nbytes
);
314 /* If this is a short read, just return. */
322 /* The number of leading 1-bits in the first byte indicates how many
324 for (nb
= 2; nb
< 7; nb
++)
325 if ((c
& ~masks
[nb
-1]) == patns
[nb
-1])
330 c
= (c
& masks
[nb
-1]);
333 s
= read_block_form (dtp
, &nread
);
336 /* Decode the bytes read. */
337 for (i
= 1; i
< nb
; i
++)
339 gfc_char4_t n
= *s
++;
341 if ((n
& 0xC0) != 0x80)
344 c
= ((c
<< 6) + (n
& 0x3F));
347 /* Make sure the shortest possible encoding was used. */
348 if (c
<= 0x7F && nb
> 1) goto invalid
;
349 if (c
<= 0x7FF && nb
> 2) goto invalid
;
350 if (c
<= 0xFFFF && nb
> 3) goto invalid
;
351 if (c
<= 0x1FFFFF && nb
> 4) goto invalid
;
352 if (c
<= 0x3FFFFFF && nb
> 5) goto invalid
;
354 /* Make sure the character is valid. */
355 if (c
> 0x7FFFFFFF || (c
>= 0xD800 && c
<= 0xDFFF))
361 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
, "Invalid UTF-8 encoding");
362 return (gfc_char4_t
) '?';
367 read_utf8_char1 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
374 len
= (width
< len
) ? len
: width
;
378 /* Proceed with decoding one character at a time. */
379 for (j
= 0; j
< len
; j
++, dest
++)
381 c
= read_utf8 (dtp
, &nbytes
);
383 /* Check for a short read and if so, break out. */
387 *dest
= c
> 255 ? '?' : (uchar
) c
;
390 /* If there was a short read, pad the remaining characters. */
391 for (i
= j
; i
< len
; i
++)
397 read_default_char1 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
402 s
= read_block_form (dtp
, &width
);
409 m
= (width
> len
) ? len
: width
;
414 memset (p
+ m
, ' ', n
);
419 read_utf8_char4 (st_parameter_dt
*dtp
, void *p
, int len
, int width
)
425 len
= (width
< len
) ? len
: width
;
427 dest
= (gfc_char4_t
*) p
;
429 /* Proceed with decoding one character at a time. */
430 for (j
= 0; j
< len
; j
++, dest
++)
432 *dest
= read_utf8 (dtp
, &nbytes
);
434 /* Check for a short read and if so, break out. */
439 /* If there was a short read, pad the remaining characters. */
440 for (i
= j
; i
< len
; i
++)
441 *dest
++ = (gfc_char4_t
) ' ';
447 read_default_char4 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
452 if (is_char4_unit(dtp
))
456 s4
= (gfc_char4_t
*) read_block_form4 (dtp
, &width
);
463 m
= ((int) width
> len
) ? len
: (int) width
;
465 dest
= (gfc_char4_t
*) p
;
467 for (n
= 0; n
< m
; n
++)
470 for (n
= 0; n
< len
- (int) width
; n
++)
471 *dest
++ = (gfc_char4_t
) ' ';
477 s
= read_block_form (dtp
, &width
);
484 m
= ((int) width
> len
) ? len
: (int) width
;
486 dest
= (gfc_char4_t
*) p
;
488 for (n
= 0; n
< m
; n
++, dest
++, s
++)
489 *dest
= (unsigned char ) *s
;
491 for (n
= 0; n
< len
- (int) width
; n
++, dest
++)
492 *dest
= (unsigned char) ' ';
497 /* read_a()-- Read a character record into a KIND=1 character destination,
498 processing UTF-8 encoding if necessary. */
501 read_a (st_parameter_dt
*dtp
, const fnode
*f
, char *p
, int length
)
507 if (wi
== -1) /* '(A)' edit descriptor */
511 /* Read in w characters, treating comma as not a separator. */
512 dtp
->u
.p
.sf_read_comma
= 0;
514 if (dtp
->u
.p
.current_unit
->flags
.encoding
== ENCODING_UTF8
)
515 read_utf8_char1 (dtp
, p
, length
, w
);
517 read_default_char1 (dtp
, p
, length
, w
);
519 dtp
->u
.p
.sf_read_comma
=
520 dtp
->u
.p
.current_unit
->decimal_status
== DECIMAL_COMMA
? 0 : 1;
524 /* read_a_char4()-- Read a character record into a KIND=4 character destination,
525 processing UTF-8 encoding if necessary. */
528 read_a_char4 (st_parameter_dt
*dtp
, const fnode
*f
, char *p
, int length
)
533 if (w
== -1) /* '(A)' edit descriptor */
536 /* Read in w characters, treating comma as not a separator. */
537 dtp
->u
.p
.sf_read_comma
= 0;
539 if (dtp
->u
.p
.current_unit
->flags
.encoding
== ENCODING_UTF8
)
540 read_utf8_char4 (dtp
, p
, length
, w
);
542 read_default_char4 (dtp
, p
, length
, w
);
544 dtp
->u
.p
.sf_read_comma
=
545 dtp
->u
.p
.current_unit
->decimal_status
== DECIMAL_COMMA
? 0 : 1;
548 /* eat_leading_spaces()-- Given a character pointer and a width,
549 * ignore the leading spaces. */
552 eat_leading_spaces (int *width
, char *p
)
556 if (*width
== 0 || *p
!= ' ')
568 next_char (st_parameter_dt
*dtp
, char **p
, int *w
)
583 if (dtp
->u
.p
.blank_status
!= BLANK_UNSPECIFIED
)
584 return ' '; /* return a blank to signal a null */
586 /* At this point, the rest of the field has to be trailing blanks */
600 /* read_decimal()-- Read a decimal integer value. The values here are
604 read_decimal (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
606 GFC_UINTEGER_LARGEST value
, maxv
, maxv_10
;
607 GFC_INTEGER_LARGEST v
;
613 p
= read_block_form (dtp
, &w
);
618 p
= eat_leading_spaces (&w
, p
);
621 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
643 maxv
= si_max (length
);
648 /* At this point we have a digit-string */
653 c
= next_char (dtp
, &p
, &w
);
659 if (dtp
->u
.p
.blank_status
== BLANK_NULL
) continue;
660 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
) c
= '0';
663 if (c
< '0' || c
> '9')
672 if (value
> maxv
- c
)
682 set_integer (dest
, v
, length
);
686 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
687 "Bad value during integer read");
688 next_record (dtp
, 1);
692 generate_error (&dtp
->common
, LIBERROR_READ_OVERFLOW
,
693 "Value overflowed during integer read");
694 next_record (dtp
, 1);
699 /* read_radix()-- This function reads values for non-decimal radixes.
700 * The difference here is that we treat the values here as unsigned
701 * values for the purposes of overflow. If minus sign is present and
702 * the top bit is set, the value will be incorrect. */
705 read_radix (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
,
708 GFC_UINTEGER_LARGEST value
, maxv
, maxv_r
;
709 GFC_INTEGER_LARGEST v
;
715 p
= read_block_form (dtp
, &w
);
720 p
= eat_leading_spaces (&w
, p
);
723 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
727 /* Maximum unsigned value, assuming two's complement. */
728 maxv
= 2 * si_max (length
) + 1;
729 maxv_r
= maxv
/ radix
;
750 /* At this point we have a digit-string */
755 c
= next_char (dtp
, &p
, &w
);
760 if (dtp
->u
.p
.blank_status
== BLANK_NULL
) continue;
761 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
) c
= '0';
767 if (c
< '0' || c
> '1')
772 if (c
< '0' || c
> '7')
797 c
= c
- 'a' + '9' + 1;
806 c
= c
- 'A' + '9' + 1;
820 value
= radix
* value
;
822 if (maxv
- c
< value
)
831 set_integer (dest
, v
, length
);
835 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
836 "Bad value during integer read");
837 next_record (dtp
, 1);
841 generate_error (&dtp
->common
, LIBERROR_READ_OVERFLOW
,
842 "Value overflowed during integer read");
843 next_record (dtp
, 1);
848 /* read_f()-- Read a floating point number with F-style editing, which
849 is what all of the other floating point descriptors behave as. The
850 tricky part is that optional spaces are allowed after an E or D,
851 and the implicit decimal point if a decimal point is not present in
855 read_f (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
857 int w
, seen_dp
, exponent
;
862 int seen_int_digit
; /* Seen a digit before the decimal point? */
863 int seen_dec_digit
; /* Seen a digit after the decimal point? */
872 /* Read in the next block. */
873 p
= read_block_form (dtp
, &w
);
876 p
= eat_leading_spaces (&w
, (char*) p
);
880 /* In this buffer we're going to re-format the number cleanly to be parsed
881 by convert_real in the end; this assures we're using strtod from the
882 C library for parsing and thus probably get the best accuracy possible.
883 This process may add a '+0.0' in front of the number as well as change the
884 exponent because of an implicit decimal point or the like. Thus allocating
885 strlen ("+0.0e-1000") == 10 characters plus one for NUL more than the
886 original buffer had should be enough. */
887 buffer
= gfc_alloca (w
+ 11);
891 if (*p
== '-' || *p
== '+')
899 p
= eat_leading_spaces (&w
, (char*) p
);
903 /* Check for Infinity or NaN. */
904 if (unlikely ((w
>= 3 && (*p
== 'i' || *p
== 'I' || *p
== 'n' || *p
== 'N'))))
909 /* Scan through the buffer keeping track of spaces and parenthesis. We
910 null terminate the string as soon as we see a left paren or if we are
911 BLANK_NULL mode. Leading spaces have already been skipped above,
912 trailing spaces are ignored by converting to '\0'. A space
913 between "NaN" and the optional perenthesis is not permitted. */
920 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
934 if (seen_paren
++ != 1)
948 if (seen_paren
!= 0 && seen_paren
!= 2)
951 if ((strcmp (save
, "inf") == 0) || (strcmp (save
, "infinity") == 0))
956 else if (strcmp (save
, "nan") != 0)
959 convert_infnan (dtp
, dest
, buffer
, length
);
963 /* Process the mantissa string. */
969 if (dtp
->u
.p
.current_unit
->decimal_status
!= DECIMAL_COMMA
)
982 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
987 else if (dtp
->u
.p
.blank_status
== BLANK_NULL
)
990 /* TODO: Should we check instead that there are only trailing
991 blanks here, as is done below for exponents? */
1034 /* No exponent has been seen, so we use the current scale factor. */
1035 exponent
= - dtp
->u
.p
.scale_factor
;
1038 /* At this point the start of an exponent has been found. */
1040 p
= eat_leading_spaces (&w
, (char*) p
);
1041 if (*p
== '-' || *p
== '+')
1049 /* At this point a digit string is required. We calculate the value
1050 of the exponent in order to take account of the scale factor and
1051 the d parameter before explict conversion takes place. */
1056 if (dtp
->u
.p
.blank_status
== BLANK_UNSPECIFIED
)
1058 while (w
> 0 && isdigit (*p
))
1061 exponent
+= *p
- '0';
1066 /* Only allow trailing blanks. */
1075 else /* BZ or BN status is enabled. */
1081 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
1084 assert (dtp
->u
.p
.blank_status
== BLANK_NULL
);
1086 else if (!isdigit (*p
))
1091 exponent
+= *p
- '0';
1099 exponent
*= exponent_sign
;
1102 /* Use the precision specified in the format if no decimal point has been
1105 exponent
-= f
->u
.real
.d
;
1107 /* Output a trailing '0' after decimal point if not yet found. */
1108 if (seen_dp
&& !seen_dec_digit
)
1110 /* Handle input of style "E+NN" by inserting a 0 for the
1112 else if (!seen_int_digit
&& !seen_dec_digit
)
1114 notify_std (&dtp
->common
, GFC_STD_LEGACY
,
1115 "REAL input of style 'E+NN'");
1119 /* Print out the exponent to finish the reformatted number. Maximum 4
1120 digits for the exponent. */
1129 exponent
= - exponent
;
1132 assert (exponent
< 10000);
1133 for (dig
= 3; dig
>= 0; --dig
)
1135 out
[dig
] = (char) ('0' + exponent
% 10);
1142 /* Do the actual conversion. */
1143 convert_real (dtp
, dest
, buffer
, length
);
1147 /* The value read is zero. */
1152 *((GFC_REAL_4
*) dest
) = 0.0;
1156 *((GFC_REAL_8
*) dest
) = 0.0;
1159 #ifdef HAVE_GFC_REAL_10
1161 *((GFC_REAL_10
*) dest
) = 0.0;
1165 #ifdef HAVE_GFC_REAL_16
1167 *((GFC_REAL_16
*) dest
) = 0.0;
1172 internal_error (&dtp
->common
, "Unsupported real kind during IO");
1177 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
1178 "Bad value during floating point read");
1179 next_record (dtp
, 1);
1184 /* read_x()-- Deal with the X/TR descriptor. We just read some data
1185 * and never look at it. */
1188 read_x (st_parameter_dt
*dtp
, int n
)
1192 if ((dtp
->u
.p
.current_unit
->pad_status
== PAD_NO
|| is_internal_unit (dtp
))
1193 && dtp
->u
.p
.current_unit
->bytes_left
< n
)
1194 n
= dtp
->u
.p
.current_unit
->bytes_left
;
1201 if (is_internal_unit (dtp
))
1203 mem_alloc_r (dtp
->u
.p
.current_unit
->s
, &length
);
1204 if (unlikely (length
< n
))
1209 if (dtp
->u
.p
.sf_seen_eor
)
1215 q
= fbuf_getc (dtp
->u
.p
.current_unit
);
1218 else if (q
== '\n' || q
== '\r')
1220 /* Unexpected end of line. Set the position. */
1221 dtp
->u
.p
.sf_seen_eor
= 1;
1223 /* If we see an EOR during non-advancing I/O, we need to skip
1224 the rest of the I/O statement. Set the corresponding flag. */
1225 if (dtp
->u
.p
.advance_status
== ADVANCE_NO
|| dtp
->u
.p
.seen_dollar
)
1226 dtp
->u
.p
.eor_condition
= 1;
1228 /* If we encounter a CR, it might be a CRLF. */
1229 if (q
== '\r') /* Probably a CRLF */
1231 /* See if there is an LF. */
1232 q2
= fbuf_getc (dtp
->u
.p
.current_unit
);
1234 dtp
->u
.p
.sf_seen_eor
= 2;
1235 else if (q2
!= EOF
) /* Oops, seek back. */
1236 fbuf_seek (dtp
->u
.p
.current_unit
, -1, SEEK_CUR
);
1244 if ((dtp
->common
.flags
& IOPARM_DT_HAS_SIZE
) != 0)
1245 dtp
->u
.p
.size_used
+= (GFC_IO_INT
) n
;
1246 dtp
->u
.p
.current_unit
->bytes_left
-= n
;
1247 dtp
->u
.p
.current_unit
->strm_pos
+= (gfc_offset
) n
;