1 /* Copyright (C) 2002, 2003, 2005, 2007, 2008, 2009, 2010
2 Free Software Foundation, Inc.
3 Contributed by Andy Vaught
4 F2003 I/O support contributed by Jerry DeLisle
6 This file is part of the GNU Fortran 95 runtime library (libgfortran).
8 Libgfortran is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgfortran is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
37 typedef unsigned char uchar
;
39 /* read.c -- Deal with formatted reads */
42 /* set_integer()-- All of the integer assignments come here to
43 * actually place the value into memory. */
46 set_integer (void *dest
, GFC_INTEGER_LARGEST value
, int length
)
50 #ifdef HAVE_GFC_INTEGER_16
51 /* length=10 comes about for kind=10 real/complex BOZ, cf. PR41711. */
55 GFC_INTEGER_16 tmp
= value
;
56 memcpy (dest
, (void *) &tmp
, length
);
62 GFC_INTEGER_8 tmp
= value
;
63 memcpy (dest
, (void *) &tmp
, length
);
68 GFC_INTEGER_4 tmp
= value
;
69 memcpy (dest
, (void *) &tmp
, length
);
74 GFC_INTEGER_2 tmp
= value
;
75 memcpy (dest
, (void *) &tmp
, length
);
80 GFC_INTEGER_1 tmp
= value
;
81 memcpy (dest
, (void *) &tmp
, length
);
85 internal_error (NULL
, "Bad integer kind");
90 /* max_value()-- Given a length (kind), return the maximum signed or
94 max_value (int length
, int signed_flag
)
96 GFC_UINTEGER_LARGEST value
;
97 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
103 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
107 for (n
= 1; n
< 4 * length
; n
++)
108 value
= (value
<< 2) + 3;
114 value
= signed_flag
? 0x7fffffffffffffff : 0xffffffffffffffff;
117 value
= signed_flag
? 0x7fffffff : 0xffffffff;
120 value
= signed_flag
? 0x7fff : 0xffff;
123 value
= signed_flag
? 0x7f : 0xff;
126 internal_error (NULL
, "Bad integer kind");
133 /* convert_real()-- Convert a character representation of a floating
134 * point number to the machine number. Returns nonzero if there is a
135 * range problem during conversion. Note: many architectures
136 * (e.g. IA-64, HP-PA) require that the storage pointed to by the dest
137 * argument is properly aligned for the type in question. TODO:
138 * handle not-a-numbers and infinities. */
141 convert_real (st_parameter_dt
*dtp
, void *dest
, const char *buffer
, int length
)
148 *((GFC_REAL_4
*) dest
) =
149 #if defined(HAVE_STRTOF)
150 gfc_strtof (buffer
, NULL
);
152 (GFC_REAL_4
) gfc_strtod (buffer
, NULL
);
157 *((GFC_REAL_8
*) dest
) = gfc_strtod (buffer
, NULL
);
160 #if defined(HAVE_GFC_REAL_10) && defined (HAVE_STRTOLD)
162 *((GFC_REAL_10
*) dest
) = gfc_strtold (buffer
, NULL
);
166 #if defined(HAVE_GFC_REAL_16) && defined (HAVE_STRTOLD)
168 *((GFC_REAL_16
*) dest
) = gfc_strtold (buffer
, NULL
);
173 internal_error (&dtp
->common
, "Unsupported real kind during IO");
178 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
179 "Error during floating point read");
180 next_record (dtp
, 1);
188 /* read_l()-- Read a logical value */
191 read_l (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
198 p
= read_block_form (dtp
, &w
);
221 set_integer (dest
, (GFC_INTEGER_LARGEST
) 1, length
);
225 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
229 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
230 "Bad value on logical read");
231 next_record (dtp
, 1);
238 read_utf8 (st_parameter_dt
*dtp
, int *nbytes
)
240 static const uchar masks
[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
241 static const uchar patns
[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
248 s
= read_block_form (dtp
, nbytes
);
252 /* If this is a short read, just return. */
260 /* The number of leading 1-bits in the first byte indicates how many
262 for (nb
= 2; nb
< 7; nb
++)
263 if ((c
& ~masks
[nb
-1]) == patns
[nb
-1])
268 c
= (c
& masks
[nb
-1]);
271 s
= read_block_form (dtp
, &nread
);
274 /* Decode the bytes read. */
275 for (i
= 1; i
< nb
; i
++)
277 gfc_char4_t n
= *s
++;
279 if ((n
& 0xC0) != 0x80)
282 c
= ((c
<< 6) + (n
& 0x3F));
285 /* Make sure the shortest possible encoding was used. */
286 if (c
<= 0x7F && nb
> 1) goto invalid
;
287 if (c
<= 0x7FF && nb
> 2) goto invalid
;
288 if (c
<= 0xFFFF && nb
> 3) goto invalid
;
289 if (c
<= 0x1FFFFF && nb
> 4) goto invalid
;
290 if (c
<= 0x3FFFFFF && nb
> 5) goto invalid
;
292 /* Make sure the character is valid. */
293 if (c
> 0x7FFFFFFF || (c
>= 0xD800 && c
<= 0xDFFF))
299 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
, "Invalid UTF-8 encoding");
300 return (gfc_char4_t
) '?';
305 read_utf8_char1 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
312 len
= (width
< len
) ? len
: width
;
316 /* Proceed with decoding one character at a time. */
317 for (j
= 0; j
< len
; j
++, dest
++)
319 c
= read_utf8 (dtp
, &nbytes
);
321 /* Check for a short read and if so, break out. */
325 *dest
= c
> 255 ? '?' : (uchar
) c
;
328 /* If there was a short read, pad the remaining characters. */
329 for (i
= j
; i
< len
; i
++)
335 read_default_char1 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
340 s
= read_block_form (dtp
, &width
);
347 m
= (width
> len
) ? len
: width
;
352 memset (p
+ m
, ' ', n
);
357 read_utf8_char4 (st_parameter_dt
*dtp
, void *p
, int len
, int width
)
363 len
= (width
< len
) ? len
: width
;
365 dest
= (gfc_char4_t
*) p
;
367 /* Proceed with decoding one character at a time. */
368 for (j
= 0; j
< len
; j
++, dest
++)
370 *dest
= read_utf8 (dtp
, &nbytes
);
372 /* Check for a short read and if so, break out. */
377 /* If there was a short read, pad the remaining characters. */
378 for (i
= j
; i
< len
; i
++)
379 *dest
++ = (gfc_char4_t
) ' ';
385 read_default_char4 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
391 s
= read_block_form (dtp
, &width
);
398 m
= ((int) width
> len
) ? len
: (int) width
;
400 dest
= (gfc_char4_t
*) p
;
402 for (n
= 0; n
< m
; n
++, dest
++, s
++)
403 *dest
= (unsigned char ) *s
;
405 for (n
= 0; n
< len
- (int) width
; n
++, dest
++)
406 *dest
= (unsigned char) ' ';
410 /* read_a()-- Read a character record into a KIND=1 character destination,
411 processing UTF-8 encoding if necessary. */
414 read_a (st_parameter_dt
*dtp
, const fnode
*f
, char *p
, int length
)
420 if (wi
== -1) /* '(A)' edit descriptor */
424 /* Read in w characters, treating comma as not a separator. */
425 dtp
->u
.p
.sf_read_comma
= 0;
427 if (dtp
->u
.p
.current_unit
->flags
.encoding
== ENCODING_UTF8
)
428 read_utf8_char1 (dtp
, p
, length
, w
);
430 read_default_char1 (dtp
, p
, length
, w
);
432 dtp
->u
.p
.sf_read_comma
=
433 dtp
->u
.p
.current_unit
->decimal_status
== DECIMAL_COMMA
? 0 : 1;
437 /* read_a_char4()-- Read a character record into a KIND=4 character destination,
438 processing UTF-8 encoding if necessary. */
441 read_a_char4 (st_parameter_dt
*dtp
, const fnode
*f
, char *p
, int length
)
446 if (w
== -1) /* '(A)' edit descriptor */
449 /* Read in w characters, treating comma as not a separator. */
450 dtp
->u
.p
.sf_read_comma
= 0;
452 if (dtp
->u
.p
.current_unit
->flags
.encoding
== ENCODING_UTF8
)
453 read_utf8_char4 (dtp
, p
, length
, w
);
455 read_default_char4 (dtp
, p
, length
, w
);
457 dtp
->u
.p
.sf_read_comma
=
458 dtp
->u
.p
.current_unit
->decimal_status
== DECIMAL_COMMA
? 0 : 1;
461 /* eat_leading_spaces()-- Given a character pointer and a width,
462 * ignore the leading spaces. */
465 eat_leading_spaces (int *width
, char *p
)
469 if (*width
== 0 || *p
!= ' ')
481 next_char (st_parameter_dt
*dtp
, char **p
, int *w
)
496 if (dtp
->u
.p
.blank_status
!= BLANK_UNSPECIFIED
)
497 return ' '; /* return a blank to signal a null */
499 /* At this point, the rest of the field has to be trailing blanks */
513 /* read_decimal()-- Read a decimal integer value. The values here are
517 read_decimal (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
519 GFC_UINTEGER_LARGEST value
, maxv
, maxv_10
;
520 GFC_INTEGER_LARGEST v
;
526 p
= read_block_form (dtp
, &w
);
531 p
= eat_leading_spaces (&w
, p
);
534 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
538 maxv
= max_value (length
, 1);
560 /* At this point we have a digit-string */
565 c
= next_char (dtp
, &p
, &w
);
571 if (dtp
->u
.p
.blank_status
== BLANK_NULL
) continue;
572 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
) c
= '0';
575 if (c
< '0' || c
> '9')
578 if (value
> maxv_10
&& compile_options
.range_check
== 1)
584 if (value
> maxv
- c
&& compile_options
.range_check
== 1)
593 set_integer (dest
, v
, length
);
597 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
598 "Bad value during integer read");
599 next_record (dtp
, 1);
603 generate_error (&dtp
->common
, LIBERROR_READ_OVERFLOW
,
604 "Value overflowed during integer read");
605 next_record (dtp
, 1);
610 /* read_radix()-- This function reads values for non-decimal radixes.
611 * The difference here is that we treat the values here as unsigned
612 * values for the purposes of overflow. If minus sign is present and
613 * the top bit is set, the value will be incorrect. */
616 read_radix (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
,
619 GFC_UINTEGER_LARGEST value
, maxv
, maxv_r
;
620 GFC_INTEGER_LARGEST v
;
626 p
= read_block_form (dtp
, &w
);
631 p
= eat_leading_spaces (&w
, p
);
634 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
638 maxv
= max_value (length
, 0);
639 maxv_r
= maxv
/ radix
;
660 /* At this point we have a digit-string */
665 c
= next_char (dtp
, &p
, &w
);
670 if (dtp
->u
.p
.blank_status
== BLANK_NULL
) continue;
671 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
) c
= '0';
677 if (c
< '0' || c
> '1')
682 if (c
< '0' || c
> '7')
707 c
= c
- 'a' + '9' + 1;
716 c
= c
- 'A' + '9' + 1;
730 value
= radix
* value
;
732 if (maxv
- c
< value
)
741 set_integer (dest
, v
, length
);
745 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
746 "Bad value during integer read");
747 next_record (dtp
, 1);
751 generate_error (&dtp
->common
, LIBERROR_READ_OVERFLOW
,
752 "Value overflowed during integer read");
753 next_record (dtp
, 1);
758 /* read_f()-- Read a floating point number with F-style editing, which
759 is what all of the other floating point descriptors behave as. The
760 tricky part is that optional spaces are allowed after an E or D,
761 and the implicit decimal point if a decimal point is not present in
765 read_f (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
767 int w
, seen_dp
, exponent
;
772 int seen_int_digit
; /* Seen a digit before the decimal point? */
773 int seen_dec_digit
; /* Seen a digit after the decimal point? */
782 /* Read in the next block. */
783 p
= read_block_form (dtp
, &w
);
786 p
= eat_leading_spaces (&w
, (char*) p
);
790 /* In this buffer we're going to re-format the number cleanly to be parsed
791 by convert_real in the end; this assures we're using strtod from the
792 C library for parsing and thus probably get the best accuracy possible.
793 This process may add a '+0.0' in front of the number as well as change the
794 exponent because of an implicit decimal point or the like. Thus allocating
795 strlen ("+0.0e-1000") == 10 characters plus one for NUL more than the
796 original buffer had should be enough. */
797 buffer
= gfc_alloca (w
+ 11);
801 if (*p
== '-' || *p
== '+')
809 p
= eat_leading_spaces (&w
, (char*) p
);
813 /* Check for Infinity or NaN. */
814 if (unlikely ((w
>= 3 && (*p
== 'i' || *p
== 'I' || *p
== 'n' || *p
== 'N'))))
819 /* Scan through the buffer keeping track of spaces and parenthesis. We
820 null terminate the string as soon as we see a left paren or if we are
821 BLANK_NULL mode. Leading spaces have already been skipped above,
822 trailing spaces are ignored by converting to '\0'. A space
823 between "NaN" and the optional perenthesis is not permitted. */
830 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
844 if (seen_paren
++ != 1)
858 if (seen_paren
!= 0 && seen_paren
!= 2)
861 if ((strcmp (save
, "inf") == 0) || (strcmp (save
, "infinity") == 0))
866 else if (strcmp (save
, "nan") != 0)
869 convert_real (dtp
, dest
, buffer
, length
);
873 /* Process the mantissa string. */
879 if (dtp
->u
.p
.current_unit
->decimal_status
!= DECIMAL_COMMA
)
892 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
897 else if (dtp
->u
.p
.blank_status
== BLANK_NULL
)
900 /* TODO: Should we check instead that there are only trailing
901 blanks here, as is done below for exponents? */
942 /* No exponent has been seen, so we use the current scale factor. */
943 exponent
= - dtp
->u
.p
.scale_factor
;
946 /* At this point the start of an exponent has been found. */
948 p
= eat_leading_spaces (&w
, (char*) p
);
949 if (*p
== '-' || *p
== '+')
957 /* At this point a digit string is required. We calculate the value
958 of the exponent in order to take account of the scale factor and
959 the d parameter before explict conversion takes place. */
964 if (dtp
->u
.p
.blank_status
== BLANK_UNSPECIFIED
)
966 while (w
> 0 && isdigit (*p
))
969 exponent
+= *p
- '0';
974 /* Only allow trailing blanks. */
983 else /* BZ or BN status is enabled. */
989 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
992 assert (dtp
->u
.p
.blank_status
== BLANK_NULL
);
994 else if (!isdigit (*p
))
999 exponent
+= *p
- '0';
1007 exponent
*= exponent_sign
;
1010 /* Use the precision specified in the format if no decimal point has been
1013 exponent
-= f
->u
.real
.d
;
1015 /* Output a trailing '0' after decimal point if not yet found. */
1016 if (seen_dp
&& !seen_dec_digit
)
1019 /* Print out the exponent to finish the reformatted number. Maximum 4
1020 digits for the exponent. */
1029 exponent
= - exponent
;
1032 assert (exponent
< 10000);
1033 for (dig
= 3; dig
>= 0; --dig
)
1035 out
[dig
] = (char) ('0' + exponent
% 10);
1042 /* Do the actual conversion. */
1043 convert_real (dtp
, dest
, buffer
, length
);
1047 /* The value read is zero. */
1052 *((GFC_REAL_4
*) dest
) = 0.0;
1056 *((GFC_REAL_8
*) dest
) = 0.0;
1059 #ifdef HAVE_GFC_REAL_10
1061 *((GFC_REAL_10
*) dest
) = 0.0;
1065 #ifdef HAVE_GFC_REAL_16
1067 *((GFC_REAL_16
*) dest
) = 0.0;
1072 internal_error (&dtp
->common
, "Unsupported real kind during IO");
1077 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
1078 "Bad value during floating point read");
1079 next_record (dtp
, 1);
1084 /* read_x()-- Deal with the X/TR descriptor. We just read some data
1085 * and never look at it. */
1088 read_x (st_parameter_dt
*dtp
, int n
)
1093 if ((dtp
->u
.p
.current_unit
->pad_status
== PAD_NO
|| is_internal_unit (dtp
))
1094 && dtp
->u
.p
.current_unit
->bytes_left
< n
)
1095 n
= dtp
->u
.p
.current_unit
->bytes_left
;
1102 if (is_internal_unit (dtp
))
1104 p
= mem_alloc_r (dtp
->u
.p
.current_unit
->s
, &length
);
1105 if (unlikely (length
< n
))
1110 if (dtp
->u
.p
.sf_seen_eor
)
1113 p
= fbuf_read (dtp
->u
.p
.current_unit
, &length
);
1120 if (length
== 0 && dtp
->u
.p
.item_count
== 1)
1122 if (dtp
->u
.p
.current_unit
->pad_status
== PAD_NO
)
1135 if (q
== '\n' || q
== '\r')
1137 /* Unexpected end of line. Set the position. */
1138 fbuf_seek (dtp
->u
.p
.current_unit
, n
+ 1 ,SEEK_CUR
);
1139 dtp
->u
.p
.sf_seen_eor
= 1;
1141 /* If we encounter a CR, it might be a CRLF. */
1142 if (q
== '\r') /* Probably a CRLF */
1144 /* See if there is an LF. Use fbuf_read rather then fbuf_getc so
1145 the position is not advanced unless it really is an LF. */
1147 p
= fbuf_read (dtp
->u
.p
.current_unit
, &readlen
);
1148 if (*p
== '\n' && readlen
== 1)
1150 dtp
->u
.p
.sf_seen_eor
= 2;
1151 fbuf_seek (dtp
->u
.p
.current_unit
, 1 ,SEEK_CUR
);
1160 fbuf_seek (dtp
->u
.p
.current_unit
, n
, SEEK_CUR
);
1163 if ((dtp
->common
.flags
& IOPARM_DT_HAS_SIZE
) != 0)
1164 dtp
->u
.p
.size_used
+= (GFC_IO_INT
) n
;
1165 dtp
->u
.p
.current_unit
->bytes_left
-= n
;
1166 dtp
->u
.p
.current_unit
->strm_pos
+= (gfc_offset
) n
;