1 /* Copyright (C) 2002, 2003, 2005, 2007, 2008, 2009, 2010
2 Free Software Foundation, Inc.
3 Contributed by Andy Vaught
4 F2003 I/O support contributed by Jerry DeLisle
6 This file is part of the GNU Fortran 95 runtime library (libgfortran).
8 Libgfortran is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgfortran is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
37 typedef unsigned char uchar
;
39 /* read.c -- Deal with formatted reads */
42 /* set_integer()-- All of the integer assignments come here to
43 * actually place the value into memory. */
46 set_integer (void *dest
, GFC_INTEGER_LARGEST value
, int length
)
50 #ifdef HAVE_GFC_INTEGER_16
51 /* length=10 comes about for kind=10 real/complex BOZ, cf. PR41711. */
55 GFC_INTEGER_16 tmp
= value
;
56 memcpy (dest
, (void *) &tmp
, length
);
62 GFC_INTEGER_8 tmp
= value
;
63 memcpy (dest
, (void *) &tmp
, length
);
68 GFC_INTEGER_4 tmp
= value
;
69 memcpy (dest
, (void *) &tmp
, length
);
74 GFC_INTEGER_2 tmp
= value
;
75 memcpy (dest
, (void *) &tmp
, length
);
80 GFC_INTEGER_1 tmp
= value
;
81 memcpy (dest
, (void *) &tmp
, length
);
85 internal_error (NULL
, "Bad integer kind");
90 /* max_value()-- Given a length (kind), return the maximum signed or
94 max_value (int length
, int signed_flag
)
96 GFC_UINTEGER_LARGEST value
;
97 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
103 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
107 for (n
= 1; n
< 4 * length
; n
++)
108 value
= (value
<< 2) + 3;
114 value
= signed_flag
? 0x7fffffffffffffff : 0xffffffffffffffff;
117 value
= signed_flag
? 0x7fffffff : 0xffffffff;
120 value
= signed_flag
? 0x7fff : 0xffff;
123 value
= signed_flag
? 0x7f : 0xff;
126 internal_error (NULL
, "Bad integer kind");
133 /* convert_real()-- Convert a character representation of a floating
134 * point number to the machine number. Returns nonzero if there is a
135 * range problem during conversion. Note: many architectures
136 * (e.g. IA-64, HP-PA) require that the storage pointed to by the dest
137 * argument is properly aligned for the type in question. TODO:
138 * handle not-a-numbers and infinities. */
141 convert_real (st_parameter_dt
*dtp
, void *dest
, const char *buffer
, int length
)
148 *((GFC_REAL_4
*) dest
) =
149 #if defined(HAVE_STRTOF)
150 gfc_strtof (buffer
, NULL
);
152 (GFC_REAL_4
) gfc_strtod (buffer
, NULL
);
157 *((GFC_REAL_8
*) dest
) = gfc_strtod (buffer
, NULL
);
160 #if defined(HAVE_GFC_REAL_10) && defined (HAVE_STRTOLD)
162 *((GFC_REAL_10
*) dest
) = gfc_strtold (buffer
, NULL
);
166 #if defined(HAVE_GFC_REAL_16) && defined (HAVE_STRTOLD)
168 *((GFC_REAL_16
*) dest
) = gfc_strtold (buffer
, NULL
);
173 internal_error (&dtp
->common
, "Unsupported real kind during IO");
178 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
179 "Error during floating point read");
180 next_record (dtp
, 1);
188 /* read_l()-- Read a logical value */
191 read_l (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
198 p
= read_block_form (dtp
, &w
);
221 set_integer (dest
, (GFC_INTEGER_LARGEST
) 1, length
);
225 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
229 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
230 "Bad value on logical read");
231 next_record (dtp
, 1);
238 read_utf8 (st_parameter_dt
*dtp
, int *nbytes
)
240 static const uchar masks
[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
241 static const uchar patns
[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
248 s
= read_block_form (dtp
, nbytes
);
252 /* If this is a short read, just return. */
260 /* The number of leading 1-bits in the first byte indicates how many
262 for (nb
= 2; nb
< 7; nb
++)
263 if ((c
& ~masks
[nb
-1]) == patns
[nb
-1])
268 c
= (c
& masks
[nb
-1]);
271 s
= read_block_form (dtp
, &nread
);
274 /* Decode the bytes read. */
275 for (i
= 1; i
< nb
; i
++)
277 gfc_char4_t n
= *s
++;
279 if ((n
& 0xC0) != 0x80)
282 c
= ((c
<< 6) + (n
& 0x3F));
285 /* Make sure the shortest possible encoding was used. */
286 if (c
<= 0x7F && nb
> 1) goto invalid
;
287 if (c
<= 0x7FF && nb
> 2) goto invalid
;
288 if (c
<= 0xFFFF && nb
> 3) goto invalid
;
289 if (c
<= 0x1FFFFF && nb
> 4) goto invalid
;
290 if (c
<= 0x3FFFFFF && nb
> 5) goto invalid
;
292 /* Make sure the character is valid. */
293 if (c
> 0x7FFFFFFF || (c
>= 0xD800 && c
<= 0xDFFF))
299 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
, "Invalid UTF-8 encoding");
300 return (gfc_char4_t
) '?';
305 read_utf8_char1 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
312 len
= (width
< len
) ? len
: width
;
316 /* Proceed with decoding one character at a time. */
317 for (j
= 0; j
< len
; j
++, dest
++)
319 c
= read_utf8 (dtp
, &nbytes
);
321 /* Check for a short read and if so, break out. */
325 *dest
= c
> 255 ? '?' : (uchar
) c
;
328 /* If there was a short read, pad the remaining characters. */
329 for (i
= j
; i
< len
; i
++)
335 read_default_char1 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
340 s
= read_block_form (dtp
, &width
);
347 m
= (width
> len
) ? len
: width
;
352 memset (p
+ m
, ' ', n
);
357 read_utf8_char4 (st_parameter_dt
*dtp
, void *p
, int len
, int width
)
363 len
= (width
< len
) ? len
: width
;
365 dest
= (gfc_char4_t
*) p
;
367 /* Proceed with decoding one character at a time. */
368 for (j
= 0; j
< len
; j
++, dest
++)
370 *dest
= read_utf8 (dtp
, &nbytes
);
372 /* Check for a short read and if so, break out. */
377 /* If there was a short read, pad the remaining characters. */
378 for (i
= j
; i
< len
; i
++)
379 *dest
++ = (gfc_char4_t
) ' ';
385 read_default_char4 (st_parameter_dt
*dtp
, char *p
, int len
, int width
)
391 s
= read_block_form (dtp
, &width
);
398 m
= ((int) width
> len
) ? len
: (int) width
;
400 dest
= (gfc_char4_t
*) p
;
402 for (n
= 0; n
< m
; n
++, dest
++, s
++)
403 *dest
= (unsigned char ) *s
;
405 for (n
= 0; n
< len
- (int) width
; n
++, dest
++)
406 *dest
= (unsigned char) ' ';
410 /* read_a()-- Read a character record into a KIND=1 character destination,
411 processing UTF-8 encoding if necessary. */
414 read_a (st_parameter_dt
*dtp
, const fnode
*f
, char *p
, int length
)
420 if (wi
== -1) /* '(A)' edit descriptor */
424 /* Read in w characters, treating comma as not a separator. */
425 dtp
->u
.p
.sf_read_comma
= 0;
427 if (dtp
->u
.p
.current_unit
->flags
.encoding
== ENCODING_UTF8
)
428 read_utf8_char1 (dtp
, p
, length
, w
);
430 read_default_char1 (dtp
, p
, length
, w
);
432 dtp
->u
.p
.sf_read_comma
=
433 dtp
->u
.p
.current_unit
->decimal_status
== DECIMAL_COMMA
? 0 : 1;
437 /* read_a_char4()-- Read a character record into a KIND=4 character destination,
438 processing UTF-8 encoding if necessary. */
441 read_a_char4 (st_parameter_dt
*dtp
, const fnode
*f
, char *p
, int length
)
446 if (w
== -1) /* '(A)' edit descriptor */
449 /* Read in w characters, treating comma as not a separator. */
450 dtp
->u
.p
.sf_read_comma
= 0;
452 if (dtp
->u
.p
.current_unit
->flags
.encoding
== ENCODING_UTF8
)
453 read_utf8_char4 (dtp
, p
, length
, w
);
455 read_default_char4 (dtp
, p
, length
, w
);
457 dtp
->u
.p
.sf_read_comma
=
458 dtp
->u
.p
.current_unit
->decimal_status
== DECIMAL_COMMA
? 0 : 1;
461 /* eat_leading_spaces()-- Given a character pointer and a width,
462 * ignore the leading spaces. */
465 eat_leading_spaces (int *width
, char *p
)
469 if (*width
== 0 || *p
!= ' ')
481 next_char (st_parameter_dt
*dtp
, char **p
, int *w
)
496 if (dtp
->u
.p
.blank_status
!= BLANK_UNSPECIFIED
)
497 return ' '; /* return a blank to signal a null */
499 /* At this point, the rest of the field has to be trailing blanks */
513 /* read_decimal()-- Read a decimal integer value. The values here are
517 read_decimal (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
519 GFC_UINTEGER_LARGEST value
, maxv
, maxv_10
;
520 GFC_INTEGER_LARGEST v
;
526 p
= read_block_form (dtp
, &w
);
531 p
= eat_leading_spaces (&w
, p
);
534 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
538 maxv
= max_value (length
, 1);
560 /* At this point we have a digit-string */
565 c
= next_char (dtp
, &p
, &w
);
571 if (dtp
->u
.p
.blank_status
== BLANK_NULL
) continue;
572 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
) c
= '0';
575 if (c
< '0' || c
> '9')
578 if (value
> maxv_10
&& compile_options
.range_check
== 1)
584 if (value
> maxv
- c
&& compile_options
.range_check
== 1)
593 set_integer (dest
, v
, length
);
597 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
598 "Bad value during integer read");
599 next_record (dtp
, 1);
603 generate_error (&dtp
->common
, LIBERROR_READ_OVERFLOW
,
604 "Value overflowed during integer read");
605 next_record (dtp
, 1);
610 /* read_radix()-- This function reads values for non-decimal radixes.
611 * The difference here is that we treat the values here as unsigned
612 * values for the purposes of overflow. If minus sign is present and
613 * the top bit is set, the value will be incorrect. */
616 read_radix (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
,
619 GFC_UINTEGER_LARGEST value
, maxv
, maxv_r
;
620 GFC_INTEGER_LARGEST v
;
626 p
= read_block_form (dtp
, &w
);
631 p
= eat_leading_spaces (&w
, p
);
634 set_integer (dest
, (GFC_INTEGER_LARGEST
) 0, length
);
638 maxv
= max_value (length
, 0);
639 maxv_r
= maxv
/ radix
;
660 /* At this point we have a digit-string */
665 c
= next_char (dtp
, &p
, &w
);
670 if (dtp
->u
.p
.blank_status
== BLANK_NULL
) continue;
671 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
) c
= '0';
677 if (c
< '0' || c
> '1')
682 if (c
< '0' || c
> '7')
707 c
= c
- 'a' + '9' + 1;
716 c
= c
- 'A' + '9' + 1;
730 value
= radix
* value
;
732 if (maxv
- c
< value
)
741 set_integer (dest
, v
, length
);
745 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
746 "Bad value during integer read");
747 next_record (dtp
, 1);
751 generate_error (&dtp
->common
, LIBERROR_READ_OVERFLOW
,
752 "Value overflowed during integer read");
753 next_record (dtp
, 1);
758 /* read_f()-- Read a floating point number with F-style editing, which
759 is what all of the other floating point descriptors behave as. The
760 tricky part is that optional spaces are allowed after an E or D,
761 and the implicit decimal point if a decimal point is not present in
765 read_f (st_parameter_dt
*dtp
, const fnode
*f
, char *dest
, int length
)
767 int w
, seen_dp
, exponent
;
772 int seen_int_digit
; /* Seen a digit before the decimal point? */
773 int seen_dec_digit
; /* Seen a digit after the decimal point? */
782 /* Read in the next block. */
783 p
= read_block_form (dtp
, &w
);
786 p
= eat_leading_spaces (&w
, (char*) p
);
790 /* In this buffer we're going to re-format the number cleanly to be parsed
791 by convert_real in the end; this assures we're using strtod from the
792 C library for parsing and thus probably get the best accuracy possible.
793 This process may add a '+0.0' in front of the number as well as change the
794 exponent because of an implicit decimal point or the like. Thus allocating
795 strlen ("+0.0e-1000") == 10 characters plus one for NUL more than the
796 original buffer had should be enough. */
797 buffer
= gfc_alloca (w
+ 11);
801 if (*p
== '-' || *p
== '+')
809 p
= eat_leading_spaces (&w
, (char*) p
);
813 /* Process the mantissa string. */
819 if (dtp
->u
.p
.current_unit
->decimal_status
!= DECIMAL_COMMA
)
832 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
837 else if (dtp
->u
.p
.blank_status
== BLANK_NULL
)
840 /* TODO: Should we check instead that there are only trailing
841 blanks here, as is done below for exponents? */
882 /* No exponent has been seen, so we use the current scale factor. */
883 exponent
= - dtp
->u
.p
.scale_factor
;
886 /* At this point the start of an exponent has been found. */
888 p
= eat_leading_spaces (&w
, (char*) p
);
889 if (*p
== '-' || *p
== '+')
897 /* At this point a digit string is required. We calculate the value
898 of the exponent in order to take account of the scale factor and
899 the d parameter before explict conversion takes place. */
904 if (dtp
->u
.p
.blank_status
== BLANK_UNSPECIFIED
)
906 while (w
> 0 && isdigit (*p
))
909 exponent
+= *p
- '0';
914 /* Only allow trailing blanks. */
923 else /* BZ or BN status is enabled. */
929 if (dtp
->u
.p
.blank_status
== BLANK_ZERO
)
932 assert (dtp
->u
.p
.blank_status
== BLANK_NULL
);
934 else if (!isdigit (*p
))
939 exponent
+= *p
- '0';
947 exponent
*= exponent_sign
;
950 /* Use the precision specified in the format if no decimal point has been
953 exponent
-= f
->u
.real
.d
;
955 /* Output a trailing '0' after decimal point if not yet found. */
956 if (seen_dp
&& !seen_dec_digit
)
959 /* Print out the exponent to finish the reformatted number. Maximum 4
960 digits for the exponent. */
969 exponent
= - exponent
;
972 assert (exponent
< 10000);
973 for (dig
= 3; dig
>= 0; --dig
)
975 out
[dig
] = (char) ('0' + exponent
% 10);
982 /* Do the actual conversion. */
983 convert_real (dtp
, dest
, buffer
, length
);
987 /* The value read is zero. */
992 *((GFC_REAL_4
*) dest
) = 0.0;
996 *((GFC_REAL_8
*) dest
) = 0.0;
999 #ifdef HAVE_GFC_REAL_10
1001 *((GFC_REAL_10
*) dest
) = 0.0;
1005 #ifdef HAVE_GFC_REAL_16
1007 *((GFC_REAL_16
*) dest
) = 0.0;
1012 internal_error (&dtp
->common
, "Unsupported real kind during IO");
1017 generate_error (&dtp
->common
, LIBERROR_READ_VALUE
,
1018 "Bad value during floating point read");
1019 next_record (dtp
, 1);
1024 /* read_x()-- Deal with the X/TR descriptor. We just read some data
1025 * and never look at it. */
1028 read_x (st_parameter_dt
*dtp
, int n
)
1033 if ((dtp
->u
.p
.current_unit
->pad_status
== PAD_NO
|| is_internal_unit (dtp
))
1034 && dtp
->u
.p
.current_unit
->bytes_left
< n
)
1035 n
= dtp
->u
.p
.current_unit
->bytes_left
;
1042 if (is_internal_unit (dtp
))
1044 p
= mem_alloc_r (dtp
->u
.p
.current_unit
->s
, &length
);
1045 if (unlikely (length
< n
))
1050 if (dtp
->u
.p
.sf_seen_eor
)
1053 p
= fbuf_read (dtp
->u
.p
.current_unit
, &length
);
1060 if (length
== 0 && dtp
->u
.p
.item_count
== 1)
1062 if (dtp
->u
.p
.current_unit
->pad_status
== PAD_NO
)
1075 if (q
== '\n' || q
== '\r')
1077 /* Unexpected end of line. Set the position. */
1078 fbuf_seek (dtp
->u
.p
.current_unit
, n
+ 1 ,SEEK_CUR
);
1079 dtp
->u
.p
.sf_seen_eor
= 1;
1081 /* If we encounter a CR, it might be a CRLF. */
1082 if (q
== '\r') /* Probably a CRLF */
1084 /* See if there is an LF. Use fbuf_read rather then fbuf_getc so
1085 the position is not advanced unless it really is an LF. */
1087 p
= fbuf_read (dtp
->u
.p
.current_unit
, &readlen
);
1088 if (*p
== '\n' && readlen
== 1)
1090 dtp
->u
.p
.sf_seen_eor
= 2;
1091 fbuf_seek (dtp
->u
.p
.current_unit
, 1 ,SEEK_CUR
);
1100 fbuf_seek (dtp
->u
.p
.current_unit
, n
, SEEK_CUR
);
1103 if ((dtp
->common
.flags
& IOPARM_DT_HAS_SIZE
) != 0)
1104 dtp
->u
.p
.size_used
+= (GFC_IO_INT
) n
;
1105 dtp
->u
.p
.current_unit
->bytes_left
-= n
;
1106 dtp
->u
.p
.current_unit
->strm_pos
+= (gfc_offset
) n
;