1 /*-------------------------------------------------------------------------
4 * Parse CSV/text/binary format for COPY FROM.
6 * This file contains routines to parse the text, CSV and binary input
7 * formats. The main entry point is NextCopyFrom(), which parses the
8 * next input line and returns it as Datums.
10 * In text/CSV mode, the parsing happens in multiple stages:
12 * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
15 * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
16 * places it into 'raw_buf'.
18 * 2. CopyConvertBuf() calls the encoding conversion function to convert
19 * the data in 'raw_buf' from client to server encoding, placing the
20 * converted result in 'input_buf'.
22 * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
23 * It is responsible for finding the next newline marker, taking quote and
24 * escape characters into account according to the COPY options. The line
25 * is copied into 'line_buf', with quotes and escape characters still
28 * 4. CopyReadAttributesText/CSV() function takes the input line from
29 * 'line_buf', and splits it into fields, unescaping the data as required.
30 * The fields are stored in 'attribute_buf', and 'raw_fields' array holds
31 * pointers to each field.
33 * If encoding conversion is not required, a shortcut is taken in step 2 to
34 * avoid copying the data unnecessarily. The 'input_buf' pointer is set to
35 * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
36 * directly into 'input_buf'. CopyConvertBuf() then merely validates that
37 * the data is valid in the current encoding.
39 * In binary mode, the pipeline is much simpler. Input is loaded into
40 * 'raw_buf', and encoding conversion is done in the datatype-specific
41 * receive functions, if required. 'input_buf' and 'line_buf' are not used,
42 * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
43 * data when it's passed the receive function.
45 * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE). 'input_buf' is also
46 * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required. 'line_buf'
47 * and 'attribute_buf' are expanded on demand, to hold the longest line
50 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
51 * Portions Copyright (c) 1994, Regents of the University of California
55 * src/backend/commands/copyfromparse.c
57 *-------------------------------------------------------------------------
65 #include "commands/copy.h"
66 #include "commands/copyfrom_internal.h"
67 #include "commands/progress.h"
68 #include "executor/executor.h"
69 #include "libpq/libpq.h"
70 #include "libpq/pqformat.h"
71 #include "mb/pg_wchar.h"
72 #include "miscadmin.h"
74 #include "port/pg_bswap.h"
75 #include "utils/memutils.h"
76 #include "utils/rel.h"
78 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
79 #define OCTVALUE(c) ((c) - '0')
82 * These macros centralize code used to process line_buf and input_buf buffers.
83 * They are macros because they often do continue/break control and to avoid
84 * function call overhead in tight COPY loops.
86 * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
87 * prevent the continue/break processing from working. We end the "if (1)"
88 * with "else ((void) 0)" to ensure the "if" does not unintentionally match
89 * any "else" in the calling code, and to avoid any compiler warnings about
90 * empty statements. See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
94 * This keeps the character read at the top of the loop in the buffer
95 * even if there is more than one read-ahead.
97 #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
100 if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
102 input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
108 /* This consumes the remainder of the buffer and breaks */
109 #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
112 if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
115 input_buf_ptr = copy_buf_len; /* consume the partial character */ \
116 /* backslash just before EOF, treat as data char */ \
123 * Transfer any approved data to line_buf; must do this to be sure
124 * there is some room in input_buf.
126 #define REFILL_LINEBUF \
129 if (input_buf_ptr > cstate->input_buf_index) \
131 appendBinaryStringInfo(&cstate->line_buf, \
132 cstate->input_buf + cstate->input_buf_index, \
133 input_buf_ptr - cstate->input_buf_index); \
134 cstate->input_buf_index = input_buf_ptr; \
138 /* Undo any read-ahead and jump out of the block. */
139 #define NO_END_OF_COPY_GOTO \
142 input_buf_ptr = prev_raw_ptr + 1; \
143 goto not_end_of_copy; \
146 /* NOTE: there's a copy of this in copyto.c */
147 static const char BinarySignature
[11] = "PGCOPY\n\377\r\n\0";
150 /* non-export function prototypes */
151 static bool CopyReadLine(CopyFromState cstate
);
152 static bool CopyReadLineText(CopyFromState cstate
);
153 static int CopyReadAttributesText(CopyFromState cstate
);
154 static int CopyReadAttributesCSV(CopyFromState cstate
);
155 static Datum
CopyReadBinaryAttribute(CopyFromState cstate
, FmgrInfo
*flinfo
,
156 Oid typioparam
, int32 typmod
,
160 /* Low-level communications functions */
161 static int CopyGetData(CopyFromState cstate
, void *databuf
,
162 int minread
, int maxread
);
163 static inline bool CopyGetInt32(CopyFromState cstate
, int32
*val
);
164 static inline bool CopyGetInt16(CopyFromState cstate
, int16
*val
);
165 static void CopyLoadInputBuf(CopyFromState cstate
);
166 static int CopyReadBinaryData(CopyFromState cstate
, char *dest
, int nbytes
);
169 ReceiveCopyBegin(CopyFromState cstate
)
172 int natts
= list_length(cstate
->attnumlist
);
173 int16 format
= (cstate
->opts
.binary
? 1 : 0);
176 pq_beginmessage(&buf
, 'G');
177 pq_sendbyte(&buf
, format
); /* overall format */
178 pq_sendint16(&buf
, natts
);
179 for (i
= 0; i
< natts
; i
++)
180 pq_sendint16(&buf
, format
); /* per-column formats */
182 cstate
->copy_src
= COPY_FRONTEND
;
183 cstate
->fe_msgbuf
= makeStringInfo();
184 /* We *must* flush here to ensure FE knows it can send. */
189 ReceiveCopyBinaryHeader(CopyFromState cstate
)
195 if (CopyReadBinaryData(cstate
, readSig
, 11) != 11 ||
196 memcmp(readSig
, BinarySignature
, 11) != 0)
198 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
199 errmsg("COPY file signature not recognized")));
201 if (!CopyGetInt32(cstate
, &tmp
))
203 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
204 errmsg("invalid COPY file header (missing flags)")));
205 if ((tmp
& (1 << 16)) != 0)
207 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
208 errmsg("invalid COPY file header (WITH OIDS)")));
210 if ((tmp
>> 16) != 0)
212 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
213 errmsg("unrecognized critical flags in COPY file header")));
214 /* Header extension length */
215 if (!CopyGetInt32(cstate
, &tmp
) ||
218 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
219 errmsg("invalid COPY file header (missing length)")));
220 /* Skip extension header, if present */
223 if (CopyReadBinaryData(cstate
, readSig
, 1) != 1)
225 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
226 errmsg("invalid COPY file header (wrong length)")));
231 * CopyGetData reads data from the source (file or frontend)
233 * We attempt to read at least minread, and at most maxread, bytes from
234 * the source. The actual number of bytes read is returned; if this is
235 * less than minread, EOF was detected.
237 * Note: when copying from the frontend, we expect a proper EOF mark per
238 * protocol; if the frontend simply drops the connection, we raise error.
239 * It seems unwise to allow the COPY IN to complete normally in that case.
241 * NB: no data conversion is applied here.
244 CopyGetData(CopyFromState cstate
, void *databuf
, int minread
, int maxread
)
248 switch (cstate
->copy_src
)
251 bytesread
= fread(databuf
, 1, maxread
, cstate
->copy_file
);
252 if (ferror(cstate
->copy_file
))
254 (errcode_for_file_access(),
255 errmsg("could not read from COPY file: %m")));
257 cstate
->raw_reached_eof
= true;
260 while (maxread
> 0 && bytesread
< minread
&& !cstate
->raw_reached_eof
)
264 while (cstate
->fe_msgbuf
->cursor
>= cstate
->fe_msgbuf
->len
)
266 /* Try to receive another message */
271 HOLD_CANCEL_INTERRUPTS();
273 mtype
= pq_getbyte();
276 (errcode(ERRCODE_CONNECTION_FAILURE
),
277 errmsg("unexpected EOF on client connection with an open transaction")));
278 /* Validate message type and set packet size limit */
281 case 'd': /* CopyData */
282 maxmsglen
= PQ_LARGE_MESSAGE_LIMIT
;
284 case 'c': /* CopyDone */
285 case 'f': /* CopyFail */
286 case 'H': /* Flush */
288 maxmsglen
= PQ_SMALL_MESSAGE_LIMIT
;
292 (errcode(ERRCODE_PROTOCOL_VIOLATION
),
293 errmsg("unexpected message type 0x%02X during COPY from stdin",
295 maxmsglen
= 0; /* keep compiler quiet */
298 /* Now collect the message body */
299 if (pq_getmessage(cstate
->fe_msgbuf
, maxmsglen
))
301 (errcode(ERRCODE_CONNECTION_FAILURE
),
302 errmsg("unexpected EOF on client connection with an open transaction")));
303 RESUME_CANCEL_INTERRUPTS();
304 /* ... and process it */
307 case 'd': /* CopyData */
309 case 'c': /* CopyDone */
310 /* COPY IN correctly terminated by frontend */
311 cstate
->raw_reached_eof
= true;
313 case 'f': /* CopyFail */
315 (errcode(ERRCODE_QUERY_CANCELED
),
316 errmsg("COPY from stdin failed: %s",
317 pq_getmsgstring(cstate
->fe_msgbuf
))));
319 case 'H': /* Flush */
323 * Ignore Flush/Sync for the convenience of client
324 * libraries (such as libpq) that may send those
325 * without noticing that the command they just
330 Assert(false); /* NOT REACHED */
333 avail
= cstate
->fe_msgbuf
->len
- cstate
->fe_msgbuf
->cursor
;
336 pq_copymsgbytes(cstate
->fe_msgbuf
, databuf
, avail
);
337 databuf
= (void *) ((char *) databuf
+ avail
);
343 bytesread
= cstate
->data_source_cb(databuf
, minread
, maxread
);
352 * These functions do apply some data conversion
356 * CopyGetInt32 reads an int32 that appears in network byte order
358 * Returns true if OK, false if EOF
361 CopyGetInt32(CopyFromState cstate
, int32
*val
)
365 if (CopyReadBinaryData(cstate
, (char *) &buf
, sizeof(buf
)) != sizeof(buf
))
367 *val
= 0; /* suppress compiler warning */
370 *val
= (int32
) pg_ntoh32(buf
);
375 * CopyGetInt16 reads an int16 that appears in network byte order
378 CopyGetInt16(CopyFromState cstate
, int16
*val
)
382 if (CopyReadBinaryData(cstate
, (char *) &buf
, sizeof(buf
)) != sizeof(buf
))
384 *val
= 0; /* suppress compiler warning */
387 *val
= (int16
) pg_ntoh16(buf
);
393 * Perform encoding conversion on data in 'raw_buf', writing the converted
394 * data into 'input_buf'.
396 * On entry, there must be some data to convert in 'raw_buf'.
399 CopyConvertBuf(CopyFromState cstate
)
402 * If the file and server encoding are the same, no encoding conversion is
403 * required. However, we still need to verify that the input is valid for
406 if (!cstate
->need_transcoding
)
409 * When conversion is not required, input_buf and raw_buf are the
410 * same. raw_buf_len is the total number of bytes in the buffer, and
411 * input_buf_len tracks how many of those bytes have already been
414 int preverifiedlen
= cstate
->input_buf_len
;
415 int unverifiedlen
= cstate
->raw_buf_len
- cstate
->input_buf_len
;
418 if (unverifiedlen
== 0)
421 * If no more raw data is coming, report the EOF to the caller.
423 if (cstate
->raw_reached_eof
)
424 cstate
->input_reached_eof
= true;
429 * Verify the new data, including any residual unverified bytes from
432 nverified
= pg_encoding_verifymbstr(cstate
->file_encoding
,
433 cstate
->raw_buf
+ preverifiedlen
,
438 * Could not verify anything.
440 * If there is no more raw input data coming, it means that there
441 * was an incomplete multi-byte sequence at the end. Also, if
442 * there's "enough" input left, we should be able to verify at
443 * least one character, and a failure to do so means that we've
444 * hit an invalid byte sequence.
446 if (cstate
->raw_reached_eof
|| unverifiedlen
>= pg_database_encoding_max_length())
447 cstate
->input_reached_error
= true;
450 cstate
->input_buf_len
+= nverified
;
455 * Encoding conversion is needed.
464 if (RAW_BUF_BYTES(cstate
) == 0)
467 * If no more raw data is coming, report the EOF to the caller.
469 if (cstate
->raw_reached_eof
)
470 cstate
->input_reached_eof
= true;
475 * First, copy down any unprocessed data.
477 nbytes
= INPUT_BUF_BYTES(cstate
);
478 if (nbytes
> 0 && cstate
->input_buf_index
> 0)
479 memmove(cstate
->input_buf
, cstate
->input_buf
+ cstate
->input_buf_index
,
481 cstate
->input_buf_index
= 0;
482 cstate
->input_buf_len
= nbytes
;
483 cstate
->input_buf
[nbytes
] = '\0';
485 src
= (unsigned char *) cstate
->raw_buf
+ cstate
->raw_buf_index
;
486 srclen
= cstate
->raw_buf_len
- cstate
->raw_buf_index
;
487 dst
= (unsigned char *) cstate
->input_buf
+ cstate
->input_buf_len
;
488 dstlen
= INPUT_BUF_SIZE
- cstate
->input_buf_len
+ 1;
491 * Do the conversion. This might stop short, if there is an invalid
492 * byte sequence in the input. We'll convert as much as we can in
495 * Note: Even if we hit an invalid byte sequence, we don't report the
496 * error until all the valid bytes have been consumed. The input
497 * might contain an end-of-input marker (\.), and we don't want to
498 * report an error if the invalid byte sequence is after the
499 * end-of-input marker. We might unnecessarily convert some data
500 * after the end-of-input marker as long as it's valid for the
501 * encoding, but that's harmless.
503 convertedlen
= pg_do_encoding_conversion_buf(cstate
->conversion_proc
,
504 cstate
->file_encoding
,
505 GetDatabaseEncoding(),
509 if (convertedlen
== 0)
512 * Could not convert anything. If there is no more raw input data
513 * coming, it means that there was an incomplete multi-byte
514 * sequence at the end. Also, if there is plenty of input left,
515 * we should be able to convert at least one character, so a
516 * failure to do so must mean that we've hit a byte sequence
519 if (cstate
->raw_reached_eof
|| srclen
>= MAX_CONVERSION_INPUT_LENGTH
)
520 cstate
->input_reached_error
= true;
523 cstate
->raw_buf_index
+= convertedlen
;
524 cstate
->input_buf_len
+= strlen((char *) dst
);
529 * Report an encoding or conversion error.
532 CopyConversionError(CopyFromState cstate
)
534 Assert(cstate
->raw_buf_len
> 0);
535 Assert(cstate
->input_reached_error
);
537 if (!cstate
->need_transcoding
)
540 * Everything up to input_buf_len was successfully verified, and
541 * input_buf_len points to the invalid or incomplete character.
543 report_invalid_encoding(cstate
->file_encoding
,
544 cstate
->raw_buf
+ cstate
->input_buf_len
,
545 cstate
->raw_buf_len
- cstate
->input_buf_len
);
550 * raw_buf_index points to the invalid or untranslatable character. We
551 * let the conversion routine report the error, because it can provide
552 * a more specific error message than we could here. An earlier call
553 * to the conversion routine in CopyConvertBuf() detected that there
554 * is an error, now we call the conversion routine again with
555 * noError=false, to have it throw the error.
562 src
= (unsigned char *) cstate
->raw_buf
+ cstate
->raw_buf_index
;
563 srclen
= cstate
->raw_buf_len
- cstate
->raw_buf_index
;
564 dst
= (unsigned char *) cstate
->input_buf
+ cstate
->input_buf_len
;
565 dstlen
= INPUT_BUF_SIZE
- cstate
->input_buf_len
+ 1;
567 (void) pg_do_encoding_conversion_buf(cstate
->conversion_proc
,
568 cstate
->file_encoding
,
569 GetDatabaseEncoding(),
575 * The conversion routine should have reported an error, so this
576 * should not be reached.
578 elog(ERROR
, "encoding conversion failed without error");
583 * Load more data from data source to raw_buf.
585 * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
586 * beginning of the buffer, and we load new data after that.
589 CopyLoadRawBuf(CopyFromState cstate
)
595 * In text mode, if encoding conversion is not required, raw_buf and
596 * input_buf point to the same buffer. Their len/index better agree, too.
598 if (cstate
->raw_buf
== cstate
->input_buf
)
600 Assert(!cstate
->need_transcoding
);
601 Assert(cstate
->raw_buf_index
== cstate
->input_buf_index
);
602 Assert(cstate
->input_buf_len
<= cstate
->raw_buf_len
);
606 * Copy down the unprocessed data if any.
608 nbytes
= RAW_BUF_BYTES(cstate
);
609 if (nbytes
> 0 && cstate
->raw_buf_index
> 0)
610 memmove(cstate
->raw_buf
, cstate
->raw_buf
+ cstate
->raw_buf_index
,
612 cstate
->raw_buf_len
-= cstate
->raw_buf_index
;
613 cstate
->raw_buf_index
= 0;
616 * If raw_buf and input_buf are in fact the same buffer, adjust the
617 * input_buf variables, too.
619 if (cstate
->raw_buf
== cstate
->input_buf
)
621 cstate
->input_buf_len
-= cstate
->input_buf_index
;
622 cstate
->input_buf_index
= 0;
626 inbytes
= CopyGetData(cstate
, cstate
->raw_buf
+ cstate
->raw_buf_len
,
627 1, RAW_BUF_SIZE
- cstate
->raw_buf_len
);
629 cstate
->raw_buf
[nbytes
] = '\0';
630 cstate
->raw_buf_len
= nbytes
;
632 cstate
->bytes_processed
+= inbytes
;
633 pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED
, cstate
->bytes_processed
);
636 cstate
->raw_reached_eof
= true;
640 * CopyLoadInputBuf loads some more data into input_buf
642 * On return, at least one more input character is loaded into
643 * input_buf, or input_reached_eof is set.
645 * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
646 * of the buffer and then we load more data after that.
649 CopyLoadInputBuf(CopyFromState cstate
)
651 int nbytes
= INPUT_BUF_BYTES(cstate
);
654 * The caller has updated input_buf_index to indicate how much of the
655 * input has been consumed and isn't needed anymore. If input_buf is the
656 * same physical area as raw_buf, update raw_buf_index accordingly.
658 if (cstate
->raw_buf
== cstate
->input_buf
)
660 Assert(!cstate
->need_transcoding
);
661 Assert(cstate
->input_buf_index
>= cstate
->raw_buf_index
);
662 cstate
->raw_buf_index
= cstate
->input_buf_index
;
667 /* If we now have some unconverted data, try to convert it */
668 CopyConvertBuf(cstate
);
670 /* If we now have some more input bytes ready, return them */
671 if (INPUT_BUF_BYTES(cstate
) > nbytes
)
675 * If we reached an invalid byte sequence, or we're at an incomplete
676 * multi-byte character but there is no more raw input data, report
679 if (cstate
->input_reached_error
)
680 CopyConversionError(cstate
);
682 /* no more input, and everything has been converted */
683 if (cstate
->input_reached_eof
)
686 /* Try to load more raw data */
687 Assert(!cstate
->raw_reached_eof
);
688 CopyLoadRawBuf(cstate
);
695 * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
696 * and writes them to 'dest'. Returns the number of bytes read (which
697 * would be less than 'nbytes' only if we reach EOF).
700 CopyReadBinaryData(CopyFromState cstate
, char *dest
, int nbytes
)
702 int copied_bytes
= 0;
704 if (RAW_BUF_BYTES(cstate
) >= nbytes
)
706 /* Enough bytes are present in the buffer. */
707 memcpy(dest
, cstate
->raw_buf
+ cstate
->raw_buf_index
, nbytes
);
708 cstate
->raw_buf_index
+= nbytes
;
709 copied_bytes
= nbytes
;
714 * Not enough bytes in the buffer, so must read from the file. Need
715 * to loop since 'nbytes' could be larger than the buffer size.
721 /* Load more data if buffer is empty. */
722 if (RAW_BUF_BYTES(cstate
) == 0)
724 CopyLoadRawBuf(cstate
);
725 if (cstate
->raw_reached_eof
)
729 /* Transfer some bytes. */
730 copy_bytes
= Min(nbytes
- copied_bytes
, RAW_BUF_BYTES(cstate
));
731 memcpy(dest
, cstate
->raw_buf
+ cstate
->raw_buf_index
, copy_bytes
);
732 cstate
->raw_buf_index
+= copy_bytes
;
734 copied_bytes
+= copy_bytes
;
735 } while (copied_bytes
< nbytes
);
742 * Read raw fields in the next line for COPY FROM in text or csv mode.
743 * Return false if no more lines.
745 * An internal temporary buffer is returned via 'fields'. It is valid until
746 * the next call of the function. Since the function returns all raw fields
747 * in the input file, 'nfields' could be different from the number of columns
750 * NOTE: force_not_null option are not applied to the returned fields.
753 NextCopyFromRawFields(CopyFromState cstate
, char ***fields
, int *nfields
)
758 /* only available for text or csv input */
759 Assert(!cstate
->opts
.binary
);
761 /* on input just throw the header line away */
762 if (cstate
->cur_lineno
== 0 && cstate
->opts
.header_line
)
764 cstate
->cur_lineno
++;
765 if (CopyReadLine(cstate
))
766 return false; /* done */
769 cstate
->cur_lineno
++;
771 /* Actually read the line into memory here */
772 done
= CopyReadLine(cstate
);
775 * EOF at start of line means we're done. If we see EOF after some
776 * characters, we act as though it was newline followed by EOF, ie,
777 * process the line and then exit loop on next iteration.
779 if (done
&& cstate
->line_buf
.len
== 0)
782 /* Parse the line into de-escaped field values */
783 if (cstate
->opts
.csv_mode
)
784 fldct
= CopyReadAttributesCSV(cstate
);
786 fldct
= CopyReadAttributesText(cstate
);
788 *fields
= cstate
->raw_fields
;
794 * Read next tuple from file for COPY FROM. Return false if no more tuples.
796 * 'econtext' is used to evaluate default expression for each columns not
797 * read from the file. It can be NULL when no default values are used, i.e.
798 * when all columns are read from the file.
800 * 'values' and 'nulls' arrays must be the same length as columns of the
801 * relation passed to BeginCopyFrom. This function fills the arrays.
804 NextCopyFrom(CopyFromState cstate
, ExprContext
*econtext
,
805 Datum
*values
, bool *nulls
)
808 AttrNumber num_phys_attrs
,
810 num_defaults
= cstate
->num_defaults
;
811 FmgrInfo
*in_functions
= cstate
->in_functions
;
812 Oid
*typioparams
= cstate
->typioparams
;
814 int *defmap
= cstate
->defmap
;
815 ExprState
**defexprs
= cstate
->defexprs
;
817 tupDesc
= RelationGetDescr(cstate
->rel
);
818 num_phys_attrs
= tupDesc
->natts
;
819 attr_count
= list_length(cstate
->attnumlist
);
821 /* Initialize all values for row to NULL */
822 MemSet(values
, 0, num_phys_attrs
* sizeof(Datum
));
823 MemSet(nulls
, true, num_phys_attrs
* sizeof(bool));
825 if (!cstate
->opts
.binary
)
827 char **field_strings
;
833 /* read raw fields in the next line */
834 if (!NextCopyFromRawFields(cstate
, &field_strings
, &fldct
))
837 /* check for overflowing fields */
838 if (attr_count
> 0 && fldct
> attr_count
)
840 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
841 errmsg("extra data after last expected column")));
845 /* Loop to read the user attributes on the line. */
846 foreach(cur
, cstate
->attnumlist
)
848 int attnum
= lfirst_int(cur
);
850 Form_pg_attribute att
= TupleDescAttr(tupDesc
, m
);
852 if (fieldno
>= fldct
)
854 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
855 errmsg("missing data for column \"%s\"",
856 NameStr(att
->attname
))));
857 string
= field_strings
[fieldno
++];
859 if (cstate
->convert_select_flags
&&
860 !cstate
->convert_select_flags
[m
])
862 /* ignore input field, leaving column as NULL */
866 if (cstate
->opts
.csv_mode
)
868 if (string
== NULL
&&
869 cstate
->opts
.force_notnull_flags
[m
])
872 * FORCE_NOT_NULL option is set and column is NULL -
873 * convert it to the NULL string.
875 string
= cstate
->opts
.null_print
;
877 else if (string
!= NULL
&& cstate
->opts
.force_null_flags
[m
]
878 && strcmp(string
, cstate
->opts
.null_print
) == 0)
881 * FORCE_NULL option is set and column matches the NULL
882 * string. It must have been quoted, or otherwise the
883 * string would already have been set to NULL. Convert it
884 * to NULL as specified.
890 cstate
->cur_attname
= NameStr(att
->attname
);
891 cstate
->cur_attval
= string
;
892 values
[m
] = InputFunctionCall(&in_functions
[m
],
898 cstate
->cur_attname
= NULL
;
899 cstate
->cur_attval
= NULL
;
902 Assert(fieldno
== attr_count
);
910 cstate
->cur_lineno
++;
912 if (!CopyGetInt16(cstate
, &fld_count
))
914 /* EOF detected (end of file, or protocol-level EOF) */
921 * Received EOF marker. Wait for the protocol-level EOF, and
922 * complain if it doesn't come immediately. In COPY FROM STDIN,
923 * this ensures that we correctly handle CopyFail, if client
924 * chooses to send that now. When copying from file, we could
925 * ignore the rest of the file like in text mode, but we choose to
926 * be consistent with the COPY FROM STDIN case.
930 if (CopyReadBinaryData(cstate
, &dummy
, 1) > 0)
932 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
933 errmsg("received copy data after EOF marker")));
937 if (fld_count
!= attr_count
)
939 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
940 errmsg("row field count is %d, expected %d",
941 (int) fld_count
, attr_count
)));
943 foreach(cur
, cstate
->attnumlist
)
945 int attnum
= lfirst_int(cur
);
947 Form_pg_attribute att
= TupleDescAttr(tupDesc
, m
);
949 cstate
->cur_attname
= NameStr(att
->attname
);
950 values
[m
] = CopyReadBinaryAttribute(cstate
,
955 cstate
->cur_attname
= NULL
;
960 * Now compute and insert any defaults available for the columns not
961 * provided by the input data. Anything not processed here or above will
964 for (i
= 0; i
< num_defaults
; i
++)
967 * The caller must supply econtext and have switched into the
968 * per-tuple memory context in it.
970 Assert(econtext
!= NULL
);
971 Assert(CurrentMemoryContext
== econtext
->ecxt_per_tuple_memory
);
973 values
[defmap
[i
]] = ExecEvalExpr(defexprs
[i
], econtext
,
981 * Read the next input line and stash it in line_buf.
983 * Result is true if read was terminated by EOF, false if terminated
984 * by newline. The terminating newline or EOF marker is not included
985 * in the final value of line_buf.
988 CopyReadLine(CopyFromState cstate
)
992 resetStringInfo(&cstate
->line_buf
);
993 cstate
->line_buf_valid
= false;
995 /* Parse data and transfer into line_buf */
996 result
= CopyReadLineText(cstate
);
1001 * Reached EOF. In protocol version 3, we should ignore anything
1002 * after \. up to the protocol end of copy data. (XXX maybe better
1003 * not to treat \. as special?)
1005 if (cstate
->copy_src
== COPY_FRONTEND
)
1011 inbytes
= CopyGetData(cstate
, cstate
->input_buf
,
1013 } while (inbytes
> 0);
1014 cstate
->input_buf_index
= 0;
1015 cstate
->input_buf_len
= 0;
1016 cstate
->raw_buf_index
= 0;
1017 cstate
->raw_buf_len
= 0;
1023 * If we didn't hit EOF, then we must have transferred the EOL marker
1024 * to line_buf along with the data. Get rid of it.
1026 switch (cstate
->eol_type
)
1029 Assert(cstate
->line_buf
.len
>= 1);
1030 Assert(cstate
->line_buf
.data
[cstate
->line_buf
.len
- 1] == '\n');
1031 cstate
->line_buf
.len
--;
1032 cstate
->line_buf
.data
[cstate
->line_buf
.len
] = '\0';
1035 Assert(cstate
->line_buf
.len
>= 1);
1036 Assert(cstate
->line_buf
.data
[cstate
->line_buf
.len
- 1] == '\r');
1037 cstate
->line_buf
.len
--;
1038 cstate
->line_buf
.data
[cstate
->line_buf
.len
] = '\0';
1041 Assert(cstate
->line_buf
.len
>= 2);
1042 Assert(cstate
->line_buf
.data
[cstate
->line_buf
.len
- 2] == '\r');
1043 Assert(cstate
->line_buf
.data
[cstate
->line_buf
.len
- 1] == '\n');
1044 cstate
->line_buf
.len
-= 2;
1045 cstate
->line_buf
.data
[cstate
->line_buf
.len
] = '\0';
1048 /* shouldn't get here */
1054 /* Now it's safe to use the buffer in error messages */
1055 cstate
->line_buf_valid
= true;
1061 * CopyReadLineText - inner loop of CopyReadLine for text mode
1064 CopyReadLineText(CopyFromState cstate
)
1066 char *copy_input_buf
;
1069 bool need_data
= false;
1070 bool hit_eof
= false;
1071 bool result
= false;
1074 bool first_char_in_line
= true;
1075 bool in_quote
= false,
1076 last_was_esc
= false;
1078 char escapec
= '\0';
1080 if (cstate
->opts
.csv_mode
)
1082 quotec
= cstate
->opts
.quote
[0];
1083 escapec
= cstate
->opts
.escape
[0];
1084 /* ignore special escape processing if it's the same as quotec */
1085 if (quotec
== escapec
)
1090 * The objective of this loop is to transfer the entire next input line
1091 * into line_buf. Hence, we only care for detecting newlines (\r and/or
1092 * \n) and the end-of-copy marker (\.).
1094 * In CSV mode, \r and \n inside a quoted field are just part of the data
1095 * value and are put in line_buf. We keep just enough state to know if we
1096 * are currently in a quoted field or not.
1098 * These four characters, and the CSV escape and quote characters, are
1099 * assumed the same in frontend and backend encodings.
1101 * The input has already been converted to the database encoding. All
1102 * supported server encodings have the property that all bytes in a
1103 * multi-byte sequence have the high bit set, so a multibyte character
1104 * cannot contain any newline or escape characters embedded in the
1105 * multibyte sequence. Therefore, we can process the input byte-by-byte,
1106 * regardless of the encoding.
1108 * For speed, we try to move data from input_buf to line_buf in chunks
1109 * rather than one character at a time. input_buf_ptr points to the next
1110 * character to examine; any characters from input_buf_index to
1111 * input_buf_ptr have been determined to be part of the line, but not yet
1112 * transferred to line_buf.
1114 * For a little extra speed within the loop, we copy input_buf and
1115 * input_buf_len into local variables.
1117 copy_input_buf
= cstate
->input_buf
;
1118 input_buf_ptr
= cstate
->input_buf_index
;
1119 copy_buf_len
= cstate
->input_buf_len
;
1127 * Load more data if needed. Ideally we would just force four bytes
1128 * of read-ahead and avoid the many calls to
1129 * IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(), but the COPY_OLD_FE protocol
1130 * does not allow us to read too far ahead or we might read into the
1131 * next data, so we read-ahead only as far we know we can. One
1132 * optimization would be to read-ahead four byte here if
1133 * cstate->copy_src != COPY_OLD_FE, but it hardly seems worth it,
1134 * considering the size of the buffer.
1136 if (input_buf_ptr
>= copy_buf_len
|| need_data
)
1140 CopyLoadInputBuf(cstate
);
1141 /* update our local variables */
1142 hit_eof
= cstate
->input_reached_eof
;
1143 input_buf_ptr
= cstate
->input_buf_index
;
1144 copy_buf_len
= cstate
->input_buf_len
;
1147 * If we are completely out of data, break out of the loop,
1150 if (INPUT_BUF_BYTES(cstate
) <= 0)
1158 /* OK to fetch a character */
1159 prev_raw_ptr
= input_buf_ptr
;
1160 c
= copy_input_buf
[input_buf_ptr
++];
1162 if (cstate
->opts
.csv_mode
)
1165 * If character is '\\' or '\r', we may need to look ahead below.
1166 * Force fetch of the next character if we don't already have it.
1167 * We need to do this before changing CSV state, in case one of
1168 * these characters is also the quote or escape character.
1170 * Note: old-protocol does not like forced prefetch, but it's OK
1171 * here since we cannot validly be at EOF.
1173 if (c
== '\\' || c
== '\r')
1175 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
1179 * Dealing with quotes and escapes here is mildly tricky. If the
1180 * quote char is also the escape char, there's no problem - we
1181 * just use the char as a toggle. If they are different, we need
1182 * to ensure that we only take account of an escape inside a
1183 * quoted field and immediately preceding a quote char, and not
1184 * the second in an escape-escape sequence.
1186 if (in_quote
&& c
== escapec
)
1187 last_was_esc
= !last_was_esc
;
1188 if (c
== quotec
&& !last_was_esc
)
1189 in_quote
= !in_quote
;
1191 last_was_esc
= false;
1194 * Updating the line count for embedded CR and/or LF chars is
1195 * necessarily a little fragile - this test is probably about the
1196 * best we can do. (XXX it's arguable whether we should do this
1197 * at all --- is cur_lineno a physical or logical count?)
1199 if (in_quote
&& c
== (cstate
->eol_type
== EOL_NL
? '\n' : '\r'))
1200 cstate
->cur_lineno
++;
1204 if (c
== '\r' && (!cstate
->opts
.csv_mode
|| !in_quote
))
1206 /* Check for \r\n on first line, _and_ handle \r\n. */
1207 if (cstate
->eol_type
== EOL_UNKNOWN
||
1208 cstate
->eol_type
== EOL_CRNL
)
1211 * If need more data, go back to loop top to load it.
1213 * Note that if we are at EOF, c will wind up as '\0' because
1214 * of the guaranteed pad of input_buf.
1216 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
1219 c
= copy_input_buf
[input_buf_ptr
];
1223 input_buf_ptr
++; /* eat newline */
1224 cstate
->eol_type
= EOL_CRNL
; /* in case not set yet */
1228 /* found \r, but no \n */
1229 if (cstate
->eol_type
== EOL_CRNL
)
1231 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1232 !cstate
->opts
.csv_mode
?
1233 errmsg("literal carriage return found in data") :
1234 errmsg("unquoted carriage return found in data"),
1235 !cstate
->opts
.csv_mode
?
1236 errhint("Use \"\\r\" to represent carriage return.") :
1237 errhint("Use quoted CSV field to represent carriage return.")));
1240 * if we got here, it is the first line and we didn't find
1241 * \n, so don't consume the peeked character
1243 cstate
->eol_type
= EOL_CR
;
1246 else if (cstate
->eol_type
== EOL_NL
)
1248 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1249 !cstate
->opts
.csv_mode
?
1250 errmsg("literal carriage return found in data") :
1251 errmsg("unquoted carriage return found in data"),
1252 !cstate
->opts
.csv_mode
?
1253 errhint("Use \"\\r\" to represent carriage return.") :
1254 errhint("Use quoted CSV field to represent carriage return.")));
1255 /* If reach here, we have found the line terminator */
1260 if (c
== '\n' && (!cstate
->opts
.csv_mode
|| !in_quote
))
1262 if (cstate
->eol_type
== EOL_CR
|| cstate
->eol_type
== EOL_CRNL
)
1264 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1265 !cstate
->opts
.csv_mode
?
1266 errmsg("literal newline found in data") :
1267 errmsg("unquoted newline found in data"),
1268 !cstate
->opts
.csv_mode
?
1269 errhint("Use \"\\n\" to represent newline.") :
1270 errhint("Use quoted CSV field to represent newline.")));
1271 cstate
->eol_type
= EOL_NL
; /* in case not set yet */
1272 /* If reach here, we have found the line terminator */
1277 * In CSV mode, we only recognize \. alone on a line. This is because
1278 * \. is a valid CSV data value.
1280 if (c
== '\\' && (!cstate
->opts
.csv_mode
|| first_char_in_line
))
1284 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
1285 IF_NEED_REFILL_AND_EOF_BREAK(0);
1288 * get next character
1289 * Note: we do not change c so if it isn't \., we can fall
1290 * through and continue processing.
1293 c2
= copy_input_buf
[input_buf_ptr
];
1297 input_buf_ptr
++; /* consume the '.' */
1300 * Note: if we loop back for more data here, it does not
1301 * matter that the CSV state change checks are re-executed; we
1302 * will come back here with no important state changed.
1304 if (cstate
->eol_type
== EOL_CRNL
)
1306 /* Get the next character */
1307 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
1308 /* if hit_eof, c2 will become '\0' */
1309 c2
= copy_input_buf
[input_buf_ptr
++];
1313 if (!cstate
->opts
.csv_mode
)
1315 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1316 errmsg("end-of-copy marker does not match previous newline style")));
1318 NO_END_OF_COPY_GOTO
;
1320 else if (c2
!= '\r')
1322 if (!cstate
->opts
.csv_mode
)
1324 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1325 errmsg("end-of-copy marker corrupt")));
1327 NO_END_OF_COPY_GOTO
;
1331 /* Get the next character */
1332 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
1333 /* if hit_eof, c2 will become '\0' */
1334 c2
= copy_input_buf
[input_buf_ptr
++];
1336 if (c2
!= '\r' && c2
!= '\n')
1338 if (!cstate
->opts
.csv_mode
)
1340 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1341 errmsg("end-of-copy marker corrupt")));
1343 NO_END_OF_COPY_GOTO
;
1346 if ((cstate
->eol_type
== EOL_NL
&& c2
!= '\n') ||
1347 (cstate
->eol_type
== EOL_CRNL
&& c2
!= '\n') ||
1348 (cstate
->eol_type
== EOL_CR
&& c2
!= '\r'))
1351 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1352 errmsg("end-of-copy marker does not match previous newline style")));
1356 * Transfer only the data before the \. into line_buf, then
1357 * discard the data and the \. sequence.
1359 if (prev_raw_ptr
> cstate
->input_buf_index
)
1360 appendBinaryStringInfo(&cstate
->line_buf
,
1361 cstate
->input_buf
+ cstate
->input_buf_index
,
1362 prev_raw_ptr
- cstate
->input_buf_index
);
1363 cstate
->input_buf_index
= input_buf_ptr
;
1364 result
= true; /* report EOF */
1367 else if (!cstate
->opts
.csv_mode
)
1370 * If we are here, it means we found a backslash followed by
1371 * something other than a period. In non-CSV mode, anything
1372 * after a backslash is special, so we skip over that second
1373 * character too. If we didn't do that \\. would be
1374 * considered an eof-of copy, while in non-CSV mode it is a
1375 * literal backslash followed by a period. In CSV mode,
1376 * backslashes are not special, so we want to process the
1377 * character after the backslash just like a normal character,
1378 * so we don't increment in those cases.
1385 * This label is for CSV cases where \. appears at the start of a
1386 * line, but there is more text after it, meaning it was a data value.
1387 * We are more strict for \. in CSV mode because \. could be a data
1388 * value, while in non-CSV mode, \. cannot be a data value.
1391 first_char_in_line
= false;
1392 } /* end of outer loop */
1395 * Transfer any still-uncopied data to line_buf.
1403 * Return decimal value for a hexadecimal digit
1406 GetDecimalFromHex(char hex
)
1408 if (isdigit((unsigned char) hex
))
1411 return tolower((unsigned char) hex
) - 'a' + 10;
1415 * Parse the current line into separate attributes (fields),
1416 * performing de-escaping as needed.
1418 * The input is in line_buf. We use attribute_buf to hold the result
1419 * strings. cstate->raw_fields[k] is set to point to the k'th attribute
1420 * string, or NULL when the input matches the null marker string.
1421 * This array is expanded as necessary.
1423 * (Note that the caller cannot check for nulls since the returned
1424 * string would be the post-de-escaping equivalent, which may look
1425 * the same as some valid data string.)
1427 * delim is the column delimiter string (must be just one byte for now).
1428 * null_print is the null marker string. Note that this is compared to
1429 * the pre-de-escaped input string.
1431 * The return value is the number of fields actually read.
1434 CopyReadAttributesText(CopyFromState cstate
)
1436 char delimc
= cstate
->opts
.delim
[0];
1443 * We need a special case for zero-column tables: check that the input
1444 * line is empty, and return.
1446 if (cstate
->max_fields
<= 0)
1448 if (cstate
->line_buf
.len
!= 0)
1450 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1451 errmsg("extra data after last expected column")));
1455 resetStringInfo(&cstate
->attribute_buf
);
1458 * The de-escaped attributes will certainly not be longer than the input
1459 * data line, so we can just force attribute_buf to be large enough and
1460 * then transfer data without any checks for enough space. We need to do
1461 * it this way because enlarging attribute_buf mid-stream would invalidate
1462 * pointers already stored into cstate->raw_fields[].
1464 if (cstate
->attribute_buf
.maxlen
<= cstate
->line_buf
.len
)
1465 enlargeStringInfo(&cstate
->attribute_buf
, cstate
->line_buf
.len
);
1466 output_ptr
= cstate
->attribute_buf
.data
;
1468 /* set pointer variables for loop */
1469 cur_ptr
= cstate
->line_buf
.data
;
1470 line_end_ptr
= cstate
->line_buf
.data
+ cstate
->line_buf
.len
;
1472 /* Outer loop iterates over fields */
1476 bool found_delim
= false;
1480 bool saw_non_ascii
= false;
1482 /* Make sure there is enough space for the next value */
1483 if (fieldno
>= cstate
->max_fields
)
1485 cstate
->max_fields
*= 2;
1486 cstate
->raw_fields
=
1487 repalloc(cstate
->raw_fields
, cstate
->max_fields
* sizeof(char *));
1490 /* Remember start of field on both input and output sides */
1491 start_ptr
= cur_ptr
;
1492 cstate
->raw_fields
[fieldno
] = output_ptr
;
1495 * Scan data for field.
1497 * Note that in this loop, we are scanning to locate the end of field
1498 * and also speculatively performing de-escaping. Once we find the
1499 * end-of-field, we can match the raw field contents against the null
1500 * marker string. Only after that comparison fails do we know that
1501 * de-escaping is actually the right thing to do; therefore we *must
1502 * not* throw any syntax errors before we've done the null-marker
1510 if (cur_ptr
>= line_end_ptr
)
1520 if (cur_ptr
>= line_end_ptr
)
1538 if (cur_ptr
< line_end_ptr
)
1544 val
= (val
<< 3) + OCTVALUE(c
);
1545 if (cur_ptr
< line_end_ptr
)
1551 val
= (val
<< 3) + OCTVALUE(c
);
1557 if (c
== '\0' || IS_HIGHBIT_SET(c
))
1558 saw_non_ascii
= true;
1563 if (cur_ptr
< line_end_ptr
)
1565 char hexchar
= *cur_ptr
;
1567 if (isxdigit((unsigned char) hexchar
))
1569 int val
= GetDecimalFromHex(hexchar
);
1572 if (cur_ptr
< line_end_ptr
)
1575 if (isxdigit((unsigned char) hexchar
))
1578 val
= (val
<< 4) + GetDecimalFromHex(hexchar
);
1582 if (c
== '\0' || IS_HIGHBIT_SET(c
))
1583 saw_non_ascii
= true;
1607 * in all other cases, take the char after '\'
1613 /* Add c to output string */
1617 /* Check whether raw input matched null marker */
1618 input_len
= end_ptr
- start_ptr
;
1619 if (input_len
== cstate
->opts
.null_print_len
&&
1620 strncmp(start_ptr
, cstate
->opts
.null_print
, input_len
) == 0)
1621 cstate
->raw_fields
[fieldno
] = NULL
;
1625 * At this point we know the field is supposed to contain data.
1627 * If we de-escaped any non-7-bit-ASCII chars, make sure the
1628 * resulting string is valid data for the db encoding.
1632 char *fld
= cstate
->raw_fields
[fieldno
];
1634 pg_verifymbstr(fld
, output_ptr
- fld
, false);
1638 /* Terminate attribute value in output area */
1639 *output_ptr
++ = '\0';
1642 /* Done if we hit EOL instead of a delim */
1647 /* Clean up state of attribute_buf */
1649 Assert(*output_ptr
== '\0');
1650 cstate
->attribute_buf
.len
= (output_ptr
- cstate
->attribute_buf
.data
);
1656 * Parse the current line into separate attributes (fields),
1657 * performing de-escaping as needed. This has exactly the same API as
1658 * CopyReadAttributesText, except we parse the fields according to
1659 * "standard" (i.e. common) CSV usage.
1662 CopyReadAttributesCSV(CopyFromState cstate
)
1664 char delimc
= cstate
->opts
.delim
[0];
1665 char quotec
= cstate
->opts
.quote
[0];
1666 char escapec
= cstate
->opts
.escape
[0];
1673 * We need a special case for zero-column tables: check that the input
1674 * line is empty, and return.
1676 if (cstate
->max_fields
<= 0)
1678 if (cstate
->line_buf
.len
!= 0)
1680 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1681 errmsg("extra data after last expected column")));
1685 resetStringInfo(&cstate
->attribute_buf
);
1688 * The de-escaped attributes will certainly not be longer than the input
1689 * data line, so we can just force attribute_buf to be large enough and
1690 * then transfer data without any checks for enough space. We need to do
1691 * it this way because enlarging attribute_buf mid-stream would invalidate
1692 * pointers already stored into cstate->raw_fields[].
1694 if (cstate
->attribute_buf
.maxlen
<= cstate
->line_buf
.len
)
1695 enlargeStringInfo(&cstate
->attribute_buf
, cstate
->line_buf
.len
);
1696 output_ptr
= cstate
->attribute_buf
.data
;
1698 /* set pointer variables for loop */
1699 cur_ptr
= cstate
->line_buf
.data
;
1700 line_end_ptr
= cstate
->line_buf
.data
+ cstate
->line_buf
.len
;
1702 /* Outer loop iterates over fields */
1706 bool found_delim
= false;
1707 bool saw_quote
= false;
1712 /* Make sure there is enough space for the next value */
1713 if (fieldno
>= cstate
->max_fields
)
1715 cstate
->max_fields
*= 2;
1716 cstate
->raw_fields
=
1717 repalloc(cstate
->raw_fields
, cstate
->max_fields
* sizeof(char *));
1720 /* Remember start of field on both input and output sides */
1721 start_ptr
= cur_ptr
;
1722 cstate
->raw_fields
[fieldno
] = output_ptr
;
1725 * Scan data for field,
1727 * The loop starts in "not quote" mode and then toggles between that
1728 * and "in quote" mode. The loop exits normally if it is in "not
1729 * quote" mode and a delimiter or line end is seen.
1739 if (cur_ptr
>= line_end_ptr
)
1742 /* unquoted field delimiter */
1748 /* start of quoted field (or part of field) */
1754 /* Add c to output string */
1762 if (cur_ptr
>= line_end_ptr
)
1764 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1765 errmsg("unterminated CSV quoted field")));
1769 /* escape within a quoted field */
1773 * peek at the next char if available, and escape it if it
1774 * is an escape char or a quote char
1776 if (cur_ptr
< line_end_ptr
)
1778 char nextc
= *cur_ptr
;
1780 if (nextc
== escapec
|| nextc
== quotec
)
1782 *output_ptr
++ = nextc
;
1790 * end of quoted field. Must do this test after testing for
1791 * escape in case quote char and escape char are the same
1792 * (which is the common case).
1797 /* Add c to output string */
1803 /* Terminate attribute value in output area */
1804 *output_ptr
++ = '\0';
1806 /* Check whether raw input matched null marker */
1807 input_len
= end_ptr
- start_ptr
;
1808 if (!saw_quote
&& input_len
== cstate
->opts
.null_print_len
&&
1809 strncmp(start_ptr
, cstate
->opts
.null_print
, input_len
) == 0)
1810 cstate
->raw_fields
[fieldno
] = NULL
;
1813 /* Done if we hit EOL instead of a delim */
1818 /* Clean up state of attribute_buf */
1820 Assert(*output_ptr
== '\0');
1821 cstate
->attribute_buf
.len
= (output_ptr
- cstate
->attribute_buf
.data
);
1828 * Read a binary attribute
1831 CopyReadBinaryAttribute(CopyFromState cstate
, FmgrInfo
*flinfo
,
1832 Oid typioparam
, int32 typmod
,
1838 if (!CopyGetInt32(cstate
, &fld_size
))
1840 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1841 errmsg("unexpected EOF in COPY data")));
1845 return ReceiveFunctionCall(flinfo
, NULL
, typioparam
, typmod
);
1849 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1850 errmsg("invalid field size")));
1852 /* reset attribute_buf to empty, and load raw data in it */
1853 resetStringInfo(&cstate
->attribute_buf
);
1855 enlargeStringInfo(&cstate
->attribute_buf
, fld_size
);
1856 if (CopyReadBinaryData(cstate
, cstate
->attribute_buf
.data
,
1857 fld_size
) != fld_size
)
1859 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT
),
1860 errmsg("unexpected EOF in COPY data")));
1862 cstate
->attribute_buf
.len
= fld_size
;
1863 cstate
->attribute_buf
.data
[fld_size
] = '\0';
1865 /* Call the column type's binary input converter */
1866 result
= ReceiveFunctionCall(flinfo
, &cstate
->attribute_buf
,
1867 typioparam
, typmod
);
1869 /* Trouble if it didn't eat the whole buffer */
1870 if (cstate
->attribute_buf
.cursor
!= cstate
->attribute_buf
.len
)
1872 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION
),
1873 errmsg("incorrect binary data format")));