doc/src/sgml/ref/copy.sgml

   1 <!--
   2 $PostgreSQL$
   3 PostgreSQL documentation
   4 -->
   5
   6
   7 <refentry id="SQL-COPY">
   8  <refmeta>
   9   <refentrytitle id="sql-copy-title">COPY</refentrytitle>
  10   <refmiscinfo>SQL - Language Statements</refmiscinfo>
  11  </refmeta>
  12
  13  <refnamediv>
  14   <refname>COPY</refname>
  15   <refpurpose>copy data between a file and a table</refpurpose>
  16  </refnamediv>
  17
  18  <indexterm zone="sql-copy">
  19   <primary>COPY</primary>
  20  </indexterm>
  21
  22  <refsynopsisdiv>
  23 <synopsis>
  24 COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable class="parameter">column</replaceable> [, ...] ) ]
  25     FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
  26     [ [ WITH ]
  27           [ BINARY ]
  28           [ OIDS ]
  29           [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
  30           [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
  31           [ CSV [ HEADER ]
  32                 [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
  33                 [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
  34                 [ FORCE NOT NULL <replaceable class="parameter">column</replaceable> [, ...] ]
  35
  36 COPY { <replaceable class="parameter">tablename</replaceable> [ ( <replaceable class="parameter">column</replaceable> [, ...] ) ] | ( <replaceable class="parameter">query</replaceable> ) }
  37     TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
  38     [ [ WITH ]
  39           [ BINARY ]
  40           [ HEADER ]
  41           [ OIDS ]
  42           [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
  43           [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
  44           [ CSV [ HEADER ]
  45                 [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ]
  46                 [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
  47                 [ FORCE QUOTE <replaceable class="parameter">column</replaceable> [, ...] ]
  48 </synopsis>
  49  </refsynopsisdiv>
  50
  51  <refsect1>
  52   <title>Description</title>
  53
  54   <para>
  55    <command>COPY</command> moves data between
  56    <productname>PostgreSQL</productname> tables and standard file-system
  57    files. <command>COPY TO</command> copies the contents of a table
  58    <emphasis>to</> a file, while <command>COPY FROM</command> copies
  59    data <emphasis>from</> a file to a table (appending the data to
  60    whatever is in the table already).  <command>COPY TO</command>
  61    can also copy the results of a <command>SELECT</> query.
  62   </para>
  63
  64   <para>
  65    If a list of columns is specified, <command>COPY</command> will
  66    only copy the data in the specified columns to or from the file.
  67    If there are any columns in the table that are not in the column list,
  68    <command>COPY FROM</command> will insert the default values for
  69    those columns.
  70   </para>
  71
  72   <para>
  73    <command>COPY</command> with a file name instructs the
  74    <productname>PostgreSQL</productname> server to directly read from
  75    or write to a file. The file must be accessible to the server and
  76    the name must be specified from the viewpoint of the server. When
  77    <literal>STDIN</literal> or <literal>STDOUT</literal> is
  78    specified, data is transmitted via the connection between the
  79    client and the server.
  80   </para>
  81  </refsect1>
  82
  83  <refsect1>
  84   <title>Parameters</title>
  85
  86   <variablelist>
  87    <varlistentry>
  88     <term><replaceable class="parameter">tablename</replaceable></term>
  89     <listitem>
  90      <para>
  91       The name (optionally schema-qualified) of an existing table.
  92      </para>
  93     </listitem>
  94    </varlistentry>
  95
  96    <varlistentry>
  97     <term><replaceable class="parameter">column</replaceable></term>
  98      <listitem>
  99      <para>
 100       An optional list of columns to be copied.  If no column list is
 101       specified, all columns of the table will be copied.
 102      </para>
 103     </listitem>
 104    </varlistentry>
 105
 106    <varlistentry>
 107     <term><replaceable class="parameter">query</replaceable></term>
 108     <listitem>
 109      <para>
 110       A <xref linkend="sql-select" endterm="sql-select-title"> or
 111       <xref linkend="sql-values" endterm="sql-values-title"> command
 112       whose results are to be copied.
 113       Note that parentheses are required around the query.
 114      </para>
 115     </listitem>
 116    </varlistentry>
 117
 118    <varlistentry>
 119     <term><replaceable class="parameter">filename</replaceable></term>
 120     <listitem>
 121      <para>
 122       The absolute path name of the input or output file.  Windows users
 123       might need to use an <literal>E''</> string and double backslashes
 124       used as path separators.
 125      </para>
 126     </listitem>
 127    </varlistentry>
 128
 129    <varlistentry>
 130     <term><literal>STDIN</literal></term>
 131     <listitem>
 132      <para>
 133       Specifies that input comes from the client application.
 134      </para>
 135     </listitem>
 136    </varlistentry>
 137
 138    <varlistentry>
 139     <term><literal>STDOUT</literal></term>
 140     <listitem>
 141      <para>
 142       Specifies that output goes to the client application.
 143      </para>
 144     </listitem>
 145    </varlistentry>
 146
 147    <varlistentry>
 148     <term><literal>BINARY</literal></term>
 149     <listitem>
 150      <para>
 151       Causes all data to be stored or read in binary format rather
 152       than as text. You cannot specify the <option>DELIMITER</option>,
 153       <option>NULL</option>, or <option>CSV</> options in binary mode.
 154      </para>
 155     </listitem>
 156    </varlistentry>
 157
 158    <varlistentry>
 159     <term><literal>OIDS</literal></term>
 160     <listitem>
 161      <para>
 162       Specifies copying the OID for each row.  (An error is raised if
 163       <literal>OIDS</literal> is specified for a table that does not
 164       have OIDs, or in the case of copying a <replaceable
 165       class="parameter">query</replaceable>.)
 166      </para>
 167     </listitem>
 168    </varlistentry>
 169
 170    <varlistentry>
 171     <term><replaceable class="parameter">delimiter</replaceable></term>
 172     <listitem>
 173      <para>
 174       The single ASCII character that separates columns within each row
 175       (line) of the file.  The default is a tab character in text mode,
 176       a comma in <literal>CSV</> mode.
 177      </para>
 178     </listitem>
 179    </varlistentry>
 180
 181    <varlistentry>
 182     <term><replaceable class="parameter">null string</replaceable></term>
 183     <listitem>
 184      <para>
 185       The string that represents a null value. The default is
 186       <literal>\N</literal> (backslash-N) in text mode, and a empty
 187       value with no quotes in <literal>CSV</> mode. You might prefer an
 188       empty string even in text mode for cases where you don't want to
 189       distinguish nulls from empty strings.
 190      </para>
 191
 192      <note>
 193       <para>
 194        When using <command>COPY FROM</command>, any data item that matches
 195        this string will be stored as a null value, so you should make
 196        sure that you use the same string as you used with
 197        <command>COPY TO</command>.
 198       </para>
 199      </note>
 200
 201     </listitem>
 202    </varlistentry>
 203
 204    <varlistentry>
 205     <term><literal>CSV</literal></term>
 206     <listitem>
 207      <para>
 208       Selects Comma Separated Value (<literal>CSV</>) mode.
 209      </para>
 210     </listitem>
 211    </varlistentry>
 212
 213    <varlistentry>
 214     <term><literal>HEADER</literal></term>
 215     <listitem>
 216      <para>
 217       Specifies the file contains a header line with the names of each
 218       column in the file.  On output, the first line contains the column
 219       names from the table, and on input, the first line is ignored.
 220      </para>
 221     </listitem>
 222    </varlistentry>
 223
 224    <varlistentry>
 225     <term><replaceable class="parameter">quote</replaceable></term>
 226     <listitem>
 227      <para>
 228       Specifies the ASCII quotation character in <literal>CSV</> mode.
 229       The default is double-quote.
 230      </para>
 231     </listitem>
 232    </varlistentry>
 233
 234    <varlistentry>
 235     <term><replaceable class="parameter">escape</replaceable></term>
 236     <listitem>
 237      <para>
 238       Specifies the ASCII character that should appear before a
 239       <literal>QUOTE</> data character value in <literal>CSV</> mode.
 240       The default is the <literal>QUOTE</> value (usually double-quote).
 241      </para>
 242     </listitem>
 243    </varlistentry>
 244
 245    <varlistentry>
 246     <term><literal>FORCE QUOTE</></term>
 247     <listitem>
 248      <para>
 249       In <literal>CSV</> <command>COPY TO</> mode, forces quoting to be
 250       used for all non-<literal>NULL</> values in each specified column.
 251       <literal>NULL</> output is never quoted.
 252      </para>
 253     </listitem>
 254    </varlistentry>
 255
 256    <varlistentry>
 257     <term><literal>FORCE NOT NULL</></term>
 258     <listitem>
 259      <para>
 260       In <literal>CSV</> <command>COPY FROM</> mode, process each
 261       specified column as though it were quoted and hence not a
 262       <literal>NULL</> value. For the default null string in
 263       <literal>CSV</> mode (<literal>''</>), this causes missing
 264       values to be input as zero-length strings.
 265      </para>
 266     </listitem>
 267    </varlistentry>
 268
 269   </variablelist>
 270  </refsect1>
 271
 272  <refsect1>
 273   <title>Outputs</title>
 274
 275   <para>
 276    On successful completion, a <command>COPY</> command returns a command
 277    tag of the form
 278 <screen>
 279 COPY <replaceable class="parameter">count</replaceable>
 280 </screen>
 281    The <replaceable class="parameter">count</replaceable> is the number
 282    of rows copied.
 283   </para>
 284  </refsect1>
 285
 286  <refsect1>
 287   <title>Notes</title>
 288
 289    <para>
 290     <command>COPY</command> can only be used with plain tables, not
 291     with views.  However, you can write <literal>COPY (SELECT * FROM
 292     <replaceable class="parameter">viewname</replaceable>) TO ...</literal>.
 293    </para>
 294
 295    <para>
 296     The <literal>BINARY</literal> key word causes all data to be
 297     stored/read as binary format rather than as text.  It is
 298     somewhat faster than the normal text mode, but a binary-format
 299     file is less portable across machine architectures and
 300     <productname>PostgreSQL</productname> versions.
 301    </para>
 302
 303    <para>
 304     You must have select privilege on the table
 305     whose values are read by <command>COPY TO</command>, and
 306     insert privilege on the table into which values
 307     are inserted by <command>COPY FROM</command>.
 308    </para>
 309
 310    <para>
 311     Files named in a <command>COPY</command> command are read or written
 312     directly by the server, not by the client application. Therefore,
 313     they must reside on or be accessible to the database server machine,
 314     not the client. They must be accessible to and readable or writable
 315     by the <productname>PostgreSQL</productname> user (the user ID the
 316     server runs as), not the client. <command>COPY</command> naming a
 317     file is only allowed to database superusers, since it allows reading
 318     or writing any file that the server has privileges to access.
 319    </para>
 320
 321    <para>
 322     Do not confuse <command>COPY</command> with the
 323     <application>psql</application> instruction
 324     <command>\copy</command>. <command>\copy</command> invokes
 325     <command>COPY FROM STDIN</command> or <command>COPY TO
 326     STDOUT</command>, and then fetches/stores the data in a file
 327     accessible to the <application>psql</application> client. Thus,
 328     file accessibility and access rights depend on the client rather
 329     than the server when <command>\copy</command> is used.
 330    </para>
 331
 332    <para>
 333     It is recommended that the file name used in <command>COPY</command>
 334     always be specified as an absolute path. This is enforced by the
 335     server in the case of <command>COPY TO</command>, but for
 336     <command>COPY FROM</command> you do have the option of reading from
 337     a file specified by a relative path. The path will be interpreted
 338     relative to the working directory of the server process (normally
 339     the cluster's data directory), not the client's working directory.
 340    </para>
 341
 342    <para>
 343     <command>COPY FROM</command> will invoke any triggers and check
 344     constraints on the destination table. However, it will not invoke rules.
 345    </para>
 346
 347    <para>
 348     <command>COPY</command> input and output is affected by
 349     <varname>DateStyle</varname>. To ensure portability to other
 350     <productname>PostgreSQL</productname> installations that might use
 351     non-default <varname>DateStyle</varname> settings,
 352     <varname>DateStyle</varname> should be set to <literal>ISO</> before
 353     using <command>COPY TO</>.
 354    </para>
 355
 356    <para>
 357     Input data is interpreted according to the current client encoding,
 358     and output data is encoded in the the current client encoding, even
 359     if the data does not pass through the client but is read from or
 360     written to a file.
 361    </para>
 362
 363    <para>
 364     <command>COPY</command> stops operation at the first error. This
 365     should not lead to problems in the event of a <command>COPY
 366     TO</command>, but the target table will already have received
 367     earlier rows in a <command>COPY FROM</command>. These rows will not
 368     be visible or accessible, but they still occupy disk space. This might
 369     amount to a considerable amount of wasted disk space if the failure
 370     happened well into a large copy operation. You might wish to invoke
 371     <command>VACUUM</command> to recover the wasted space.
 372    </para>
 373
 374  </refsect1>
 375
 376  <refsect1>
 377   <title>File Formats</title>
 378
 379   <refsect2>
 380    <title>Text Format</title>
 381
 382    <para>
 383     When <command>COPY</command> is used without the <literal>BINARY</literal>
 384     or <literal>CSV</> options,
 385     the data read or written is a text file with one line per table row.
 386     Columns in a row are separated by the delimiter character.
 387     The column values themselves are strings generated by the
 388     output function, or acceptable to the input function, of each
 389     attribute's data type.  The specified null string is used in
 390     place of columns that are null.
 391     <command>COPY FROM</command> will raise an error if any line of the
 392     input file contains more or fewer columns than are expected.
 393     If <literal>OIDS</literal> is specified, the OID is read or written as the first column,
 394     preceding the user data columns.
 395    </para>
 396
 397    <para>
 398     End of data can be represented by a single line containing just
 399     backslash-period (<literal>\.</>).  An end-of-data marker is
 400     not necessary when reading from a file, since the end of file
 401     serves perfectly well; it is needed only when copying data to or from
 402     client applications using pre-3.0 client protocol.
 403    </para>
 404
 405    <para>
 406     Backslash characters (<literal>\</>) can be used in the
 407     <command>COPY</command> data to quote data characters that might
 408     otherwise be taken as row or column delimiters. In particular, the
 409     following characters <emphasis>must</> be preceded by a backslash if
 410     they appear as part of a column value: backslash itself,
 411     newline, carriage return, and the current delimiter character.
 412    </para>
 413
 414    <para>
 415     The specified null string is sent by <command>COPY TO</command> without
 416     adding any backslashes; conversely, <command>COPY FROM</command> matches
 417     the input against the null string before removing backslashes.  Therefore,
 418     a null string such as <literal>\N</literal> cannot be confused with
 419     the actual data value <literal>\N</literal> (which would be represented
 420     as <literal>\\N</literal>).
 421    </para>
 422
 423    <para>
 424     The following special backslash sequences are recognized by
 425     <command>COPY FROM</command>:
 426
 427    <informaltable>
 428     <tgroup cols="2">
 429      <thead>
 430       <row>
 431        <entry>Sequence</entry>
 432        <entry>Represents</entry>
 433       </row>
 434      </thead>
 435
 436      <tbody>
 437       <row>
 438        <entry><literal>\b</></entry>
 439        <entry>Backspace (ASCII 8)</entry>
 440       </row>
 441       <row>
 442        <entry><literal>\f</></entry>
 443        <entry>Form feed (ASCII 12)</entry>
 444       </row>
 445       <row>
 446        <entry><literal>\n</></entry>
 447        <entry>Newline (ASCII 10)</entry>
 448       </row>
 449       <row>
 450        <entry><literal>\r</></entry>
 451        <entry>Carriage return (ASCII 13)</entry>
 452       </row>
 453       <row>
 454        <entry><literal>\t</></entry>
 455        <entry>Tab (ASCII 9)</entry>
 456       </row>
 457       <row>
 458        <entry><literal>\v</></entry>
 459        <entry>Vertical tab (ASCII 11)</entry>
 460       </row>
 461       <row>
 462        <entry><literal>\</><replaceable>digits</></entry>
 463        <entry>Backslash followed by one to three octal digits specifies
 464        the character with that numeric code</entry>
 465       </row>
 466       <row>
 467        <entry><literal>\x</><replaceable>digits</></entry>
 468        <entry>Backslash <literal>x</> followed by one or two hex digits specifies
 469        the character with that numeric code</entry>
 470       </row>
 471      </tbody>
 472     </tgroup>
 473    </informaltable>
 474
 475     Presently, <command>COPY TO</command> will never emit an octal or
 476     hex-digits backslash sequence, but it does use the other sequences
 477     listed above for those control characters.
 478    </para>
 479
 480    <para>
 481     Any other backslashed character that is not mentioned in the above table
 482     will be taken to represent itself.  However, beware of adding backslashes
 483     unnecessarily, since that might accidentally produce a string matching the
 484     end-of-data marker (<literal>\.</>) or the null string (<literal>\N</> by
 485     default).  These strings will be recognized before any other backslash
 486     processing is done.
 487    </para>
 488
 489    <para>
 490     It is strongly recommended that applications generating <command>COPY</command> data convert
 491     data newlines and carriage returns to the <literal>\n</> and
 492     <literal>\r</> sequences respectively.  At present it is
 493     possible to represent a data carriage return by a backslash and carriage
 494     return, and to represent a data newline by a backslash and newline.
 495     However, these representations might not be accepted in future releases.
 496     They are also highly vulnerable to corruption if the <command>COPY</command> file is
 497     transferred across different machines (for example, from Unix to Windows
 498     or vice versa).
 499    </para>
 500
 501    <para>
 502     <command>COPY TO</command> will terminate each row with a Unix-style
 503     newline (<quote><literal>\n</></>).  Servers running on Microsoft Windows instead
 504     output carriage return/newline (<quote><literal>\r\n</></>), but only for
 505     <command>COPY</> to a server file; for consistency across platforms,
 506     <command>COPY TO STDOUT</> always sends <quote><literal>\n</></>
 507     regardless of server platform.
 508     <command>COPY FROM</command> can handle lines ending with newlines,
 509     carriage returns, or carriage return/newlines.  To reduce the risk of
 510     error due to un-backslashed newlines or carriage returns that were
 511     meant as data, <command>COPY FROM</command> will complain if the line
 512     endings in the input are not all alike.
 513    </para>
 514   </refsect2>
 515
 516   <refsect2>
 517    <title>CSV Format</title>
 518
 519    <para>
 520     This format is used for importing and exporting the Comma
 521     Separated Value (<literal>CSV</>) file format used by many other
 522     programs, such as spreadsheets. Instead of the escaping used by
 523     <productname>PostgreSQL</productname>'s standard text mode, it
 524     produces and recognizes the common CSV escaping mechanism.
 525    </para>
 526
 527    <para>
 528     The values in each record are separated by the <literal>DELIMITER</>
 529     character. If the value contains the delimiter character, the
 530     <literal>QUOTE</> character, the <literal>NULL</> string, a carriage
 531     return, or line feed character, then the whole value is prefixed and
 532     suffixed by the <literal>QUOTE</> character, and any occurrence
 533     within the value of a <literal>QUOTE</> character or the
 534     <literal>ESCAPE</> character is preceded by the escape character.
 535     You can also use <literal>FORCE QUOTE</> to force quotes when outputting
 536     non-<literal>NULL</> values in specific columns.
 537    </para>
 538
 539    <para>
 540     The <literal>CSV</> format has no standard way to distinguish a
 541     <literal>NULL</> value from an empty string.
 542     <productname>PostgreSQL</>'s <command>COPY</> handles this by
 543     quoting. A <literal>NULL</> is output as the <literal>NULL</>
 544     string and is not quoted, while a data value matching the
 545     <literal>NULL</> string is quoted. Therefore, using the default
 546     settings, a <literal>NULL</> is written as an unquoted empty
 547     string, while an empty string is written with double quotes
 548     (<literal>""</>). Reading values follows similar rules. You can
 549     use <literal>FORCE NOT NULL</> to prevent <literal>NULL</> input
 550     comparisons for specific columns.
 551    </para>
 552
 553    <para>
 554     Because backslash is not a special character in the <literal>CSV</>
 555     format, <literal>\.</>, the end-of-data marker, could also appear
 556     as a data value.  To avoid any misinterpretation, a <literal>\.</>
 557     data value appearing as a lone entry on a line is automatically
 558     quoted on output, and on input, if quoted, is not interpreted as the
 559     end-of-data marker.  If you are loading a file created by another
 560     application that has a single unquoted column and might have a
 561     value of <literal>\.</>, you might need to quote that value in the
 562     input file.
 563    </para>
 564
 565    <note>
 566     <para>
 567      In <literal>CSV</> mode, all characters are significant. A quoted value
 568      surrounded by white space, or any characters other than
 569      <literal>DELIMITER</>, will include those characters. This can cause
 570      errors if  you import data from a system that pads <literal>CSV</>
 571      lines with white space out to some fixed width. If such a situation
 572      arises you might need to preprocess the <literal>CSV</> file to remove
 573      the trailing white space, before importing the data into
 574      <productname>PostgreSQL</>.
 575     </para>
 576    </note>
 577
 578    <note>
 579     <para>
 580      CSV mode will both recognize and produce CSV files with quoted
 581      values containing embedded carriage returns and line feeds. Thus
 582      the files are not strictly one line per table row like text-mode
 583      files.
 584     </para>
 585    </note>
 586
 587    <note>
 588     <para>
 589      Many programs produce strange and occasionally perverse CSV files,
 590      so the file format is more a convention than a standard. Thus you
 591      might encounter some files that cannot be imported using this
 592      mechanism, and <command>COPY</> might produce files that other
 593      programs cannot process.
 594     </para>
 595    </note>
 596
 597   </refsect2>
 598
 599   <refsect2>
 600    <title>Binary Format</title>
 601
 602    <para>
 603     The file format used for <command>COPY BINARY</command> changed in
 604     <productname>PostgreSQL</productname> 7.4. The new format consists
 605     of a file header, zero or more tuples containing the row data, and
 606     a file trailer. Headers and data are now in network byte order.
 607    </para>
 608
 609    <refsect3>
 610     <title>File Header</title>
 611
 612     <para>
 613      The file header consists of 15 bytes of fixed fields, followed
 614      by a variable-length header extension area.  The fixed fields are:
 615
 616     <variablelist>
 617      <varlistentry>
 618       <term>Signature</term>
 619       <listitem>
 620        <para>
 621 11-byte sequence <literal>PGCOPY\n\377\r\n\0</> &mdash; note that the zero byte
 622 is a required part of the signature.  (The signature is designed to allow
 623 easy identification of files that have been munged by a non-8-bit-clean
 624 transfer.  This signature will be changed by end-of-line-translation
 625 filters, dropped zero bytes, dropped high bits, or parity changes.)
 626        </para>
 627       </listitem>
 628      </varlistentry>
 629
 630      <varlistentry>
 631       <term>Flags field</term>
 632       <listitem>
 633        <para>
 634 32-bit integer bit mask to denote important aspects of the file format. Bits
 635 are numbered from 0 (<acronym>LSB</>) to 31 (<acronym>MSB</>).  Note that
 636 this field is stored in network byte order (most significant byte first),
 637 as are all the integer fields used in the file format.  Bits
 638 16-31 are reserved to denote critical file format issues; a reader
 639 should abort if it finds an unexpected bit set in this range. Bits 0-15
 640 are reserved to signal backwards-compatible format issues; a reader
 641 should simply ignore any unexpected bits set in this range. Currently
 642 only one flag bit is defined, and the rest must be zero:
 643         <variablelist>
 644          <varlistentry>
 645           <term>Bit 16</term>
 646           <listitem>
 647            <para>
 648             if 1, OIDs are included in the data; if 0, not
 649            </para>
 650           </listitem>
 651          </varlistentry>
 652         </variablelist>
 653        </para>
 654       </listitem>
 655      </varlistentry>
 656
 657      <varlistentry>
 658       <term>Header extension area length</term>
 659       <listitem>
 660        <para>
 661 32-bit integer, length in bytes of remainder of header, not including self.
 662 Currently, this is zero, and the first tuple follows
 663 immediately.  Future changes to the format might allow additional data
 664 to be present in the header.  A reader should silently skip over any header
 665 extension data it does not know what to do with.
 666        </para>
 667       </listitem>
 668      </varlistentry>
 669     </variablelist>
 670     </para>
 671
 672     <para>
 673 The header extension area is envisioned to contain a sequence of
 674 self-identifying chunks.  The flags field is not intended to tell readers
 675 what is in the extension area.  Specific design of header extension contents
 676 is left for a later release.
 677     </para>
 678
 679     <para>
 680      This design allows for both backwards-compatible header additions (add
 681      header extension chunks, or set low-order flag bits) and
 682      non-backwards-compatible changes (set high-order flag bits to signal such
 683      changes, and add supporting data to the extension area if needed).
 684     </para>
 685    </refsect3>
 686
 687    <refsect3>
 688     <title>Tuples</title>
 689     <para>
 690 Each tuple begins with a 16-bit integer count of the number of fields in the
 691 tuple.  (Presently, all tuples in a table will have the same count, but that
 692 might not always be true.)  Then, repeated for each field in the tuple, there
 693 is a 32-bit length word followed by that many bytes of field data.  (The
 694 length word does not include itself, and can be zero.)  As a special case,
 695 -1 indicates a NULL field value.  No value bytes follow in the NULL case.
 696     </para>
 697
 698     <para>
 699 There is no alignment padding or any other extra data between fields.
 700     </para>
 701
 702     <para>
 703 Presently, all data values in a <command>COPY BINARY</command> file are
 704 assumed to be in binary format (format code one).  It is anticipated that a
 705 future extension might add a header field that allows per-column format codes
 706 to be specified.
 707     </para>
 708
 709     <para>
 710 To determine the appropriate binary format for the actual tuple data you
 711 should consult the <productname>PostgreSQL</productname> source, in
 712 particular the <function>*send</> and <function>*recv</> functions for
 713 each column's data type (typically these functions are found in the
 714 <filename>src/backend/utils/adt/</filename> directory of the source
 715 distribution).
 716     </para>
 717
 718     <para>
 719 If OIDs are included in the file, the OID field immediately follows the
 720 field-count word.  It is a normal field except that it's not included
 721 in the field-count.  In particular it has a length word &mdash; this will allow
 722 handling of 4-byte vs. 8-byte OIDs without too much pain, and will allow
 723 OIDs to be shown as null if that ever proves desirable.
 724     </para>
 725    </refsect3>
 726
 727    <refsect3>
 728     <title>File Trailer</title>
 729
 730     <para>
 731      The file trailer consists of a 16-bit integer word containing -1.  This
 732      is easily distinguished from a tuple's field-count word.
 733     </para>
 734
 735     <para>
 736      A reader should report an error if a field-count word is neither -1
 737      nor the expected number of columns.  This provides an extra
 738      check against somehow getting out of sync with the data.
 739     </para>
 740    </refsect3>
 741   </refsect2>
 742  </refsect1>
 743
 744  <refsect1>
 745   <title>Examples</title>
 746
 747   <para>
 748    The following example copies a table to the client
 749    using the vertical bar (<literal>|</literal>) as the field delimiter:
 750 <programlisting>
 751 COPY country TO STDOUT WITH DELIMITER '|';
 752 </programlisting>
 753   </para>
 754
 755   <para>
 756    To copy data from a file into the <literal>country</> table:
 757 <programlisting>
 758 COPY country FROM '/usr1/proj/bray/sql/country_data';
 759 </programlisting>
 760   </para>
 761
 762   <para>
 763    To copy into a file just the countries whose names start with 'A':
 764 <programlisting>
 765 COPY (SELECT * FROM country WHERE country_name LIKE 'A%') TO '/usr1/proj/bray/sql/a_list_countries.copy';
 766 </programlisting>
 767   </para>
 768
 769   <para>
 770    Here is a sample of data suitable for copying into a table from
 771    <literal>STDIN</literal>:
 772 <programlisting>
 773 AF      AFGHANISTAN
 774 AL      ALBANIA
 775 DZ      ALGERIA
 776 ZM      ZAMBIA
 777 ZW      ZIMBABWE
 778 </programlisting>
 779    Note that the white space on each line is actually a tab character.
 780   </para>
 781
 782   <para>
 783    The following is the same data, output in binary format.
 784    The data is shown after filtering through the
 785    Unix utility <command>od -c</command>. The table has three columns;
 786    the first has type <type>char(2)</type>, the second has type <type>text</type>,
 787    and the third has type <type>integer</type>. All the rows have a null value
 788    in the third column.
 789 <programlisting>
 790 0000000   P   G   C   O   P   Y  \n 377  \r  \n  \0  \0  \0  \0  \0  \0
 791 0000020  \0  \0  \0  \0 003  \0  \0  \0 002   A   F  \0  \0  \0 013   A
 792 0000040   F   G   H   A   N   I   S   T   A   N 377 377 377 377  \0 003
 793 0000060  \0  \0  \0 002   A   L  \0  \0  \0 007   A   L   B   A   N   I
 794 0000100   A 377 377 377 377  \0 003  \0  \0  \0 002   D   Z  \0  \0  \0
 795 0000120 007   A   L   G   E   R   I   A 377 377 377 377  \0 003  \0  \0
 796 0000140  \0 002   Z   M  \0  \0  \0 006   Z   A   M   B   I   A 377 377
 797 0000160 377 377  \0 003  \0  \0  \0 002   Z   W  \0  \0  \0  \b   Z   I
 798 0000200   M   B   A   B   W   E 377 377 377 377 377 377
 799 </programlisting>
 800   </para>
 801  </refsect1>
 802
 803  <refsect1>
 804   <title>Compatibility</title>
 805
 806   <para>
 807    There is no <command>COPY</command> statement in the SQL standard.
 808   </para>
 809
 810   <para>
 811    The following syntax was used before <productname>PostgreSQL</>
 812    version 7.3 and is still supported:
 813
 814 <synopsis>
 815 COPY [ BINARY ] <replaceable class="parameter">tablename</replaceable> [ WITH OIDS ]
 816     FROM { '<replaceable class="parameter">filename</replaceable>' | STDIN }
 817     [ [USING] DELIMITERS '<replaceable class="parameter">delimiter</replaceable>' ]
 818     [ WITH NULL AS '<replaceable class="parameter">null string</replaceable>' ]
 819
 820 COPY [ BINARY ] <replaceable class="parameter">tablename</replaceable> [ WITH OIDS ]
 821     TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
 822     [ [USING] DELIMITERS '<replaceable class="parameter">delimiter</replaceable>' ]
 823     [ WITH NULL AS '<replaceable class="parameter">null string</replaceable>' ]
 824 </synopsis>
 825   </para>
 826  </refsect1>
 827 </refentry>