contrib/postgres_fdw/connection.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * connection.c
   4  *                Connection management functions for postgres_fdw
   5  *
   6  * Portions Copyright (c) 2012-2023, PostgreSQL Global Development Group
   7  *
   8  * IDENTIFICATION
   9  *                contrib/postgres_fdw/connection.c
  10  *
  11  *-------------------------------------------------------------------------
  12  */
  13 #include "postgres.h"
  14
  15 #include "access/htup_details.h"
  16 #include "access/xact.h"
  17 #include "catalog/pg_user_mapping.h"
  18 #include "commands/defrem.h"
  19 #include "funcapi.h"
  20 #include "libpq/libpq-be.h"
  21 #include "libpq/libpq-be-fe-helpers.h"
  22 #include "mb/pg_wchar.h"
  23 #include "miscadmin.h"
  24 #include "pgstat.h"
  25 #include "postgres_fdw.h"
  26 #include "storage/fd.h"
  27 #include "storage/latch.h"
  28 #include "utils/builtins.h"
  29 #include "utils/datetime.h"
  30 #include "utils/hsearch.h"
  31 #include "utils/inval.h"
  32 #include "utils/memutils.h"
  33 #include "utils/syscache.h"
  34
  35 /*
  36  * Connection cache hash table entry
  37  *
  38  * The lookup key in this hash table is the user mapping OID. We use just one
  39  * connection per user mapping ID, which ensures that all the scans use the
  40  * same snapshot during a query.  Using the user mapping OID rather than
  41  * the foreign server OID + user OID avoids creating multiple connections when
  42  * the public user mapping applies to all user OIDs.
  43  *
  44  * The "conn" pointer can be NULL if we don't currently have a live connection.
  45  * When we do have a connection, xact_depth tracks the current depth of
  46  * transactions and subtransactions open on the remote side.  We need to issue
  47  * commands at the same nesting depth on the remote as we're executing at
  48  * ourselves, so that rolling back a subtransaction will kill the right
  49  * queries and not the wrong ones.
  50  */
  51 typedef Oid ConnCacheKey;
  52
  53 typedef struct ConnCacheEntry
  54 {
  55         ConnCacheKey key;                       /* hash key (must be first) */
  56         PGconn     *conn;                       /* connection to foreign server, or NULL */
  57         /* Remaining fields are invalid when conn is NULL: */
  58         int                     xact_depth;             /* 0 = no xact open, 1 = main xact open, 2 =
  59                                                                  * one level of subxact open, etc */
  60         bool            have_prep_stmt; /* have we prepared any stmts in this xact? */
  61         bool            have_error;             /* have any subxacts aborted in this xact? */
  62         bool            changing_xact_state;    /* xact state change in process */
  63         bool            parallel_commit;        /* do we commit (sub)xacts in parallel? */
  64         bool            parallel_abort; /* do we abort (sub)xacts in parallel? */
  65         bool            invalidated;    /* true if reconnect is pending */
  66         bool            keep_connections;       /* setting value of keep_connections
  67                                                                          * server option */
  68         Oid                     serverid;               /* foreign server OID used to get server name */
  69         uint32          server_hashvalue;       /* hash value of foreign server OID */
  70         uint32          mapping_hashvalue;      /* hash value of user mapping OID */
  71         PgFdwConnState state;           /* extra per-connection state */
  72 } ConnCacheEntry;
  73
  74 /*
  75  * Connection cache (initialized on first use)
  76  */
  77 static HTAB *ConnectionHash = NULL;
  78
  79 /* for assigning cursor numbers and prepared statement numbers */
  80 static unsigned int cursor_number = 0;
  81 static unsigned int prep_stmt_number = 0;
  82
  83 /* tracks whether any work is needed in callback functions */
  84 static bool xact_got_connection = false;
  85
  86 /*
  87  * Milliseconds to wait to cancel an in-progress query or execute a cleanup
  88  * query; if it takes longer than 30 seconds to do these, we assume the
  89  * connection is dead.
  90  */
  91 #define CONNECTION_CLEANUP_TIMEOUT      30000
  92
  93 /* Macro for constructing abort command to be sent */
  94 #define CONSTRUCT_ABORT_COMMAND(sql, entry, toplevel) \
  95         do { \
  96                 if (toplevel) \
  97                         snprintf((sql), sizeof(sql), \
  98                                          "ABORT TRANSACTION"); \
  99                 else \
 100                         snprintf((sql), sizeof(sql), \
 101                                          "ROLLBACK TO SAVEPOINT s%d; RELEASE SAVEPOINT s%d", \
 102                                          (entry)->xact_depth, (entry)->xact_depth); \
 103         } while(0)
 104
 105 /*
 106  * SQL functions
 107  */
 108 PG_FUNCTION_INFO_V1(postgres_fdw_get_connections);
 109 PG_FUNCTION_INFO_V1(postgres_fdw_disconnect);
 110 PG_FUNCTION_INFO_V1(postgres_fdw_disconnect_all);
 111
 112 /* prototypes of private functions */
 113 static void make_new_connection(ConnCacheEntry *entry, UserMapping *user);
 114 static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
 115 static void disconnect_pg_server(ConnCacheEntry *entry);
 116 static void check_conn_params(const char **keywords, const char **values, UserMapping *user);
 117 static void configure_remote_session(PGconn *conn);
 118 static void do_sql_command_begin(PGconn *conn, const char *sql);
 119 static void do_sql_command_end(PGconn *conn, const char *sql,
 120                                                            bool consume_input);
 121 static void begin_remote_xact(ConnCacheEntry *entry);
 122 static void pgfdw_xact_callback(XactEvent event, void *arg);
 123 static void pgfdw_subxact_callback(SubXactEvent event,
 124                                                                    SubTransactionId mySubid,
 125                                                                    SubTransactionId parentSubid,
 126                                                                    void *arg);
 127 static void pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue);
 128 static void pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry);
 129 static void pgfdw_reset_xact_state(ConnCacheEntry *entry, bool toplevel);
 130 static bool pgfdw_cancel_query(PGconn *conn);
 131 static bool pgfdw_cancel_query_begin(PGconn *conn);
 132 static bool pgfdw_cancel_query_end(PGconn *conn, TimestampTz endtime,
 133                                                                    bool consume_input);
 134 static bool pgfdw_exec_cleanup_query(PGconn *conn, const char *query,
 135                                                                          bool ignore_errors);
 136 static bool pgfdw_exec_cleanup_query_begin(PGconn *conn, const char *query);
 137 static bool pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query,
 138                                                                                  TimestampTz endtime,
 139                                                                                  bool consume_input,
 140                                                                                  bool ignore_errors);
 141 static bool pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime,
 142                                                                          PGresult **result, bool *timed_out);
 143 static void pgfdw_abort_cleanup(ConnCacheEntry *entry, bool toplevel);
 144 static bool pgfdw_abort_cleanup_begin(ConnCacheEntry *entry, bool toplevel,
 145                                                                           List **pending_entries,
 146                                                                           List **cancel_requested);
 147 static void pgfdw_finish_pre_commit_cleanup(List *pending_entries);
 148 static void pgfdw_finish_pre_subcommit_cleanup(List *pending_entries,
 149                                                                                            int curlevel);
 150 static void pgfdw_finish_abort_cleanup(List *pending_entries,
 151                                                                            List *cancel_requested,
 152                                                                            bool toplevel);
 153 static void pgfdw_security_check(const char **keywords, const char **values,
 154                                                                  UserMapping *user, PGconn *conn);
 155 static bool UserMappingPasswordRequired(UserMapping *user);
 156 static bool disconnect_cached_connections(Oid serverid);
 157
 158 /*
 159  * Get a PGconn which can be used to execute queries on the remote PostgreSQL
 160  * server with the user's authorization.  A new connection is established
 161  * if we don't already have a suitable one, and a transaction is opened at
 162  * the right subtransaction nesting depth if we didn't do that already.
 163  *
 164  * will_prep_stmt must be true if caller intends to create any prepared
 165  * statements.  Since those don't go away automatically at transaction end
 166  * (not even on error), we need this flag to cue manual cleanup.
 167  *
 168  * If state is not NULL, *state receives the per-connection state associated
 169  * with the PGconn.
 170  */
 171 PGconn *
 172 GetConnection(UserMapping *user, bool will_prep_stmt, PgFdwConnState **state)
 173 {
 174         bool            found;
 175         bool            retry = false;
 176         ConnCacheEntry *entry;
 177         ConnCacheKey key;
 178         MemoryContext ccxt = CurrentMemoryContext;
 179
 180         /* First time through, initialize connection cache hashtable */
 181         if (ConnectionHash == NULL)
 182         {
 183                 HASHCTL         ctl;
 184
 185                 ctl.keysize = sizeof(ConnCacheKey);
 186                 ctl.entrysize = sizeof(ConnCacheEntry);
 187                 ConnectionHash = hash_create("postgres_fdw connections", 8,
 188                                                                          &ctl,
 189                                                                          HASH_ELEM | HASH_BLOBS);
 190
 191                 /*
 192                  * Register some callback functions that manage connection cleanup.
 193                  * This should be done just once in each backend.
 194                  */
 195                 RegisterXactCallback(pgfdw_xact_callback, NULL);
 196                 RegisterSubXactCallback(pgfdw_subxact_callback, NULL);
 197                 CacheRegisterSyscacheCallback(FOREIGNSERVEROID,
 198                                                                           pgfdw_inval_callback, (Datum) 0);
 199                 CacheRegisterSyscacheCallback(USERMAPPINGOID,
 200                                                                           pgfdw_inval_callback, (Datum) 0);
 201         }
 202
 203         /* Set flag that we did GetConnection during the current transaction */
 204         xact_got_connection = true;
 205
 206         /* Create hash key for the entry.  Assume no pad bytes in key struct */
 207         key = user->umid;
 208
 209         /*
 210          * Find or create cached entry for requested connection.
 211          */
 212         entry = hash_search(ConnectionHash, &key, HASH_ENTER, &found);
 213         if (!found)
 214         {
 215                 /*
 216                  * We need only clear "conn" here; remaining fields will be filled
 217                  * later when "conn" is set.
 218                  */
 219                 entry->conn = NULL;
 220         }
 221
 222         /* Reject further use of connections which failed abort cleanup. */
 223         pgfdw_reject_incomplete_xact_state_change(entry);
 224
 225         /*
 226          * If the connection needs to be remade due to invalidation, disconnect as
 227          * soon as we're out of all transactions.
 228          */
 229         if (entry->conn != NULL && entry->invalidated && entry->xact_depth == 0)
 230         {
 231                 elog(DEBUG3, "closing connection %p for option changes to take effect",
 232                          entry->conn);
 233                 disconnect_pg_server(entry);
 234         }
 235
 236         /*
 237          * If cache entry doesn't have a connection, we have to establish a new
 238          * connection.  (If connect_pg_server throws an error, the cache entry
 239          * will remain in a valid empty state, ie conn == NULL.)
 240          */
 241         if (entry->conn == NULL)
 242                 make_new_connection(entry, user);
 243
 244         /*
 245          * We check the health of the cached connection here when using it.  In
 246          * cases where we're out of all transactions, if a broken connection is
 247          * detected, we try to reestablish a new connection later.
 248          */
 249         PG_TRY();
 250         {
 251                 /* Process a pending asynchronous request if any. */
 252                 if (entry->state.pendingAreq)
 253                         process_pending_request(entry->state.pendingAreq);
 254                 /* Start a new transaction or subtransaction if needed. */
 255                 begin_remote_xact(entry);
 256         }
 257         PG_CATCH();
 258         {
 259                 MemoryContext ecxt = MemoryContextSwitchTo(ccxt);
 260                 ErrorData  *errdata = CopyErrorData();
 261
 262                 /*
 263                  * Determine whether to try to reestablish the connection.
 264                  *
 265                  * After a broken connection is detected in libpq, any error other
 266                  * than connection failure (e.g., out-of-memory) can be thrown
 267                  * somewhere between return from libpq and the expected ereport() call
 268                  * in pgfdw_report_error(). In this case, since PQstatus() indicates
 269                  * CONNECTION_BAD, checking only PQstatus() causes the false detection
 270                  * of connection failure. To avoid this, we also verify that the
 271                  * error's sqlstate is ERRCODE_CONNECTION_FAILURE. Note that also
 272                  * checking only the sqlstate can cause another false detection
 273                  * because pgfdw_report_error() may report ERRCODE_CONNECTION_FAILURE
 274                  * for any libpq-originated error condition.
 275                  */
 276                 if (errdata->sqlerrcode != ERRCODE_CONNECTION_FAILURE ||
 277                         PQstatus(entry->conn) != CONNECTION_BAD ||
 278                         entry->xact_depth > 0)
 279                 {
 280                         MemoryContextSwitchTo(ecxt);
 281                         PG_RE_THROW();
 282                 }
 283
 284                 /* Clean up the error state */
 285                 FlushErrorState();
 286                 FreeErrorData(errdata);
 287                 errdata = NULL;
 288
 289                 retry = true;
 290         }
 291         PG_END_TRY();
 292
 293         /*
 294          * If a broken connection is detected, disconnect it, reestablish a new
 295          * connection and retry a new remote transaction. If connection failure is
 296          * reported again, we give up getting a connection.
 297          */
 298         if (retry)
 299         {
 300                 Assert(entry->xact_depth == 0);
 301
 302                 ereport(DEBUG3,
 303                                 (errmsg_internal("could not start remote transaction on connection %p",
 304                                                                  entry->conn)),
 305                                 errdetail_internal("%s", pchomp(PQerrorMessage(entry->conn))));
 306
 307                 elog(DEBUG3, "closing connection %p to reestablish a new one",
 308                          entry->conn);
 309                 disconnect_pg_server(entry);
 310
 311                 make_new_connection(entry, user);
 312
 313                 begin_remote_xact(entry);
 314         }
 315
 316         /* Remember if caller will prepare statements */
 317         entry->have_prep_stmt |= will_prep_stmt;
 318
 319         /* If caller needs access to the per-connection state, return it. */
 320         if (state)
 321                 *state = &entry->state;
 322
 323         return entry->conn;
 324 }
 325
 326 /*
 327  * Reset all transient state fields in the cached connection entry and
 328  * establish new connection to the remote server.
 329  */
 330 static void
 331 make_new_connection(ConnCacheEntry *entry, UserMapping *user)
 332 {
 333         ForeignServer *server = GetForeignServer(user->serverid);
 334         ListCell   *lc;
 335
 336         Assert(entry->conn == NULL);
 337
 338         /* Reset all transient state fields, to be sure all are clean */
 339         entry->xact_depth = 0;
 340         entry->have_prep_stmt = false;
 341         entry->have_error = false;
 342         entry->changing_xact_state = false;
 343         entry->invalidated = false;
 344         entry->serverid = server->serverid;
 345         entry->server_hashvalue =
 346                 GetSysCacheHashValue1(FOREIGNSERVEROID,
 347                                                           ObjectIdGetDatum(server->serverid));
 348         entry->mapping_hashvalue =
 349                 GetSysCacheHashValue1(USERMAPPINGOID,
 350                                                           ObjectIdGetDatum(user->umid));
 351         memset(&entry->state, 0, sizeof(entry->state));
 352
 353         /*
 354          * Determine whether to keep the connection that we're about to make here
 355          * open even after the transaction using it ends, so that the subsequent
 356          * transactions can re-use it.
 357          *
 358          * By default, all the connections to any foreign servers are kept open.
 359          *
 360          * Also determine whether to commit/abort (sub)transactions opened on the
 361          * remote server in parallel at (sub)transaction end, which is disabled by
 362          * default.
 363          *
 364          * Note: it's enough to determine these only when making a new connection
 365          * because if these settings for it are changed, it will be closed and
 366          * re-made later.
 367          */
 368         entry->keep_connections = true;
 369         entry->parallel_commit = false;
 370         entry->parallel_abort = false;
 371         foreach(lc, server->options)
 372         {
 373                 DefElem    *def = (DefElem *) lfirst(lc);
 374
 375                 if (strcmp(def->defname, "keep_connections") == 0)
 376                         entry->keep_connections = defGetBoolean(def);
 377                 else if (strcmp(def->defname, "parallel_commit") == 0)
 378                         entry->parallel_commit = defGetBoolean(def);
 379                 else if (strcmp(def->defname, "parallel_abort") == 0)
 380                         entry->parallel_abort = defGetBoolean(def);
 381         }
 382
 383         /* Now try to make the connection */
 384         entry->conn = connect_pg_server(server, user);
 385
 386         elog(DEBUG3, "new postgres_fdw connection %p for server \"%s\" (user mapping oid %u, userid %u)",
 387                  entry->conn, server->servername, user->umid, user->userid);
 388 }
 389
 390 /*
 391  * Check that non-superuser has used password or delegated credentials
 392  * to establish connection; otherwise, he's piggybacking on the
 393  * postgres server's user identity. See also dblink_security_check()
 394  * in contrib/dblink and check_conn_params.
 395  */
 396 static void
 397 pgfdw_security_check(const char **keywords, const char **values, UserMapping *user, PGconn *conn)
 398 {
 399         /* Superusers bypass the check */
 400         if (superuser_arg(user->userid))
 401                 return;
 402
 403 #ifdef ENABLE_GSS
 404         /* Connected via GSSAPI with delegated credentials- all good. */
 405         if (PQconnectionUsedGSSAPI(conn) && be_gssapi_get_delegation(MyProcPort))
 406                 return;
 407 #endif
 408
 409         /* Ok if superuser set PW required false. */
 410         if (!UserMappingPasswordRequired(user))
 411                 return;
 412
 413         /* Connected via PW, with PW required true, and provided non-empty PW. */
 414         if (PQconnectionUsedPassword(conn))
 415         {
 416                 /* ok if params contain a non-empty password */
 417                 for (int i = 0; keywords[i] != NULL; i++)
 418                 {
 419                         if (strcmp(keywords[i], "password") == 0 && values[i][0] != '\0')
 420                                 return;
 421                 }
 422         }
 423
 424         ereport(ERROR,
 425                         (errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
 426                          errmsg("password or GSSAPI delegated credentials required"),
 427                          errdetail("Non-superuser cannot connect if the server does not request a password or use GSSAPI with delegated credentials."),
 428                          errhint("Target server's authentication method must be changed or password_required=false set in the user mapping attributes.")));
 429 }
 430
 431 /*
 432  * Connect to remote server using specified server and user mapping properties.
 433  */
 434 static PGconn *
 435 connect_pg_server(ForeignServer *server, UserMapping *user)
 436 {
 437         PGconn     *volatile conn = NULL;
 438
 439         /*
 440          * Use PG_TRY block to ensure closing connection on error.
 441          */
 442         PG_TRY();
 443         {
 444                 const char **keywords;
 445                 const char **values;
 446                 char       *appname = NULL;
 447                 int                     n;
 448
 449                 /*
 450                  * Construct connection params from generic options of ForeignServer
 451                  * and UserMapping.  (Some of them might not be libpq options, in
 452                  * which case we'll just waste a few array slots.)  Add 4 extra slots
 453                  * for application_name, fallback_application_name, client_encoding,
 454                  * end marker.
 455                  */
 456                 n = list_length(server->options) + list_length(user->options) + 4;
 457                 keywords = (const char **) palloc(n * sizeof(char *));
 458                 values = (const char **) palloc(n * sizeof(char *));
 459
 460                 n = 0;
 461                 n += ExtractConnectionOptions(server->options,
 462                                                                           keywords + n, values + n);
 463                 n += ExtractConnectionOptions(user->options,
 464                                                                           keywords + n, values + n);
 465
 466                 /*
 467                  * Use pgfdw_application_name as application_name if set.
 468                  *
 469                  * PQconnectdbParams() processes the parameter arrays from start to
 470                  * end. If any key word is repeated, the last value is used. Therefore
 471                  * note that pgfdw_application_name must be added to the arrays after
 472                  * options of ForeignServer are, so that it can override
 473                  * application_name set in ForeignServer.
 474                  */
 475                 if (pgfdw_application_name && *pgfdw_application_name != '\0')
 476                 {
 477                         keywords[n] = "application_name";
 478                         values[n] = pgfdw_application_name;
 479                         n++;
 480                 }
 481
 482                 /*
 483                  * Search the parameter arrays to find application_name setting, and
 484                  * replace escape sequences in it with status information if found.
 485                  * The arrays are searched backwards because the last value is used if
 486                  * application_name is repeatedly set.
 487                  */
 488                 for (int i = n - 1; i >= 0; i--)
 489                 {
 490                         if (strcmp(keywords[i], "application_name") == 0 &&
 491                                 *(values[i]) != '\0')
 492                         {
 493                                 /*
 494                                  * Use this application_name setting if it's not empty string
 495                                  * even after any escape sequences in it are replaced.
 496                                  */
 497                                 appname = process_pgfdw_appname(values[i]);
 498                                 if (appname[0] != '\0')
 499                                 {
 500                                         values[i] = appname;
 501                                         break;
 502                                 }
 503
 504                                 /*
 505                                  * This empty application_name is not used, so we set
 506                                  * values[i] to NULL and keep searching the array to find the
 507                                  * next one.
 508                                  */
 509                                 values[i] = NULL;
 510                                 pfree(appname);
 511                                 appname = NULL;
 512                         }
 513                 }
 514
 515                 /* Use "postgres_fdw" as fallback_application_name */
 516                 keywords[n] = "fallback_application_name";
 517                 values[n] = "postgres_fdw";
 518                 n++;
 519
 520                 /* Set client_encoding so that libpq can convert encoding properly. */
 521                 keywords[n] = "client_encoding";
 522                 values[n] = GetDatabaseEncodingName();
 523                 n++;
 524
 525                 keywords[n] = values[n] = NULL;
 526
 527                 /* verify the set of connection parameters */
 528                 check_conn_params(keywords, values, user);
 529
 530                 /* OK to make connection */
 531                 conn = libpqsrv_connect_params(keywords, values,
 532                                                                            false,       /* expand_dbname */
 533                                                                            PG_WAIT_EXTENSION);
 534
 535                 if (!conn || PQstatus(conn) != CONNECTION_OK)
 536                         ereport(ERROR,
 537                                         (errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),
 538                                          errmsg("could not connect to server \"%s\"",
 539                                                         server->servername),
 540                                          errdetail_internal("%s", pchomp(PQerrorMessage(conn)))));
 541
 542                 /* Perform post-connection security checks */
 543                 pgfdw_security_check(keywords, values, user, conn);
 544
 545                 /* Prepare new session for use */
 546                 configure_remote_session(conn);
 547
 548                 if (appname != NULL)
 549                         pfree(appname);
 550                 pfree(keywords);
 551                 pfree(values);
 552         }
 553         PG_CATCH();
 554         {
 555                 libpqsrv_disconnect(conn);
 556                 PG_RE_THROW();
 557         }
 558         PG_END_TRY();
 559
 560         return conn;
 561 }
 562
 563 /*
 564  * Disconnect any open connection for a connection cache entry.
 565  */
 566 static void
 567 disconnect_pg_server(ConnCacheEntry *entry)
 568 {
 569         if (entry->conn != NULL)
 570         {
 571                 libpqsrv_disconnect(entry->conn);
 572                 entry->conn = NULL;
 573         }
 574 }
 575
 576 /*
 577  * Return true if the password_required is defined and false for this user
 578  * mapping, otherwise false. The mapping has been pre-validated.
 579  */
 580 static bool
 581 UserMappingPasswordRequired(UserMapping *user)
 582 {
 583         ListCell   *cell;
 584
 585         foreach(cell, user->options)
 586         {
 587                 DefElem    *def = (DefElem *) lfirst(cell);
 588
 589                 if (strcmp(def->defname, "password_required") == 0)
 590                         return defGetBoolean(def);
 591         }
 592
 593         return true;
 594 }
 595
 596 /*
 597  * For non-superusers, insist that the connstr specify a password or that the
 598  * user provided their own GSSAPI delegated credentials.  This
 599  * prevents a password from being picked up from .pgpass, a service file, the
 600  * environment, etc.  We don't want the postgres user's passwords,
 601  * certificates, etc to be accessible to non-superusers.  (See also
 602  * dblink_connstr_check in contrib/dblink.)
 603  */
 604 static void
 605 check_conn_params(const char **keywords, const char **values, UserMapping *user)
 606 {
 607         int                     i;
 608
 609         /* no check required if superuser */
 610         if (superuser_arg(user->userid))
 611                 return;
 612
 613 #ifdef ENABLE_GSS
 614         /* ok if the user provided their own delegated credentials */
 615         if (be_gssapi_get_delegation(MyProcPort))
 616                 return;
 617 #endif
 618
 619         /* ok if params contain a non-empty password */
 620         for (i = 0; keywords[i] != NULL; i++)
 621         {
 622                 if (strcmp(keywords[i], "password") == 0 && values[i][0] != '\0')
 623                         return;
 624         }
 625
 626         /* ok if the superuser explicitly said so at user mapping creation time */
 627         if (!UserMappingPasswordRequired(user))
 628                 return;
 629
 630         ereport(ERROR,
 631                         (errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
 632                          errmsg("password or GSSAPI delegated credentials required"),
 633                          errdetail("Non-superusers must delegate GSSAPI credentials or provide a password in the user mapping.")));
 634 }
 635
 636 /*
 637  * Issue SET commands to make sure remote session is configured properly.
 638  *
 639  * We do this just once at connection, assuming nothing will change the
 640  * values later.  Since we'll never send volatile function calls to the
 641  * remote, there shouldn't be any way to break this assumption from our end.
 642  * It's possible to think of ways to break it at the remote end, eg making
 643  * a foreign table point to a view that includes a set_config call ---
 644  * but once you admit the possibility of a malicious view definition,
 645  * there are any number of ways to break things.
 646  */
 647 static void
 648 configure_remote_session(PGconn *conn)
 649 {
 650         int                     remoteversion = PQserverVersion(conn);
 651
 652         /* Force the search path to contain only pg_catalog (see deparse.c) */
 653         do_sql_command(conn, "SET search_path = pg_catalog");
 654
 655         /*
 656          * Set remote timezone; this is basically just cosmetic, since all
 657          * transmitted and returned timestamptzs should specify a zone explicitly
 658          * anyway.  However it makes the regression test outputs more predictable.
 659          *
 660          * We don't risk setting remote zone equal to ours, since the remote
 661          * server might use a different timezone database.  Instead, use UTC
 662          * (quoted, because very old servers are picky about case).
 663          */
 664         do_sql_command(conn, "SET timezone = 'UTC'");
 665
 666         /*
 667          * Set values needed to ensure unambiguous data output from remote.  (This
 668          * logic should match what pg_dump does.  See also set_transmission_modes
 669          * in postgres_fdw.c.)
 670          */
 671         do_sql_command(conn, "SET datestyle = ISO");
 672         if (remoteversion >= 80400)
 673                 do_sql_command(conn, "SET intervalstyle = postgres");
 674         if (remoteversion >= 90000)
 675                 do_sql_command(conn, "SET extra_float_digits = 3");
 676         else
 677                 do_sql_command(conn, "SET extra_float_digits = 2");
 678 }
 679
 680 /*
 681  * Convenience subroutine to issue a non-data-returning SQL command to remote
 682  */
 683 void
 684 do_sql_command(PGconn *conn, const char *sql)
 685 {
 686         do_sql_command_begin(conn, sql);
 687         do_sql_command_end(conn, sql, false);
 688 }
 689
 690 static void
 691 do_sql_command_begin(PGconn *conn, const char *sql)
 692 {
 693         if (!PQsendQuery(conn, sql))
 694                 pgfdw_report_error(ERROR, NULL, conn, false, sql);
 695 }
 696
 697 static void
 698 do_sql_command_end(PGconn *conn, const char *sql, bool consume_input)
 699 {
 700         PGresult   *res;
 701
 702         /*
 703          * If requested, consume whatever data is available from the socket. (Note
 704          * that if all data is available, this allows pgfdw_get_result to call
 705          * PQgetResult without forcing the overhead of WaitLatchOrSocket, which
 706          * would be large compared to the overhead of PQconsumeInput.)
 707          */
 708         if (consume_input && !PQconsumeInput(conn))
 709                 pgfdw_report_error(ERROR, NULL, conn, false, sql);
 710         res = pgfdw_get_result(conn, sql);
 711         if (PQresultStatus(res) != PGRES_COMMAND_OK)
 712                 pgfdw_report_error(ERROR, res, conn, true, sql);
 713         PQclear(res);
 714 }
 715
 716 /*
 717  * Start remote transaction or subtransaction, if needed.
 718  *
 719  * Note that we always use at least REPEATABLE READ in the remote session.
 720  * This is so that, if a query initiates multiple scans of the same or
 721  * different foreign tables, we will get snapshot-consistent results from
 722  * those scans.  A disadvantage is that we can't provide sane emulation of
 723  * READ COMMITTED behavior --- it would be nice if we had some other way to
 724  * control which remote queries share a snapshot.
 725  */
 726 static void
 727 begin_remote_xact(ConnCacheEntry *entry)
 728 {
 729         int                     curlevel = GetCurrentTransactionNestLevel();
 730
 731         /* Start main transaction if we haven't yet */
 732         if (entry->xact_depth <= 0)
 733         {
 734                 const char *sql;
 735
 736                 elog(DEBUG3, "starting remote transaction on connection %p",
 737                          entry->conn);
 738
 739                 if (IsolationIsSerializable())
 740                         sql = "START TRANSACTION ISOLATION LEVEL SERIALIZABLE";
 741                 else
 742                         sql = "START TRANSACTION ISOLATION LEVEL REPEATABLE READ";
 743                 entry->changing_xact_state = true;
 744                 do_sql_command(entry->conn, sql);
 745                 entry->xact_depth = 1;
 746                 entry->changing_xact_state = false;
 747         }
 748
 749         /*
 750          * If we're in a subtransaction, stack up savepoints to match our level.
 751          * This ensures we can rollback just the desired effects when a
 752          * subtransaction aborts.
 753          */
 754         while (entry->xact_depth < curlevel)
 755         {
 756                 char            sql[64];
 757
 758                 snprintf(sql, sizeof(sql), "SAVEPOINT s%d", entry->xact_depth + 1);
 759                 entry->changing_xact_state = true;
 760                 do_sql_command(entry->conn, sql);
 761                 entry->xact_depth++;
 762                 entry->changing_xact_state = false;
 763         }
 764 }
 765
 766 /*
 767  * Release connection reference count created by calling GetConnection.
 768  */
 769 void
 770 ReleaseConnection(PGconn *conn)
 771 {
 772         /*
 773          * Currently, we don't actually track connection references because all
 774          * cleanup is managed on a transaction or subtransaction basis instead. So
 775          * there's nothing to do here.
 776          */
 777 }
 778
 779 /*
 780  * Assign a "unique" number for a cursor.
 781  *
 782  * These really only need to be unique per connection within a transaction.
 783  * For the moment we ignore the per-connection point and assign them across
 784  * all connections in the transaction, but we ask for the connection to be
 785  * supplied in case we want to refine that.
 786  *
 787  * Note that even if wraparound happens in a very long transaction, actual
 788  * collisions are highly improbable; just be sure to use %u not %d to print.
 789  */
 790 unsigned int
 791 GetCursorNumber(PGconn *conn)
 792 {
 793         return ++cursor_number;
 794 }
 795
 796 /*
 797  * Assign a "unique" number for a prepared statement.
 798  *
 799  * This works much like GetCursorNumber, except that we never reset the counter
 800  * within a session.  That's because we can't be 100% sure we've gotten rid
 801  * of all prepared statements on all connections, and it's not really worth
 802  * increasing the risk of prepared-statement name collisions by resetting.
 803  */
 804 unsigned int
 805 GetPrepStmtNumber(PGconn *conn)
 806 {
 807         return ++prep_stmt_number;
 808 }
 809
 810 /*
 811  * Submit a query and wait for the result.
 812  *
 813  * This function is interruptible by signals.
 814  *
 815  * Caller is responsible for the error handling on the result.
 816  */
 817 PGresult *
 818 pgfdw_exec_query(PGconn *conn, const char *query, PgFdwConnState *state)
 819 {
 820         /* First, process a pending asynchronous request, if any. */
 821         if (state && state->pendingAreq)
 822                 process_pending_request(state->pendingAreq);
 823
 824         /*
 825          * Submit a query.  Since we don't use non-blocking mode, this also can
 826          * block.  But its risk is relatively small, so we ignore that for now.
 827          */
 828         if (!PQsendQuery(conn, query))
 829                 pgfdw_report_error(ERROR, NULL, conn, false, query);
 830
 831         /* Wait for the result. */
 832         return pgfdw_get_result(conn, query);
 833 }
 834
 835 /*
 836  * Wait for the result from a prior asynchronous execution function call.
 837  *
 838  * This function offers quick responsiveness by checking for any interruptions.
 839  *
 840  * This function emulates PQexec()'s behavior of returning the last result
 841  * when there are many.
 842  *
 843  * Caller is responsible for the error handling on the result.
 844  */
 845 PGresult *
 846 pgfdw_get_result(PGconn *conn, const char *query)
 847 {
 848         PGresult   *volatile last_res = NULL;
 849
 850         /* In what follows, do not leak any PGresults on an error. */
 851         PG_TRY();
 852         {
 853                 for (;;)
 854                 {
 855                         PGresult   *res;
 856
 857                         while (PQisBusy(conn))
 858                         {
 859                                 int                     wc;
 860
 861                                 /* Sleep until there's something to do */
 862                                 wc = WaitLatchOrSocket(MyLatch,
 863                                                                            WL_LATCH_SET | WL_SOCKET_READABLE |
 864                                                                            WL_EXIT_ON_PM_DEATH,
 865                                                                            PQsocket(conn),
 866                                                                            -1L, PG_WAIT_EXTENSION);
 867                                 ResetLatch(MyLatch);
 868
 869                                 CHECK_FOR_INTERRUPTS();
 870
 871                                 /* Data available in socket? */
 872                                 if (wc & WL_SOCKET_READABLE)
 873                                 {
 874                                         if (!PQconsumeInput(conn))
 875                                                 pgfdw_report_error(ERROR, NULL, conn, false, query);
 876                                 }
 877                         }
 878
 879                         res = PQgetResult(conn);
 880                         if (res == NULL)
 881                                 break;                  /* query is complete */
 882
 883                         PQclear(last_res);
 884                         last_res = res;
 885                 }
 886         }
 887         PG_CATCH();
 888         {
 889                 PQclear(last_res);
 890                 PG_RE_THROW();
 891         }
 892         PG_END_TRY();
 893
 894         return last_res;
 895 }
 896
 897 /*
 898  * Report an error we got from the remote server.
 899  *
 900  * elevel: error level to use (typically ERROR, but might be less)
 901  * res: PGresult containing the error
 902  * conn: connection we did the query on
 903  * clear: if true, PQclear the result (otherwise caller will handle it)
 904  * sql: NULL, or text of remote command we tried to execute
 905  *
 906  * Note: callers that choose not to throw ERROR for a remote error are
 907  * responsible for making sure that the associated ConnCacheEntry gets
 908  * marked with have_error = true.
 909  */
 910 void
 911 pgfdw_report_error(int elevel, PGresult *res, PGconn *conn,
 912                                    bool clear, const char *sql)
 913 {
 914         /* If requested, PGresult must be released before leaving this function. */
 915         PG_TRY();
 916         {
 917                 char       *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);
 918                 char       *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY);
 919                 char       *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL);
 920                 char       *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT);
 921                 char       *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT);
 922                 int                     sqlstate;
 923
 924                 if (diag_sqlstate)
 925                         sqlstate = MAKE_SQLSTATE(diag_sqlstate[0],
 926                                                                          diag_sqlstate[1],
 927                                                                          diag_sqlstate[2],
 928                                                                          diag_sqlstate[3],
 929                                                                          diag_sqlstate[4]);
 930                 else
 931                         sqlstate = ERRCODE_CONNECTION_FAILURE;
 932
 933                 /*
 934                  * If we don't get a message from the PGresult, try the PGconn.  This
 935                  * is needed because for connection-level failures, PQexec may just
 936                  * return NULL, not a PGresult at all.
 937                  */
 938                 if (message_primary == NULL)
 939                         message_primary = pchomp(PQerrorMessage(conn));
 940
 941                 ereport(elevel,
 942                                 (errcode(sqlstate),
 943                                  (message_primary != NULL && message_primary[0] != '\0') ?
 944                                  errmsg_internal("%s", message_primary) :
 945                                  errmsg("could not obtain message string for remote error"),
 946                                  message_detail ? errdetail_internal("%s", message_detail) : 0,
 947                                  message_hint ? errhint("%s", message_hint) : 0,
 948                                  message_context ? errcontext("%s", message_context) : 0,
 949                                  sql ? errcontext("remote SQL command: %s", sql) : 0));
 950         }
 951         PG_FINALLY();
 952         {
 953                 if (clear)
 954                         PQclear(res);
 955         }
 956         PG_END_TRY();
 957 }
 958
 959 /*
 960  * pgfdw_xact_callback --- cleanup at main-transaction end.
 961  *
 962  * This runs just late enough that it must not enter user-defined code
 963  * locally.  (Entering such code on the remote side is fine.  Its remote
 964  * COMMIT TRANSACTION may run deferred triggers.)
 965  */
 966 static void
 967 pgfdw_xact_callback(XactEvent event, void *arg)
 968 {
 969         HASH_SEQ_STATUS scan;
 970         ConnCacheEntry *entry;
 971         List       *pending_entries = NIL;
 972         List       *cancel_requested = NIL;
 973
 974         /* Quick exit if no connections were touched in this transaction. */
 975         if (!xact_got_connection)
 976                 return;
 977
 978         /*
 979          * Scan all connection cache entries to find open remote transactions, and
 980          * close them.
 981          */
 982         hash_seq_init(&scan, ConnectionHash);
 983         while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
 984         {
 985                 PGresult   *res;
 986
 987                 /* Ignore cache entry if no open connection right now */
 988                 if (entry->conn == NULL)
 989                         continue;
 990
 991                 /* If it has an open remote transaction, try to close it */
 992                 if (entry->xact_depth > 0)
 993                 {
 994                         elog(DEBUG3, "closing remote transaction on connection %p",
 995                                  entry->conn);
 996
 997                         switch (event)
 998                         {
 999                                 case XACT_EVENT_PARALLEL_PRE_COMMIT:
1000                                 case XACT_EVENT_PRE_COMMIT:
1001
1002                                         /*
1003                                          * If abort cleanup previously failed for this connection,
1004                                          * we can't issue any more commands against it.
1005                                          */
1006                                         pgfdw_reject_incomplete_xact_state_change(entry);
1007
1008                                         /* Commit all remote transactions during pre-commit */
1009                                         entry->changing_xact_state = true;
1010                                         if (entry->parallel_commit)
1011                                         {
1012                                                 do_sql_command_begin(entry->conn, "COMMIT TRANSACTION");
1013                                                 pending_entries = lappend(pending_entries, entry);
1014                                                 continue;
1015                                         }
1016                                         do_sql_command(entry->conn, "COMMIT TRANSACTION");
1017                                         entry->changing_xact_state = false;
1018
1019                                         /*
1020                                          * If there were any errors in subtransactions, and we
1021                                          * made prepared statements, do a DEALLOCATE ALL to make
1022                                          * sure we get rid of all prepared statements. This is
1023                                          * annoying and not terribly bulletproof, but it's
1024                                          * probably not worth trying harder.
1025                                          *
1026                                          * DEALLOCATE ALL only exists in 8.3 and later, so this
1027                                          * constrains how old a server postgres_fdw can
1028                                          * communicate with.  We intentionally ignore errors in
1029                                          * the DEALLOCATE, so that we can hobble along to some
1030                                          * extent with older servers (leaking prepared statements
1031                                          * as we go; but we don't really support update operations
1032                                          * pre-8.3 anyway).
1033                                          */
1034                                         if (entry->have_prep_stmt && entry->have_error)
1035                                         {
1036                                                 res = PQexec(entry->conn, "DEALLOCATE ALL");
1037                                                 PQclear(res);
1038                                         }
1039                                         entry->have_prep_stmt = false;
1040                                         entry->have_error = false;
1041                                         break;
1042                                 case XACT_EVENT_PRE_PREPARE:
1043
1044                                         /*
1045                                          * We disallow any remote transactions, since it's not
1046                                          * very reasonable to hold them open until the prepared
1047                                          * transaction is committed.  For the moment, throw error
1048                                          * unconditionally; later we might allow read-only cases.
1049                                          * Note that the error will cause us to come right back
1050                                          * here with event == XACT_EVENT_ABORT, so we'll clean up
1051                                          * the connection state at that point.
1052                                          */
1053                                         ereport(ERROR,
1054                                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1055                                                          errmsg("cannot PREPARE a transaction that has operated on postgres_fdw foreign tables")));
1056                                         break;
1057                                 case XACT_EVENT_PARALLEL_COMMIT:
1058                                 case XACT_EVENT_COMMIT:
1059                                 case XACT_EVENT_PREPARE:
1060                                         /* Pre-commit should have closed the open transaction */
1061                                         elog(ERROR, "missed cleaning up connection during pre-commit");
1062                                         break;
1063                                 case XACT_EVENT_PARALLEL_ABORT:
1064                                 case XACT_EVENT_ABORT:
1065                                         /* Rollback all remote transactions during abort */
1066                                         if (entry->parallel_abort)
1067                                         {
1068                                                 if (pgfdw_abort_cleanup_begin(entry, true,
1069                                                                                                           &pending_entries,
1070                                                                                                           &cancel_requested))
1071                                                         continue;
1072                                         }
1073                                         else
1074                                                 pgfdw_abort_cleanup(entry, true);
1075                                         break;
1076                         }
1077                 }
1078
1079                 /* Reset state to show we're out of a transaction */
1080                 pgfdw_reset_xact_state(entry, true);
1081         }
1082
1083         /* If there are any pending connections, finish cleaning them up */
1084         if (pending_entries || cancel_requested)
1085         {
1086                 if (event == XACT_EVENT_PARALLEL_PRE_COMMIT ||
1087                         event == XACT_EVENT_PRE_COMMIT)
1088                 {
1089                         Assert(cancel_requested == NIL);
1090                         pgfdw_finish_pre_commit_cleanup(pending_entries);
1091                 }
1092                 else
1093                 {
1094                         Assert(event == XACT_EVENT_PARALLEL_ABORT ||
1095                                    event == XACT_EVENT_ABORT);
1096                         pgfdw_finish_abort_cleanup(pending_entries, cancel_requested,
1097                                                                            true);
1098                 }
1099         }
1100
1101         /*
1102          * Regardless of the event type, we can now mark ourselves as out of the
1103          * transaction.  (Note: if we are here during PRE_COMMIT or PRE_PREPARE,
1104          * this saves a useless scan of the hashtable during COMMIT or PREPARE.)
1105          */
1106         xact_got_connection = false;
1107
1108         /* Also reset cursor numbering for next transaction */
1109         cursor_number = 0;
1110 }
1111
1112 /*
1113  * pgfdw_subxact_callback --- cleanup at subtransaction end.
1114  */
1115 static void
1116 pgfdw_subxact_callback(SubXactEvent event, SubTransactionId mySubid,
1117                                            SubTransactionId parentSubid, void *arg)
1118 {
1119         HASH_SEQ_STATUS scan;
1120         ConnCacheEntry *entry;
1121         int                     curlevel;
1122         List       *pending_entries = NIL;
1123         List       *cancel_requested = NIL;
1124
1125         /* Nothing to do at subxact start, nor after commit. */
1126         if (!(event == SUBXACT_EVENT_PRE_COMMIT_SUB ||
1127                   event == SUBXACT_EVENT_ABORT_SUB))
1128                 return;
1129
1130         /* Quick exit if no connections were touched in this transaction. */
1131         if (!xact_got_connection)
1132                 return;
1133
1134         /*
1135          * Scan all connection cache entries to find open remote subtransactions
1136          * of the current level, and close them.
1137          */
1138         curlevel = GetCurrentTransactionNestLevel();
1139         hash_seq_init(&scan, ConnectionHash);
1140         while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
1141         {
1142                 char            sql[100];
1143
1144                 /*
1145                  * We only care about connections with open remote subtransactions of
1146                  * the current level.
1147                  */
1148                 if (entry->conn == NULL || entry->xact_depth < curlevel)
1149                         continue;
1150
1151                 if (entry->xact_depth > curlevel)
1152                         elog(ERROR, "missed cleaning up remote subtransaction at level %d",
1153                                  entry->xact_depth);
1154
1155                 if (event == SUBXACT_EVENT_PRE_COMMIT_SUB)
1156                 {
1157                         /*
1158                          * If abort cleanup previously failed for this connection, we
1159                          * can't issue any more commands against it.
1160                          */
1161                         pgfdw_reject_incomplete_xact_state_change(entry);
1162
1163                         /* Commit all remote subtransactions during pre-commit */
1164                         snprintf(sql, sizeof(sql), "RELEASE SAVEPOINT s%d", curlevel);
1165                         entry->changing_xact_state = true;
1166                         if (entry->parallel_commit)
1167                         {
1168                                 do_sql_command_begin(entry->conn, sql);
1169                                 pending_entries = lappend(pending_entries, entry);
1170                                 continue;
1171                         }
1172                         do_sql_command(entry->conn, sql);
1173                         entry->changing_xact_state = false;
1174                 }
1175                 else
1176                 {
1177                         /* Rollback all remote subtransactions during abort */
1178                         if (entry->parallel_abort)
1179                         {
1180                                 if (pgfdw_abort_cleanup_begin(entry, false,
1181                                                                                           &pending_entries,
1182                                                                                           &cancel_requested))
1183                                         continue;
1184                         }
1185                         else
1186                                 pgfdw_abort_cleanup(entry, false);
1187                 }
1188
1189                 /* OK, we're outta that level of subtransaction */
1190                 pgfdw_reset_xact_state(entry, false);
1191         }
1192
1193         /* If there are any pending connections, finish cleaning them up */
1194         if (pending_entries || cancel_requested)
1195         {
1196                 if (event == SUBXACT_EVENT_PRE_COMMIT_SUB)
1197                 {
1198                         Assert(cancel_requested == NIL);
1199                         pgfdw_finish_pre_subcommit_cleanup(pending_entries, curlevel);
1200                 }
1201                 else
1202                 {
1203                         Assert(event == SUBXACT_EVENT_ABORT_SUB);
1204                         pgfdw_finish_abort_cleanup(pending_entries, cancel_requested,
1205                                                                            false);
1206                 }
1207         }
1208 }
1209
1210 /*
1211  * Connection invalidation callback function
1212  *
1213  * After a change to a pg_foreign_server or pg_user_mapping catalog entry,
1214  * close connections depending on that entry immediately if current transaction
1215  * has not used those connections yet. Otherwise, mark those connections as
1216  * invalid and then make pgfdw_xact_callback() close them at the end of current
1217  * transaction, since they cannot be closed in the midst of the transaction
1218  * using them. Closed connections will be remade at the next opportunity if
1219  * necessary.
1220  *
1221  * Although most cache invalidation callbacks blow away all the related stuff
1222  * regardless of the given hashvalue, connections are expensive enough that
1223  * it's worth trying to avoid that.
1224  *
1225  * NB: We could avoid unnecessary disconnection more strictly by examining
1226  * individual option values, but it seems too much effort for the gain.
1227  */
1228 static void
1229 pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue)
1230 {
1231         HASH_SEQ_STATUS scan;
1232         ConnCacheEntry *entry;
1233
1234         Assert(cacheid == FOREIGNSERVEROID || cacheid == USERMAPPINGOID);
1235
1236         /* ConnectionHash must exist already, if we're registered */
1237         hash_seq_init(&scan, ConnectionHash);
1238         while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
1239         {
1240                 /* Ignore invalid entries */
1241                 if (entry->conn == NULL)
1242                         continue;
1243
1244                 /* hashvalue == 0 means a cache reset, must clear all state */
1245                 if (hashvalue == 0 ||
1246                         (cacheid == FOREIGNSERVEROID &&
1247                          entry->server_hashvalue == hashvalue) ||
1248                         (cacheid == USERMAPPINGOID &&
1249                          entry->mapping_hashvalue == hashvalue))
1250                 {
1251                         /*
1252                          * Close the connection immediately if it's not used yet in this
1253                          * transaction. Otherwise mark it as invalid so that
1254                          * pgfdw_xact_callback() can close it at the end of this
1255                          * transaction.
1256                          */
1257                         if (entry->xact_depth == 0)
1258                         {
1259                                 elog(DEBUG3, "discarding connection %p", entry->conn);
1260                                 disconnect_pg_server(entry);
1261                         }
1262                         else
1263                                 entry->invalidated = true;
1264                 }
1265         }
1266 }
1267
1268 /*
1269  * Raise an error if the given connection cache entry is marked as being
1270  * in the middle of an xact state change.  This should be called at which no
1271  * such change is expected to be in progress; if one is found to be in
1272  * progress, it means that we aborted in the middle of a previous state change
1273  * and now don't know what the remote transaction state actually is.
1274  * Such connections can't safely be further used.  Re-establishing the
1275  * connection would change the snapshot and roll back any writes already
1276  * performed, so that's not an option, either. Thus, we must abort.
1277  */
1278 static void
1279 pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry)
1280 {
1281         ForeignServer *server;
1282
1283         /* nothing to do for inactive entries and entries of sane state */
1284         if (entry->conn == NULL || !entry->changing_xact_state)
1285                 return;
1286
1287         /* make sure this entry is inactive */
1288         disconnect_pg_server(entry);
1289
1290         /* find server name to be shown in the message below */
1291         server = GetForeignServer(entry->serverid);
1292
1293         ereport(ERROR,
1294                         (errcode(ERRCODE_CONNECTION_EXCEPTION),
1295                          errmsg("connection to server \"%s\" was lost",
1296                                         server->servername)));
1297 }
1298
1299 /*
1300  * Reset state to show we're out of a (sub)transaction.
1301  */
1302 static void
1303 pgfdw_reset_xact_state(ConnCacheEntry *entry, bool toplevel)
1304 {
1305         if (toplevel)
1306         {
1307                 /* Reset state to show we're out of a transaction */
1308                 entry->xact_depth = 0;
1309
1310                 /*
1311                  * If the connection isn't in a good idle state, it is marked as
1312                  * invalid or keep_connections option of its server is disabled, then
1313                  * discard it to recover. Next GetConnection will open a new
1314                  * connection.
1315                  */
1316                 if (PQstatus(entry->conn) != CONNECTION_OK ||
1317                         PQtransactionStatus(entry->conn) != PQTRANS_IDLE ||
1318                         entry->changing_xact_state ||
1319                         entry->invalidated ||
1320                         !entry->keep_connections)
1321                 {
1322                         elog(DEBUG3, "discarding connection %p", entry->conn);
1323                         disconnect_pg_server(entry);
1324                 }
1325         }
1326         else
1327         {
1328                 /* Reset state to show we're out of a subtransaction */
1329                 entry->xact_depth--;
1330         }
1331 }
1332
1333 /*
1334  * Cancel the currently-in-progress query (whose query text we do not have)
1335  * and ignore the result.  Returns true if we successfully cancel the query
1336  * and discard any pending result, and false if not.
1337  *
1338  * It's not a huge problem if we throw an ERROR here, but if we get into error
1339  * recursion trouble, we'll end up slamming the connection shut, which will
1340  * necessitate failing the entire toplevel transaction even if subtransactions
1341  * were used.  Try to use WARNING where we can.
1342  *
1343  * XXX: if the query was one sent by fetch_more_data_begin(), we could get the
1344  * query text from the pendingAreq saved in the per-connection state, then
1345  * report the query using it.
1346  */
1347 static bool
1348 pgfdw_cancel_query(PGconn *conn)
1349 {
1350         TimestampTz endtime;
1351
1352         /*
1353          * If it takes too long to cancel the query and discard the result, assume
1354          * the connection is dead.
1355          */
1356         endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
1357                                                                                   CONNECTION_CLEANUP_TIMEOUT);
1358
1359         if (!pgfdw_cancel_query_begin(conn))
1360                 return false;
1361         return pgfdw_cancel_query_end(conn, endtime, false);
1362 }
1363
1364 static bool
1365 pgfdw_cancel_query_begin(PGconn *conn)
1366 {
1367         PGcancel   *cancel;
1368         char            errbuf[256];
1369
1370         /*
1371          * Issue cancel request.  Unfortunately, there's no good way to limit the
1372          * amount of time that we might block inside PQgetCancel().
1373          */
1374         if ((cancel = PQgetCancel(conn)))
1375         {
1376                 if (!PQcancel(cancel, errbuf, sizeof(errbuf)))
1377                 {
1378                         ereport(WARNING,
1379                                         (errcode(ERRCODE_CONNECTION_FAILURE),
1380                                          errmsg("could not send cancel request: %s",
1381                                                         errbuf)));
1382                         PQfreeCancel(cancel);
1383                         return false;
1384                 }
1385                 PQfreeCancel(cancel);
1386         }
1387
1388         return true;
1389 }
1390
1391 static bool
1392 pgfdw_cancel_query_end(PGconn *conn, TimestampTz endtime, bool consume_input)
1393 {
1394         PGresult   *result = NULL;
1395         bool            timed_out;
1396
1397         /*
1398          * If requested, consume whatever data is available from the socket. (Note
1399          * that if all data is available, this allows pgfdw_get_cleanup_result to
1400          * call PQgetResult without forcing the overhead of WaitLatchOrSocket,
1401          * which would be large compared to the overhead of PQconsumeInput.)
1402          */
1403         if (consume_input && !PQconsumeInput(conn))
1404         {
1405                 ereport(WARNING,
1406                                 (errcode(ERRCODE_CONNECTION_FAILURE),
1407                                  errmsg("could not get result of cancel request: %s",
1408                                                 pchomp(PQerrorMessage(conn)))));
1409                 return false;
1410         }
1411
1412         /* Get and discard the result of the query. */
1413         if (pgfdw_get_cleanup_result(conn, endtime, &result, &timed_out))
1414         {
1415                 if (timed_out)
1416                         ereport(WARNING,
1417                                         (errmsg("could not get result of cancel request due to timeout")));
1418                 else
1419                         ereport(WARNING,
1420                                         (errcode(ERRCODE_CONNECTION_FAILURE),
1421                                          errmsg("could not get result of cancel request: %s",
1422                                                         pchomp(PQerrorMessage(conn)))));
1423
1424                 return false;
1425         }
1426         PQclear(result);
1427
1428         return true;
1429 }
1430
1431 /*
1432  * Submit a query during (sub)abort cleanup and wait up to 30 seconds for the
1433  * result.  If the query is executed without error, the return value is true.
1434  * If the query is executed successfully but returns an error, the return
1435  * value is true if and only if ignore_errors is set.  If the query can't be
1436  * sent or times out, the return value is false.
1437  *
1438  * It's not a huge problem if we throw an ERROR here, but if we get into error
1439  * recursion trouble, we'll end up slamming the connection shut, which will
1440  * necessitate failing the entire toplevel transaction even if subtransactions
1441  * were used.  Try to use WARNING where we can.
1442  */
1443 static bool
1444 pgfdw_exec_cleanup_query(PGconn *conn, const char *query, bool ignore_errors)
1445 {
1446         TimestampTz endtime;
1447
1448         /*
1449          * If it takes too long to execute a cleanup query, assume the connection
1450          * is dead.  It's fairly likely that this is why we aborted in the first
1451          * place (e.g. statement timeout, user cancel), so the timeout shouldn't
1452          * be too long.
1453          */
1454         endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
1455                                                                                   CONNECTION_CLEANUP_TIMEOUT);
1456
1457         if (!pgfdw_exec_cleanup_query_begin(conn, query))
1458                 return false;
1459         return pgfdw_exec_cleanup_query_end(conn, query, endtime,
1460                                                                                 false, ignore_errors);
1461 }
1462
1463 static bool
1464 pgfdw_exec_cleanup_query_begin(PGconn *conn, const char *query)
1465 {
1466         /*
1467          * Submit a query.  Since we don't use non-blocking mode, this also can
1468          * block.  But its risk is relatively small, so we ignore that for now.
1469          */
1470         if (!PQsendQuery(conn, query))
1471         {
1472                 pgfdw_report_error(WARNING, NULL, conn, false, query);
1473                 return false;
1474         }
1475
1476         return true;
1477 }
1478
1479 static bool
1480 pgfdw_exec_cleanup_query_end(PGconn *conn, const char *query,
1481                                                          TimestampTz endtime, bool consume_input,
1482                                                          bool ignore_errors)
1483 {
1484         PGresult   *result = NULL;
1485         bool            timed_out;
1486
1487         /*
1488          * If requested, consume whatever data is available from the socket. (Note
1489          * that if all data is available, this allows pgfdw_get_cleanup_result to
1490          * call PQgetResult without forcing the overhead of WaitLatchOrSocket,
1491          * which would be large compared to the overhead of PQconsumeInput.)
1492          */
1493         if (consume_input && !PQconsumeInput(conn))
1494         {
1495                 pgfdw_report_error(WARNING, NULL, conn, false, query);
1496                 return false;
1497         }
1498
1499         /* Get the result of the query. */
1500         if (pgfdw_get_cleanup_result(conn, endtime, &result, &timed_out))
1501         {
1502                 if (timed_out)
1503                         ereport(WARNING,
1504                                         (errmsg("could not get query result due to timeout"),
1505                                          query ? errcontext("remote SQL command: %s", query) : 0));
1506                 else
1507                         pgfdw_report_error(WARNING, NULL, conn, false, query);
1508
1509                 return false;
1510         }
1511
1512         /* Issue a warning if not successful. */
1513         if (PQresultStatus(result) != PGRES_COMMAND_OK)
1514         {
1515                 pgfdw_report_error(WARNING, result, conn, true, query);
1516                 return ignore_errors;
1517         }
1518         PQclear(result);
1519
1520         return true;
1521 }
1522
1523 /*
1524  * Get, during abort cleanup, the result of a query that is in progress.  This
1525  * might be a query that is being interrupted by transaction abort, or it might
1526  * be a query that was initiated as part of transaction abort to get the remote
1527  * side back to the appropriate state.
1528  *
1529  * endtime is the time at which we should give up and assume the remote
1530  * side is dead.  Returns true if the timeout expired or connection trouble
1531  * occurred, false otherwise.  Sets *result except in case of a timeout.
1532  * Sets timed_out to true only when the timeout expired.
1533  */
1534 static bool
1535 pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result,
1536                                                  bool *timed_out)
1537 {
1538         volatile bool failed = false;
1539         PGresult   *volatile last_res = NULL;
1540
1541         *timed_out = false;
1542
1543         /* In what follows, do not leak any PGresults on an error. */
1544         PG_TRY();
1545         {
1546                 for (;;)
1547                 {
1548                         PGresult   *res;
1549
1550                         while (PQisBusy(conn))
1551                         {
1552                                 int                     wc;
1553                                 TimestampTz now = GetCurrentTimestamp();
1554                                 long            cur_timeout;
1555
1556                                 /* If timeout has expired, give up, else get sleep time. */
1557                                 cur_timeout = TimestampDifferenceMilliseconds(now, endtime);
1558                                 if (cur_timeout <= 0)
1559                                 {
1560                                         *timed_out = true;
1561                                         failed = true;
1562                                         goto exit;
1563                                 }
1564
1565                                 /* Sleep until there's something to do */
1566                                 wc = WaitLatchOrSocket(MyLatch,
1567                                                                            WL_LATCH_SET | WL_SOCKET_READABLE |
1568                                                                            WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
1569                                                                            PQsocket(conn),
1570                                                                            cur_timeout, PG_WAIT_EXTENSION);
1571                                 ResetLatch(MyLatch);
1572
1573                                 CHECK_FOR_INTERRUPTS();
1574
1575                                 /* Data available in socket? */
1576                                 if (wc & WL_SOCKET_READABLE)
1577                                 {
1578                                         if (!PQconsumeInput(conn))
1579                                         {
1580                                                 /* connection trouble */
1581                                                 failed = true;
1582                                                 goto exit;
1583                                         }
1584                                 }
1585                         }
1586
1587                         res = PQgetResult(conn);
1588                         if (res == NULL)
1589                                 break;                  /* query is complete */
1590
1591                         PQclear(last_res);
1592                         last_res = res;
1593                 }
1594 exit:   ;
1595         }
1596         PG_CATCH();
1597         {
1598                 PQclear(last_res);
1599                 PG_RE_THROW();
1600         }
1601         PG_END_TRY();
1602
1603         if (failed)
1604                 PQclear(last_res);
1605         else
1606                 *result = last_res;
1607         return failed;
1608 }
1609
1610 /*
1611  * Abort remote transaction or subtransaction.
1612  *
1613  * "toplevel" should be set to true if toplevel (main) transaction is
1614  * rollbacked, false otherwise.
1615  *
1616  * Set entry->changing_xact_state to false on success, true on failure.
1617  */
1618 static void
1619 pgfdw_abort_cleanup(ConnCacheEntry *entry, bool toplevel)
1620 {
1621         char            sql[100];
1622
1623         /*
1624          * Don't try to clean up the connection if we're already in error
1625          * recursion trouble.
1626          */
1627         if (in_error_recursion_trouble())
1628                 entry->changing_xact_state = true;
1629
1630         /*
1631          * If connection is already unsalvageable, don't touch it further.
1632          */
1633         if (entry->changing_xact_state)
1634                 return;
1635
1636         /*
1637          * Mark this connection as in the process of changing transaction state.
1638          */
1639         entry->changing_xact_state = true;
1640
1641         /* Assume we might have lost track of prepared statements */
1642         entry->have_error = true;
1643
1644         /*
1645          * If a command has been submitted to the remote server by using an
1646          * asynchronous execution function, the command might not have yet
1647          * completed.  Check to see if a command is still being processed by the
1648          * remote server, and if so, request cancellation of the command.
1649          */
1650         if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE &&
1651                 !pgfdw_cancel_query(entry->conn))
1652                 return;                                 /* Unable to cancel running query */
1653
1654         CONSTRUCT_ABORT_COMMAND(sql, entry, toplevel);
1655         if (!pgfdw_exec_cleanup_query(entry->conn, sql, false))
1656                 return;                                 /* Unable to abort remote (sub)transaction */
1657
1658         if (toplevel)
1659         {
1660                 if (entry->have_prep_stmt && entry->have_error &&
1661                         !pgfdw_exec_cleanup_query(entry->conn,
1662                                                                           "DEALLOCATE ALL",
1663                                                                           true))
1664                         return;                         /* Trouble clearing prepared statements */
1665
1666                 entry->have_prep_stmt = false;
1667                 entry->have_error = false;
1668         }
1669
1670         /*
1671          * If pendingAreq of the per-connection state is not NULL, it means that
1672          * an asynchronous fetch begun by fetch_more_data_begin() was not done
1673          * successfully and thus the per-connection state was not reset in
1674          * fetch_more_data(); in that case reset the per-connection state here.
1675          */
1676         if (entry->state.pendingAreq)
1677                 memset(&entry->state, 0, sizeof(entry->state));
1678
1679         /* Disarm changing_xact_state if it all worked */
1680         entry->changing_xact_state = false;
1681 }
1682
1683 /*
1684  * Like pgfdw_abort_cleanup, submit an abort command or cancel request, but
1685  * don't wait for the result.
1686  *
1687  * Returns true if the abort command or cancel request is successfully issued,
1688  * false otherwise.  If the abort command is successfully issued, the given
1689  * connection cache entry is appended to *pending_entries.  Otherwise, if the
1690  * cancel request is successfully issued, it is appended to *cancel_requested.
1691  */
1692 static bool
1693 pgfdw_abort_cleanup_begin(ConnCacheEntry *entry, bool toplevel,
1694                                                   List **pending_entries, List **cancel_requested)
1695 {
1696         /*
1697          * Don't try to clean up the connection if we're already in error
1698          * recursion trouble.
1699          */
1700         if (in_error_recursion_trouble())
1701                 entry->changing_xact_state = true;
1702
1703         /*
1704          * If connection is already unsalvageable, don't touch it further.
1705          */
1706         if (entry->changing_xact_state)
1707                 return false;
1708
1709         /*
1710          * Mark this connection as in the process of changing transaction state.
1711          */
1712         entry->changing_xact_state = true;
1713
1714         /* Assume we might have lost track of prepared statements */
1715         entry->have_error = true;
1716
1717         /*
1718          * If a command has been submitted to the remote server by using an
1719          * asynchronous execution function, the command might not have yet
1720          * completed.  Check to see if a command is still being processed by the
1721          * remote server, and if so, request cancellation of the command.
1722          */
1723         if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE)
1724         {
1725                 if (!pgfdw_cancel_query_begin(entry->conn))
1726                         return false;           /* Unable to cancel running query */
1727                 *cancel_requested = lappend(*cancel_requested, entry);
1728         }
1729         else
1730         {
1731                 char            sql[100];
1732
1733                 CONSTRUCT_ABORT_COMMAND(sql, entry, toplevel);
1734                 if (!pgfdw_exec_cleanup_query_begin(entry->conn, sql))
1735                         return false;           /* Unable to abort remote transaction */
1736                 *pending_entries = lappend(*pending_entries, entry);
1737         }
1738
1739         return true;
1740 }
1741
1742 /*
1743  * Finish pre-commit cleanup of connections on each of which we've sent a
1744  * COMMIT command to the remote server.
1745  */
1746 static void
1747 pgfdw_finish_pre_commit_cleanup(List *pending_entries)
1748 {
1749         ConnCacheEntry *entry;
1750         List       *pending_deallocs = NIL;
1751         ListCell   *lc;
1752
1753         Assert(pending_entries);
1754
1755         /*
1756          * Get the result of the COMMIT command for each of the pending entries
1757          */
1758         foreach(lc, pending_entries)
1759         {
1760                 entry = (ConnCacheEntry *) lfirst(lc);
1761
1762                 Assert(entry->changing_xact_state);
1763
1764                 /*
1765                  * We might already have received the result on the socket, so pass
1766                  * consume_input=true to try to consume it first
1767                  */
1768                 do_sql_command_end(entry->conn, "COMMIT TRANSACTION", true);
1769                 entry->changing_xact_state = false;
1770
1771                 /* Do a DEALLOCATE ALL in parallel if needed */
1772                 if (entry->have_prep_stmt && entry->have_error)
1773                 {
1774                         /* Ignore errors (see notes in pgfdw_xact_callback) */
1775                         if (PQsendQuery(entry->conn, "DEALLOCATE ALL"))
1776                         {
1777                                 pending_deallocs = lappend(pending_deallocs, entry);
1778                                 continue;
1779                         }
1780                 }
1781                 entry->have_prep_stmt = false;
1782                 entry->have_error = false;
1783
1784                 pgfdw_reset_xact_state(entry, true);
1785         }
1786
1787         /* No further work if no pending entries */
1788         if (!pending_deallocs)
1789                 return;
1790
1791         /*
1792          * Get the result of the DEALLOCATE command for each of the pending
1793          * entries
1794          */
1795         foreach(lc, pending_deallocs)
1796         {
1797                 PGresult   *res;
1798
1799                 entry = (ConnCacheEntry *) lfirst(lc);
1800
1801                 /* Ignore errors (see notes in pgfdw_xact_callback) */
1802                 while ((res = PQgetResult(entry->conn)) != NULL)
1803                 {
1804                         PQclear(res);
1805                         /* Stop if the connection is lost (else we'll loop infinitely) */
1806                         if (PQstatus(entry->conn) == CONNECTION_BAD)
1807                                 break;
1808                 }
1809                 entry->have_prep_stmt = false;
1810                 entry->have_error = false;
1811
1812                 pgfdw_reset_xact_state(entry, true);
1813         }
1814 }
1815
1816 /*
1817  * Finish pre-subcommit cleanup of connections on each of which we've sent a
1818  * RELEASE command to the remote server.
1819  */
1820 static void
1821 pgfdw_finish_pre_subcommit_cleanup(List *pending_entries, int curlevel)
1822 {
1823         ConnCacheEntry *entry;
1824         char            sql[100];
1825         ListCell   *lc;
1826
1827         Assert(pending_entries);
1828
1829         /*
1830          * Get the result of the RELEASE command for each of the pending entries
1831          */
1832         snprintf(sql, sizeof(sql), "RELEASE SAVEPOINT s%d", curlevel);
1833         foreach(lc, pending_entries)
1834         {
1835                 entry = (ConnCacheEntry *) lfirst(lc);
1836
1837                 Assert(entry->changing_xact_state);
1838
1839                 /*
1840                  * We might already have received the result on the socket, so pass
1841                  * consume_input=true to try to consume it first
1842                  */
1843                 do_sql_command_end(entry->conn, sql, true);
1844                 entry->changing_xact_state = false;
1845
1846                 pgfdw_reset_xact_state(entry, false);
1847         }
1848 }
1849
1850 /*
1851  * Finish abort cleanup of connections on each of which we've sent an abort
1852  * command or cancel request to the remote server.
1853  */
1854 static void
1855 pgfdw_finish_abort_cleanup(List *pending_entries, List *cancel_requested,
1856                                                    bool toplevel)
1857 {
1858         List       *pending_deallocs = NIL;
1859         ListCell   *lc;
1860
1861         /*
1862          * For each of the pending cancel requests (if any), get and discard the
1863          * result of the query, and submit an abort command to the remote server.
1864          */
1865         if (cancel_requested)
1866         {
1867                 foreach(lc, cancel_requested)
1868                 {
1869                         ConnCacheEntry *entry = (ConnCacheEntry *) lfirst(lc);
1870                         TimestampTz endtime;
1871                         char            sql[100];
1872
1873                         Assert(entry->changing_xact_state);
1874
1875                         /*
1876                          * Set end time.  You might think we should do this before issuing
1877                          * cancel request like in normal mode, but that is problematic,
1878                          * because if, for example, it took longer than 30 seconds to
1879                          * process the first few entries in the cancel_requested list, it
1880                          * would cause a timeout error when processing each of the
1881                          * remaining entries in the list, leading to slamming that entry's
1882                          * connection shut.
1883                          */
1884                         endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
1885                                                                                                   CONNECTION_CLEANUP_TIMEOUT);
1886
1887                         if (!pgfdw_cancel_query_end(entry->conn, endtime, true))
1888                         {
1889                                 /* Unable to cancel running query */
1890                                 pgfdw_reset_xact_state(entry, toplevel);
1891                                 continue;
1892                         }
1893
1894                         /* Send an abort command in parallel if needed */
1895                         CONSTRUCT_ABORT_COMMAND(sql, entry, toplevel);
1896                         if (!pgfdw_exec_cleanup_query_begin(entry->conn, sql))
1897                         {
1898                                 /* Unable to abort remote (sub)transaction */
1899                                 pgfdw_reset_xact_state(entry, toplevel);
1900                         }
1901                         else
1902                                 pending_entries = lappend(pending_entries, entry);
1903                 }
1904         }
1905
1906         /* No further work if no pending entries */
1907         if (!pending_entries)
1908                 return;
1909
1910         /*
1911          * Get the result of the abort command for each of the pending entries
1912          */
1913         foreach(lc, pending_entries)
1914         {
1915                 ConnCacheEntry *entry = (ConnCacheEntry *) lfirst(lc);
1916                 TimestampTz endtime;
1917                 char            sql[100];
1918
1919                 Assert(entry->changing_xact_state);
1920
1921                 /*
1922                  * Set end time.  We do this now, not before issuing the command like
1923                  * in normal mode, for the same reason as for the cancel_requested
1924                  * entries.
1925                  */
1926                 endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
1927                                                                                           CONNECTION_CLEANUP_TIMEOUT);
1928
1929                 CONSTRUCT_ABORT_COMMAND(sql, entry, toplevel);
1930                 if (!pgfdw_exec_cleanup_query_end(entry->conn, sql, endtime,
1931                                                                                   true, false))
1932                 {
1933                         /* Unable to abort remote (sub)transaction */
1934                         pgfdw_reset_xact_state(entry, toplevel);
1935                         continue;
1936                 }
1937
1938                 if (toplevel)
1939                 {
1940                         /* Do a DEALLOCATE ALL in parallel if needed */
1941                         if (entry->have_prep_stmt && entry->have_error)
1942                         {
1943                                 if (!pgfdw_exec_cleanup_query_begin(entry->conn,
1944                                                                                                         "DEALLOCATE ALL"))
1945                                 {
1946                                         /* Trouble clearing prepared statements */
1947                                         pgfdw_reset_xact_state(entry, toplevel);
1948                                 }
1949                                 else
1950                                         pending_deallocs = lappend(pending_deallocs, entry);
1951                                 continue;
1952                         }
1953                         entry->have_prep_stmt = false;
1954                         entry->have_error = false;
1955                 }
1956
1957                 /* Reset the per-connection state if needed */
1958                 if (entry->state.pendingAreq)
1959                         memset(&entry->state, 0, sizeof(entry->state));
1960
1961                 /* We're done with this entry; unset the changing_xact_state flag */
1962                 entry->changing_xact_state = false;
1963                 pgfdw_reset_xact_state(entry, toplevel);
1964         }
1965
1966         /* No further work if no pending entries */
1967         if (!pending_deallocs)
1968                 return;
1969         Assert(toplevel);
1970
1971         /*
1972          * Get the result of the DEALLOCATE command for each of the pending
1973          * entries
1974          */
1975         foreach(lc, pending_deallocs)
1976         {
1977                 ConnCacheEntry *entry = (ConnCacheEntry *) lfirst(lc);
1978                 TimestampTz endtime;
1979
1980                 Assert(entry->changing_xact_state);
1981                 Assert(entry->have_prep_stmt);
1982                 Assert(entry->have_error);
1983
1984                 /*
1985                  * Set end time.  We do this now, not before issuing the command like
1986                  * in normal mode, for the same reason as for the cancel_requested
1987                  * entries.
1988                  */
1989                 endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
1990                                                                                           CONNECTION_CLEANUP_TIMEOUT);
1991
1992                 if (!pgfdw_exec_cleanup_query_end(entry->conn, "DEALLOCATE ALL",
1993                                                                                   endtime, true, true))
1994                 {
1995                         /* Trouble clearing prepared statements */
1996                         pgfdw_reset_xact_state(entry, toplevel);
1997                         continue;
1998                 }
1999                 entry->have_prep_stmt = false;
2000                 entry->have_error = false;
2001
2002                 /* Reset the per-connection state if needed */
2003                 if (entry->state.pendingAreq)
2004                         memset(&entry->state, 0, sizeof(entry->state));
2005
2006                 /* We're done with this entry; unset the changing_xact_state flag */
2007                 entry->changing_xact_state = false;
2008                 pgfdw_reset_xact_state(entry, toplevel);
2009         }
2010 }
2011
2012 /*
2013  * List active foreign server connections.
2014  *
2015  * This function takes no input parameter and returns setof record made of
2016  * following values:
2017  * - server_name - server name of active connection. In case the foreign server
2018  *   is dropped but still the connection is active, then the server name will
2019  *   be NULL in output.
2020  * - valid - true/false representing whether the connection is valid or not.
2021  *       Note that the connections can get invalidated in pgfdw_inval_callback.
2022  *
2023  * No records are returned when there are no cached connections at all.
2024  */
2025 Datum
2026 postgres_fdw_get_connections(PG_FUNCTION_ARGS)
2027 {
2028 #define POSTGRES_FDW_GET_CONNECTIONS_COLS       2
2029         ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
2030         HASH_SEQ_STATUS scan;
2031         ConnCacheEntry *entry;
2032
2033         InitMaterializedSRF(fcinfo, 0);
2034
2035         /* If cache doesn't exist, we return no records */
2036         if (!ConnectionHash)
2037                 PG_RETURN_VOID();
2038
2039         hash_seq_init(&scan, ConnectionHash);
2040         while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
2041         {
2042                 ForeignServer *server;
2043                 Datum           values[POSTGRES_FDW_GET_CONNECTIONS_COLS] = {0};
2044                 bool            nulls[POSTGRES_FDW_GET_CONNECTIONS_COLS] = {0};
2045
2046                 /* We only look for open remote connections */
2047                 if (!entry->conn)
2048                         continue;
2049
2050                 server = GetForeignServerExtended(entry->serverid, FSV_MISSING_OK);
2051
2052                 /*
2053                  * The foreign server may have been dropped in current explicit
2054                  * transaction. It is not possible to drop the server from another
2055                  * session when the connection associated with it is in use in the
2056                  * current transaction, if tried so, the drop query in another session
2057                  * blocks until the current transaction finishes.
2058                  *
2059                  * Even though the server is dropped in the current transaction, the
2060                  * cache can still have associated active connection entry, say we
2061                  * call such connections dangling. Since we can not fetch the server
2062                  * name from system catalogs for dangling connections, instead we show
2063                  * NULL value for server name in output.
2064                  *
2065                  * We could have done better by storing the server name in the cache
2066                  * entry instead of server oid so that it could be used in the output.
2067                  * But the server name in each cache entry requires 64 bytes of
2068                  * memory, which is huge, when there are many cached connections and
2069                  * the use case i.e. dropping the foreign server within the explicit
2070                  * current transaction seems rare. So, we chose to show NULL value for
2071                  * server name in output.
2072                  *
2073                  * Such dangling connections get closed either in next use or at the
2074                  * end of current explicit transaction in pgfdw_xact_callback.
2075                  */
2076                 if (!server)
2077                 {
2078                         /*
2079                          * If the server has been dropped in the current explicit
2080                          * transaction, then this entry would have been invalidated in
2081                          * pgfdw_inval_callback at the end of drop server command. Note
2082                          * that this connection would not have been closed in
2083                          * pgfdw_inval_callback because it is still being used in the
2084                          * current explicit transaction. So, assert that here.
2085                          */
2086                         Assert(entry->conn && entry->xact_depth > 0 && entry->invalidated);
2087
2088                         /* Show null, if no server name was found */
2089                         nulls[0] = true;
2090                 }
2091                 else
2092                         values[0] = CStringGetTextDatum(server->servername);
2093
2094                 values[1] = BoolGetDatum(!entry->invalidated);
2095
2096                 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2097         }
2098
2099         PG_RETURN_VOID();
2100 }
2101
2102 /*
2103  * Disconnect the specified cached connections.
2104  *
2105  * This function discards the open connections that are established by
2106  * postgres_fdw from the local session to the foreign server with
2107  * the given name. Note that there can be multiple connections to
2108  * the given server using different user mappings. If the connections
2109  * are used in the current local transaction, they are not disconnected
2110  * and warning messages are reported. This function returns true
2111  * if it disconnects at least one connection, otherwise false. If no
2112  * foreign server with the given name is found, an error is reported.
2113  */
2114 Datum
2115 postgres_fdw_disconnect(PG_FUNCTION_ARGS)
2116 {
2117         ForeignServer *server;
2118         char       *servername;
2119
2120         servername = text_to_cstring(PG_GETARG_TEXT_PP(0));
2121         server = GetForeignServerByName(servername, false);
2122
2123         PG_RETURN_BOOL(disconnect_cached_connections(server->serverid));
2124 }
2125
2126 /*
2127  * Disconnect all the cached connections.
2128  *
2129  * This function discards all the open connections that are established by
2130  * postgres_fdw from the local session to the foreign servers.
2131  * If the connections are used in the current local transaction, they are
2132  * not disconnected and warning messages are reported. This function
2133  * returns true if it disconnects at least one connection, otherwise false.
2134  */
2135 Datum
2136 postgres_fdw_disconnect_all(PG_FUNCTION_ARGS)
2137 {
2138         PG_RETURN_BOOL(disconnect_cached_connections(InvalidOid));
2139 }
2140
2141 /*
2142  * Workhorse to disconnect cached connections.
2143  *
2144  * This function scans all the connection cache entries and disconnects
2145  * the open connections whose foreign server OID matches with
2146  * the specified one. If InvalidOid is specified, it disconnects all
2147  * the cached connections.
2148  *
2149  * This function emits a warning for each connection that's used in
2150  * the current transaction and doesn't close it. It returns true if
2151  * it disconnects at least one connection, otherwise false.
2152  *
2153  * Note that this function disconnects even the connections that are
2154  * established by other users in the same local session using different
2155  * user mappings. This leads even non-superuser to be able to close
2156  * the connections established by superusers in the same local session.
2157  *
2158  * XXX As of now we don't see any security risk doing this. But we should
2159  * set some restrictions on that, for example, prevent non-superuser
2160  * from closing the connections established by superusers even
2161  * in the same session?
2162  */
2163 static bool
2164 disconnect_cached_connections(Oid serverid)
2165 {
2166         HASH_SEQ_STATUS scan;
2167         ConnCacheEntry *entry;
2168         bool            all = !OidIsValid(serverid);
2169         bool            result = false;
2170
2171         /*
2172          * Connection cache hashtable has not been initialized yet in this
2173          * session, so return false.
2174          */
2175         if (!ConnectionHash)
2176                 return false;
2177
2178         hash_seq_init(&scan, ConnectionHash);
2179         while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
2180         {
2181                 /* Ignore cache entry if no open connection right now. */
2182                 if (!entry->conn)
2183                         continue;
2184
2185                 if (all || entry->serverid == serverid)
2186                 {
2187                         /*
2188                          * Emit a warning because the connection to close is used in the
2189                          * current transaction and cannot be disconnected right now.
2190                          */
2191                         if (entry->xact_depth > 0)
2192                         {
2193                                 ForeignServer *server;
2194
2195                                 server = GetForeignServerExtended(entry->serverid,
2196                                                                                                   FSV_MISSING_OK);
2197
2198                                 if (!server)
2199                                 {
2200                                         /*
2201                                          * If the foreign server was dropped while its connection
2202                                          * was used in the current transaction, the connection
2203                                          * must have been marked as invalid by
2204                                          * pgfdw_inval_callback at the end of DROP SERVER command.
2205                                          */
2206                                         Assert(entry->invalidated);
2207
2208                                         ereport(WARNING,
2209                                                         (errmsg("cannot close dropped server connection because it is still in use")));
2210                                 }
2211                                 else
2212                                         ereport(WARNING,
2213                                                         (errmsg("cannot close connection for server \"%s\" because it is still in use",
2214                                                                         server->servername)));
2215                         }
2216                         else
2217                         {
2218                                 elog(DEBUG3, "discarding connection %p", entry->conn);
2219                                 disconnect_pg_server(entry);
2220                                 result = true;
2221                         }
2222                 }
2223         }
2224
2225         return result;
2226 }