src/backend/postmaster/postmaster.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * postmaster.c
   4  *        This program acts as a clearing house for requests to the
   5  *        POSTGRES system.  Frontend programs send a startup message
   6  *        to the Postmaster and the postmaster uses the info in the
   7  *        message to setup a backend process.
   8  *
   9  *        The postmaster also manages system-wide operations such as
  10  *        startup and shutdown. The postmaster itself doesn't do those
  11  *        operations, mind you --- it just forks off a subprocess to do them
  12  *        at the right times.  It also takes care of resetting the system
  13  *        if a backend crashes.
  14  *
  15  *        The postmaster process creates the shared memory and semaphore
  16  *        pools during startup, but as a rule does not touch them itself.
  17  *        In particular, it is not a member of the PGPROC array of backends
  18  *        and so it cannot participate in lock-manager operations.  Keeping
  19  *        the postmaster away from shared memory operations makes it simpler
  20  *        and more reliable.  The postmaster is almost always able to recover
  21  *        from crashes of individual backends by resetting shared memory;
  22  *        if it did much with shared memory then it would be prone to crashing
  23  *        along with the backends.
  24  *
  25  *        When a request message is received, we now fork() immediately.
  26  *        The child process performs authentication of the request, and
  27  *        then becomes a backend if successful.  This allows the auth code
  28  *        to be written in a simple single-threaded style (as opposed to the
  29  *        crufty "poor man's multitasking" code that used to be needed).
  30  *        More importantly, it ensures that blockages in non-multithreaded
  31  *        libraries like SSL or PAM cannot cause denial of service to other
  32  *        clients.
  33  *
  34  *
  35  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
  36  * Portions Copyright (c) 1994, Regents of the University of California
  37  *
  38  *
  39  * IDENTIFICATION
  40  *        src/backend/postmaster/postmaster.c
  41  *
  42  * NOTES
  43  *
  44  * Initialization:
  45  *              The Postmaster sets up shared memory data structures
  46  *              for the backends.
  47  *
  48  * Synchronization:
  49  *              The Postmaster shares memory with the backends but should avoid
  50  *              touching shared memory, so as not to become stuck if a crashing
  51  *              backend screws up locks or shared memory.  Likewise, the Postmaster
  52  *              should never block on messages from frontend clients.
  53  *
  54  * Garbage Collection:
  55  *              The Postmaster cleans up after backends if they have an emergency
  56  *              exit and/or core dump.
  57  *
  58  * Error Reporting:
  59  *              Use write_stderr() only for reporting "interactive" errors
  60  *              (essentially, bogus arguments on the command line).  Once the
  61  *              postmaster is launched, use ereport().
  62  *
  63  *-------------------------------------------------------------------------
  64  */
  65
  66 #include "postgres.h"
  67
  68 #include <unistd.h>
  69 #include <signal.h>
  70 #include <time.h>
  71 #include <sys/wait.h>
  72 #include <ctype.h>
  73 #include <sys/stat.h>
  74 #include <sys/socket.h>
  75 #include <fcntl.h>
  76 #include <sys/param.h>
  77 #include <netdb.h>
  78 #include <limits.h>
  79
  80 #ifdef USE_BONJOUR
  81 #include <dns_sd.h>
  82 #endif
  83
  84 #ifdef USE_SYSTEMD
  85 #include <systemd/sd-daemon.h>
  86 #endif
  87
  88 #ifdef HAVE_PTHREAD_IS_THREADED_NP
  89 #include <pthread.h>
  90 #endif
  91
  92 #include "access/transam.h"
  93 #include "access/xlog.h"
  94 #include "access/xlogrecovery.h"
  95 #include "catalog/pg_control.h"
  96 #include "common/file_perm.h"
  97 #include "common/file_utils.h"
  98 #include "common/ip.h"
  99 #include "common/pg_prng.h"
 100 #include "common/string.h"
 101 #include "lib/ilist.h"
 102 #include "libpq/auth.h"
 103 #include "libpq/libpq.h"
 104 #include "libpq/pqformat.h"
 105 #include "libpq/pqsignal.h"
 106 #include "nodes/queryjumble.h"
 107 #include "pg_getopt.h"
 108 #include "pgstat.h"
 109 #include "port/pg_bswap.h"
 110 #include "postmaster/autovacuum.h"
 111 #include "postmaster/auxprocess.h"
 112 #include "postmaster/bgworker_internals.h"
 113 #include "postmaster/fork_process.h"
 114 #include "postmaster/interrupt.h"
 115 #include "postmaster/pgarch.h"
 116 #include "postmaster/postmaster.h"
 117 #include "postmaster/syslogger.h"
 118 #include "replication/logicallauncher.h"
 119 #include "replication/walsender.h"
 120 #include "storage/fd.h"
 121 #include "storage/ipc.h"
 122 #include "storage/pg_shmem.h"
 123 #include "storage/pmsignal.h"
 124 #include "storage/proc.h"
 125 #include "tcop/tcopprot.h"
 126 #include "utils/builtins.h"
 127 #include "utils/datetime.h"
 128 #include "utils/memutils.h"
 129 #include "utils/pidfile.h"
 130 #include "utils/ps_status.h"
 131 #include "utils/timeout.h"
 132 #include "utils/timestamp.h"
 133 #include "utils/varlena.h"
 134
 135 #ifdef EXEC_BACKEND
 136 #include "storage/spin.h"
 137 #endif
 138
 139
 140 /*
 141  * Possible types of a backend. Beyond being the possible bkend_type values in
 142  * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
 143  * and CountChildren().
 144  */
 145 #define BACKEND_TYPE_NORMAL             0x0001  /* normal backend */
 146 #define BACKEND_TYPE_AUTOVAC    0x0002  /* autovacuum worker process */
 147 #define BACKEND_TYPE_WALSND             0x0004  /* walsender process */
 148 #define BACKEND_TYPE_BGWORKER   0x0008  /* bgworker process */
 149 #define BACKEND_TYPE_ALL                0x000F  /* OR of all the above */
 150
 151 /*
 152  * List of active backends (or child processes anyway; we don't actually
 153  * know whether a given child has become a backend or is still in the
 154  * authorization phase).  This is used mainly to keep track of how many
 155  * children we have and send them appropriate signals when necessary.
 156  *
 157  * As shown in the above set of backend types, this list includes not only
 158  * "normal" client sessions, but also autovacuum workers, walsenders, and
 159  * background workers.  (Note that at the time of launch, walsenders are
 160  * labeled BACKEND_TYPE_NORMAL; we relabel them to BACKEND_TYPE_WALSND
 161  * upon noticing they've changed their PMChildFlags entry.  Hence that check
 162  * must be done before any operation that needs to distinguish walsenders
 163  * from normal backends.)
 164  *
 165  * Also, "dead_end" children are in it: these are children launched just for
 166  * the purpose of sending a friendly rejection message to a would-be client.
 167  * We must track them because they are attached to shared memory, but we know
 168  * they will never become live backends.  dead_end children are not assigned a
 169  * PMChildSlot.  dead_end children have bkend_type NORMAL.
 170  *
 171  * "Special" children such as the startup, bgwriter and autovacuum launcher
 172  * tasks are not in this list.  They are tracked via StartupPID and other
 173  * pid_t variables below.  (Thus, there can't be more than one of any given
 174  * "special" child process type.  We use BackendList entries for any child
 175  * process there can be more than one of.)
 176  */
 177 typedef struct bkend
 178 {
 179         pid_t           pid;                    /* process id of backend */
 180         int32           cancel_key;             /* cancel key for cancels for this backend */
 181         int                     child_slot;             /* PMChildSlot for this backend, if any */
 182         int                     bkend_type;             /* child process flavor, see above */
 183         bool            dead_end;               /* is it going to send an error and quit? */
 184         bool            bgworker_notify;        /* gets bgworker start/stop notifications */
 185         dlist_node      elem;                   /* list link in BackendList */
 186 } Backend;
 187
 188 static dlist_head BackendList = DLIST_STATIC_INIT(BackendList);
 189
 190 #ifdef EXEC_BACKEND
 191 static Backend *ShmemBackendArray;
 192 #endif
 193
 194 BackgroundWorker *MyBgworkerEntry = NULL;
 195
 196
 197
 198 /* The socket number we are listening for connections on */
 199 int                     PostPortNumber = DEF_PGPORT;
 200
 201 /* The directory names for Unix socket(s) */
 202 char       *Unix_socket_directories;
 203
 204 /* The TCP listen address(es) */
 205 char       *ListenAddresses;
 206
 207 /*
 208  * SuperuserReservedConnections is the number of backends reserved for
 209  * superuser use, and ReservedConnections is the number of backends reserved
 210  * for use by roles with privileges of the pg_use_reserved_connections
 211  * predefined role.  These are taken out of the pool of MaxConnections backend
 212  * slots, so the number of backend slots available for roles that are neither
 213  * superuser nor have privileges of pg_use_reserved_connections is
 214  * (MaxConnections - SuperuserReservedConnections - ReservedConnections).
 215  *
 216  * If the number of remaining slots is less than or equal to
 217  * SuperuserReservedConnections, only superusers can make new connections.  If
 218  * the number of remaining slots is greater than SuperuserReservedConnections
 219  * but less than or equal to
 220  * (SuperuserReservedConnections + ReservedConnections), only superusers and
 221  * roles with privileges of pg_use_reserved_connections can make new
 222  * connections.  Note that pre-existing superuser and
 223  * pg_use_reserved_connections connections don't count against the limits.
 224  */
 225 int                     SuperuserReservedConnections;
 226 int                     ReservedConnections;
 227
 228 /* The socket(s) we're listening to. */
 229 #define MAXLISTEN       64
 230 static int      NumListenSockets = 0;
 231 static pgsocket *ListenSockets = NULL;
 232
 233 /* still more option variables */
 234 bool            EnableSSL = false;
 235
 236 int                     PreAuthDelay = 0;
 237 int                     AuthenticationTimeout = 60;
 238
 239 bool            log_hostname;           /* for ps display and logging */
 240 bool            Log_connections = false;
 241
 242 bool            enable_bonjour = false;
 243 char       *bonjour_name;
 244 bool            restart_after_crash = true;
 245 bool            remove_temp_files_after_crash = true;
 246 bool            send_abort_for_crash = false;
 247 bool            send_abort_for_kill = false;
 248
 249 /* PIDs of special child processes; 0 when not running */
 250 static pid_t StartupPID = 0,
 251                         BgWriterPID = 0,
 252                         CheckpointerPID = 0,
 253                         WalWriterPID = 0,
 254                         WalReceiverPID = 0,
 255                         AutoVacPID = 0,
 256                         PgArchPID = 0,
 257                         SysLoggerPID = 0;
 258
 259 /* Startup process's status */
 260 typedef enum
 261 {
 262         STARTUP_NOT_RUNNING,
 263         STARTUP_RUNNING,
 264         STARTUP_SIGNALED,                       /* we sent it a SIGQUIT or SIGKILL */
 265         STARTUP_CRASHED,
 266 } StartupStatusEnum;
 267
 268 static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING;
 269
 270 /* Startup/shutdown state */
 271 #define                 NoShutdown              0
 272 #define                 SmartShutdown   1
 273 #define                 FastShutdown    2
 274 #define                 ImmediateShutdown       3
 275
 276 static int      Shutdown = NoShutdown;
 277
 278 static bool FatalError = false; /* T if recovering from backend crash */
 279
 280 /*
 281  * We use a simple state machine to control startup, shutdown, and
 282  * crash recovery (which is rather like shutdown followed by startup).
 283  *
 284  * After doing all the postmaster initialization work, we enter PM_STARTUP
 285  * state and the startup process is launched. The startup process begins by
 286  * reading the control file and other preliminary initialization steps.
 287  * In a normal startup, or after crash recovery, the startup process exits
 288  * with exit code 0 and we switch to PM_RUN state.  However, archive recovery
 289  * is handled specially since it takes much longer and we would like to support
 290  * hot standby during archive recovery.
 291  *
 292  * When the startup process is ready to start archive recovery, it signals the
 293  * postmaster, and we switch to PM_RECOVERY state. The background writer and
 294  * checkpointer are launched, while the startup process continues applying WAL.
 295  * If Hot Standby is enabled, then, after reaching a consistent point in WAL
 296  * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
 297  * state and begin accepting connections to perform read-only queries.  When
 298  * archive recovery is finished, the startup process exits with exit code 0
 299  * and we switch to PM_RUN state.
 300  *
 301  * Normal child backends can only be launched when we are in PM_RUN or
 302  * PM_HOT_STANDBY state.  (connsAllowed can also restrict launching.)
 303  * In other states we handle connection requests by launching "dead_end"
 304  * child processes, which will simply send the client an error message and
 305  * quit.  (We track these in the BackendList so that we can know when they
 306  * are all gone; this is important because they're still connected to shared
 307  * memory, and would interfere with an attempt to destroy the shmem segment,
 308  * possibly leading to SHMALL failure when we try to make a new one.)
 309  * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
 310  * to drain out of the system, and therefore stop accepting connection
 311  * requests at all until the last existing child has quit (which hopefully
 312  * will not be very long).
 313  *
 314  * Notice that this state variable does not distinguish *why* we entered
 315  * states later than PM_RUN --- Shutdown and FatalError must be consulted
 316  * to find that out.  FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
 317  * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
 318  * states when trying to recover from a crash).  It can be true in PM_STARTUP
 319  * state, because we don't clear it until we've successfully started WAL redo.
 320  */
 321 typedef enum
 322 {
 323         PM_INIT,                                        /* postmaster starting */
 324         PM_STARTUP,                                     /* waiting for startup subprocess */
 325         PM_RECOVERY,                            /* in archive recovery mode */
 326         PM_HOT_STANDBY,                         /* in hot standby mode */
 327         PM_RUN,                                         /* normal "database is alive" state */
 328         PM_STOP_BACKENDS,                       /* need to stop remaining backends */
 329         PM_WAIT_BACKENDS,                       /* waiting for live backends to exit */
 330         PM_SHUTDOWN,                            /* waiting for checkpointer to do shutdown
 331                                                                  * ckpt */
 332         PM_SHUTDOWN_2,                          /* waiting for archiver and walsenders to
 333                                                                  * finish */
 334         PM_WAIT_DEAD_END,                       /* waiting for dead_end children to exit */
 335         PM_NO_CHILDREN,                         /* all important children have exited */
 336 } PMState;
 337
 338 static PMState pmState = PM_INIT;
 339
 340 /*
 341  * While performing a "smart shutdown", we restrict new connections but stay
 342  * in PM_RUN or PM_HOT_STANDBY state until all the client backends are gone.
 343  * connsAllowed is a sub-state indicator showing the active restriction.
 344  * It is of no interest unless pmState is PM_RUN or PM_HOT_STANDBY.
 345  */
 346 static bool connsAllowed = true;
 347
 348 /* Start time of SIGKILL timeout during immediate shutdown or child crash */
 349 /* Zero means timeout is not running */
 350 static time_t AbortStartTime = 0;
 351
 352 /* Length of said timeout */
 353 #define SIGKILL_CHILDREN_AFTER_SECS             5
 354
 355 static bool ReachedNormalRunning = false;       /* T if we've reached PM_RUN */
 356
 357 bool            ClientAuthInProgress = false;   /* T during new-client
 358                                                                                          * authentication */
 359
 360 bool            redirection_done = false;       /* stderr redirected for syslogger? */
 361
 362 /* received START_AUTOVAC_LAUNCHER signal */
 363 static bool start_autovac_launcher = false;
 364
 365 /* the launcher needs to be signaled to communicate some condition */
 366 static bool avlauncher_needs_signal = false;
 367
 368 /* received START_WALRECEIVER signal */
 369 static bool WalReceiverRequested = false;
 370
 371 /* set when there's a worker that needs to be started up */
 372 static bool StartWorkerNeeded = true;
 373 static bool HaveCrashedWorker = false;
 374
 375 /* set when signals arrive */
 376 static volatile sig_atomic_t pending_pm_pmsignal;
 377 static volatile sig_atomic_t pending_pm_child_exit;
 378 static volatile sig_atomic_t pending_pm_reload_request;
 379 static volatile sig_atomic_t pending_pm_shutdown_request;
 380 static volatile sig_atomic_t pending_pm_fast_shutdown_request;
 381 static volatile sig_atomic_t pending_pm_immediate_shutdown_request;
 382
 383 /* event multiplexing object */
 384 static WaitEventSet *pm_wait_set;
 385
 386 #ifdef USE_SSL
 387 /* Set when and if SSL has been initialized properly */
 388 static bool LoadedSSL = false;
 389 #endif
 390
 391 #ifdef USE_BONJOUR
 392 static DNSServiceRef bonjour_sdref = NULL;
 393 #endif
 394
 395 /*
 396  * postmaster.c - function prototypes
 397  */
 398 static void CloseServerPorts(int status, Datum arg);
 399 static void unlink_external_pid_file(int status, Datum arg);
 400 static void getInstallationPaths(const char *argv0);
 401 static void checkControlFile(void);
 402 static Port *ConnCreate(int serverFd);
 403 static void ConnFree(Port *port);
 404 static void handle_pm_pmsignal_signal(SIGNAL_ARGS);
 405 static void handle_pm_child_exit_signal(SIGNAL_ARGS);
 406 static void handle_pm_reload_request_signal(SIGNAL_ARGS);
 407 static void handle_pm_shutdown_request_signal(SIGNAL_ARGS);
 408 static void process_pm_pmsignal(void);
 409 static void process_pm_child_exit(void);
 410 static void process_pm_reload_request(void);
 411 static void process_pm_shutdown_request(void);
 412 static void process_startup_packet_die(SIGNAL_ARGS);
 413 static void dummy_handler(SIGNAL_ARGS);
 414 static void StartupPacketTimeoutHandler(void);
 415 static void CleanupBackend(int pid, int exitstatus);
 416 static bool CleanupBackgroundWorker(int pid, int exitstatus);
 417 static void HandleChildCrash(int pid, int exitstatus, const char *procname);
 418 static void LogChildExit(int lev, const char *procname,
 419                                                  int pid, int exitstatus);
 420 static void PostmasterStateMachine(void);
 421 static void BackendInitialize(Port *port);
 422 static void BackendRun(Port *port) pg_attribute_noreturn();
 423 static void ExitPostmaster(int status) pg_attribute_noreturn();
 424 static int      ServerLoop(void);
 425 static int      BackendStartup(Port *port);
 426 static int      ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done);
 427 static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
 428 static void processCancelRequest(Port *port, void *pkt);
 429 static void report_fork_failure_to_client(Port *port, int errnum);
 430 static CAC_state canAcceptConnections(int backend_type);
 431 static bool RandomCancelKey(int32 *cancel_key);
 432 static void signal_child(pid_t pid, int signal);
 433 static void sigquit_child(pid_t pid);
 434 static bool SignalSomeChildren(int signal, int target);
 435 static void TerminateChildren(int signal);
 436
 437 #define SignalChildren(sig)                        SignalSomeChildren(sig, BACKEND_TYPE_ALL)
 438
 439 static int      CountChildren(int target);
 440 static bool assign_backendlist_entry(RegisteredBgWorker *rw);
 441 static void maybe_start_bgworkers(void);
 442 static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
 443 static pid_t StartChildProcess(AuxProcType type);
 444 static void StartAutovacuumWorker(void);
 445 static void MaybeStartWalReceiver(void);
 446 static void InitPostmasterDeathWatchHandle(void);
 447
 448 /*
 449  * Archiver is allowed to start up at the current postmaster state?
 450  *
 451  * If WAL archiving is enabled always, we are allowed to start archiver
 452  * even during recovery.
 453  */
 454 #define PgArchStartupAllowed()  \
 455         (((XLogArchivingActive() && pmState == PM_RUN) ||                       \
 456           (XLogArchivingAlways() &&                                                                       \
 457            (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
 458          PgArchCanRestart())
 459
 460 #ifdef EXEC_BACKEND
 461
 462 #ifdef WIN32
 463 #define WNOHANG 0                               /* ignored, so any integer value will do */
 464
 465 static pid_t waitpid(pid_t pid, int *exitstatus, int options);
 466 static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
 467
 468 static HANDLE win32ChildQueue;
 469
 470 typedef struct
 471 {
 472         HANDLE          waitHandle;
 473         HANDLE          procHandle;
 474         DWORD           procId;
 475 } win32_deadchild_waitinfo;
 476 #endif                                                  /* WIN32 */
 477
 478 static pid_t backend_forkexec(Port *port);
 479 static pid_t internal_forkexec(int argc, char *argv[], Port *port);
 480
 481 /* Type for a socket that can be inherited to a client process */
 482 #ifdef WIN32
 483 typedef struct
 484 {
 485         SOCKET          origsocket;             /* Original socket value, or PGINVALID_SOCKET
 486                                                                  * if not a socket */
 487         WSAPROTOCOL_INFO wsainfo;
 488 } InheritableSocket;
 489 #else
 490 typedef int InheritableSocket;
 491 #endif
 492
 493 /*
 494  * Structure contains all variables passed to exec:ed backends
 495  */
 496 typedef struct
 497 {
 498         Port            port;
 499         InheritableSocket portsocket;
 500         char            DataDir[MAXPGPATH];
 501         int32           MyCancelKey;
 502         int                     MyPMChildSlot;
 503 #ifndef WIN32
 504         unsigned long UsedShmemSegID;
 505 #else
 506         void       *ShmemProtectiveRegion;
 507         HANDLE          UsedShmemSegID;
 508 #endif
 509         void       *UsedShmemSegAddr;
 510         slock_t    *ShmemLock;
 511         VariableCache ShmemVariableCache;
 512         Backend    *ShmemBackendArray;
 513 #ifndef HAVE_SPINLOCKS
 514         PGSemaphore *SpinlockSemaArray;
 515 #endif
 516         int                     NamedLWLockTrancheRequests;
 517         NamedLWLockTranche *NamedLWLockTrancheArray;
 518         LWLockPadded *MainLWLockArray;
 519         slock_t    *ProcStructLock;
 520         PROC_HDR   *ProcGlobal;
 521         PGPROC     *AuxiliaryProcs;
 522         PGPROC     *PreparedXactProcs;
 523         PMSignalData *PMSignalState;
 524         pid_t           PostmasterPid;
 525         TimestampTz PgStartTime;
 526         TimestampTz PgReloadTime;
 527         pg_time_t       first_syslogger_file_time;
 528         bool            redirection_done;
 529         bool            IsBinaryUpgrade;
 530         bool            query_id_enabled;
 531         int                     max_safe_fds;
 532         int                     MaxBackends;
 533 #ifdef WIN32
 534         HANDLE          PostmasterHandle;
 535         HANDLE          initial_signal_pipe;
 536         HANDLE          syslogPipe[2];
 537 #else
 538         int                     postmaster_alive_fds[2];
 539         int                     syslogPipe[2];
 540 #endif
 541         char            my_exec_path[MAXPGPATH];
 542         char            pkglib_path[MAXPGPATH];
 543 } BackendParameters;
 544
 545 static void read_backend_variables(char *id, Port *port);
 546 static void restore_backend_variables(BackendParameters *param, Port *port);
 547
 548 #ifndef WIN32
 549 static bool save_backend_variables(BackendParameters *param, Port *port);
 550 #else
 551 static bool save_backend_variables(BackendParameters *param, Port *port,
 552                                                                    HANDLE childProcess, pid_t childPid);
 553 #endif
 554
 555 static void ShmemBackendArrayAdd(Backend *bn);
 556 static void ShmemBackendArrayRemove(Backend *bn);
 557 #endif                                                  /* EXEC_BACKEND */
 558
 559 #define StartupDataBase()               StartChildProcess(StartupProcess)
 560 #define StartArchiver()                 StartChildProcess(ArchiverProcess)
 561 #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
 562 #define StartCheckpointer()             StartChildProcess(CheckpointerProcess)
 563 #define StartWalWriter()                StartChildProcess(WalWriterProcess)
 564 #define StartWalReceiver()              StartChildProcess(WalReceiverProcess)
 565
 566 /* Macros to check exit status of a child process */
 567 #define EXIT_STATUS_0(st)  ((st) == 0)
 568 #define EXIT_STATUS_1(st)  (WIFEXITED(st) && WEXITSTATUS(st) == 1)
 569 #define EXIT_STATUS_3(st)  (WIFEXITED(st) && WEXITSTATUS(st) == 3)
 570
 571 #ifndef WIN32
 572 /*
 573  * File descriptors for pipe used to monitor if postmaster is alive.
 574  * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
 575  */
 576 int                     postmaster_alive_fds[2] = {-1, -1};
 577 #else
 578 /* Process handle of postmaster used for the same purpose on Windows */
 579 HANDLE          PostmasterHandle;
 580 #endif
 581
 582 /*
 583  * Postmaster main entry point
 584  */
 585 void
 586 PostmasterMain(int argc, char *argv[])
 587 {
 588         int                     opt;
 589         int                     status;
 590         char       *userDoption = NULL;
 591         bool            listen_addr_saved = false;
 592         char       *output_config_variable = NULL;
 593
 594         InitProcessGlobals();
 595
 596         PostmasterPid = MyProcPid;
 597
 598         IsPostmasterEnvironment = true;
 599
 600         /*
 601          * Start our win32 signal implementation
 602          */
 603 #ifdef WIN32
 604         pgwin32_signal_initialize();
 605 #endif
 606
 607         /*
 608          * We should not be creating any files or directories before we check the
 609          * data directory (see checkDataDir()), but just in case set the umask to
 610          * the most restrictive (owner-only) permissions.
 611          *
 612          * checkDataDir() will reset the umask based on the data directory
 613          * permissions.
 614          */
 615         umask(PG_MODE_MASK_OWNER);
 616
 617         /*
 618          * By default, palloc() requests in the postmaster will be allocated in
 619          * the PostmasterContext, which is space that can be recycled by backends.
 620          * Allocated data that needs to be available to backends should be
 621          * allocated in TopMemoryContext.
 622          */
 623         PostmasterContext = AllocSetContextCreate(TopMemoryContext,
 624                                                                                           "Postmaster",
 625                                                                                           ALLOCSET_DEFAULT_SIZES);
 626         MemoryContextSwitchTo(PostmasterContext);
 627
 628         /* Initialize paths to installation files */
 629         getInstallationPaths(argv[0]);
 630
 631         /*
 632          * Set up signal handlers for the postmaster process.
 633          *
 634          * CAUTION: when changing this list, check for side-effects on the signal
 635          * handling setup of child processes.  See tcop/postgres.c,
 636          * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
 637          * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/syslogger.c,
 638          * postmaster/bgworker.c and postmaster/checkpointer.c.
 639          */
 640         pqinitmask();
 641         sigprocmask(SIG_SETMASK, &BlockSig, NULL);
 642
 643         pqsignal(SIGHUP, handle_pm_reload_request_signal);
 644         pqsignal(SIGINT, handle_pm_shutdown_request_signal);
 645         pqsignal(SIGQUIT, handle_pm_shutdown_request_signal);
 646         pqsignal(SIGTERM, handle_pm_shutdown_request_signal);
 647         pqsignal(SIGALRM, SIG_IGN); /* ignored */
 648         pqsignal(SIGPIPE, SIG_IGN); /* ignored */
 649         pqsignal(SIGUSR1, handle_pm_pmsignal_signal);
 650         pqsignal(SIGUSR2, dummy_handler);       /* unused, reserve for children */
 651         pqsignal(SIGCHLD, handle_pm_child_exit_signal);
 652
 653         /* This may configure SIGURG, depending on platform. */
 654         InitializeLatchSupport();
 655         InitProcessLocalLatch();
 656
 657         /*
 658          * No other place in Postgres should touch SIGTTIN/SIGTTOU handling.  We
 659          * ignore those signals in a postmaster environment, so that there is no
 660          * risk of a child process freezing up due to writing to stderr.  But for
 661          * a standalone backend, their default handling is reasonable.  Hence, all
 662          * child processes should just allow the inherited settings to stand.
 663          */
 664 #ifdef SIGTTIN
 665         pqsignal(SIGTTIN, SIG_IGN); /* ignored */
 666 #endif
 667 #ifdef SIGTTOU
 668         pqsignal(SIGTTOU, SIG_IGN); /* ignored */
 669 #endif
 670
 671         /* ignore SIGXFSZ, so that ulimit violations work like disk full */
 672 #ifdef SIGXFSZ
 673         pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
 674 #endif
 675
 676         /* Begin accepting signals. */
 677         sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
 678
 679         /*
 680          * Options setup
 681          */
 682         InitializeGUCOptions();
 683
 684         opterr = 1;
 685
 686         /*
 687          * Parse command-line options.  CAUTION: keep this in sync with
 688          * tcop/postgres.c (the option sets should not conflict) and with the
 689          * common help() function in main/main.c.
 690          */
 691         while ((opt = getopt(argc, argv, "B:bC:c:D:d:EeFf:h:ijk:lN:OPp:r:S:sTt:W:-:")) != -1)
 692         {
 693                 switch (opt)
 694                 {
 695                         case 'B':
 696                                 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
 697                                 break;
 698
 699                         case 'b':
 700                                 /* Undocumented flag used for binary upgrades */
 701                                 IsBinaryUpgrade = true;
 702                                 break;
 703
 704                         case 'C':
 705                                 output_config_variable = strdup(optarg);
 706                                 break;
 707
 708                         case 'c':
 709                         case '-':
 710                                 {
 711                                         char       *name,
 712                                                            *value;
 713
 714                                         ParseLongOption(optarg, &name, &value);
 715                                         if (!value)
 716                                         {
 717                                                 if (opt == '-')
 718                                                         ereport(ERROR,
 719                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
 720                                                                          errmsg("--%s requires a value",
 721                                                                                         optarg)));
 722                                                 else
 723                                                         ereport(ERROR,
 724                                                                         (errcode(ERRCODE_SYNTAX_ERROR),
 725                                                                          errmsg("-c %s requires a value",
 726                                                                                         optarg)));
 727                                         }
 728
 729                                         SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
 730                                         pfree(name);
 731                                         pfree(value);
 732                                         break;
 733                                 }
 734
 735                         case 'D':
 736                                 userDoption = strdup(optarg);
 737                                 break;
 738
 739                         case 'd':
 740                                 set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
 741                                 break;
 742
 743                         case 'E':
 744                                 SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
 745                                 break;
 746
 747                         case 'e':
 748                                 SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
 749                                 break;
 750
 751                         case 'F':
 752                                 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
 753                                 break;
 754
 755                         case 'f':
 756                                 if (!set_plan_disabling_options(optarg, PGC_POSTMASTER, PGC_S_ARGV))
 757                                 {
 758                                         write_stderr("%s: invalid argument for option -f: \"%s\"\n",
 759                                                                  progname, optarg);
 760                                         ExitPostmaster(1);
 761                                 }
 762                                 break;
 763
 764                         case 'h':
 765                                 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
 766                                 break;
 767
 768                         case 'i':
 769                                 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
 770                                 break;
 771
 772                         case 'j':
 773                                 /* only used by interactive backend */
 774                                 break;
 775
 776                         case 'k':
 777                                 SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
 778                                 break;
 779
 780                         case 'l':
 781                                 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
 782                                 break;
 783
 784                         case 'N':
 785                                 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
 786                                 break;
 787
 788                         case 'O':
 789                                 SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
 790                                 break;
 791
 792                         case 'P':
 793                                 SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
 794                                 break;
 795
 796                         case 'p':
 797                                 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
 798                                 break;
 799
 800                         case 'r':
 801                                 /* only used by single-user backend */
 802                                 break;
 803
 804                         case 'S':
 805                                 SetConfigOption("work_mem", optarg, PGC_POSTMASTER, PGC_S_ARGV);
 806                                 break;
 807
 808                         case 's':
 809                                 SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
 810                                 break;
 811
 812                         case 'T':
 813
 814                                 /*
 815                                  * This option used to be defined as sending SIGSTOP after a
 816                                  * backend crash, but sending SIGABRT seems more useful.
 817                                  */
 818                                 SetConfigOption("send_abort_for_crash", "true", PGC_POSTMASTER, PGC_S_ARGV);
 819                                 break;
 820
 821                         case 't':
 822                                 {
 823                                         const char *tmp = get_stats_option_name(optarg);
 824
 825                                         if (tmp)
 826                                         {
 827                                                 SetConfigOption(tmp, "true", PGC_POSTMASTER, PGC_S_ARGV);
 828                                         }
 829                                         else
 830                                         {
 831                                                 write_stderr("%s: invalid argument for option -t: \"%s\"\n",
 832                                                                          progname, optarg);
 833                                                 ExitPostmaster(1);
 834                                         }
 835                                         break;
 836                                 }
 837
 838                         case 'W':
 839                                 SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
 840                                 break;
 841
 842                         default:
 843                                 write_stderr("Try \"%s --help\" for more information.\n",
 844                                                          progname);
 845                                 ExitPostmaster(1);
 846                 }
 847         }
 848
 849         /*
 850          * Postmaster accepts no non-option switch arguments.
 851          */
 852         if (optind < argc)
 853         {
 854                 write_stderr("%s: invalid argument: \"%s\"\n",
 855                                          progname, argv[optind]);
 856                 write_stderr("Try \"%s --help\" for more information.\n",
 857                                          progname);
 858                 ExitPostmaster(1);
 859         }
 860
 861         /*
 862          * Locate the proper configuration files and data directory, and read
 863          * postgresql.conf for the first time.
 864          */
 865         if (!SelectConfigFiles(userDoption, progname))
 866                 ExitPostmaster(2);
 867
 868         if (output_config_variable != NULL)
 869         {
 870                 /*
 871                  * If this is a runtime-computed GUC, it hasn't yet been initialized,
 872                  * and the present value is not useful.  However, this is a convenient
 873                  * place to print the value for most GUCs because it is safe to run
 874                  * postmaster startup to this point even if the server is already
 875                  * running.  For the handful of runtime-computed GUCs that we cannot
 876                  * provide meaningful values for yet, we wait until later in
 877                  * postmaster startup to print the value.  We won't be able to use -C
 878                  * on running servers for those GUCs, but using this option now would
 879                  * lead to incorrect results for them.
 880                  */
 881                 int                     flags = GetConfigOptionFlags(output_config_variable, true);
 882
 883                 if ((flags & GUC_RUNTIME_COMPUTED) == 0)
 884                 {
 885                         /*
 886                          * "-C guc" was specified, so print GUC's value and exit.  No
 887                          * extra permission check is needed because the user is reading
 888                          * inside the data dir.
 889                          */
 890                         const char *config_val = GetConfigOption(output_config_variable,
 891                                                                                                          false, false);
 892
 893                         puts(config_val ? config_val : "");
 894                         ExitPostmaster(0);
 895                 }
 896
 897                 /*
 898                  * A runtime-computed GUC will be printed later on.  As we initialize
 899                  * a server startup sequence, silence any log messages that may show
 900                  * up in the output generated.  FATAL and more severe messages are
 901                  * useful to show, even if one would only expect at least PANIC.  LOG
 902                  * entries are hidden.
 903                  */
 904                 SetConfigOption("log_min_messages", "FATAL", PGC_SUSET,
 905                                                 PGC_S_OVERRIDE);
 906         }
 907
 908         /* Verify that DataDir looks reasonable */
 909         checkDataDir();
 910
 911         /* Check that pg_control exists */
 912         checkControlFile();
 913
 914         /* And switch working directory into it */
 915         ChangeToDataDir();
 916
 917         /*
 918          * Check for invalid combinations of GUC settings.
 919          */
 920         if (SuperuserReservedConnections + ReservedConnections >= MaxConnections)
 921         {
 922                 write_stderr("%s: superuser_reserved_connections (%d) plus reserved_connections (%d) must be less than max_connections (%d)\n",
 923                                          progname,
 924                                          SuperuserReservedConnections, ReservedConnections,
 925                                          MaxConnections);
 926                 ExitPostmaster(1);
 927         }
 928         if (XLogArchiveMode > ARCHIVE_MODE_OFF && wal_level == WAL_LEVEL_MINIMAL)
 929                 ereport(ERROR,
 930                                 (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
 931         if (max_wal_senders > 0 && wal_level == WAL_LEVEL_MINIMAL)
 932                 ereport(ERROR,
 933                                 (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
 934
 935         /*
 936          * Other one-time internal sanity checks can go here, if they are fast.
 937          * (Put any slow processing further down, after postmaster.pid creation.)
 938          */
 939         if (!CheckDateTokenTables())
 940         {
 941                 write_stderr("%s: invalid datetoken tables, please fix\n", progname);
 942                 ExitPostmaster(1);
 943         }
 944
 945         /*
 946          * Now that we are done processing the postmaster arguments, reset
 947          * getopt(3) library so that it will work correctly in subprocesses.
 948          */
 949         optind = 1;
 950 #ifdef HAVE_INT_OPTRESET
 951         optreset = 1;                           /* some systems need this too */
 952 #endif
 953
 954         /* For debugging: display postmaster environment */
 955         {
 956                 extern char **environ;
 957                 char      **p;
 958
 959                 ereport(DEBUG3,
 960                                 (errmsg_internal("%s: PostmasterMain: initial environment dump:",
 961                                                                  progname)));
 962                 ereport(DEBUG3,
 963                                 (errmsg_internal("-----------------------------------------")));
 964                 for (p = environ; *p; ++p)
 965                         ereport(DEBUG3,
 966                                         (errmsg_internal("\t%s", *p)));
 967                 ereport(DEBUG3,
 968                                 (errmsg_internal("-----------------------------------------")));
 969         }
 970
 971         /*
 972          * Create lockfile for data directory.
 973          *
 974          * We want to do this before we try to grab the input sockets, because the
 975          * data directory interlock is more reliable than the socket-file
 976          * interlock (thanks to whoever decided to put socket files in /tmp :-().
 977          * For the same reason, it's best to grab the TCP socket(s) before the
 978          * Unix socket(s).
 979          *
 980          * Also note that this internally sets up the on_proc_exit function that
 981          * is responsible for removing both data directory and socket lockfiles;
 982          * so it must happen before opening sockets so that at exit, the socket
 983          * lockfiles go away after CloseServerPorts runs.
 984          */
 985         CreateDataDirLockFile(true);
 986
 987         /*
 988          * Read the control file (for error checking and config info).
 989          *
 990          * Since we verify the control file's CRC, this has a useful side effect
 991          * on machines where we need a run-time test for CRC support instructions.
 992          * The postmaster will do the test once at startup, and then its child
 993          * processes will inherit the correct function pointer and not need to
 994          * repeat the test.
 995          */
 996         LocalProcessControlFile(false);
 997
 998         /*
 999          * Register the apply launcher.  It's probably a good idea to call this
1000          * before any modules had a chance to take the background worker slots.
1001          */
1002         ApplyLauncherRegister();
1003
1004         /*
1005          * process any libraries that should be preloaded at postmaster start
1006          */
1007         process_shared_preload_libraries();
1008
1009         /*
1010          * Initialize SSL library, if specified.
1011          */
1012 #ifdef USE_SSL
1013         if (EnableSSL)
1014         {
1015                 (void) secure_initialize(true);
1016                 LoadedSSL = true;
1017         }
1018 #endif
1019
1020         /*
1021          * Now that loadable modules have had their chance to alter any GUCs,
1022          * calculate MaxBackends.
1023          */
1024         InitializeMaxBackends();
1025
1026         /*
1027          * Give preloaded libraries a chance to request additional shared memory.
1028          */
1029         process_shmem_requests();
1030
1031         /*
1032          * Now that loadable modules have had their chance to request additional
1033          * shared memory, determine the value of any runtime-computed GUCs that
1034          * depend on the amount of shared memory required.
1035          */
1036         InitializeShmemGUCs();
1037
1038         /*
1039          * Now that modules have been loaded, we can process any custom resource
1040          * managers specified in the wal_consistency_checking GUC.
1041          */
1042         InitializeWalConsistencyChecking();
1043
1044         /*
1045          * If -C was specified with a runtime-computed GUC, we held off printing
1046          * the value earlier, as the GUC was not yet initialized.  We handle -C
1047          * for most GUCs before we lock the data directory so that the option may
1048          * be used on a running server.  However, a handful of GUCs are runtime-
1049          * computed and do not have meaningful values until after locking the data
1050          * directory, and we cannot safely calculate their values earlier on a
1051          * running server.  At this point, such GUCs should be properly
1052          * initialized, and we haven't yet set up shared memory, so this is a good
1053          * time to handle the -C option for these special GUCs.
1054          */
1055         if (output_config_variable != NULL)
1056         {
1057                 const char *config_val = GetConfigOption(output_config_variable,
1058                                                                                                  false, false);
1059
1060                 puts(config_val ? config_val : "");
1061                 ExitPostmaster(0);
1062         }
1063
1064         /*
1065          * Set up shared memory and semaphores.
1066          *
1067          * Note: if using SysV shmem and/or semas, each postmaster startup will
1068          * normally choose the same IPC keys.  This helps ensure that we will
1069          * clean up dead IPC objects if the postmaster crashes and is restarted.
1070          */
1071         CreateSharedMemoryAndSemaphores();
1072
1073         /*
1074          * Estimate number of openable files.  This must happen after setting up
1075          * semaphores, because on some platforms semaphores count as open files.
1076          */
1077         set_max_safe_fds();
1078
1079         /*
1080          * Set reference point for stack-depth checking.
1081          */
1082         (void) set_stack_base();
1083
1084         /*
1085          * Initialize pipe (or process handle on Windows) that allows children to
1086          * wake up from sleep on postmaster death.
1087          */
1088         InitPostmasterDeathWatchHandle();
1089
1090 #ifdef WIN32
1091
1092         /*
1093          * Initialize I/O completion port used to deliver list of dead children.
1094          */
1095         win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1096         if (win32ChildQueue == NULL)
1097                 ereport(FATAL,
1098                                 (errmsg("could not create I/O completion port for child queue")));
1099 #endif
1100
1101 #ifdef EXEC_BACKEND
1102         /* Write out nondefault GUC settings for child processes to use */
1103         write_nondefault_variables(PGC_POSTMASTER);
1104
1105         /*
1106          * Clean out the temp directory used to transmit parameters to child
1107          * processes (see internal_forkexec, below).  We must do this before
1108          * launching any child processes, else we have a race condition: we could
1109          * remove a parameter file before the child can read it.  It should be
1110          * safe to do so now, because we verified earlier that there are no
1111          * conflicting Postgres processes in this data directory.
1112          */
1113         RemovePgTempFilesInDir(PG_TEMP_FILES_DIR, true, false);
1114 #endif
1115
1116         /*
1117          * Forcibly remove the files signaling a standby promotion request.
1118          * Otherwise, the existence of those files triggers a promotion too early,
1119          * whether a user wants that or not.
1120          *
1121          * This removal of files is usually unnecessary because they can exist
1122          * only during a few moments during a standby promotion. However there is
1123          * a race condition: if pg_ctl promote is executed and creates the files
1124          * during a promotion, the files can stay around even after the server is
1125          * brought up to be the primary.  Then, if a new standby starts by using
1126          * the backup taken from the new primary, the files can exist at server
1127          * startup and must be removed in order to avoid an unexpected promotion.
1128          *
1129          * Note that promotion signal files need to be removed before the startup
1130          * process is invoked. Because, after that, they can be used by
1131          * postmaster's SIGUSR1 signal handler.
1132          */
1133         RemovePromoteSignalFiles();
1134
1135         /* Do the same for logrotate signal file */
1136         RemoveLogrotateSignalFiles();
1137
1138         /* Remove any outdated file holding the current log filenames. */
1139         if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1140                 ereport(LOG,
1141                                 (errcode_for_file_access(),
1142                                  errmsg("could not remove file \"%s\": %m",
1143                                                 LOG_METAINFO_DATAFILE)));
1144
1145         /*
1146          * If enabled, start up syslogger collection subprocess
1147          */
1148         SysLoggerPID = SysLogger_Start();
1149
1150         /*
1151          * Reset whereToSendOutput from DestDebug (its starting state) to
1152          * DestNone. This stops ereport from sending log messages to stderr unless
1153          * Log_destination permits.  We don't do this until the postmaster is
1154          * fully launched, since startup failures may as well be reported to
1155          * stderr.
1156          *
1157          * If we are in fact disabling logging to stderr, first emit a log message
1158          * saying so, to provide a breadcrumb trail for users who may not remember
1159          * that their logging is configured to go somewhere else.
1160          */
1161         if (!(Log_destination & LOG_DESTINATION_STDERR))
1162                 ereport(LOG,
1163                                 (errmsg("ending log output to stderr"),
1164                                  errhint("Future log output will go to log destination \"%s\".",
1165                                                  Log_destination_string)));
1166
1167         whereToSendOutput = DestNone;
1168
1169         /*
1170          * Report server startup in log.  While we could emit this much earlier,
1171          * it seems best to do so after starting the log collector, if we intend
1172          * to use one.
1173          */
1174         ereport(LOG,
1175                         (errmsg("starting %s", PG_VERSION_STR)));
1176
1177         /*
1178          * Establish input sockets.
1179          *
1180          * First set up an on_proc_exit function that's charged with closing the
1181          * sockets again at postmaster shutdown.
1182          */
1183         ListenSockets = palloc(MAXLISTEN * sizeof(pgsocket));
1184         on_proc_exit(CloseServerPorts, 0);
1185
1186         if (ListenAddresses)
1187         {
1188                 char       *rawstring;
1189                 List       *elemlist;
1190                 ListCell   *l;
1191                 int                     success = 0;
1192
1193                 /* Need a modifiable copy of ListenAddresses */
1194                 rawstring = pstrdup(ListenAddresses);
1195
1196                 /* Parse string into list of hostnames */
1197                 if (!SplitGUCList(rawstring, ',', &elemlist))
1198                 {
1199                         /* syntax error in list */
1200                         ereport(FATAL,
1201                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1202                                          errmsg("invalid list syntax in parameter \"%s\"",
1203                                                         "listen_addresses")));
1204                 }
1205
1206                 foreach(l, elemlist)
1207                 {
1208                         char       *curhost = (char *) lfirst(l);
1209
1210                         if (strcmp(curhost, "*") == 0)
1211                                 status = StreamServerPort(AF_UNSPEC, NULL,
1212                                                                                   (unsigned short) PostPortNumber,
1213                                                                                   NULL,
1214                                                                                   ListenSockets,
1215                                                                                   &NumListenSockets,
1216                                                                                   MAXLISTEN);
1217                         else
1218                                 status = StreamServerPort(AF_UNSPEC, curhost,
1219                                                                                   (unsigned short) PostPortNumber,
1220                                                                                   NULL,
1221                                                                                   ListenSockets,
1222                                                                                   &NumListenSockets,
1223                                                                                   MAXLISTEN);
1224
1225                         if (status == STATUS_OK)
1226                         {
1227                                 success++;
1228                                 /* record the first successful host addr in lockfile */
1229                                 if (!listen_addr_saved)
1230                                 {
1231                                         AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, curhost);
1232                                         listen_addr_saved = true;
1233                                 }
1234                         }
1235                         else
1236                                 ereport(WARNING,
1237                                                 (errmsg("could not create listen socket for \"%s\"",
1238                                                                 curhost)));
1239                 }
1240
1241                 if (!success && elemlist != NIL)
1242                         ereport(FATAL,
1243                                         (errmsg("could not create any TCP/IP sockets")));
1244
1245                 list_free(elemlist);
1246                 pfree(rawstring);
1247         }
1248
1249 #ifdef USE_BONJOUR
1250         /* Register for Bonjour only if we opened TCP socket(s) */
1251         if (enable_bonjour && NumListenSockets > 0)
1252         {
1253                 DNSServiceErrorType err;
1254
1255                 /*
1256                  * We pass 0 for interface_index, which will result in registering on
1257                  * all "applicable" interfaces.  It's not entirely clear from the
1258                  * DNS-SD docs whether this would be appropriate if we have bound to
1259                  * just a subset of the available network interfaces.
1260                  */
1261                 err = DNSServiceRegister(&bonjour_sdref,
1262                                                                  0,
1263                                                                  0,
1264                                                                  bonjour_name,
1265                                                                  "_postgresql._tcp.",
1266                                                                  NULL,
1267                                                                  NULL,
1268                                                                  pg_hton16(PostPortNumber),
1269                                                                  0,
1270                                                                  NULL,
1271                                                                  NULL,
1272                                                                  NULL);
1273                 if (err != kDNSServiceErr_NoError)
1274                         ereport(LOG,
1275                                         (errmsg("DNSServiceRegister() failed: error code %ld",
1276                                                         (long) err)));
1277
1278                 /*
1279                  * We don't bother to read the mDNS daemon's reply, and we expect that
1280                  * it will automatically terminate our registration when the socket is
1281                  * closed at postmaster termination.  So there's nothing more to be
1282                  * done here.  However, the bonjour_sdref is kept around so that
1283                  * forked children can close their copies of the socket.
1284                  */
1285         }
1286 #endif
1287
1288         if (Unix_socket_directories)
1289         {
1290                 char       *rawstring;
1291                 List       *elemlist;
1292                 ListCell   *l;
1293                 int                     success = 0;
1294
1295                 /* Need a modifiable copy of Unix_socket_directories */
1296                 rawstring = pstrdup(Unix_socket_directories);
1297
1298                 /* Parse string into list of directories */
1299                 if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1300                 {
1301                         /* syntax error in list */
1302                         ereport(FATAL,
1303                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1304                                          errmsg("invalid list syntax in parameter \"%s\"",
1305                                                         "unix_socket_directories")));
1306                 }
1307
1308                 foreach(l, elemlist)
1309                 {
1310                         char       *socketdir = (char *) lfirst(l);
1311
1312                         status = StreamServerPort(AF_UNIX, NULL,
1313                                                                           (unsigned short) PostPortNumber,
1314                                                                           socketdir,
1315                                                                           ListenSockets,
1316                                                                           &NumListenSockets,
1317                                                                           MAXLISTEN);
1318
1319                         if (status == STATUS_OK)
1320                         {
1321                                 success++;
1322                                 /* record the first successful Unix socket in lockfile */
1323                                 if (success == 1)
1324                                         AddToDataDirLockFile(LOCK_FILE_LINE_SOCKET_DIR, socketdir);
1325                         }
1326                         else
1327                                 ereport(WARNING,
1328                                                 (errmsg("could not create Unix-domain socket in directory \"%s\"",
1329                                                                 socketdir)));
1330                 }
1331
1332                 if (!success && elemlist != NIL)
1333                         ereport(FATAL,
1334                                         (errmsg("could not create any Unix-domain sockets")));
1335
1336                 list_free_deep(elemlist);
1337                 pfree(rawstring);
1338         }
1339
1340         /*
1341          * check that we have some socket to listen on
1342          */
1343         if (NumListenSockets == 0)
1344                 ereport(FATAL,
1345                                 (errmsg("no socket created for listening")));
1346
1347         /*
1348          * If no valid TCP ports, write an empty line for listen address,
1349          * indicating the Unix socket must be used.  Note that this line is not
1350          * added to the lock file until there is a socket backing it.
1351          */
1352         if (!listen_addr_saved)
1353                 AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, "");
1354
1355         /*
1356          * Record postmaster options.  We delay this till now to avoid recording
1357          * bogus options (eg, unusable port number).
1358          */
1359         if (!CreateOptsFile(argc, argv, my_exec_path))
1360                 ExitPostmaster(1);
1361
1362         /*
1363          * Write the external PID file if requested
1364          */
1365         if (external_pid_file)
1366         {
1367                 FILE       *fpidfile = fopen(external_pid_file, "w");
1368
1369                 if (fpidfile)
1370                 {
1371                         fprintf(fpidfile, "%d\n", MyProcPid);
1372                         fclose(fpidfile);
1373
1374                         /* Make PID file world readable */
1375                         if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1376                                 write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1377                                                          progname, external_pid_file, strerror(errno));
1378                 }
1379                 else
1380                         write_stderr("%s: could not write external PID file \"%s\": %s\n",
1381                                                  progname, external_pid_file, strerror(errno));
1382
1383                 on_proc_exit(unlink_external_pid_file, 0);
1384         }
1385
1386         /*
1387          * Remove old temporary files.  At this point there can be no other
1388          * Postgres processes running in this directory, so this should be safe.
1389          */
1390         RemovePgTempFiles();
1391
1392         /*
1393          * Initialize the autovacuum subsystem (again, no process start yet)
1394          */
1395         autovac_init();
1396
1397         /*
1398          * Load configuration files for client authentication.
1399          */
1400         if (!load_hba())
1401         {
1402                 /*
1403                  * It makes no sense to continue if we fail to load the HBA file,
1404                  * since there is no way to connect to the database in this case.
1405                  */
1406                 ereport(FATAL,
1407                 /* translator: %s is a configuration file */
1408                                 (errmsg("could not load %s", HbaFileName)));
1409         }
1410         if (!load_ident())
1411         {
1412                 /*
1413                  * We can start up without the IDENT file, although it means that you
1414                  * cannot log in using any of the authentication methods that need a
1415                  * user name mapping. load_ident() already logged the details of error
1416                  * to the log.
1417                  */
1418         }
1419
1420 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1421
1422         /*
1423          * On macOS, libintl replaces setlocale() with a version that calls
1424          * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1425          * environment variable is unset or empty.  CFLocaleCopyCurrent() makes
1426          * the process multithreaded.  The postmaster calls sigprocmask() and
1427          * calls fork() without an immediate exec(), both of which have undefined
1428          * behavior in a multithreaded program.  A multithreaded postmaster is the
1429          * normal case on Windows, which offers neither fork() nor sigprocmask().
1430          */
1431         if (pthread_is_threaded_np() != 0)
1432                 ereport(FATAL,
1433                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1434                                  errmsg("postmaster became multithreaded during startup"),
1435                                  errhint("Set the LC_ALL environment variable to a valid locale.")));
1436 #endif
1437
1438         /*
1439          * Remember postmaster startup time
1440          */
1441         PgStartTime = GetCurrentTimestamp();
1442
1443         /*
1444          * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1445          * see what's happening.
1446          */
1447         AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING);
1448
1449         /* Start bgwriter and checkpointer so they can help with recovery */
1450         if (CheckpointerPID == 0)
1451                 CheckpointerPID = StartCheckpointer();
1452         if (BgWriterPID == 0)
1453                 BgWriterPID = StartBackgroundWriter();
1454
1455         /*
1456          * We're ready to rock and roll...
1457          */
1458         StartupPID = StartupDataBase();
1459         Assert(StartupPID != 0);
1460         StartupStatus = STARTUP_RUNNING;
1461         pmState = PM_STARTUP;
1462
1463         /* Some workers may be scheduled to start now */
1464         maybe_start_bgworkers();
1465
1466         status = ServerLoop();
1467
1468         /*
1469          * ServerLoop probably shouldn't ever return, but if it does, close down.
1470          */
1471         ExitPostmaster(status != STATUS_OK);
1472
1473         abort();                                        /* not reached */
1474 }
1475
1476
1477 /*
1478  * on_proc_exit callback to close server's listen sockets
1479  */
1480 static void
1481 CloseServerPorts(int status, Datum arg)
1482 {
1483         int                     i;
1484
1485         /*
1486          * First, explicitly close all the socket FDs.  We used to just let this
1487          * happen implicitly at postmaster exit, but it's better to close them
1488          * before we remove the postmaster.pid lockfile; otherwise there's a race
1489          * condition if a new postmaster wants to re-use the TCP port number.
1490          */
1491         for (i = 0; i < NumListenSockets; i++)
1492                 StreamClose(ListenSockets[i]);
1493         NumListenSockets = 0;
1494
1495         /*
1496          * Next, remove any filesystem entries for Unix sockets.  To avoid race
1497          * conditions against incoming postmasters, this must happen after closing
1498          * the sockets and before removing lock files.
1499          */
1500         RemoveSocketFiles();
1501
1502         /*
1503          * We don't do anything about socket lock files here; those will be
1504          * removed in a later on_proc_exit callback.
1505          */
1506 }
1507
1508 /*
1509  * on_proc_exit callback to delete external_pid_file
1510  */
1511 static void
1512 unlink_external_pid_file(int status, Datum arg)
1513 {
1514         if (external_pid_file)
1515                 unlink(external_pid_file);
1516 }
1517
1518
1519 /*
1520  * Compute and check the directory paths to files that are part of the
1521  * installation (as deduced from the postgres executable's own location)
1522  */
1523 static void
1524 getInstallationPaths(const char *argv0)
1525 {
1526         DIR                *pdir;
1527
1528         /* Locate the postgres executable itself */
1529         if (find_my_exec(argv0, my_exec_path) < 0)
1530                 ereport(FATAL,
1531                                 (errmsg("%s: could not locate my own executable path", argv0)));
1532
1533 #ifdef EXEC_BACKEND
1534         /* Locate executable backend before we change working directory */
1535         if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1536                                                 postgres_exec_path) < 0)
1537                 ereport(FATAL,
1538                                 (errmsg("%s: could not locate matching postgres executable",
1539                                                 argv0)));
1540 #endif
1541
1542         /*
1543          * Locate the pkglib directory --- this has to be set early in case we try
1544          * to load any modules from it in response to postgresql.conf entries.
1545          */
1546         get_pkglib_path(my_exec_path, pkglib_path);
1547
1548         /*
1549          * Verify that there's a readable directory there; otherwise the Postgres
1550          * installation is incomplete or corrupt.  (A typical cause of this
1551          * failure is that the postgres executable has been moved or hardlinked to
1552          * some directory that's not a sibling of the installation lib/
1553          * directory.)
1554          */
1555         pdir = AllocateDir(pkglib_path);
1556         if (pdir == NULL)
1557                 ereport(ERROR,
1558                                 (errcode_for_file_access(),
1559                                  errmsg("could not open directory \"%s\": %m",
1560                                                 pkglib_path),
1561                                  errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1562                                                  my_exec_path)));
1563         FreeDir(pdir);
1564
1565         /*
1566          * It's not worth checking the share/ directory.  If the lib/ directory is
1567          * there, then share/ probably is too.
1568          */
1569 }
1570
1571 /*
1572  * Check that pg_control exists in the correct location in the data directory.
1573  *
1574  * No attempt is made to validate the contents of pg_control here.  This is
1575  * just a sanity check to see if we are looking at a real data directory.
1576  */
1577 static void
1578 checkControlFile(void)
1579 {
1580         char            path[MAXPGPATH];
1581         FILE       *fp;
1582
1583         snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1584
1585         fp = AllocateFile(path, PG_BINARY_R);
1586         if (fp == NULL)
1587         {
1588                 write_stderr("%s: could not find the database system\n"
1589                                          "Expected to find it in the directory \"%s\",\n"
1590                                          "but could not open file \"%s\": %s\n",
1591                                          progname, DataDir, path, strerror(errno));
1592                 ExitPostmaster(2);
1593         }
1594         FreeFile(fp);
1595 }
1596
1597 /*
1598  * Determine how long should we let ServerLoop sleep, in milliseconds.
1599  *
1600  * In normal conditions we wait at most one minute, to ensure that the other
1601  * background tasks handled by ServerLoop get done even when no requests are
1602  * arriving.  However, if there are background workers waiting to be started,
1603  * we don't actually sleep so that they are quickly serviced.  Other exception
1604  * cases are as shown in the code.
1605  */
1606 static int
1607 DetermineSleepTime(void)
1608 {
1609         TimestampTz next_wakeup = 0;
1610
1611         /*
1612          * Normal case: either there are no background workers at all, or we're in
1613          * a shutdown sequence (during which we ignore bgworkers altogether).
1614          */
1615         if (Shutdown > NoShutdown ||
1616                 (!StartWorkerNeeded && !HaveCrashedWorker))
1617         {
1618                 if (AbortStartTime != 0)
1619                 {
1620                         int                     seconds;
1621
1622                         /* time left to abort; clamp to 0 in case it already expired */
1623                         seconds = SIGKILL_CHILDREN_AFTER_SECS -
1624                                 (time(NULL) - AbortStartTime);
1625
1626                         return Max(seconds * 1000, 0);
1627                 }
1628                 else
1629                         return 60 * 1000;
1630         }
1631
1632         if (StartWorkerNeeded)
1633                 return 0;
1634
1635         if (HaveCrashedWorker)
1636         {
1637                 slist_mutable_iter siter;
1638
1639                 /*
1640                  * When there are crashed bgworkers, we sleep just long enough that
1641                  * they are restarted when they request to be.  Scan the list to
1642                  * determine the minimum of all wakeup times according to most recent
1643                  * crash time and requested restart interval.
1644                  */
1645                 slist_foreach_modify(siter, &BackgroundWorkerList)
1646                 {
1647                         RegisteredBgWorker *rw;
1648                         TimestampTz this_wakeup;
1649
1650                         rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1651
1652                         if (rw->rw_crashed_at == 0)
1653                                 continue;
1654
1655                         if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART
1656                                 || rw->rw_terminate)
1657                         {
1658                                 ForgetBackgroundWorker(&siter);
1659                                 continue;
1660                         }
1661
1662                         this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1663                                                                                                           1000L * rw->rw_worker.bgw_restart_time);
1664                         if (next_wakeup == 0 || this_wakeup < next_wakeup)
1665                                 next_wakeup = this_wakeup;
1666                 }
1667         }
1668
1669         if (next_wakeup != 0)
1670         {
1671                 int                     ms;
1672
1673                 /* result of TimestampDifferenceMilliseconds is in [0, INT_MAX] */
1674                 ms = (int) TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
1675                                                                                                    next_wakeup);
1676                 return Min(60 * 1000, ms);
1677         }
1678
1679         return 60 * 1000;
1680 }
1681
1682 /*
1683  * Activate or deactivate notifications of server socket events.  Since we
1684  * don't currently have a way to remove events from an existing WaitEventSet,
1685  * we'll just destroy and recreate the whole thing.  This is called during
1686  * shutdown so we can wait for backends to exit without accepting new
1687  * connections, and during crash reinitialization when we need to start
1688  * listening for new connections again.  The WaitEventSet will be freed in fork
1689  * children by ClosePostmasterPorts().
1690  */
1691 static void
1692 ConfigurePostmasterWaitSet(bool accept_connections)
1693 {
1694         if (pm_wait_set)
1695                 FreeWaitEventSet(pm_wait_set);
1696         pm_wait_set = NULL;
1697
1698         pm_wait_set = CreateWaitEventSet(CurrentMemoryContext,
1699                                                                          accept_connections ? (1 + NumListenSockets) : 1);
1700         AddWaitEventToSet(pm_wait_set, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch,
1701                                           NULL);
1702
1703         if (accept_connections)
1704         {
1705                 for (int i = 0; i < NumListenSockets; i++)
1706                         AddWaitEventToSet(pm_wait_set, WL_SOCKET_ACCEPT, ListenSockets[i],
1707                                                           NULL, NULL);
1708         }
1709 }
1710
1711 /*
1712  * Main idle loop of postmaster
1713  */
1714 static int
1715 ServerLoop(void)
1716 {
1717         time_t          last_lockfile_recheck_time,
1718                                 last_touch_time;
1719         WaitEvent       events[MAXLISTEN];
1720         int                     nevents;
1721
1722         ConfigurePostmasterWaitSet(true);
1723         last_lockfile_recheck_time = last_touch_time = time(NULL);
1724
1725         for (;;)
1726         {
1727                 time_t          now;
1728
1729                 nevents = WaitEventSetWait(pm_wait_set,
1730                                                                    DetermineSleepTime(),
1731                                                                    events,
1732                                                                    lengthof(events),
1733                                                                    0 /* postmaster posts no wait_events */ );
1734
1735                 /*
1736                  * Latch set by signal handler, or new connection pending on any of
1737                  * our sockets? If the latter, fork a child process to deal with it.
1738                  */
1739                 for (int i = 0; i < nevents; i++)
1740                 {
1741                         if (events[i].events & WL_LATCH_SET)
1742                                 ResetLatch(MyLatch);
1743
1744                         /*
1745                          * The following requests are handled unconditionally, even if we
1746                          * didn't see WL_LATCH_SET.  This gives high priority to shutdown
1747                          * and reload requests where the latch happens to appear later in
1748                          * events[] or will be reported by a later call to
1749                          * WaitEventSetWait().
1750                          */
1751                         if (pending_pm_shutdown_request)
1752                                 process_pm_shutdown_request();
1753                         if (pending_pm_reload_request)
1754                                 process_pm_reload_request();
1755                         if (pending_pm_child_exit)
1756                                 process_pm_child_exit();
1757                         if (pending_pm_pmsignal)
1758                                 process_pm_pmsignal();
1759
1760                         if (events[i].events & WL_SOCKET_ACCEPT)
1761                         {
1762                                 Port       *port;
1763
1764                                 port = ConnCreate(events[i].fd);
1765                                 if (port)
1766                                 {
1767                                         BackendStartup(port);
1768
1769                                         /*
1770                                          * We no longer need the open socket or port structure in
1771                                          * this process
1772                                          */
1773                                         StreamClose(port->sock);
1774                                         ConnFree(port);
1775                                 }
1776                         }
1777                 }
1778
1779                 /* If we have lost the log collector, try to start a new one */
1780                 if (SysLoggerPID == 0 && Logging_collector)
1781                         SysLoggerPID = SysLogger_Start();
1782
1783                 /*
1784                  * If no background writer process is running, and we are not in a
1785                  * state that prevents it, start one.  It doesn't matter if this
1786                  * fails, we'll just try again later.  Likewise for the checkpointer.
1787                  */
1788                 if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1789                         pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
1790                 {
1791                         if (CheckpointerPID == 0)
1792                                 CheckpointerPID = StartCheckpointer();
1793                         if (BgWriterPID == 0)
1794                                 BgWriterPID = StartBackgroundWriter();
1795                 }
1796
1797                 /*
1798                  * Likewise, if we have lost the walwriter process, try to start a new
1799                  * one.  But this is needed only in normal operation (else we cannot
1800                  * be writing any new WAL).
1801                  */
1802                 if (WalWriterPID == 0 && pmState == PM_RUN)
1803                         WalWriterPID = StartWalWriter();
1804
1805                 /*
1806                  * If we have lost the autovacuum launcher, try to start a new one. We
1807                  * don't want autovacuum to run in binary upgrade mode because
1808                  * autovacuum might update relfrozenxid for empty tables before the
1809                  * physical files are put in place.
1810                  */
1811                 if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1812                         (AutoVacuumingActive() || start_autovac_launcher) &&
1813                         pmState == PM_RUN)
1814                 {
1815                         AutoVacPID = StartAutoVacLauncher();
1816                         if (AutoVacPID != 0)
1817                                 start_autovac_launcher = false; /* signal processed */
1818                 }
1819
1820                 /* If we have lost the archiver, try to start a new one. */
1821                 if (PgArchPID == 0 && PgArchStartupAllowed())
1822                         PgArchPID = StartArchiver();
1823
1824                 /* If we need to signal the autovacuum launcher, do so now */
1825                 if (avlauncher_needs_signal)
1826                 {
1827                         avlauncher_needs_signal = false;
1828                         if (AutoVacPID != 0)
1829                                 kill(AutoVacPID, SIGUSR2);
1830                 }
1831
1832                 /* If we need to start a WAL receiver, try to do that now */
1833                 if (WalReceiverRequested)
1834                         MaybeStartWalReceiver();
1835
1836                 /* Get other worker processes running, if needed */
1837                 if (StartWorkerNeeded || HaveCrashedWorker)
1838                         maybe_start_bgworkers();
1839
1840 #ifdef HAVE_PTHREAD_IS_THREADED_NP
1841
1842                 /*
1843                  * With assertions enabled, check regularly for appearance of
1844                  * additional threads.  All builds check at start and exit.
1845                  */
1846                 Assert(pthread_is_threaded_np() == 0);
1847 #endif
1848
1849                 /*
1850                  * Lastly, check to see if it's time to do some things that we don't
1851                  * want to do every single time through the loop, because they're a
1852                  * bit expensive.  Note that there's up to a minute of slop in when
1853                  * these tasks will be performed, since DetermineSleepTime() will let
1854                  * us sleep at most that long; except for SIGKILL timeout which has
1855                  * special-case logic there.
1856                  */
1857                 now = time(NULL);
1858
1859                 /*
1860                  * If we already sent SIGQUIT to children and they are slow to shut
1861                  * down, it's time to send them SIGKILL (or SIGABRT if requested).
1862                  * This doesn't happen normally, but under certain conditions backends
1863                  * can get stuck while shutting down.  This is a last measure to get
1864                  * them unwedged.
1865                  *
1866                  * Note we also do this during recovery from a process crash.
1867                  */
1868                 if ((Shutdown >= ImmediateShutdown || FatalError) &&
1869                         AbortStartTime != 0 &&
1870                         (now - AbortStartTime) >= SIGKILL_CHILDREN_AFTER_SECS)
1871                 {
1872                         /* We were gentle with them before. Not anymore */
1873                         ereport(LOG,
1874                         /* translator: %s is SIGKILL or SIGABRT */
1875                                         (errmsg("issuing %s to recalcitrant children",
1876                                                         send_abort_for_kill ? "SIGABRT" : "SIGKILL")));
1877                         TerminateChildren(send_abort_for_kill ? SIGABRT : SIGKILL);
1878                         /* reset flag so we don't SIGKILL again */
1879                         AbortStartTime = 0;
1880                 }
1881
1882                 /*
1883                  * Once a minute, verify that postmaster.pid hasn't been removed or
1884                  * overwritten.  If it has, we force a shutdown.  This avoids having
1885                  * postmasters and child processes hanging around after their database
1886                  * is gone, and maybe causing problems if a new database cluster is
1887                  * created in the same place.  It also provides some protection
1888                  * against a DBA foolishly removing postmaster.pid and manually
1889                  * starting a new postmaster.  Data corruption is likely to ensue from
1890                  * that anyway, but we can minimize the damage by aborting ASAP.
1891                  */
1892                 if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1893                 {
1894                         if (!RecheckDataDirLockFile())
1895                         {
1896                                 ereport(LOG,
1897                                                 (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1898                                 kill(MyProcPid, SIGQUIT);
1899                         }
1900                         last_lockfile_recheck_time = now;
1901                 }
1902
1903                 /*
1904                  * Touch Unix socket and lock files every 58 minutes, to ensure that
1905                  * they are not removed by overzealous /tmp-cleaning tasks.  We assume
1906                  * no one runs cleaners with cutoff times of less than an hour ...
1907                  */
1908                 if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1909                 {
1910                         TouchSocketFiles();
1911                         TouchSocketLockFiles();
1912                         last_touch_time = now;
1913                 }
1914         }
1915 }
1916
1917 /*
1918  * Read a client's startup packet and do something according to it.
1919  *
1920  * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1921  * not return at all.
1922  *
1923  * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1924  * if that's what you want.  Return STATUS_ERROR if you don't want to
1925  * send anything to the client, which would typically be appropriate
1926  * if we detect a communications failure.)
1927  *
1928  * Set ssl_done and/or gss_done when negotiation of an encrypted layer
1929  * (currently, TLS or GSSAPI) is completed. A successful negotiation of either
1930  * encryption layer sets both flags, but a rejected negotiation sets only the
1931  * flag for that layer, since the client may wish to try the other one. We
1932  * should make no assumption here about the order in which the client may make
1933  * requests.
1934  */
1935 static int
1936 ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
1937 {
1938         int32           len;
1939         char       *buf;
1940         ProtocolVersion proto;
1941         MemoryContext oldcontext;
1942
1943         pq_startmsgread();
1944
1945         /*
1946          * Grab the first byte of the length word separately, so that we can tell
1947          * whether we have no data at all or an incomplete packet.  (This might
1948          * sound inefficient, but it's not really, because of buffering in
1949          * pqcomm.c.)
1950          */
1951         if (pq_getbytes((char *) &len, 1) == EOF)
1952         {
1953                 /*
1954                  * If we get no data at all, don't clutter the log with a complaint;
1955                  * such cases often occur for legitimate reasons.  An example is that
1956                  * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
1957                  * client didn't like our response, it'll probably just drop the
1958                  * connection.  Service-monitoring software also often just opens and
1959                  * closes a connection without sending anything.  (So do port
1960                  * scanners, which may be less benign, but it's not really our job to
1961                  * notice those.)
1962                  */
1963                 return STATUS_ERROR;
1964         }
1965
1966         if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
1967         {
1968                 /* Got a partial length word, so bleat about that */
1969                 if (!ssl_done && !gss_done)
1970                         ereport(COMMERROR,
1971                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
1972                                          errmsg("incomplete startup packet")));
1973                 return STATUS_ERROR;
1974         }
1975
1976         len = pg_ntoh32(len);
1977         len -= 4;
1978
1979         if (len < (int32) sizeof(ProtocolVersion) ||
1980                 len > MAX_STARTUP_PACKET_LENGTH)
1981         {
1982                 ereport(COMMERROR,
1983                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1984                                  errmsg("invalid length of startup packet")));
1985                 return STATUS_ERROR;
1986         }
1987
1988         /*
1989          * Allocate space to hold the startup packet, plus one extra byte that's
1990          * initialized to be zero.  This ensures we will have null termination of
1991          * all strings inside the packet.
1992          */
1993         buf = palloc(len + 1);
1994         buf[len] = '\0';
1995
1996         if (pq_getbytes(buf, len) == EOF)
1997         {
1998                 ereport(COMMERROR,
1999                                 (errcode(ERRCODE_PROTOCOL_VIOLATION),
2000                                  errmsg("incomplete startup packet")));
2001                 return STATUS_ERROR;
2002         }
2003         pq_endmsgread();
2004
2005         /*
2006          * The first field is either a protocol version number or a special
2007          * request code.
2008          */
2009         port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
2010
2011         if (proto == CANCEL_REQUEST_CODE)
2012         {
2013                 if (len != sizeof(CancelRequestPacket))
2014                 {
2015                         ereport(COMMERROR,
2016                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
2017                                          errmsg("invalid length of startup packet")));
2018                         return STATUS_ERROR;
2019                 }
2020                 processCancelRequest(port, buf);
2021                 /* Not really an error, but we don't want to proceed further */
2022                 return STATUS_ERROR;
2023         }
2024
2025         if (proto == NEGOTIATE_SSL_CODE && !ssl_done)
2026         {
2027                 char            SSLok;
2028
2029 #ifdef USE_SSL
2030                 /* No SSL when disabled or on Unix sockets */
2031                 if (!LoadedSSL || port->laddr.addr.ss_family == AF_UNIX)
2032                         SSLok = 'N';
2033                 else
2034                         SSLok = 'S';            /* Support for SSL */
2035 #else
2036                 SSLok = 'N';                    /* No support for SSL */
2037 #endif
2038
2039 retry1:
2040                 if (send(port->sock, &SSLok, 1, 0) != 1)
2041                 {
2042                         if (errno == EINTR)
2043                                 goto retry1;    /* if interrupted, just retry */
2044                         ereport(COMMERROR,
2045                                         (errcode_for_socket_access(),
2046                                          errmsg("failed to send SSL negotiation response: %m")));
2047                         return STATUS_ERROR;    /* close the connection */
2048                 }
2049
2050 #ifdef USE_SSL
2051                 if (SSLok == 'S' && secure_open_server(port) == -1)
2052                         return STATUS_ERROR;
2053 #endif
2054
2055                 /*
2056                  * At this point we should have no data already buffered.  If we do,
2057                  * it was received before we performed the SSL handshake, so it wasn't
2058                  * encrypted and indeed may have been injected by a man-in-the-middle.
2059                  * We report this case to the client.
2060                  */
2061                 if (pq_buffer_has_data())
2062                         ereport(FATAL,
2063                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
2064                                          errmsg("received unencrypted data after SSL request"),
2065                                          errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2066
2067                 /*
2068                  * regular startup packet, cancel, etc packet should follow, but not
2069                  * another SSL negotiation request, and a GSS request should only
2070                  * follow if SSL was rejected (client may negotiate in either order)
2071                  */
2072                 return ProcessStartupPacket(port, true, SSLok == 'S');
2073         }
2074         else if (proto == NEGOTIATE_GSS_CODE && !gss_done)
2075         {
2076                 char            GSSok = 'N';
2077
2078 #ifdef ENABLE_GSS
2079                 /* No GSSAPI encryption when on Unix socket */
2080                 if (port->laddr.addr.ss_family != AF_UNIX)
2081                         GSSok = 'G';
2082 #endif
2083
2084                 while (send(port->sock, &GSSok, 1, 0) != 1)
2085                 {
2086                         if (errno == EINTR)
2087                                 continue;
2088                         ereport(COMMERROR,
2089                                         (errcode_for_socket_access(),
2090                                          errmsg("failed to send GSSAPI negotiation response: %m")));
2091                         return STATUS_ERROR;    /* close the connection */
2092                 }
2093
2094 #ifdef ENABLE_GSS
2095                 if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2096                         return STATUS_ERROR;
2097 #endif
2098
2099                 /*
2100                  * At this point we should have no data already buffered.  If we do,
2101                  * it was received before we performed the GSS handshake, so it wasn't
2102                  * encrypted and indeed may have been injected by a man-in-the-middle.
2103                  * We report this case to the client.
2104                  */
2105                 if (pq_buffer_has_data())
2106                         ereport(FATAL,
2107                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
2108                                          errmsg("received unencrypted data after GSSAPI encryption request"),
2109                                          errdetail("This could be either a client-software bug or evidence of an attempted man-in-the-middle attack.")));
2110
2111                 /*
2112                  * regular startup packet, cancel, etc packet should follow, but not
2113                  * another GSS negotiation request, and an SSL request should only
2114                  * follow if GSS was rejected (client may negotiate in either order)
2115                  */
2116                 return ProcessStartupPacket(port, GSSok == 'G', true);
2117         }
2118
2119         /* Could add additional special packet types here */
2120
2121         /*
2122          * Set FrontendProtocol now so that ereport() knows what format to send if
2123          * we fail during startup.
2124          */
2125         FrontendProtocol = proto;
2126
2127         /* Check that the major protocol version is in range. */
2128         if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
2129                 PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST))
2130                 ereport(FATAL,
2131                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2132                                  errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2133                                                 PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2134                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
2135                                                 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
2136                                                 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
2137
2138         /*
2139          * Now fetch parameters out of startup packet and save them into the Port
2140          * structure.  All data structures attached to the Port struct must be
2141          * allocated in TopMemoryContext so that they will remain available in a
2142          * running backend (even after PostmasterContext is destroyed).  We need
2143          * not worry about leaking this storage on failure, since we aren't in the
2144          * postmaster process anymore.
2145          */
2146         oldcontext = MemoryContextSwitchTo(TopMemoryContext);
2147
2148         /* Handle protocol version 3 startup packet */
2149         {
2150                 int32           offset = sizeof(ProtocolVersion);
2151                 List       *unrecognized_protocol_options = NIL;
2152
2153                 /*
2154                  * Scan packet body for name/option pairs.  We can assume any string
2155                  * beginning within the packet body is null-terminated, thanks to
2156                  * zeroing extra byte above.
2157                  */
2158                 port->guc_options = NIL;
2159
2160                 while (offset < len)
2161                 {
2162                         char       *nameptr = buf + offset;
2163                         int32           valoffset;
2164                         char       *valptr;
2165
2166                         if (*nameptr == '\0')
2167                                 break;                  /* found packet terminator */
2168                         valoffset = offset + strlen(nameptr) + 1;
2169                         if (valoffset >= len)
2170                                 break;                  /* missing value, will complain below */
2171                         valptr = buf + valoffset;
2172
2173                         if (strcmp(nameptr, "database") == 0)
2174                                 port->database_name = pstrdup(valptr);
2175                         else if (strcmp(nameptr, "user") == 0)
2176                                 port->user_name = pstrdup(valptr);
2177                         else if (strcmp(nameptr, "options") == 0)
2178                                 port->cmdline_options = pstrdup(valptr);
2179                         else if (strcmp(nameptr, "replication") == 0)
2180                         {
2181                                 /*
2182                                  * Due to backward compatibility concerns the replication
2183                                  * parameter is a hybrid beast which allows the value to be
2184                                  * either boolean or the string 'database'. The latter
2185                                  * connects to a specific database which is e.g. required for
2186                                  * logical decoding while.
2187                                  */
2188                                 if (strcmp(valptr, "database") == 0)
2189                                 {
2190                                         am_walsender = true;
2191                                         am_db_walsender = true;
2192                                 }
2193                                 else if (!parse_bool(valptr, &am_walsender))
2194                                         ereport(FATAL,
2195                                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2196                                                          errmsg("invalid value for parameter \"%s\": \"%s\"",
2197                                                                         "replication",
2198                                                                         valptr),
2199                                                          errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2200                         }
2201                         else if (strncmp(nameptr, "_pq_.", 5) == 0)
2202                         {
2203                                 /*
2204                                  * Any option beginning with _pq_. is reserved for use as a
2205                                  * protocol-level option, but at present no such options are
2206                                  * defined.
2207                                  */
2208                                 unrecognized_protocol_options =
2209                                         lappend(unrecognized_protocol_options, pstrdup(nameptr));
2210                         }
2211                         else
2212                         {
2213                                 /* Assume it's a generic GUC option */
2214                                 port->guc_options = lappend(port->guc_options,
2215                                                                                         pstrdup(nameptr));
2216                                 port->guc_options = lappend(port->guc_options,
2217                                                                                         pstrdup(valptr));
2218
2219                                 /*
2220                                  * Copy application_name to port if we come across it.  This
2221                                  * is done so we can log the application_name in the
2222                                  * connection authorization message.  Note that the GUC would
2223                                  * be used but we haven't gone through GUC setup yet.
2224                                  */
2225                                 if (strcmp(nameptr, "application_name") == 0)
2226                                 {
2227                                         port->application_name = pg_clean_ascii(valptr, 0);
2228                                 }
2229                         }
2230                         offset = valoffset + strlen(valptr) + 1;
2231                 }
2232
2233                 /*
2234                  * If we didn't find a packet terminator exactly at the end of the
2235                  * given packet length, complain.
2236                  */
2237                 if (offset != len - 1)
2238                         ereport(FATAL,
2239                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
2240                                          errmsg("invalid startup packet layout: expected terminator as last byte")));
2241
2242                 /*
2243                  * If the client requested a newer protocol version or if the client
2244                  * requested any protocol options we didn't recognize, let them know
2245                  * the newest minor protocol version we do support and the names of
2246                  * any unrecognized options.
2247                  */
2248                 if (PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST) ||
2249                         unrecognized_protocol_options != NIL)
2250                         SendNegotiateProtocolVersion(unrecognized_protocol_options);
2251         }
2252
2253         /* Check a user name was given. */
2254         if (port->user_name == NULL || port->user_name[0] == '\0')
2255                 ereport(FATAL,
2256                                 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2257                                  errmsg("no PostgreSQL user name specified in startup packet")));
2258
2259         /* The database defaults to the user name. */
2260         if (port->database_name == NULL || port->database_name[0] == '\0')
2261                 port->database_name = pstrdup(port->user_name);
2262
2263         if (am_walsender)
2264                 MyBackendType = B_WAL_SENDER;
2265         else
2266                 MyBackendType = B_BACKEND;
2267
2268         /*
2269          * Normal walsender backends, e.g. for streaming replication, are not
2270          * connected to a particular database. But walsenders used for logical
2271          * replication need to connect to a specific database. We allow streaming
2272          * replication commands to be issued even if connected to a database as it
2273          * can make sense to first make a basebackup and then stream changes
2274          * starting from that.
2275          */
2276         if (am_walsender && !am_db_walsender)
2277                 port->database_name[0] = '\0';
2278
2279         /*
2280          * Done putting stuff in TopMemoryContext.
2281          */
2282         MemoryContextSwitchTo(oldcontext);
2283
2284         return STATUS_OK;
2285 }
2286
2287 /*
2288  * Send a NegotiateProtocolVersion to the client.  This lets the client know
2289  * that they have requested a newer minor protocol version than we are able
2290  * to speak.  We'll speak the highest version we know about; the client can,
2291  * of course, abandon the connection if that's a problem.
2292  *
2293  * We also include in the response a list of protocol options we didn't
2294  * understand.  This allows clients to include optional parameters that might
2295  * be present either in newer protocol versions or third-party protocol
2296  * extensions without fear of having to reconnect if those options are not
2297  * understood, while at the same time making certain that the client is aware
2298  * of which options were actually accepted.
2299  */
2300 static void
2301 SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2302 {
2303         StringInfoData buf;
2304         ListCell   *lc;
2305
2306         pq_beginmessage(&buf, PqMsg_NegotiateProtocolVersion);
2307         pq_sendint32(&buf, PG_PROTOCOL_LATEST);
2308         pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2309         foreach(lc, unrecognized_protocol_options)
2310                 pq_sendstring(&buf, lfirst(lc));
2311         pq_endmessage(&buf);
2312
2313         /* no need to flush, some other message will follow */
2314 }
2315
2316 /*
2317  * The client has sent a cancel request packet, not a normal
2318  * start-a-new-connection packet.  Perform the necessary processing.
2319  * Nothing is sent back to the client.
2320  */
2321 static void
2322 processCancelRequest(Port *port, void *pkt)
2323 {
2324         CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2325         int                     backendPID;
2326         int32           cancelAuthCode;
2327         Backend    *bp;
2328
2329 #ifndef EXEC_BACKEND
2330         dlist_iter      iter;
2331 #else
2332         int                     i;
2333 #endif
2334
2335         backendPID = (int) pg_ntoh32(canc->backendPID);
2336         cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2337
2338         /*
2339          * See if we have a matching backend.  In the EXEC_BACKEND case, we can no
2340          * longer access the postmaster's own backend list, and must rely on the
2341          * duplicate array in shared memory.
2342          */
2343 #ifndef EXEC_BACKEND
2344         dlist_foreach(iter, &BackendList)
2345         {
2346                 bp = dlist_container(Backend, elem, iter.cur);
2347 #else
2348         for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2349         {
2350                 bp = (Backend *) &ShmemBackendArray[i];
2351 #endif
2352                 if (bp->pid == backendPID)
2353                 {
2354                         if (bp->cancel_key == cancelAuthCode)
2355                         {
2356                                 /* Found a match; signal that backend to cancel current op */
2357                                 ereport(DEBUG2,
2358                                                 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2359                                                                                  backendPID)));
2360                                 signal_child(bp->pid, SIGINT);
2361                         }
2362                         else
2363                                 /* Right PID, wrong key: no way, Jose */
2364                                 ereport(LOG,
2365                                                 (errmsg("wrong key in cancel request for process %d",
2366                                                                 backendPID)));
2367                         return;
2368                 }
2369 #ifndef EXEC_BACKEND                    /* make GNU Emacs 26.1 see brace balance */
2370         }
2371 #else
2372         }
2373 #endif
2374
2375         /* No matching backend */
2376         ereport(LOG,
2377                         (errmsg("PID %d in cancel request did not match any process",
2378                                         backendPID)));
2379 }
2380
2381 /*
2382  * canAcceptConnections --- check to see if database state allows connections
2383  * of the specified type.  backend_type can be BACKEND_TYPE_NORMAL,
2384  * BACKEND_TYPE_AUTOVAC, or BACKEND_TYPE_BGWORKER.  (Note that we don't yet
2385  * know whether a NORMAL connection might turn into a walsender.)
2386  */
2387 static CAC_state
2388 canAcceptConnections(int backend_type)
2389 {
2390         CAC_state       result = CAC_OK;
2391
2392         /*
2393          * Can't start backends when in startup/shutdown/inconsistent recovery
2394          * state.  We treat autovac workers the same as user backends for this
2395          * purpose.  However, bgworkers are excluded from this test; we expect
2396          * bgworker_should_start_now() decided whether the DB state allows them.
2397          */
2398         if (pmState != PM_RUN && pmState != PM_HOT_STANDBY &&
2399                 backend_type != BACKEND_TYPE_BGWORKER)
2400         {
2401                 if (Shutdown > NoShutdown)
2402                         return CAC_SHUTDOWN;    /* shutdown is pending */
2403                 else if (!FatalError && pmState == PM_STARTUP)
2404                         return CAC_STARTUP; /* normal startup */
2405                 else if (!FatalError && pmState == PM_RECOVERY)
2406                         return CAC_NOTCONSISTENT;       /* not yet at consistent recovery
2407                                                                                  * state */
2408                 else
2409                         return CAC_RECOVERY;    /* else must be crash recovery */
2410         }
2411
2412         /*
2413          * "Smart shutdown" restrictions are applied only to normal connections,
2414          * not to autovac workers or bgworkers.
2415          */
2416         if (!connsAllowed && backend_type == BACKEND_TYPE_NORMAL)
2417                 return CAC_SHUTDOWN;    /* shutdown is pending */
2418
2419         /*
2420          * Don't start too many children.
2421          *
2422          * We allow more connections here than we can have backends because some
2423          * might still be authenticating; they might fail auth, or some existing
2424          * backend might exit before the auth cycle is completed.  The exact
2425          * MaxBackends limit is enforced when a new backend tries to join the
2426          * shared-inval backend array.
2427          *
2428          * The limit here must match the sizes of the per-child-process arrays;
2429          * see comments for MaxLivePostmasterChildren().
2430          */
2431         if (CountChildren(BACKEND_TYPE_ALL) >= MaxLivePostmasterChildren())
2432                 result = CAC_TOOMANY;
2433
2434         return result;
2435 }
2436
2437
2438 /*
2439  * ConnCreate -- create a local connection data structure
2440  *
2441  * Returns NULL on failure, other than out-of-memory which is fatal.
2442  */
2443 static Port *
2444 ConnCreate(int serverFd)
2445 {
2446         Port       *port;
2447
2448         if (!(port = (Port *) calloc(1, sizeof(Port))))
2449         {
2450                 ereport(LOG,
2451                                 (errcode(ERRCODE_OUT_OF_MEMORY),
2452                                  errmsg("out of memory")));
2453                 ExitPostmaster(1);
2454         }
2455
2456         if (StreamConnection(serverFd, port) != STATUS_OK)
2457         {
2458                 if (port->sock != PGINVALID_SOCKET)
2459                         StreamClose(port->sock);
2460                 ConnFree(port);
2461                 return NULL;
2462         }
2463
2464         return port;
2465 }
2466
2467
2468 /*
2469  * ConnFree -- free a local connection data structure
2470  *
2471  * Caller has already closed the socket if any, so there's not much
2472  * to do here.
2473  */
2474 static void
2475 ConnFree(Port *port)
2476 {
2477         free(port);
2478 }
2479
2480
2481 /*
2482  * ClosePostmasterPorts -- close all the postmaster's open sockets
2483  *
2484  * This is called during child process startup to release file descriptors
2485  * that are not needed by that child process.  The postmaster still has
2486  * them open, of course.
2487  *
2488  * Note: we pass am_syslogger as a boolean because we don't want to set
2489  * the global variable yet when this is called.
2490  */
2491 void
2492 ClosePostmasterPorts(bool am_syslogger)
2493 {
2494         /* Release resources held by the postmaster's WaitEventSet. */
2495         if (pm_wait_set)
2496         {
2497                 FreeWaitEventSetAfterFork(pm_wait_set);
2498                 pm_wait_set = NULL;
2499         }
2500
2501 #ifndef WIN32
2502
2503         /*
2504          * Close the write end of postmaster death watch pipe. It's important to
2505          * do this as early as possible, so that if postmaster dies, others won't
2506          * think that it's still running because we're holding the pipe open.
2507          */
2508         if (close(postmaster_alive_fds[POSTMASTER_FD_OWN]) != 0)
2509                 ereport(FATAL,
2510                                 (errcode_for_file_access(),
2511                                  errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2512         postmaster_alive_fds[POSTMASTER_FD_OWN] = -1;
2513         /* Notify fd.c that we released one pipe FD. */
2514         ReleaseExternalFD();
2515 #endif
2516
2517         /*
2518          * Close the postmaster's listen sockets.  These aren't tracked by fd.c,
2519          * so we don't call ReleaseExternalFD() here.
2520          *
2521          * The listen sockets are marked as FD_CLOEXEC, so this isn't needed in
2522          * EXEC_BACKEND mode.
2523          */
2524 #ifndef EXEC_BACKEND
2525         if (ListenSockets)
2526         {
2527                 for (int i = 0; i < NumListenSockets; i++)
2528                         StreamClose(ListenSockets[i]);
2529                 pfree(ListenSockets);
2530         }
2531         NumListenSockets = 0;
2532         ListenSockets = NULL;
2533 #endif
2534
2535         /*
2536          * If using syslogger, close the read side of the pipe.  We don't bother
2537          * tracking this in fd.c, either.
2538          */
2539         if (!am_syslogger)
2540         {
2541 #ifndef WIN32
2542                 if (syslogPipe[0] >= 0)
2543                         close(syslogPipe[0]);
2544                 syslogPipe[0] = -1;
2545 #else
2546                 if (syslogPipe[0])
2547                         CloseHandle(syslogPipe[0]);
2548                 syslogPipe[0] = 0;
2549 #endif
2550         }
2551
2552 #ifdef USE_BONJOUR
2553         /* If using Bonjour, close the connection to the mDNS daemon */
2554         if (bonjour_sdref)
2555                 close(DNSServiceRefSockFD(bonjour_sdref));
2556 #endif
2557 }
2558
2559
2560 /*
2561  * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2562  *
2563  * Called early in the postmaster and every backend.
2564  */
2565 void
2566 InitProcessGlobals(void)
2567 {
2568         MyProcPid = getpid();
2569         MyStartTimestamp = GetCurrentTimestamp();
2570         MyStartTime = timestamptz_to_time_t(MyStartTimestamp);
2571
2572         /*
2573          * Set a different global seed in every process.  We want something
2574          * unpredictable, so if possible, use high-quality random bits for the
2575          * seed.  Otherwise, fall back to a seed based on timestamp and PID.
2576          */
2577         if (unlikely(!pg_prng_strong_seed(&pg_global_prng_state)))
2578         {
2579                 uint64          rseed;
2580
2581                 /*
2582                  * Since PIDs and timestamps tend to change more frequently in their
2583                  * least significant bits, shift the timestamp left to allow a larger
2584                  * total number of seeds in a given time period.  Since that would
2585                  * leave only 20 bits of the timestamp that cycle every ~1 second,
2586                  * also mix in some higher bits.
2587                  */
2588                 rseed = ((uint64) MyProcPid) ^
2589                         ((uint64) MyStartTimestamp << 12) ^
2590                         ((uint64) MyStartTimestamp >> 20);
2591
2592                 pg_prng_seed(&pg_global_prng_state, rseed);
2593         }
2594
2595         /*
2596          * Also make sure that we've set a good seed for random(3).  Use of that
2597          * is deprecated in core Postgres, but extensions might use it.
2598          */
2599 #ifndef WIN32
2600         srandom(pg_prng_uint32(&pg_global_prng_state));
2601 #endif
2602 }
2603
2604 /*
2605  * Child processes use SIGUSR1 to notify us of 'pmsignals'.  pg_ctl uses
2606  * SIGUSR1 to ask postmaster to check for logrotate and promote files.
2607  */
2608 static void
2609 handle_pm_pmsignal_signal(SIGNAL_ARGS)
2610 {
2611         int                     save_errno = errno;
2612
2613         pending_pm_pmsignal = true;
2614         SetLatch(MyLatch);
2615
2616         errno = save_errno;
2617 }
2618
2619 /*
2620  * pg_ctl uses SIGHUP to request a reload of the configuration files.
2621  */
2622 static void
2623 handle_pm_reload_request_signal(SIGNAL_ARGS)
2624 {
2625         int                     save_errno = errno;
2626
2627         pending_pm_reload_request = true;
2628         SetLatch(MyLatch);
2629
2630         errno = save_errno;
2631 }
2632
2633 /*
2634  * Re-read config files, and tell children to do same.
2635  */
2636 static void
2637 process_pm_reload_request(void)
2638 {
2639         pending_pm_reload_request = false;
2640
2641         ereport(DEBUG2,
2642                         (errmsg_internal("postmaster received reload request signal")));
2643
2644         if (Shutdown <= SmartShutdown)
2645         {
2646                 ereport(LOG,
2647                                 (errmsg("received SIGHUP, reloading configuration files")));
2648                 ProcessConfigFile(PGC_SIGHUP);
2649                 SignalChildren(SIGHUP);
2650                 if (StartupPID != 0)
2651                         signal_child(StartupPID, SIGHUP);
2652                 if (BgWriterPID != 0)
2653                         signal_child(BgWriterPID, SIGHUP);
2654                 if (CheckpointerPID != 0)
2655                         signal_child(CheckpointerPID, SIGHUP);
2656                 if (WalWriterPID != 0)
2657                         signal_child(WalWriterPID, SIGHUP);
2658                 if (WalReceiverPID != 0)
2659                         signal_child(WalReceiverPID, SIGHUP);
2660                 if (AutoVacPID != 0)
2661                         signal_child(AutoVacPID, SIGHUP);
2662                 if (PgArchPID != 0)
2663                         signal_child(PgArchPID, SIGHUP);
2664                 if (SysLoggerPID != 0)
2665                         signal_child(SysLoggerPID, SIGHUP);
2666
2667                 /* Reload authentication config files too */
2668                 if (!load_hba())
2669                         ereport(LOG,
2670                         /* translator: %s is a configuration file */
2671                                         (errmsg("%s was not reloaded", HbaFileName)));
2672
2673                 if (!load_ident())
2674                         ereport(LOG,
2675                                         (errmsg("%s was not reloaded", IdentFileName)));
2676
2677 #ifdef USE_SSL
2678                 /* Reload SSL configuration as well */
2679                 if (EnableSSL)
2680                 {
2681                         if (secure_initialize(false) == 0)
2682                                 LoadedSSL = true;
2683                         else
2684                                 ereport(LOG,
2685                                                 (errmsg("SSL configuration was not reloaded")));
2686                 }
2687                 else
2688                 {
2689                         secure_destroy();
2690                         LoadedSSL = false;
2691                 }
2692 #endif
2693
2694 #ifdef EXEC_BACKEND
2695                 /* Update the starting-point file for future children */
2696                 write_nondefault_variables(PGC_SIGHUP);
2697 #endif
2698         }
2699 }
2700
2701 /*
2702  * pg_ctl uses SIGTERM, SIGINT and SIGQUIT to request different types of
2703  * shutdown.
2704  */
2705 static void
2706 handle_pm_shutdown_request_signal(SIGNAL_ARGS)
2707 {
2708         int                     save_errno = errno;
2709
2710         switch (postgres_signal_arg)
2711         {
2712                 case SIGTERM:
2713                         /* smart is implied if the other two flags aren't set */
2714                         pending_pm_shutdown_request = true;
2715                         break;
2716                 case SIGINT:
2717                         pending_pm_fast_shutdown_request = true;
2718                         pending_pm_shutdown_request = true;
2719                         break;
2720                 case SIGQUIT:
2721                         pending_pm_immediate_shutdown_request = true;
2722                         pending_pm_shutdown_request = true;
2723                         break;
2724         }
2725         SetLatch(MyLatch);
2726
2727         errno = save_errno;
2728 }
2729
2730 /*
2731  * Process shutdown request.
2732  */
2733 static void
2734 process_pm_shutdown_request(void)
2735 {
2736         int                     mode;
2737
2738         ereport(DEBUG2,
2739                         (errmsg_internal("postmaster received shutdown request signal")));
2740
2741         pending_pm_shutdown_request = false;
2742
2743         /*
2744          * If more than one shutdown request signal arrived since the last server
2745          * loop, take the one that is the most immediate.  That matches the
2746          * priority that would apply if we processed them one by one in any order.
2747          */
2748         if (pending_pm_immediate_shutdown_request)
2749         {
2750                 pending_pm_immediate_shutdown_request = false;
2751                 pending_pm_fast_shutdown_request = false;
2752                 mode = ImmediateShutdown;
2753         }
2754         else if (pending_pm_fast_shutdown_request)
2755         {
2756                 pending_pm_fast_shutdown_request = false;
2757                 mode = FastShutdown;
2758         }
2759         else
2760                 mode = SmartShutdown;
2761
2762         switch (mode)
2763         {
2764                 case SmartShutdown:
2765
2766                         /*
2767                          * Smart Shutdown:
2768                          *
2769                          * Wait for children to end their work, then shut down.
2770                          */
2771                         if (Shutdown >= SmartShutdown)
2772                                 break;
2773                         Shutdown = SmartShutdown;
2774                         ereport(LOG,
2775                                         (errmsg("received smart shutdown request")));
2776
2777                         /* Report status */
2778                         AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2779 #ifdef USE_SYSTEMD
2780                         sd_notify(0, "STOPPING=1");
2781 #endif
2782
2783                         /*
2784                          * If we reached normal running, we go straight to waiting for
2785                          * client backends to exit.  If already in PM_STOP_BACKENDS or a
2786                          * later state, do not change it.
2787                          */
2788                         if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
2789                                 connsAllowed = false;
2790                         else if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2791                         {
2792                                 /* There should be no clients, so proceed to stop children */
2793                                 pmState = PM_STOP_BACKENDS;
2794                         }
2795
2796                         /*
2797                          * Now wait for online backup mode to end and backends to exit. If
2798                          * that is already the case, PostmasterStateMachine will take the
2799                          * next step.
2800                          */
2801                         PostmasterStateMachine();
2802                         break;
2803
2804                 case FastShutdown:
2805
2806                         /*
2807                          * Fast Shutdown:
2808                          *
2809                          * Abort all children with SIGTERM (rollback active transactions
2810                          * and exit) and shut down when they are gone.
2811                          */
2812                         if (Shutdown >= FastShutdown)
2813                                 break;
2814                         Shutdown = FastShutdown;
2815                         ereport(LOG,
2816                                         (errmsg("received fast shutdown request")));
2817
2818                         /* Report status */
2819                         AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2820 #ifdef USE_SYSTEMD
2821                         sd_notify(0, "STOPPING=1");
2822 #endif
2823
2824                         if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2825                         {
2826                                 /* Just shut down background processes silently */
2827                                 pmState = PM_STOP_BACKENDS;
2828                         }
2829                         else if (pmState == PM_RUN ||
2830                                          pmState == PM_HOT_STANDBY)
2831                         {
2832                                 /* Report that we're about to zap live client sessions */
2833                                 ereport(LOG,
2834                                                 (errmsg("aborting any active transactions")));
2835                                 pmState = PM_STOP_BACKENDS;
2836                         }
2837
2838                         /*
2839                          * PostmasterStateMachine will issue any necessary signals, or
2840                          * take the next step if no child processes need to be killed.
2841                          */
2842                         PostmasterStateMachine();
2843                         break;
2844
2845                 case ImmediateShutdown:
2846
2847                         /*
2848                          * Immediate Shutdown:
2849                          *
2850                          * abort all children with SIGQUIT, wait for them to exit,
2851                          * terminate remaining ones with SIGKILL, then exit without
2852                          * attempt to properly shut down the data base system.
2853                          */
2854                         if (Shutdown >= ImmediateShutdown)
2855                                 break;
2856                         Shutdown = ImmediateShutdown;
2857                         ereport(LOG,
2858                                         (errmsg("received immediate shutdown request")));
2859
2860                         /* Report status */
2861                         AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2862 #ifdef USE_SYSTEMD
2863                         sd_notify(0, "STOPPING=1");
2864 #endif
2865
2866                         /* tell children to shut down ASAP */
2867                         /* (note we don't apply send_abort_for_crash here) */
2868                         SetQuitSignalReason(PMQUIT_FOR_STOP);
2869                         TerminateChildren(SIGQUIT);
2870                         pmState = PM_WAIT_BACKENDS;
2871
2872                         /* set stopwatch for them to die */
2873                         AbortStartTime = time(NULL);
2874
2875                         /*
2876                          * Now wait for backends to exit.  If there are none,
2877                          * PostmasterStateMachine will take the next step.
2878                          */
2879                         PostmasterStateMachine();
2880                         break;
2881         }
2882 }
2883
2884 static void
2885 handle_pm_child_exit_signal(SIGNAL_ARGS)
2886 {
2887         int                     save_errno = errno;
2888
2889         pending_pm_child_exit = true;
2890         SetLatch(MyLatch);
2891
2892         errno = save_errno;
2893 }
2894
2895 /*
2896  * Cleanup after a child process dies.
2897  */
2898 static void
2899 process_pm_child_exit(void)
2900 {
2901         int                     pid;                    /* process id of dead child process */
2902         int                     exitstatus;             /* its exit status */
2903
2904         pending_pm_child_exit = false;
2905
2906         ereport(DEBUG4,
2907                         (errmsg_internal("reaping dead processes")));
2908
2909         while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2910         {
2911                 /*
2912                  * Check if this child was a startup process.
2913                  */
2914                 if (pid == StartupPID)
2915                 {
2916                         StartupPID = 0;
2917
2918                         /*
2919                          * Startup process exited in response to a shutdown request (or it
2920                          * completed normally regardless of the shutdown request).
2921                          */
2922                         if (Shutdown > NoShutdown &&
2923                                 (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2924                         {
2925                                 StartupStatus = STARTUP_NOT_RUNNING;
2926                                 pmState = PM_WAIT_BACKENDS;
2927                                 /* PostmasterStateMachine logic does the rest */
2928                                 continue;
2929                         }
2930
2931                         if (EXIT_STATUS_3(exitstatus))
2932                         {
2933                                 ereport(LOG,
2934                                                 (errmsg("shutdown at recovery target")));
2935                                 StartupStatus = STARTUP_NOT_RUNNING;
2936                                 Shutdown = Max(Shutdown, SmartShutdown);
2937                                 TerminateChildren(SIGTERM);
2938                                 pmState = PM_WAIT_BACKENDS;
2939                                 /* PostmasterStateMachine logic does the rest */
2940                                 continue;
2941                         }
2942
2943                         /*
2944                          * Unexpected exit of startup process (including FATAL exit)
2945                          * during PM_STARTUP is treated as catastrophic. There are no
2946                          * other processes running yet, so we can just exit.
2947                          */
2948                         if (pmState == PM_STARTUP &&
2949                                 StartupStatus != STARTUP_SIGNALED &&
2950                                 !EXIT_STATUS_0(exitstatus))
2951                         {
2952                                 LogChildExit(LOG, _("startup process"),
2953                                                          pid, exitstatus);
2954                                 ereport(LOG,
2955                                                 (errmsg("aborting startup due to startup process failure")));
2956                                 ExitPostmaster(1);
2957                         }
2958
2959                         /*
2960                          * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2961                          * the startup process is catastrophic, so kill other children,
2962                          * and set StartupStatus so we don't try to reinitialize after
2963                          * they're gone.  Exception: if StartupStatus is STARTUP_SIGNALED,
2964                          * then we previously sent the startup process a SIGQUIT; so
2965                          * that's probably the reason it died, and we do want to try to
2966                          * restart in that case.
2967                          *
2968                          * This stanza also handles the case where we sent a SIGQUIT
2969                          * during PM_STARTUP due to some dead_end child crashing: in that
2970                          * situation, if the startup process dies on the SIGQUIT, we need
2971                          * to transition to PM_WAIT_BACKENDS state which will allow
2972                          * PostmasterStateMachine to restart the startup process.  (On the
2973                          * other hand, the startup process might complete normally, if we
2974                          * were too late with the SIGQUIT.  In that case we'll fall
2975                          * through and commence normal operations.)
2976                          */
2977                         if (!EXIT_STATUS_0(exitstatus))
2978                         {
2979                                 if (StartupStatus == STARTUP_SIGNALED)
2980                                 {
2981                                         StartupStatus = STARTUP_NOT_RUNNING;
2982                                         if (pmState == PM_STARTUP)
2983                                                 pmState = PM_WAIT_BACKENDS;
2984                                 }
2985                                 else
2986                                         StartupStatus = STARTUP_CRASHED;
2987                                 HandleChildCrash(pid, exitstatus,
2988                                                                  _("startup process"));
2989                                 continue;
2990                         }
2991
2992                         /*
2993                          * Startup succeeded, commence normal operations
2994                          */
2995                         StartupStatus = STARTUP_NOT_RUNNING;
2996                         FatalError = false;
2997                         AbortStartTime = 0;
2998                         ReachedNormalRunning = true;
2999                         pmState = PM_RUN;
3000                         connsAllowed = true;
3001
3002                         /*
3003                          * Crank up the background tasks, if we didn't do that already
3004                          * when we entered consistent recovery state.  It doesn't matter
3005                          * if this fails, we'll just try again later.
3006                          */
3007                         if (CheckpointerPID == 0)
3008                                 CheckpointerPID = StartCheckpointer();
3009                         if (BgWriterPID == 0)
3010                                 BgWriterPID = StartBackgroundWriter();
3011                         if (WalWriterPID == 0)
3012                                 WalWriterPID = StartWalWriter();
3013
3014                         /*
3015                          * Likewise, start other special children as needed.  In a restart
3016                          * situation, some of them may be alive already.
3017                          */
3018                         if (!IsBinaryUpgrade && AutoVacuumingActive() && AutoVacPID == 0)
3019                                 AutoVacPID = StartAutoVacLauncher();
3020                         if (PgArchStartupAllowed() && PgArchPID == 0)
3021                                 PgArchPID = StartArchiver();
3022
3023                         /* workers may be scheduled to start now */
3024                         maybe_start_bgworkers();
3025
3026                         /* at this point we are really open for business */
3027                         ereport(LOG,
3028                                         (errmsg("database system is ready to accept connections")));
3029
3030                         /* Report status */
3031                         AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
3032 #ifdef USE_SYSTEMD
3033                         sd_notify(0, "READY=1");
3034 #endif
3035
3036                         continue;
3037                 }
3038
3039                 /*
3040                  * Was it the bgwriter?  Normal exit can be ignored; we'll start a new
3041                  * one at the next iteration of the postmaster's main loop, if
3042                  * necessary.  Any other exit condition is treated as a crash.
3043                  */
3044                 if (pid == BgWriterPID)
3045                 {
3046                         BgWriterPID = 0;
3047                         if (!EXIT_STATUS_0(exitstatus))
3048                                 HandleChildCrash(pid, exitstatus,
3049                                                                  _("background writer process"));
3050                         continue;
3051                 }
3052
3053                 /*
3054                  * Was it the checkpointer?
3055                  */
3056                 if (pid == CheckpointerPID)
3057                 {
3058                         CheckpointerPID = 0;
3059                         if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3060                         {
3061                                 /*
3062                                  * OK, we saw normal exit of the checkpointer after it's been
3063                                  * told to shut down.  We expect that it wrote a shutdown
3064                                  * checkpoint.  (If for some reason it didn't, recovery will
3065                                  * occur on next postmaster start.)
3066                                  *
3067                                  * At this point we should have no normal backend children
3068                                  * left (else we'd not be in PM_SHUTDOWN state) but we might
3069                                  * have dead_end children to wait for.
3070                                  *
3071                                  * If we have an archiver subprocess, tell it to do a last
3072                                  * archive cycle and quit. Likewise, if we have walsender
3073                                  * processes, tell them to send any remaining WAL and quit.
3074                                  */
3075                                 Assert(Shutdown > NoShutdown);
3076
3077                                 /* Waken archiver for the last time */
3078                                 if (PgArchPID != 0)
3079                                         signal_child(PgArchPID, SIGUSR2);
3080
3081                                 /*
3082                                  * Waken walsenders for the last time. No regular backends
3083                                  * should be around anymore.
3084                                  */
3085                                 SignalChildren(SIGUSR2);
3086
3087                                 pmState = PM_SHUTDOWN_2;
3088                         }
3089                         else
3090                         {
3091                                 /*
3092                                  * Any unexpected exit of the checkpointer (including FATAL
3093                                  * exit) is treated as a crash.
3094                                  */
3095                                 HandleChildCrash(pid, exitstatus,
3096                                                                  _("checkpointer process"));
3097                         }
3098
3099                         continue;
3100                 }
3101
3102                 /*
3103                  * Was it the wal writer?  Normal exit can be ignored; we'll start a
3104                  * new one at the next iteration of the postmaster's main loop, if
3105                  * necessary.  Any other exit condition is treated as a crash.
3106                  */
3107                 if (pid == WalWriterPID)
3108                 {
3109                         WalWriterPID = 0;
3110                         if (!EXIT_STATUS_0(exitstatus))
3111                                 HandleChildCrash(pid, exitstatus,
3112                                                                  _("WAL writer process"));
3113                         continue;
3114                 }
3115
3116                 /*
3117                  * Was it the wal receiver?  If exit status is zero (normal) or one
3118                  * (FATAL exit), we assume everything is all right just like normal
3119                  * backends.  (If we need a new wal receiver, we'll start one at the
3120                  * next iteration of the postmaster's main loop.)
3121                  */
3122                 if (pid == WalReceiverPID)
3123                 {
3124                         WalReceiverPID = 0;
3125                         if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3126                                 HandleChildCrash(pid, exitstatus,
3127                                                                  _("WAL receiver process"));
3128                         continue;
3129                 }
3130
3131                 /*
3132                  * Was it the autovacuum launcher?      Normal exit can be ignored; we'll
3133                  * start a new one at the next iteration of the postmaster's main
3134                  * loop, if necessary.  Any other exit condition is treated as a
3135                  * crash.
3136                  */
3137                 if (pid == AutoVacPID)
3138                 {
3139                         AutoVacPID = 0;
3140                         if (!EXIT_STATUS_0(exitstatus))
3141                                 HandleChildCrash(pid, exitstatus,
3142                                                                  _("autovacuum launcher process"));
3143                         continue;
3144                 }
3145
3146                 /*
3147                  * Was it the archiver?  If exit status is zero (normal) or one (FATAL
3148                  * exit), we assume everything is all right just like normal backends
3149                  * and just try to restart a new one so that we immediately retry
3150                  * archiving remaining files. (If fail, we'll try again in future
3151                  * cycles of the postmaster's main loop.) Unless we were waiting for
3152                  * it to shut down; don't restart it in that case, and
3153                  * PostmasterStateMachine() will advance to the next shutdown step.
3154                  */
3155                 if (pid == PgArchPID)
3156                 {
3157                         PgArchPID = 0;
3158                         if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3159                                 HandleChildCrash(pid, exitstatus,
3160                                                                  _("archiver process"));
3161                         if (PgArchStartupAllowed())
3162                                 PgArchPID = StartArchiver();
3163                         continue;
3164                 }
3165
3166                 /* Was it the system logger?  If so, try to start a new one */
3167                 if (pid == SysLoggerPID)
3168                 {
3169                         SysLoggerPID = 0;
3170                         /* for safety's sake, launch new logger *first* */
3171                         SysLoggerPID = SysLogger_Start();
3172                         if (!EXIT_STATUS_0(exitstatus))
3173                                 LogChildExit(LOG, _("system logger process"),
3174                                                          pid, exitstatus);
3175                         continue;
3176                 }
3177
3178                 /* Was it one of our background workers? */
3179                 if (CleanupBackgroundWorker(pid, exitstatus))
3180                 {
3181                         /* have it be restarted */
3182                         HaveCrashedWorker = true;
3183                         continue;
3184                 }
3185
3186                 /*
3187                  * Else do standard backend child cleanup.
3188                  */
3189                 CleanupBackend(pid, exitstatus);
3190         }                                                       /* loop over pending child-death reports */
3191
3192         /*
3193          * After cleaning out the SIGCHLD queue, see if we have any state changes
3194          * or actions to make.
3195          */
3196         PostmasterStateMachine();
3197 }
3198
3199 /*
3200  * Scan the bgworkers list and see if the given PID (which has just stopped
3201  * or crashed) is in it.  Handle its shutdown if so, and return true.  If not a
3202  * bgworker, return false.
3203  *
3204  * This is heavily based on CleanupBackend.  One important difference is that
3205  * we don't know yet that the dying process is a bgworker, so we must be silent
3206  * until we're sure it is.
3207  */
3208 static bool
3209 CleanupBackgroundWorker(int pid,
3210                                                 int exitstatus) /* child's exit status */
3211 {
3212         char            namebuf[MAXPGPATH];
3213         slist_mutable_iter iter;
3214
3215         slist_foreach_modify(iter, &BackgroundWorkerList)
3216         {
3217                 RegisteredBgWorker *rw;
3218
3219                 rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3220
3221                 if (rw->rw_pid != pid)
3222                         continue;
3223
3224 #ifdef WIN32
3225                 /* see CleanupBackend */
3226                 if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3227                         exitstatus = 0;
3228 #endif
3229
3230                 snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3231                                  rw->rw_worker.bgw_type);
3232
3233
3234                 if (!EXIT_STATUS_0(exitstatus))
3235                 {
3236                         /* Record timestamp, so we know when to restart the worker. */
3237                         rw->rw_crashed_at = GetCurrentTimestamp();
3238                 }
3239                 else
3240                 {
3241                         /* Zero exit status means terminate */
3242                         rw->rw_crashed_at = 0;
3243                         rw->rw_terminate = true;
3244                 }
3245
3246                 /*
3247                  * Additionally, just like a backend, any exit status other than 0 or
3248                  * 1 is considered a crash and causes a system-wide restart.
3249                  */
3250                 if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3251                 {
3252                         HandleChildCrash(pid, exitstatus, namebuf);
3253                         return true;
3254                 }
3255
3256                 /*
3257                  * We must release the postmaster child slot. If the worker failed to
3258                  * do so, it did not clean up after itself, requiring a crash-restart
3259                  * cycle.
3260                  */
3261                 if (!ReleasePostmasterChildSlot(rw->rw_child_slot))
3262                 {
3263                         HandleChildCrash(pid, exitstatus, namebuf);
3264                         return true;
3265                 }
3266
3267                 /* Get it out of the BackendList and clear out remaining data */
3268                 dlist_delete(&rw->rw_backend->elem);
3269 #ifdef EXEC_BACKEND
3270                 ShmemBackendArrayRemove(rw->rw_backend);
3271 #endif
3272
3273                 /*
3274                  * It's possible that this background worker started some OTHER
3275                  * background worker and asked to be notified when that worker started
3276                  * or stopped.  If so, cancel any notifications destined for the
3277                  * now-dead backend.
3278                  */
3279                 if (rw->rw_backend->bgworker_notify)
3280                         BackgroundWorkerStopNotifications(rw->rw_pid);
3281                 pfree(rw->rw_backend);
3282                 rw->rw_backend = NULL;
3283                 rw->rw_pid = 0;
3284                 rw->rw_child_slot = 0;
3285                 ReportBackgroundWorkerExit(&iter);      /* report child death */
3286
3287                 LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3288                                          namebuf, pid, exitstatus);
3289
3290                 return true;
3291         }
3292
3293         return false;
3294 }
3295
3296 /*
3297  * CleanupBackend -- cleanup after terminated backend.
3298  *
3299  * Remove all local state associated with backend.
3300  *
3301  * If you change this, see also CleanupBackgroundWorker.
3302  */
3303 static void
3304 CleanupBackend(int pid,
3305                            int exitstatus)      /* child's exit status. */
3306 {
3307         dlist_mutable_iter iter;
3308
3309         LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3310
3311         /*
3312          * If a backend dies in an ugly way then we must signal all other backends
3313          * to quickdie.  If exit status is zero (normal) or one (FATAL exit), we
3314          * assume everything is all right and proceed to remove the backend from
3315          * the active backend list.
3316          */
3317
3318 #ifdef WIN32
3319
3320         /*
3321          * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3322          * since that sometimes happens under load when the process fails to start
3323          * properly (long before it starts using shared memory). Microsoft reports
3324          * it is related to mutex failure:
3325          * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3326          */
3327         if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3328         {
3329                 LogChildExit(LOG, _("server process"), pid, exitstatus);
3330                 exitstatus = 0;
3331         }
3332 #endif
3333
3334         if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3335         {
3336                 HandleChildCrash(pid, exitstatus, _("server process"));
3337                 return;
3338         }
3339
3340         dlist_foreach_modify(iter, &BackendList)
3341         {
3342                 Backend    *bp = dlist_container(Backend, elem, iter.cur);
3343
3344                 if (bp->pid == pid)
3345                 {
3346                         if (!bp->dead_end)
3347                         {
3348                                 if (!ReleasePostmasterChildSlot(bp->child_slot))
3349                                 {
3350                                         /*
3351                                          * Uh-oh, the child failed to clean itself up.  Treat as a
3352                                          * crash after all.
3353                                          */
3354                                         HandleChildCrash(pid, exitstatus, _("server process"));
3355                                         return;
3356                                 }
3357 #ifdef EXEC_BACKEND
3358                                 ShmemBackendArrayRemove(bp);
3359 #endif
3360                         }
3361                         if (bp->bgworker_notify)
3362                         {
3363                                 /*
3364                                  * This backend may have been slated to receive SIGUSR1 when
3365                                  * some background worker started or stopped.  Cancel those
3366                                  * notifications, as we don't want to signal PIDs that are not
3367                                  * PostgreSQL backends.  This gets skipped in the (probably
3368                                  * very common) case where the backend has never requested any
3369                                  * such notifications.
3370                                  */
3371                                 BackgroundWorkerStopNotifications(bp->pid);
3372                         }
3373                         dlist_delete(iter.cur);
3374                         pfree(bp);
3375                         break;
3376                 }
3377         }
3378 }
3379
3380 /*
3381  * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3382  * walwriter, autovacuum, archiver or background worker.
3383  *
3384  * The objectives here are to clean up our local state about the child
3385  * process, and to signal all other remaining children to quickdie.
3386  */
3387 static void
3388 HandleChildCrash(int pid, int exitstatus, const char *procname)
3389 {
3390         dlist_mutable_iter iter;
3391         slist_iter      siter;
3392         Backend    *bp;
3393         bool            take_action;
3394
3395         /*
3396          * We only log messages and send signals if this is the first process
3397          * crash and we're not doing an immediate shutdown; otherwise, we're only
3398          * here to update postmaster's idea of live processes.  If we have already
3399          * signaled children, nonzero exit status is to be expected, so don't
3400          * clutter log.
3401          */
3402         take_action = !FatalError && Shutdown != ImmediateShutdown;
3403
3404         if (take_action)
3405         {
3406                 LogChildExit(LOG, procname, pid, exitstatus);
3407                 ereport(LOG,
3408                                 (errmsg("terminating any other active server processes")));
3409                 SetQuitSignalReason(PMQUIT_FOR_CRASH);
3410         }
3411
3412         /* Process background workers. */
3413         slist_foreach(siter, &BackgroundWorkerList)
3414         {
3415                 RegisteredBgWorker *rw;
3416
3417                 rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3418                 if (rw->rw_pid == 0)
3419                         continue;                       /* not running */
3420                 if (rw->rw_pid == pid)
3421                 {
3422                         /*
3423                          * Found entry for freshly-dead worker, so remove it.
3424                          */
3425                         (void) ReleasePostmasterChildSlot(rw->rw_child_slot);
3426                         dlist_delete(&rw->rw_backend->elem);
3427 #ifdef EXEC_BACKEND
3428                         ShmemBackendArrayRemove(rw->rw_backend);
3429 #endif
3430                         pfree(rw->rw_backend);
3431                         rw->rw_backend = NULL;
3432                         rw->rw_pid = 0;
3433                         rw->rw_child_slot = 0;
3434                         /* don't reset crashed_at */
3435                         /* don't report child stop, either */
3436                         /* Keep looping so we can signal remaining workers */
3437                 }
3438                 else
3439                 {
3440                         /*
3441                          * This worker is still alive.  Unless we did so already, tell it
3442                          * to commit hara-kiri.
3443                          */
3444                         if (take_action)
3445                                 sigquit_child(rw->rw_pid);
3446                 }
3447         }
3448
3449         /* Process regular backends */
3450         dlist_foreach_modify(iter, &BackendList)
3451         {
3452                 bp = dlist_container(Backend, elem, iter.cur);
3453
3454                 if (bp->pid == pid)
3455                 {
3456                         /*
3457                          * Found entry for freshly-dead backend, so remove it.
3458                          */
3459                         if (!bp->dead_end)
3460                         {
3461                                 (void) ReleasePostmasterChildSlot(bp->child_slot);
3462 #ifdef EXEC_BACKEND
3463                                 ShmemBackendArrayRemove(bp);
3464 #endif
3465                         }
3466                         dlist_delete(iter.cur);
3467                         pfree(bp);
3468                         /* Keep looping so we can signal remaining backends */
3469                 }
3470                 else
3471                 {
3472                         /*
3473                          * This backend is still alive.  Unless we did so already, tell it
3474                          * to commit hara-kiri.
3475                          *
3476                          * We could exclude dead_end children here, but at least when
3477                          * sending SIGABRT it seems better to include them.
3478                          *
3479                          * Background workers were already processed above; ignore them
3480                          * here.
3481                          */
3482                         if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3483                                 continue;
3484
3485                         if (take_action)
3486                                 sigquit_child(bp->pid);
3487                 }
3488         }
3489
3490         /* Take care of the startup process too */
3491         if (pid == StartupPID)
3492         {
3493                 StartupPID = 0;
3494                 /* Caller adjusts StartupStatus, so don't touch it here */
3495         }
3496         else if (StartupPID != 0 && take_action)
3497         {
3498                 sigquit_child(StartupPID);
3499                 StartupStatus = STARTUP_SIGNALED;
3500         }
3501
3502         /* Take care of the bgwriter too */
3503         if (pid == BgWriterPID)
3504                 BgWriterPID = 0;
3505         else if (BgWriterPID != 0 && take_action)
3506                 sigquit_child(BgWriterPID);
3507
3508         /* Take care of the checkpointer too */
3509         if (pid == CheckpointerPID)
3510                 CheckpointerPID = 0;
3511         else if (CheckpointerPID != 0 && take_action)
3512                 sigquit_child(CheckpointerPID);
3513
3514         /* Take care of the walwriter too */
3515         if (pid == WalWriterPID)
3516                 WalWriterPID = 0;
3517         else if (WalWriterPID != 0 && take_action)
3518                 sigquit_child(WalWriterPID);
3519
3520         /* Take care of the walreceiver too */
3521         if (pid == WalReceiverPID)
3522                 WalReceiverPID = 0;
3523         else if (WalReceiverPID != 0 && take_action)
3524                 sigquit_child(WalReceiverPID);
3525
3526         /* Take care of the autovacuum launcher too */
3527         if (pid == AutoVacPID)
3528                 AutoVacPID = 0;
3529         else if (AutoVacPID != 0 && take_action)
3530                 sigquit_child(AutoVacPID);
3531
3532         /* Take care of the archiver too */
3533         if (pid == PgArchPID)
3534                 PgArchPID = 0;
3535         else if (PgArchPID != 0 && take_action)
3536                 sigquit_child(PgArchPID);
3537
3538         /* We do NOT restart the syslogger */
3539
3540         if (Shutdown != ImmediateShutdown)
3541                 FatalError = true;
3542
3543         /* We now transit into a state of waiting for children to die */
3544         if (pmState == PM_RECOVERY ||
3545                 pmState == PM_HOT_STANDBY ||
3546                 pmState == PM_RUN ||
3547                 pmState == PM_STOP_BACKENDS ||
3548                 pmState == PM_SHUTDOWN)
3549                 pmState = PM_WAIT_BACKENDS;
3550
3551         /*
3552          * .. and if this doesn't happen quickly enough, now the clock is ticking
3553          * for us to kill them without mercy.
3554          */
3555         if (AbortStartTime == 0)
3556                 AbortStartTime = time(NULL);
3557 }
3558
3559 /*
3560  * Log the death of a child process.
3561  */
3562 static void
3563 LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3564 {
3565         /*
3566          * size of activity_buffer is arbitrary, but set equal to default
3567          * track_activity_query_size
3568          */
3569         char            activity_buffer[1024];
3570         const char *activity = NULL;
3571
3572         if (!EXIT_STATUS_0(exitstatus))
3573                 activity = pgstat_get_crashed_backend_activity(pid,
3574                                                                                                            activity_buffer,
3575                                                                                                            sizeof(activity_buffer));
3576
3577         if (WIFEXITED(exitstatus))
3578                 ereport(lev,
3579
3580                 /*------
3581                   translator: %s is a noun phrase describing a child process, such as
3582                   "server process" */
3583                                 (errmsg("%s (PID %d) exited with exit code %d",
3584                                                 procname, pid, WEXITSTATUS(exitstatus)),
3585                                  activity ? errdetail("Failed process was running: %s", activity) : 0));
3586         else if (WIFSIGNALED(exitstatus))
3587         {
3588 #if defined(WIN32)
3589                 ereport(lev,
3590
3591                 /*------
3592                   translator: %s is a noun phrase describing a child process, such as
3593                   "server process" */
3594                                 (errmsg("%s (PID %d) was terminated by exception 0x%X",
3595                                                 procname, pid, WTERMSIG(exitstatus)),
3596                                  errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3597                                  activity ? errdetail("Failed process was running: %s", activity) : 0));
3598 #else
3599                 ereport(lev,
3600
3601                 /*------
3602                   translator: %s is a noun phrase describing a child process, such as
3603                   "server process" */
3604                                 (errmsg("%s (PID %d) was terminated by signal %d: %s",
3605                                                 procname, pid, WTERMSIG(exitstatus),
3606                                                 pg_strsignal(WTERMSIG(exitstatus))),
3607                                  activity ? errdetail("Failed process was running: %s", activity) : 0));
3608 #endif
3609         }
3610         else
3611                 ereport(lev,
3612
3613                 /*------
3614                   translator: %s is a noun phrase describing a child process, such as
3615                   "server process" */
3616                                 (errmsg("%s (PID %d) exited with unrecognized status %d",
3617                                                 procname, pid, exitstatus),
3618                                  activity ? errdetail("Failed process was running: %s", activity) : 0));
3619 }
3620
3621 /*
3622  * Advance the postmaster's state machine and take actions as appropriate
3623  *
3624  * This is common code for process_pm_shutdown_request(),
3625  * process_pm_child_exit() and process_pm_pmsignal(), which process the signals
3626  * that might mean we need to change state.
3627  */
3628 static void
3629 PostmasterStateMachine(void)
3630 {
3631         /* If we're doing a smart shutdown, try to advance that state. */
3632         if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3633         {
3634                 if (!connsAllowed)
3635                 {
3636                         /*
3637                          * This state ends when we have no normal client backends running.
3638                          * Then we're ready to stop other children.
3639                          */
3640                         if (CountChildren(BACKEND_TYPE_NORMAL) == 0)
3641                                 pmState = PM_STOP_BACKENDS;
3642                 }
3643         }
3644
3645         /*
3646          * If we're ready to do so, signal child processes to shut down.  (This
3647          * isn't a persistent state, but treating it as a distinct pmState allows
3648          * us to share this code across multiple shutdown code paths.)
3649          */
3650         if (pmState == PM_STOP_BACKENDS)
3651         {
3652                 /*
3653                  * Forget any pending requests for background workers, since we're no
3654                  * longer willing to launch any new workers.  (If additional requests
3655                  * arrive, BackgroundWorkerStateChange will reject them.)
3656                  */
3657                 ForgetUnstartedBackgroundWorkers();
3658
3659                 /* Signal all backend children except walsenders */
3660                 SignalSomeChildren(SIGTERM,
3661                                                    BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND);
3662                 /* and the autovac launcher too */
3663                 if (AutoVacPID != 0)
3664                         signal_child(AutoVacPID, SIGTERM);
3665                 /* and the bgwriter too */
3666                 if (BgWriterPID != 0)
3667                         signal_child(BgWriterPID, SIGTERM);
3668                 /* and the walwriter too */
3669                 if (WalWriterPID != 0)
3670                         signal_child(WalWriterPID, SIGTERM);
3671                 /* If we're in recovery, also stop startup and walreceiver procs */
3672                 if (StartupPID != 0)
3673                         signal_child(StartupPID, SIGTERM);
3674                 if (WalReceiverPID != 0)
3675                         signal_child(WalReceiverPID, SIGTERM);
3676                 /* checkpointer, archiver, stats, and syslogger may continue for now */
3677
3678                 /* Now transition to PM_WAIT_BACKENDS state to wait for them to die */
3679                 pmState = PM_WAIT_BACKENDS;
3680         }
3681
3682         /*
3683          * If we are in a state-machine state that implies waiting for backends to
3684          * exit, see if they're all gone, and change state if so.
3685          */
3686         if (pmState == PM_WAIT_BACKENDS)
3687         {
3688                 /*
3689                  * PM_WAIT_BACKENDS state ends when we have no regular backends
3690                  * (including autovac workers), no bgworkers (including unconnected
3691                  * ones), and no walwriter, autovac launcher or bgwriter.  If we are
3692                  * doing crash recovery or an immediate shutdown then we expect the
3693                  * checkpointer to exit as well, otherwise not. The stats and
3694                  * syslogger processes are disregarded since they are not connected to
3695                  * shared memory; we also disregard dead_end children here. Walsenders
3696                  * and archiver are also disregarded, they will be terminated later
3697                  * after writing the checkpoint record.
3698                  */
3699                 if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 &&
3700                         StartupPID == 0 &&
3701                         WalReceiverPID == 0 &&
3702                         BgWriterPID == 0 &&
3703                         (CheckpointerPID == 0 ||
3704                          (!FatalError && Shutdown < ImmediateShutdown)) &&
3705                         WalWriterPID == 0 &&
3706                         AutoVacPID == 0)
3707                 {
3708                         if (Shutdown >= ImmediateShutdown || FatalError)
3709                         {
3710                                 /*
3711                                  * Start waiting for dead_end children to die.  This state
3712                                  * change causes ServerLoop to stop creating new ones.
3713                                  */
3714                                 pmState = PM_WAIT_DEAD_END;
3715
3716                                 /*
3717                                  * We already SIGQUIT'd the archiver and stats processes, if
3718                                  * any, when we started immediate shutdown or entered
3719                                  * FatalError state.
3720                                  */
3721                         }
3722                         else
3723                         {
3724                                 /*
3725                                  * If we get here, we are proceeding with normal shutdown. All
3726                                  * the regular children are gone, and it's time to tell the
3727                                  * checkpointer to do a shutdown checkpoint.
3728                                  */
3729                                 Assert(Shutdown > NoShutdown);
3730                                 /* Start the checkpointer if not running */
3731                                 if (CheckpointerPID == 0)
3732                                         CheckpointerPID = StartCheckpointer();
3733                                 /* And tell it to shut down */
3734                                 if (CheckpointerPID != 0)
3735                                 {
3736                                         signal_child(CheckpointerPID, SIGUSR2);
3737                                         pmState = PM_SHUTDOWN;
3738                                 }
3739                                 else
3740                                 {
3741                                         /*
3742                                          * If we failed to fork a checkpointer, just shut down.
3743                                          * Any required cleanup will happen at next restart. We
3744                                          * set FatalError so that an "abnormal shutdown" message
3745                                          * gets logged when we exit.
3746                                          *
3747                                          * We don't consult send_abort_for_crash here, as it's
3748                                          * unlikely that dumping cores would illuminate the reason
3749                                          * for checkpointer fork failure.
3750                                          */
3751                                         FatalError = true;
3752                                         pmState = PM_WAIT_DEAD_END;
3753
3754                                         /* Kill the walsenders and archiver too */
3755                                         SignalChildren(SIGQUIT);
3756                                         if (PgArchPID != 0)
3757                                                 signal_child(PgArchPID, SIGQUIT);
3758                                 }
3759                         }
3760                 }
3761         }
3762
3763         if (pmState == PM_SHUTDOWN_2)
3764         {
3765                 /*
3766                  * PM_SHUTDOWN_2 state ends when there's no other children than
3767                  * dead_end children left. There shouldn't be any regular backends
3768                  * left by now anyway; what we're really waiting for is walsenders and
3769                  * archiver.
3770                  */
3771                 if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3772                 {
3773                         pmState = PM_WAIT_DEAD_END;
3774                 }
3775         }
3776
3777         if (pmState == PM_WAIT_DEAD_END)
3778         {
3779                 /* Don't allow any new socket connection events. */
3780                 ConfigurePostmasterWaitSet(false);
3781
3782                 /*
3783                  * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3784                  * (ie, no dead_end children remain), and the archiver is gone too.
3785                  *
3786                  * The reason we wait for those two is to protect them against a new
3787                  * postmaster starting conflicting subprocesses; this isn't an
3788                  * ironclad protection, but it at least helps in the
3789                  * shutdown-and-immediately-restart scenario.  Note that they have
3790                  * already been sent appropriate shutdown signals, either during a
3791                  * normal state transition leading up to PM_WAIT_DEAD_END, or during
3792                  * FatalError processing.
3793                  */
3794                 if (dlist_is_empty(&BackendList) && PgArchPID == 0)
3795                 {
3796                         /* These other guys should be dead already */
3797                         Assert(StartupPID == 0);
3798                         Assert(WalReceiverPID == 0);
3799                         Assert(BgWriterPID == 0);
3800                         Assert(CheckpointerPID == 0);
3801                         Assert(WalWriterPID == 0);
3802                         Assert(AutoVacPID == 0);
3803                         /* syslogger is not considered here */
3804                         pmState = PM_NO_CHILDREN;
3805                 }
3806         }
3807
3808         /*
3809          * If we've been told to shut down, we exit as soon as there are no
3810          * remaining children.  If there was a crash, cleanup will occur at the
3811          * next startup.  (Before PostgreSQL 8.3, we tried to recover from the
3812          * crash before exiting, but that seems unwise if we are quitting because
3813          * we got SIGTERM from init --- there may well not be time for recovery
3814          * before init decides to SIGKILL us.)
3815          *
3816          * Note that the syslogger continues to run.  It will exit when it sees
3817          * EOF on its input pipe, which happens when there are no more upstream
3818          * processes.
3819          */
3820         if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
3821         {
3822                 if (FatalError)
3823                 {
3824                         ereport(LOG, (errmsg("abnormal database system shutdown")));
3825                         ExitPostmaster(1);
3826                 }
3827                 else
3828                 {
3829                         /*
3830                          * Normal exit from the postmaster is here.  We don't need to log
3831                          * anything here, since the UnlinkLockFiles proc_exit callback
3832                          * will do so, and that should be the last user-visible action.
3833                          */
3834                         ExitPostmaster(0);
3835                 }
3836         }
3837
3838         /*
3839          * If the startup process failed, or the user does not want an automatic
3840          * restart after backend crashes, wait for all non-syslogger children to
3841          * exit, and then exit postmaster.  We don't try to reinitialize when the
3842          * startup process fails, because more than likely it will just fail again
3843          * and we will keep trying forever.
3844          */
3845         if (pmState == PM_NO_CHILDREN)
3846         {
3847                 if (StartupStatus == STARTUP_CRASHED)
3848                 {
3849                         ereport(LOG,
3850                                         (errmsg("shutting down due to startup process failure")));
3851                         ExitPostmaster(1);
3852                 }
3853                 if (!restart_after_crash)
3854                 {
3855                         ereport(LOG,
3856                                         (errmsg("shutting down because restart_after_crash is off")));
3857                         ExitPostmaster(1);
3858                 }
3859         }
3860
3861         /*
3862          * If we need to recover from a crash, wait for all non-syslogger children
3863          * to exit, then reset shmem and StartupDataBase.
3864          */
3865         if (FatalError && pmState == PM_NO_CHILDREN)
3866         {
3867                 ereport(LOG,
3868                                 (errmsg("all server processes terminated; reinitializing")));
3869
3870                 /* remove leftover temporary files after a crash */
3871                 if (remove_temp_files_after_crash)
3872                         RemovePgTempFiles();
3873
3874                 /* allow background workers to immediately restart */
3875                 ResetBackgroundWorkerCrashTimes();
3876
3877                 shmem_exit(1);
3878
3879                 /* re-read control file into local memory */
3880                 LocalProcessControlFile(true);
3881
3882                 /* re-create shared memory and semaphores */
3883                 CreateSharedMemoryAndSemaphores();
3884
3885                 StartupPID = StartupDataBase();
3886                 Assert(StartupPID != 0);
3887                 StartupStatus = STARTUP_RUNNING;
3888                 pmState = PM_STARTUP;
3889                 /* crash recovery started, reset SIGKILL flag */
3890                 AbortStartTime = 0;
3891
3892                 /* start accepting server socket connection events again */
3893                 ConfigurePostmasterWaitSet(true);
3894         }
3895 }
3896
3897
3898 /*
3899  * Send a signal to a postmaster child process
3900  *
3901  * On systems that have setsid(), each child process sets itself up as a
3902  * process group leader.  For signals that are generally interpreted in the
3903  * appropriate fashion, we signal the entire process group not just the
3904  * direct child process.  This allows us to, for example, SIGQUIT a blocked
3905  * archive_recovery script, or SIGINT a script being run by a backend via
3906  * system().
3907  *
3908  * There is a race condition for recently-forked children: they might not
3909  * have executed setsid() yet.  So we signal the child directly as well as
3910  * the group.  We assume such a child will handle the signal before trying
3911  * to spawn any grandchild processes.  We also assume that signaling the
3912  * child twice will not cause any problems.
3913  */
3914 static void
3915 signal_child(pid_t pid, int signal)
3916 {
3917         if (kill(pid, signal) < 0)
3918                 elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3919 #ifdef HAVE_SETSID
3920         switch (signal)
3921         {
3922                 case SIGINT:
3923                 case SIGTERM:
3924                 case SIGQUIT:
3925                 case SIGKILL:
3926                 case SIGABRT:
3927                         if (kill(-pid, signal) < 0)
3928                                 elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3929                         break;
3930                 default:
3931                         break;
3932         }
3933 #endif
3934 }
3935
3936 /*
3937  * Convenience function for killing a child process after a crash of some
3938  * other child process.  We log the action at a higher level than we would
3939  * otherwise do, and we apply send_abort_for_crash to decide which signal
3940  * to send.  Normally it's SIGQUIT -- and most other comments in this file
3941  * are written on the assumption that it is -- but developers might prefer
3942  * to use SIGABRT to collect per-child core dumps.
3943  */
3944 static void
3945 sigquit_child(pid_t pid)
3946 {
3947         ereport(DEBUG2,
3948                         (errmsg_internal("sending %s to process %d",
3949                                                          (send_abort_for_crash ? "SIGABRT" : "SIGQUIT"),
3950                                                          (int) pid)));
3951         signal_child(pid, (send_abort_for_crash ? SIGABRT : SIGQUIT));
3952 }
3953
3954 /*
3955  * Send a signal to the targeted children (but NOT special children;
3956  * dead_end children are never signaled, either).
3957  */
3958 static bool
3959 SignalSomeChildren(int signal, int target)
3960 {
3961         dlist_iter      iter;
3962         bool            signaled = false;
3963
3964         dlist_foreach(iter, &BackendList)
3965         {
3966                 Backend    *bp = dlist_container(Backend, elem, iter.cur);
3967
3968                 if (bp->dead_end)
3969                         continue;
3970
3971                 /*
3972                  * Since target == BACKEND_TYPE_ALL is the most common case, we test
3973                  * it first and avoid touching shared memory for every child.
3974                  */
3975                 if (target != BACKEND_TYPE_ALL)
3976                 {
3977                         /*
3978                          * Assign bkend_type for any recently announced WAL Sender
3979                          * processes.
3980                          */
3981                         if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
3982                                 IsPostmasterChildWalSender(bp->child_slot))
3983                                 bp->bkend_type = BACKEND_TYPE_WALSND;
3984
3985                         if (!(target & bp->bkend_type))
3986                                 continue;
3987                 }
3988
3989                 ereport(DEBUG4,
3990                                 (errmsg_internal("sending signal %d to process %d",
3991                                                                  signal, (int) bp->pid)));
3992                 signal_child(bp->pid, signal);
3993                 signaled = true;
3994         }
3995         return signaled;
3996 }
3997
3998 /*
3999  * Send a termination signal to children.  This considers all of our children
4000  * processes, except syslogger and dead_end backends.
4001  */
4002 static void
4003 TerminateChildren(int signal)
4004 {
4005         SignalChildren(signal);
4006         if (StartupPID != 0)
4007         {
4008                 signal_child(StartupPID, signal);
4009                 if (signal == SIGQUIT || signal == SIGKILL || signal == SIGABRT)
4010                         StartupStatus = STARTUP_SIGNALED;
4011         }
4012         if (BgWriterPID != 0)
4013                 signal_child(BgWriterPID, signal);
4014         if (CheckpointerPID != 0)
4015                 signal_child(CheckpointerPID, signal);
4016         if (WalWriterPID != 0)
4017                 signal_child(WalWriterPID, signal);
4018         if (WalReceiverPID != 0)
4019                 signal_child(WalReceiverPID, signal);
4020         if (AutoVacPID != 0)
4021                 signal_child(AutoVacPID, signal);
4022         if (PgArchPID != 0)
4023                 signal_child(PgArchPID, signal);
4024 }
4025
4026 /*
4027  * BackendStartup -- start backend process
4028  *
4029  * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4030  *
4031  * Note: if you change this code, also consider StartAutovacuumWorker.
4032  */
4033 static int
4034 BackendStartup(Port *port)
4035 {
4036         Backend    *bn;                         /* for backend cleanup */
4037         pid_t           pid;
4038
4039         /*
4040          * Create backend data structure.  Better before the fork() so we can
4041          * handle failure cleanly.
4042          */
4043         bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
4044         if (!bn)
4045         {
4046                 ereport(LOG,
4047                                 (errcode(ERRCODE_OUT_OF_MEMORY),
4048                                  errmsg("out of memory")));
4049                 return STATUS_ERROR;
4050         }
4051
4052         /*
4053          * Compute the cancel key that will be assigned to this backend. The
4054          * backend will have its own copy in the forked-off process' value of
4055          * MyCancelKey, so that it can transmit the key to the frontend.
4056          */
4057         if (!RandomCancelKey(&MyCancelKey))
4058         {
4059                 pfree(bn);
4060                 ereport(LOG,
4061                                 (errcode(ERRCODE_INTERNAL_ERROR),
4062                                  errmsg("could not generate random cancel key")));
4063                 return STATUS_ERROR;
4064         }
4065
4066         bn->cancel_key = MyCancelKey;
4067
4068         /* Pass down canAcceptConnections state */
4069         port->canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL);
4070         bn->dead_end = (port->canAcceptConnections != CAC_OK);
4071
4072         /*
4073          * Unless it's a dead_end child, assign it a child slot number
4074          */
4075         if (!bn->dead_end)
4076                 bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
4077         else
4078                 bn->child_slot = 0;
4079
4080         /* Hasn't asked to be notified about any bgworkers yet */
4081         bn->bgworker_notify = false;
4082
4083 #ifdef EXEC_BACKEND
4084         pid = backend_forkexec(port);
4085 #else                                                   /* !EXEC_BACKEND */
4086         pid = fork_process();
4087         if (pid == 0)                           /* child */
4088         {
4089                 /* Detangle from postmaster */
4090                 InitPostmasterChild();
4091
4092                 /* Close the postmaster's sockets */
4093                 ClosePostmasterPorts(false);
4094
4095                 /* Perform additional initialization and collect startup packet */
4096                 BackendInitialize(port);
4097
4098                 /*
4099                  * Create a per-backend PGPROC struct in shared memory. We must do
4100                  * this before we can use LWLocks. In the !EXEC_BACKEND case (here)
4101                  * this could be delayed a bit further, but EXEC_BACKEND needs to do
4102                  * stuff with LWLocks before PostgresMain(), so we do it here as well
4103                  * for symmetry.
4104                  */
4105                 InitProcess();
4106
4107                 /* And run the backend */
4108                 BackendRun(port);
4109         }
4110 #endif                                                  /* EXEC_BACKEND */
4111
4112         if (pid < 0)
4113         {
4114                 /* in parent, fork failed */
4115                 int                     save_errno = errno;
4116
4117                 if (!bn->dead_end)
4118                         (void) ReleasePostmasterChildSlot(bn->child_slot);
4119                 pfree(bn);
4120                 errno = save_errno;
4121                 ereport(LOG,
4122                                 (errmsg("could not fork new process for connection: %m")));
4123                 report_fork_failure_to_client(port, save_errno);
4124                 return STATUS_ERROR;
4125         }
4126
4127         /* in parent, successful fork */
4128         ereport(DEBUG2,
4129                         (errmsg_internal("forked new backend, pid=%d socket=%d",
4130                                                          (int) pid, (int) port->sock)));
4131
4132         /*
4133          * Everything's been successful, it's safe to add this backend to our list
4134          * of backends.
4135          */
4136         bn->pid = pid;
4137         bn->bkend_type = BACKEND_TYPE_NORMAL;   /* Can change later to WALSND */
4138         dlist_push_head(&BackendList, &bn->elem);
4139
4140 #ifdef EXEC_BACKEND
4141         if (!bn->dead_end)
4142                 ShmemBackendArrayAdd(bn);
4143 #endif
4144
4145         return STATUS_OK;
4146 }
4147
4148 /*
4149  * Try to report backend fork() failure to client before we close the
4150  * connection.  Since we do not care to risk blocking the postmaster on
4151  * this connection, we set the connection to non-blocking and try only once.
4152  *
4153  * This is grungy special-purpose code; we cannot use backend libpq since
4154  * it's not up and running.
4155  */
4156 static void
4157 report_fork_failure_to_client(Port *port, int errnum)
4158 {
4159         char            buffer[1000];
4160         int                     rc;
4161
4162         /* Format the error message packet (always V2 protocol) */
4163         snprintf(buffer, sizeof(buffer), "E%s%s\n",
4164                          _("could not fork new process for connection: "),
4165                          strerror(errnum));
4166
4167         /* Set port to non-blocking.  Don't do send() if this fails */
4168         if (!pg_set_noblock(port->sock))
4169                 return;
4170
4171         /* We'll retry after EINTR, but ignore all other failures */
4172         do
4173         {
4174                 rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4175         } while (rc < 0 && errno == EINTR);
4176 }
4177
4178
4179 /*
4180  * BackendInitialize -- initialize an interactive (postmaster-child)
4181  *                              backend process, and collect the client's startup packet.
4182  *
4183  * returns: nothing.  Will not return at all if there's any failure.
4184  *
4185  * Note: this code does not depend on having any access to shared memory.
4186  * Indeed, our approach to SIGTERM/timeout handling *requires* that
4187  * shared memory not have been touched yet; see comments within.
4188  * In the EXEC_BACKEND case, we are physically attached to shared memory
4189  * but have not yet set up most of our local pointers to shmem structures.
4190  */
4191 static void
4192 BackendInitialize(Port *port)
4193 {
4194         int                     status;
4195         int                     ret;
4196         char            remote_host[NI_MAXHOST];
4197         char            remote_port[NI_MAXSERV];
4198         StringInfoData ps_data;
4199
4200         /* Save port etc. for ps status */
4201         MyProcPort = port;
4202
4203         /* Tell fd.c about the long-lived FD associated with the port */
4204         ReserveExternalFD();
4205
4206         /*
4207          * PreAuthDelay is a debugging aid for investigating problems in the
4208          * authentication cycle: it can be set in postgresql.conf to allow time to
4209          * attach to the newly-forked backend with a debugger.  (See also
4210          * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4211          * is not honored until after authentication.)
4212          */
4213         if (PreAuthDelay > 0)
4214                 pg_usleep(PreAuthDelay * 1000000L);
4215
4216         /* This flag will remain set until InitPostgres finishes authentication */
4217         ClientAuthInProgress = true;    /* limit visibility of log messages */
4218
4219         /* set these to empty in case they are needed before we set them up */
4220         port->remote_host = "";
4221         port->remote_port = "";
4222
4223         /*
4224          * Initialize libpq and enable reporting of ereport errors to the client.
4225          * Must do this now because authentication uses libpq to send messages.
4226          */
4227         pq_init();                                      /* initialize libpq to talk to client */
4228         whereToSendOutput = DestRemote; /* now safe to ereport to client */
4229
4230         /*
4231          * We arrange to do _exit(1) if we receive SIGTERM or timeout while trying
4232          * to collect the startup packet; while SIGQUIT results in _exit(2).
4233          * Otherwise the postmaster cannot shutdown the database FAST or IMMED
4234          * cleanly if a buggy client fails to send the packet promptly.
4235          *
4236          * Exiting with _exit(1) is only possible because we have not yet touched
4237          * shared memory; therefore no outside-the-process state needs to get
4238          * cleaned up.
4239          */
4240         pqsignal(SIGTERM, process_startup_packet_die);
4241         /* SIGQUIT handler was already set up by InitPostmasterChild */
4242         InitializeTimeouts();           /* establishes SIGALRM handler */
4243         sigprocmask(SIG_SETMASK, &StartupBlockSig, NULL);
4244
4245         /*
4246          * Get the remote host name and port for logging and status display.
4247          */
4248         remote_host[0] = '\0';
4249         remote_port[0] = '\0';
4250         if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4251                                                                   remote_host, sizeof(remote_host),
4252                                                                   remote_port, sizeof(remote_port),
4253                                                                   (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4254                 ereport(WARNING,
4255                                 (errmsg_internal("pg_getnameinfo_all() failed: %s",
4256                                                                  gai_strerror(ret))));
4257
4258         /*
4259          * Save remote_host and remote_port in port structure (after this, they
4260          * will appear in log_line_prefix data for log messages).
4261          */
4262         port->remote_host = strdup(remote_host);
4263         port->remote_port = strdup(remote_port);
4264
4265         /* And now we can issue the Log_connections message, if wanted */
4266         if (Log_connections)
4267         {
4268                 if (remote_port[0])
4269                         ereport(LOG,
4270                                         (errmsg("connection received: host=%s port=%s",
4271                                                         remote_host,
4272                                                         remote_port)));
4273                 else
4274                         ereport(LOG,
4275                                         (errmsg("connection received: host=%s",
4276                                                         remote_host)));
4277         }
4278
4279         /*
4280          * If we did a reverse lookup to name, we might as well save the results
4281          * rather than possibly repeating the lookup during authentication.
4282          *
4283          * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4284          * get nothing useful for a client without an rDNS entry.  Therefore, we
4285          * must check whether we got a numeric IPv4 or IPv6 address, and not save
4286          * it into remote_hostname if so.  (This test is conservative and might
4287          * sometimes classify a hostname as numeric, but an error in that
4288          * direction is safe; it only results in a possible extra lookup.)
4289          */
4290         if (log_hostname &&
4291                 ret == 0 &&
4292                 strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4293                 strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4294                 port->remote_hostname = strdup(remote_host);
4295
4296         /*
4297          * Ready to begin client interaction.  We will give up and _exit(1) after
4298          * a time delay, so that a broken client can't hog a connection
4299          * indefinitely.  PreAuthDelay and any DNS interactions above don't count
4300          * against the time limit.
4301          *
4302          * Note: AuthenticationTimeout is applied here while waiting for the
4303          * startup packet, and then again in InitPostgres for the duration of any
4304          * authentication operations.  So a hostile client could tie up the
4305          * process for nearly twice AuthenticationTimeout before we kick him off.
4306          *
4307          * Note: because PostgresMain will call InitializeTimeouts again, the
4308          * registration of STARTUP_PACKET_TIMEOUT will be lost.  This is okay
4309          * since we never use it again after this function.
4310          */
4311         RegisterTimeout(STARTUP_PACKET_TIMEOUT, StartupPacketTimeoutHandler);
4312         enable_timeout_after(STARTUP_PACKET_TIMEOUT, AuthenticationTimeout * 1000);
4313
4314         /*
4315          * Receive the startup packet (which might turn out to be a cancel request
4316          * packet).
4317          */
4318         status = ProcessStartupPacket(port, false, false);
4319
4320         /*
4321          * If we're going to reject the connection due to database state, say so
4322          * now instead of wasting cycles on an authentication exchange. (This also
4323          * allows a pg_ping utility to be written.)
4324          */
4325         switch (port->canAcceptConnections)
4326         {
4327                 case CAC_STARTUP:
4328                         ereport(FATAL,
4329                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
4330                                          errmsg("the database system is starting up")));
4331                         break;
4332                 case CAC_NOTCONSISTENT:
4333                         if (EnableHotStandby)
4334                                 ereport(FATAL,
4335                                                 (errcode(ERRCODE_CANNOT_CONNECT_NOW),
4336                                                  errmsg("the database system is not yet accepting connections"),
4337                                                  errdetail("Consistent recovery state has not been yet reached.")));
4338                         else
4339                                 ereport(FATAL,
4340                                                 (errcode(ERRCODE_CANNOT_CONNECT_NOW),
4341                                                  errmsg("the database system is not accepting connections"),
4342                                                  errdetail("Hot standby mode is disabled.")));
4343                         break;
4344                 case CAC_SHUTDOWN:
4345                         ereport(FATAL,
4346                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
4347                                          errmsg("the database system is shutting down")));
4348                         break;
4349                 case CAC_RECOVERY:
4350                         ereport(FATAL,
4351                                         (errcode(ERRCODE_CANNOT_CONNECT_NOW),
4352                                          errmsg("the database system is in recovery mode")));
4353                         break;
4354                 case CAC_TOOMANY:
4355                         ereport(FATAL,
4356                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
4357                                          errmsg("sorry, too many clients already")));
4358                         break;
4359                 case CAC_OK:
4360                         break;
4361         }
4362
4363         /*
4364          * Disable the timeout, and prevent SIGTERM again.
4365          */
4366         disable_timeout(STARTUP_PACKET_TIMEOUT, false);
4367         sigprocmask(SIG_SETMASK, &BlockSig, NULL);
4368
4369         /*
4370          * As a safety check that nothing in startup has yet performed
4371          * shared-memory modifications that would need to be undone if we had
4372          * exited through SIGTERM or timeout above, check that no on_shmem_exit
4373          * handlers have been registered yet.  (This isn't terribly bulletproof,
4374          * since someone might misuse an on_proc_exit handler for shmem cleanup,
4375          * but it's a cheap and helpful check.  We cannot disallow on_proc_exit
4376          * handlers unfortunately, since pq_init() already registered one.)
4377          */
4378         check_on_shmem_exit_lists_are_empty();
4379
4380         /*
4381          * Stop here if it was bad or a cancel packet.  ProcessStartupPacket
4382          * already did any appropriate error reporting.
4383          */
4384         if (status != STATUS_OK)
4385                 proc_exit(0);
4386
4387         /*
4388          * Now that we have the user and database name, we can set the process
4389          * title for ps.  It's good to do this as early as possible in startup.
4390          */
4391         initStringInfo(&ps_data);
4392         if (am_walsender)
4393                 appendStringInfo(&ps_data, "%s ", GetBackendTypeDesc(B_WAL_SENDER));
4394         appendStringInfo(&ps_data, "%s ", port->user_name);
4395         if (port->database_name[0] != '\0')
4396                 appendStringInfo(&ps_data, "%s ", port->database_name);
4397         appendStringInfoString(&ps_data, port->remote_host);
4398         if (port->remote_port[0] != '\0')
4399                 appendStringInfo(&ps_data, "(%s)", port->remote_port);
4400
4401         init_ps_display(ps_data.data);
4402         pfree(ps_data.data);
4403
4404         set_ps_display("initializing");
4405 }
4406
4407
4408 /*
4409  * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4410  *
4411  * returns:
4412  *              Doesn't return at all.
4413  */
4414 static void
4415 BackendRun(Port *port)
4416 {
4417         /*
4418          * Make sure we aren't in PostmasterContext anymore.  (We can't delete it
4419          * just yet, though, because InitPostgres will need the HBA data.)
4420          */
4421         MemoryContextSwitchTo(TopMemoryContext);
4422
4423         PostgresMain(port->database_name, port->user_name);
4424 }
4425
4426
4427 #ifdef EXEC_BACKEND
4428
4429 /*
4430  * postmaster_forkexec -- fork and exec a postmaster subprocess
4431  *
4432  * The caller must have set up the argv array already, except for argv[2]
4433  * which will be filled with the name of the temp variable file.
4434  *
4435  * Returns the child process PID, or -1 on fork failure (a suitable error
4436  * message has been logged on failure).
4437  *
4438  * All uses of this routine will dispatch to SubPostmasterMain in the
4439  * child process.
4440  */
4441 pid_t
4442 postmaster_forkexec(int argc, char *argv[])
4443 {
4444         Port            port;
4445
4446         /* This entry point passes dummy values for the Port variables */
4447         memset(&port, 0, sizeof(port));
4448         return internal_forkexec(argc, argv, &port);
4449 }
4450
4451 /*
4452  * backend_forkexec -- fork/exec off a backend process
4453  *
4454  * Some operating systems (WIN32) don't have fork() so we have to simulate
4455  * it by storing parameters that need to be passed to the child and
4456  * then create a new child process.
4457  *
4458  * returns the pid of the fork/exec'd process, or -1 on failure
4459  */
4460 static pid_t
4461 backend_forkexec(Port *port)
4462 {
4463         char       *av[4];
4464         int                     ac = 0;
4465
4466         av[ac++] = "postgres";
4467         av[ac++] = "--forkbackend";
4468         av[ac++] = NULL;                        /* filled in by internal_forkexec */
4469
4470         av[ac] = NULL;
4471         Assert(ac < lengthof(av));
4472
4473         return internal_forkexec(ac, av, port);
4474 }
4475
4476 #ifndef WIN32
4477
4478 /*
4479  * internal_forkexec non-win32 implementation
4480  *
4481  * - writes out backend variables to the parameter file
4482  * - fork():s, and then exec():s the child process
4483  */
4484 static pid_t
4485 internal_forkexec(int argc, char *argv[], Port *port)
4486 {
4487         static unsigned long tmpBackendFileNum = 0;
4488         pid_t           pid;
4489         char            tmpfilename[MAXPGPATH];
4490         BackendParameters param;
4491         FILE       *fp;
4492
4493         if (!save_backend_variables(&param, port))
4494                 return -1;                              /* log made by save_backend_variables */
4495
4496         /* Calculate name for temp file */
4497         snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4498                          PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
4499                          MyProcPid, ++tmpBackendFileNum);
4500
4501         /* Open file */
4502         fp = AllocateFile(tmpfilename, PG_BINARY_W);
4503         if (!fp)
4504         {
4505                 /*
4506                  * As in OpenTemporaryFileInTablespace, try to make the temp-file
4507                  * directory, ignoring errors.
4508                  */
4509                 (void) MakePGDirectory(PG_TEMP_FILES_DIR);
4510
4511                 fp = AllocateFile(tmpfilename, PG_BINARY_W);
4512                 if (!fp)
4513                 {
4514                         ereport(LOG,
4515                                         (errcode_for_file_access(),
4516                                          errmsg("could not create file \"%s\": %m",
4517                                                         tmpfilename)));
4518                         return -1;
4519                 }
4520         }
4521
4522         if (fwrite(&param, sizeof(param), 1, fp) != 1)
4523         {
4524                 ereport(LOG,
4525                                 (errcode_for_file_access(),
4526                                  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4527                 FreeFile(fp);
4528                 return -1;
4529         }
4530
4531         /* Release file */
4532         if (FreeFile(fp))
4533         {
4534                 ereport(LOG,
4535                                 (errcode_for_file_access(),
4536                                  errmsg("could not write to file \"%s\": %m", tmpfilename)));
4537                 return -1;
4538         }
4539
4540         /* Make sure caller set up argv properly */
4541         Assert(argc >= 3);
4542         Assert(argv[argc] == NULL);
4543         Assert(strncmp(argv[1], "--fork", 6) == 0);
4544         Assert(argv[2] == NULL);
4545
4546         /* Insert temp file name after --fork argument */
4547         argv[2] = tmpfilename;
4548
4549         /* Fire off execv in child */
4550         if ((pid = fork_process()) == 0)
4551         {
4552                 if (execv(postgres_exec_path, argv) < 0)
4553                 {
4554                         ereport(LOG,
4555                                         (errmsg("could not execute server process \"%s\": %m",
4556                                                         postgres_exec_path)));
4557                         /* We're already in the child process here, can't return */
4558                         exit(1);
4559                 }
4560         }
4561
4562         return pid;                                     /* Parent returns pid, or -1 on fork failure */
4563 }
4564 #else                                                   /* WIN32 */
4565
4566 /*
4567  * internal_forkexec win32 implementation
4568  *
4569  * - starts backend using CreateProcess(), in suspended state
4570  * - writes out backend variables to the parameter file
4571  *      - during this, duplicates handles and sockets required for
4572  *        inheritance into the new process
4573  * - resumes execution of the new process once the backend parameter
4574  *       file is complete.
4575  */
4576 static pid_t
4577 internal_forkexec(int argc, char *argv[], Port *port)
4578 {
4579         int                     retry_count = 0;
4580         STARTUPINFO si;
4581         PROCESS_INFORMATION pi;
4582         int                     i;
4583         int                     j;
4584         char            cmdLine[MAXPGPATH * 2];
4585         HANDLE          paramHandle;
4586         BackendParameters *param;
4587         SECURITY_ATTRIBUTES sa;
4588         char            paramHandleStr[32];
4589         win32_deadchild_waitinfo *childinfo;
4590
4591         /* Make sure caller set up argv properly */
4592         Assert(argc >= 3);
4593         Assert(argv[argc] == NULL);
4594         Assert(strncmp(argv[1], "--fork", 6) == 0);
4595         Assert(argv[2] == NULL);
4596
4597         /* Resume here if we need to retry */
4598 retry:
4599
4600         /* Set up shared memory for parameter passing */
4601         ZeroMemory(&sa, sizeof(sa));
4602         sa.nLength = sizeof(sa);
4603         sa.bInheritHandle = TRUE;
4604         paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4605                                                                         &sa,
4606                                                                         PAGE_READWRITE,
4607                                                                         0,
4608                                                                         sizeof(BackendParameters),
4609                                                                         NULL);
4610         if (paramHandle == INVALID_HANDLE_VALUE)
4611         {
4612                 ereport(LOG,
4613                                 (errmsg("could not create backend parameter file mapping: error code %lu",
4614                                                 GetLastError())));
4615                 return -1;
4616         }
4617
4618         param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4619         if (!param)
4620         {
4621                 ereport(LOG,
4622                                 (errmsg("could not map backend parameter memory: error code %lu",
4623                                                 GetLastError())));
4624                 CloseHandle(paramHandle);
4625                 return -1;
4626         }
4627
4628         /* Insert temp file name after --fork argument */
4629 #ifdef _WIN64
4630         sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4631 #else
4632         sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4633 #endif
4634         argv[2] = paramHandleStr;
4635
4636         /* Format the cmd line */
4637         cmdLine[sizeof(cmdLine) - 1] = '\0';
4638         cmdLine[sizeof(cmdLine) - 2] = '\0';
4639         snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4640         i = 0;
4641         while (argv[++i] != NULL)
4642         {
4643                 j = strlen(cmdLine);
4644                 snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4645         }
4646         if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4647         {
4648                 ereport(LOG,
4649                                 (errmsg("subprocess command line too long")));
4650                 UnmapViewOfFile(param);
4651                 CloseHandle(paramHandle);
4652                 return -1;
4653         }
4654
4655         memset(&pi, 0, sizeof(pi));
4656         memset(&si, 0, sizeof(si));
4657         si.cb = sizeof(si);
4658
4659         /*
4660          * Create the subprocess in a suspended state. This will be resumed later,
4661          * once we have written out the parameter file.
4662          */
4663         if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4664                                            NULL, NULL, &si, &pi))
4665         {
4666                 ereport(LOG,
4667                                 (errmsg("CreateProcess() call failed: %m (error code %lu)",
4668                                                 GetLastError())));
4669                 UnmapViewOfFile(param);
4670                 CloseHandle(paramHandle);
4671                 return -1;
4672         }
4673
4674         if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4675         {
4676                 /*
4677                  * log made by save_backend_variables, but we have to clean up the
4678                  * mess with the half-started process
4679                  */
4680                 if (!TerminateProcess(pi.hProcess, 255))
4681                         ereport(LOG,
4682                                         (errmsg_internal("could not terminate unstarted process: error code %lu",
4683                                                                          GetLastError())));
4684                 CloseHandle(pi.hProcess);
4685                 CloseHandle(pi.hThread);
4686                 UnmapViewOfFile(param);
4687                 CloseHandle(paramHandle);
4688                 return -1;                              /* log made by save_backend_variables */
4689         }
4690
4691         /* Drop the parameter shared memory that is now inherited to the backend */
4692         if (!UnmapViewOfFile(param))
4693                 ereport(LOG,
4694                                 (errmsg("could not unmap view of backend parameter file: error code %lu",
4695                                                 GetLastError())));
4696         if (!CloseHandle(paramHandle))
4697                 ereport(LOG,
4698                                 (errmsg("could not close handle to backend parameter file: error code %lu",
4699                                                 GetLastError())));
4700
4701         /*
4702          * Reserve the memory region used by our main shared memory segment before
4703          * we resume the child process.  Normally this should succeed, but if ASLR
4704          * is active then it might sometimes fail due to the stack or heap having
4705          * gotten mapped into that range.  In that case, just terminate the
4706          * process and retry.
4707          */
4708         if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4709         {
4710                 /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4711                 if (!TerminateProcess(pi.hProcess, 255))
4712                         ereport(LOG,
4713                                         (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4714                                                                          GetLastError())));
4715                 CloseHandle(pi.hProcess);
4716                 CloseHandle(pi.hThread);
4717                 if (++retry_count < 100)
4718                         goto retry;
4719                 ereport(LOG,
4720                                 (errmsg("giving up after too many tries to reserve shared memory"),
4721                                  errhint("This might be caused by ASLR or antivirus software.")));
4722                 return -1;
4723         }
4724
4725         /*
4726          * Now that the backend variables are written out, we start the child
4727          * thread so it can start initializing while we set up the rest of the
4728          * parent state.
4729          */
4730         if (ResumeThread(pi.hThread) == -1)
4731         {
4732                 if (!TerminateProcess(pi.hProcess, 255))
4733                 {
4734                         ereport(LOG,
4735                                         (errmsg_internal("could not terminate unstartable process: error code %lu",
4736                                                                          GetLastError())));
4737                         CloseHandle(pi.hProcess);
4738                         CloseHandle(pi.hThread);
4739                         return -1;
4740                 }
4741                 CloseHandle(pi.hProcess);
4742                 CloseHandle(pi.hThread);
4743                 ereport(LOG,
4744                                 (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4745                                                                  GetLastError())));
4746                 return -1;
4747         }
4748
4749         /*
4750          * Queue a waiter to signal when this child dies. The wait will be handled
4751          * automatically by an operating system thread pool.  The memory will be
4752          * freed by a later call to waitpid().
4753          */
4754         childinfo = palloc(sizeof(win32_deadchild_waitinfo));
4755         childinfo->procHandle = pi.hProcess;
4756         childinfo->procId = pi.dwProcessId;
4757
4758         if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4759                                                                          pi.hProcess,
4760                                                                          pgwin32_deadchild_callback,
4761                                                                          childinfo,
4762                                                                          INFINITE,
4763                                                                          WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4764                 ereport(FATAL,
4765                                 (errmsg_internal("could not register process for wait: error code %lu",
4766                                                                  GetLastError())));
4767
4768         /* Don't close pi.hProcess here - waitpid() needs access to it */
4769
4770         CloseHandle(pi.hThread);
4771
4772         return pi.dwProcessId;
4773 }
4774 #endif                                                  /* WIN32 */
4775
4776
4777 /*
4778  * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4779  *                      to what it would be if we'd simply forked on Unix, and then
4780  *                      dispatch to the appropriate place.
4781  *
4782  * The first two command line arguments are expected to be "--forkFOO"
4783  * (where FOO indicates which postmaster child we are to become), and
4784  * the name of a variables file that we can read to load data that would
4785  * have been inherited by fork() on Unix.  Remaining arguments go to the
4786  * subprocess FooMain() routine.
4787  */
4788 void
4789 SubPostmasterMain(int argc, char *argv[])
4790 {
4791         Port            port;
4792
4793         /* In EXEC_BACKEND case we will not have inherited these settings */
4794         IsPostmasterEnvironment = true;
4795         whereToSendOutput = DestNone;
4796
4797         /* Setup essential subsystems (to ensure elog() behaves sanely) */
4798         InitializeGUCOptions();
4799
4800         /* Check we got appropriate args */
4801         if (argc < 3)
4802                 elog(FATAL, "invalid subpostmaster invocation");
4803
4804         /* Read in the variables file */
4805         memset(&port, 0, sizeof(Port));
4806         read_backend_variables(argv[2], &port);
4807
4808         /* Close the postmaster's sockets (as soon as we know them) */
4809         ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4810
4811         /* Setup as postmaster child */
4812         InitPostmasterChild();
4813
4814         /*
4815          * If appropriate, physically re-attach to shared memory segment. We want
4816          * to do this before going any further to ensure that we can attach at the
4817          * same address the postmaster used.  On the other hand, if we choose not
4818          * to re-attach, we may have other cleanup to do.
4819          *
4820          * If testing EXEC_BACKEND on Linux, you should run this as root before
4821          * starting the postmaster:
4822          *
4823          * sysctl -w kernel.randomize_va_space=0
4824          *
4825          * This prevents using randomized stack and code addresses that cause the
4826          * child process's memory map to be different from the parent's, making it
4827          * sometimes impossible to attach to shared memory at the desired address.
4828          * Return the setting to its old value (usually '1' or '2') when finished.
4829          */
4830         if (strcmp(argv[1], "--forkbackend") == 0 ||
4831                 strcmp(argv[1], "--forkavlauncher") == 0 ||
4832                 strcmp(argv[1], "--forkavworker") == 0 ||
4833                 strcmp(argv[1], "--forkaux") == 0 ||
4834                 strncmp(argv[1], "--forkbgworker=", 15) == 0)
4835                 PGSharedMemoryReAttach();
4836         else
4837                 PGSharedMemoryNoReAttach();
4838
4839         /* autovacuum needs this set before calling InitProcess */
4840         if (strcmp(argv[1], "--forkavlauncher") == 0)
4841                 AutovacuumLauncherIAm();
4842         if (strcmp(argv[1], "--forkavworker") == 0)
4843                 AutovacuumWorkerIAm();
4844
4845         /* Read in remaining GUC variables */
4846         read_nondefault_variables();
4847
4848         /*
4849          * Check that the data directory looks valid, which will also check the
4850          * privileges on the data directory and update our umask and file/group
4851          * variables for creating files later.  Note: this should really be done
4852          * before we create any files or directories.
4853          */
4854         checkDataDir();
4855
4856         /*
4857          * (re-)read control file, as it contains config. The postmaster will
4858          * already have read this, but this process doesn't know about that.
4859          */
4860         LocalProcessControlFile(false);
4861
4862         /*
4863          * Reload any libraries that were preloaded by the postmaster.  Since we
4864          * exec'd this process, those libraries didn't come along with us; but we
4865          * should load them into all child processes to be consistent with the
4866          * non-EXEC_BACKEND behavior.
4867          */
4868         process_shared_preload_libraries();
4869
4870         /* Run backend or appropriate child */
4871         if (strcmp(argv[1], "--forkbackend") == 0)
4872         {
4873                 Assert(argc == 3);              /* shouldn't be any more args */
4874
4875                 /*
4876                  * Need to reinitialize the SSL library in the backend, since the
4877                  * context structures contain function pointers and cannot be passed
4878                  * through the parameter file.
4879                  *
4880                  * If for some reason reload fails (maybe the user installed broken
4881                  * key files), soldier on without SSL; that's better than all
4882                  * connections becoming impossible.
4883                  *
4884                  * XXX should we do this in all child processes?  For the moment it's
4885                  * enough to do it in backend children.
4886                  */
4887 #ifdef USE_SSL
4888                 if (EnableSSL)
4889                 {
4890                         if (secure_initialize(false) == 0)
4891                                 LoadedSSL = true;
4892                         else
4893                                 ereport(LOG,
4894                                                 (errmsg("SSL configuration could not be loaded in child process")));
4895                 }
4896 #endif
4897
4898                 /*
4899                  * Perform additional initialization and collect startup packet.
4900                  *
4901                  * We want to do this before InitProcess() for a couple of reasons: 1.
4902                  * so that we aren't eating up a PGPROC slot while waiting on the
4903                  * client. 2. so that if InitProcess() fails due to being out of
4904                  * PGPROC slots, we have already initialized libpq and are able to
4905                  * report the error to the client.
4906                  */
4907                 BackendInitialize(&port);
4908
4909                 /* Restore basic shared memory pointers */
4910                 InitShmemAccess(UsedShmemSegAddr);
4911
4912                 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4913                 InitProcess();
4914
4915                 /* Attach process to shared data structures */
4916                 CreateSharedMemoryAndSemaphores();
4917
4918                 /* And run the backend */
4919                 BackendRun(&port);              /* does not return */
4920         }
4921         if (strcmp(argv[1], "--forkaux") == 0)
4922         {
4923                 AuxProcType auxtype;
4924
4925                 Assert(argc == 4);
4926
4927                 /* Restore basic shared memory pointers */
4928                 InitShmemAccess(UsedShmemSegAddr);
4929
4930                 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4931                 InitAuxiliaryProcess();
4932
4933                 /* Attach process to shared data structures */
4934                 CreateSharedMemoryAndSemaphores();
4935
4936                 auxtype = atoi(argv[3]);
4937                 AuxiliaryProcessMain(auxtype);  /* does not return */
4938         }
4939         if (strcmp(argv[1], "--forkavlauncher") == 0)
4940         {
4941                 /* Restore basic shared memory pointers */
4942                 InitShmemAccess(UsedShmemSegAddr);
4943
4944                 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4945                 InitProcess();
4946
4947                 /* Attach process to shared data structures */
4948                 CreateSharedMemoryAndSemaphores();
4949
4950                 AutoVacLauncherMain(argc - 2, argv + 2);        /* does not return */
4951         }
4952         if (strcmp(argv[1], "--forkavworker") == 0)
4953         {
4954                 /* Restore basic shared memory pointers */
4955                 InitShmemAccess(UsedShmemSegAddr);
4956
4957                 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4958                 InitProcess();
4959
4960                 /* Attach process to shared data structures */
4961                 CreateSharedMemoryAndSemaphores();
4962
4963                 AutoVacWorkerMain(argc - 2, argv + 2);  /* does not return */
4964         }
4965         if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
4966         {
4967                 int                     shmem_slot;
4968
4969                 /* do this as early as possible; in particular, before InitProcess() */
4970                 IsBackgroundWorker = true;
4971
4972                 /* Restore basic shared memory pointers */
4973                 InitShmemAccess(UsedShmemSegAddr);
4974
4975                 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4976                 InitProcess();
4977
4978                 /* Attach process to shared data structures */
4979                 CreateSharedMemoryAndSemaphores();
4980
4981                 /* Fetch MyBgworkerEntry from shared memory */
4982                 shmem_slot = atoi(argv[1] + 15);
4983                 MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
4984
4985                 BackgroundWorkerMain();
4986         }
4987         if (strcmp(argv[1], "--forklog") == 0)
4988         {
4989                 /* Do not want to attach to shared memory */
4990
4991                 SysLoggerMain(argc, argv);      /* does not return */
4992         }
4993
4994         abort();                                        /* shouldn't get here */
4995 }
4996 #endif                                                  /* EXEC_BACKEND */
4997
4998
4999 /*
5000  * ExitPostmaster -- cleanup
5001  *
5002  * Do NOT call exit() directly --- always go through here!
5003  */
5004 static void
5005 ExitPostmaster(int status)
5006 {
5007 #ifdef HAVE_PTHREAD_IS_THREADED_NP
5008
5009         /*
5010          * There is no known cause for a postmaster to become multithreaded after
5011          * startup.  Recheck to account for the possibility of unknown causes.
5012          * This message uses LOG level, because an unclean shutdown at this point
5013          * would usually not look much different from a clean shutdown.
5014          */
5015         if (pthread_is_threaded_np() != 0)
5016                 ereport(LOG,
5017                                 (errcode(ERRCODE_INTERNAL_ERROR),
5018                                  errmsg_internal("postmaster became multithreaded"),
5019                                  errdetail("Please report this to <%s>.", PACKAGE_BUGREPORT)));
5020 #endif
5021
5022         /* should cleanup shared memory and kill all backends */
5023
5024         /*
5025          * Not sure of the semantics here.  When the Postmaster dies, should the
5026          * backends all be killed? probably not.
5027          *
5028          * MUST         -- vadim 05-10-1999
5029          */
5030
5031         proc_exit(status);
5032 }
5033
5034 /*
5035  * Handle pmsignal conditions representing requests from backends,
5036  * and check for promote and logrotate requests from pg_ctl.
5037  */
5038 static void
5039 process_pm_pmsignal(void)
5040 {
5041         pending_pm_pmsignal = false;
5042
5043         ereport(DEBUG2,
5044                         (errmsg_internal("postmaster received pmsignal signal")));
5045
5046         /*
5047          * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5048          * unexpected states. If the startup process quickly starts up, completes
5049          * recovery, exits, we might process the death of the startup process
5050          * first. We don't want to go back to recovery in that case.
5051          */
5052         if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
5053                 pmState == PM_STARTUP && Shutdown == NoShutdown)
5054         {
5055                 /* WAL redo has started. We're out of reinitialization. */
5056                 FatalError = false;
5057                 AbortStartTime = 0;
5058
5059                 /*
5060                  * Start the archiver if we're responsible for (re-)archiving received
5061                  * files.
5062                  */
5063                 Assert(PgArchPID == 0);
5064                 if (XLogArchivingAlways())
5065                         PgArchPID = StartArchiver();
5066
5067                 /*
5068                  * If we aren't planning to enter hot standby mode later, treat
5069                  * RECOVERY_STARTED as meaning we're out of startup, and report status
5070                  * accordingly.
5071                  */
5072                 if (!EnableHotStandby)
5073                 {
5074                         AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STANDBY);
5075 #ifdef USE_SYSTEMD
5076                         sd_notify(0, "READY=1");
5077 #endif
5078                 }
5079
5080                 pmState = PM_RECOVERY;
5081         }
5082
5083         if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) &&
5084                 pmState == PM_RECOVERY && Shutdown == NoShutdown)
5085         {
5086                 ereport(LOG,
5087                                 (errmsg("database system is ready to accept read-only connections")));
5088
5089                 /* Report status */
5090                 AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
5091 #ifdef USE_SYSTEMD
5092                 sd_notify(0, "READY=1");
5093 #endif
5094
5095                 pmState = PM_HOT_STANDBY;
5096                 connsAllowed = true;
5097
5098                 /* Some workers may be scheduled to start now */
5099                 StartWorkerNeeded = true;
5100         }
5101
5102         /* Process background worker state changes. */
5103         if (CheckPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE))
5104         {
5105                 /* Accept new worker requests only if not stopping. */
5106                 BackgroundWorkerStateChange(pmState < PM_STOP_BACKENDS);
5107                 StartWorkerNeeded = true;
5108         }
5109
5110         if (StartWorkerNeeded || HaveCrashedWorker)
5111                 maybe_start_bgworkers();
5112
5113         /* Tell syslogger to rotate logfile if requested */
5114         if (SysLoggerPID != 0)
5115         {
5116                 if (CheckLogrotateSignal())
5117                 {
5118                         signal_child(SysLoggerPID, SIGUSR1);
5119                         RemoveLogrotateSignalFiles();
5120                 }
5121                 else if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE))
5122                 {
5123                         signal_child(SysLoggerPID, SIGUSR1);
5124                 }
5125         }
5126
5127         if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) &&
5128                 Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS)
5129         {
5130                 /*
5131                  * Start one iteration of the autovacuum daemon, even if autovacuuming
5132                  * is nominally not enabled.  This is so we can have an active defense
5133                  * against transaction ID wraparound.  We set a flag for the main loop
5134                  * to do it rather than trying to do it here --- this is because the
5135                  * autovac process itself may send the signal, and we want to handle
5136                  * that by launching another iteration as soon as the current one
5137                  * completes.
5138                  */
5139                 start_autovac_launcher = true;
5140         }
5141
5142         if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) &&
5143                 Shutdown <= SmartShutdown && pmState < PM_STOP_BACKENDS)
5144         {
5145                 /* The autovacuum launcher wants us to start a worker process. */
5146                 StartAutovacuumWorker();
5147         }
5148
5149         if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER))
5150         {
5151                 /* Startup Process wants us to start the walreceiver process. */
5152                 /* Start immediately if possible, else remember request for later. */
5153                 WalReceiverRequested = true;
5154                 MaybeStartWalReceiver();
5155         }
5156
5157         /*
5158          * Try to advance postmaster's state machine, if a child requests it.
5159          *
5160          * Be careful about the order of this action relative to this function's
5161          * other actions.  Generally, this should be after other actions, in case
5162          * they have effects PostmasterStateMachine would need to know about.
5163          * However, we should do it before the CheckPromoteSignal step, which
5164          * cannot have any (immediate) effect on the state machine, but does
5165          * depend on what state we're in now.
5166          */
5167         if (CheckPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE))
5168         {
5169                 PostmasterStateMachine();
5170         }
5171
5172         if (StartupPID != 0 &&
5173                 (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5174                  pmState == PM_HOT_STANDBY) &&
5175                 CheckPromoteSignal())
5176         {
5177                 /*
5178                  * Tell startup process to finish recovery.
5179                  *
5180                  * Leave the promote signal file in place and let the Startup process
5181                  * do the unlink.
5182                  */
5183                 signal_child(StartupPID, SIGUSR2);
5184         }
5185 }
5186
5187 /*
5188  * SIGTERM while processing startup packet.
5189  *
5190  * Running proc_exit() from a signal handler would be quite unsafe.
5191  * However, since we have not yet touched shared memory, we can just
5192  * pull the plug and exit without running any atexit handlers.
5193  *
5194  * One might be tempted to try to send a message, or log one, indicating
5195  * why we are disconnecting.  However, that would be quite unsafe in itself.
5196  * Also, it seems undesirable to provide clues about the database's state
5197  * to a client that has not yet completed authentication, or even sent us
5198  * a startup packet.
5199  */
5200 static void
5201 process_startup_packet_die(SIGNAL_ARGS)
5202 {
5203         _exit(1);
5204 }
5205
5206 /*
5207  * Dummy signal handler
5208  *
5209  * We use this for signals that we don't actually use in the postmaster,
5210  * but we do use in backends.  If we were to SIG_IGN such signals in the
5211  * postmaster, then a newly started backend might drop a signal that arrives
5212  * before it's able to reconfigure its signal processing.  (See notes in
5213  * tcop/postgres.c.)
5214  */
5215 static void
5216 dummy_handler(SIGNAL_ARGS)
5217 {
5218 }
5219
5220 /*
5221  * Timeout while processing startup packet.
5222  * As for process_startup_packet_die(), we exit via _exit(1).
5223  */
5224 static void
5225 StartupPacketTimeoutHandler(void)
5226 {
5227         _exit(1);
5228 }
5229
5230
5231 /*
5232  * Generate a random cancel key.
5233  */
5234 static bool
5235 RandomCancelKey(int32 *cancel_key)
5236 {
5237         return pg_strong_random(cancel_key, sizeof(int32));
5238 }
5239
5240 /*
5241  * Count up number of child processes of specified types (dead_end children
5242  * are always excluded).
5243  */
5244 static int
5245 CountChildren(int target)
5246 {
5247         dlist_iter      iter;
5248         int                     cnt = 0;
5249
5250         dlist_foreach(iter, &BackendList)
5251         {
5252                 Backend    *bp = dlist_container(Backend, elem, iter.cur);
5253
5254                 if (bp->dead_end)
5255                         continue;
5256
5257                 /*
5258                  * Since target == BACKEND_TYPE_ALL is the most common case, we test
5259                  * it first and avoid touching shared memory for every child.
5260                  */
5261                 if (target != BACKEND_TYPE_ALL)
5262                 {
5263                         /*
5264                          * Assign bkend_type for any recently announced WAL Sender
5265                          * processes.
5266                          */
5267                         if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5268                                 IsPostmasterChildWalSender(bp->child_slot))
5269                                 bp->bkend_type = BACKEND_TYPE_WALSND;
5270
5271                         if (!(target & bp->bkend_type))
5272                                 continue;
5273                 }
5274
5275                 cnt++;
5276         }
5277         return cnt;
5278 }
5279
5280
5281 /*
5282  * StartChildProcess -- start an auxiliary process for the postmaster
5283  *
5284  * "type" determines what kind of child will be started.  All child types
5285  * initially go to AuxiliaryProcessMain, which will handle common setup.
5286  *
5287  * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5288  * to start subprocess.
5289  */
5290 static pid_t
5291 StartChildProcess(AuxProcType type)
5292 {
5293         pid_t           pid;
5294
5295 #ifdef EXEC_BACKEND
5296         {
5297                 char       *av[10];
5298                 int                     ac = 0;
5299                 char            typebuf[32];
5300
5301                 /*
5302                  * Set up command-line arguments for subprocess
5303                  */
5304                 av[ac++] = "postgres";
5305                 av[ac++] = "--forkaux";
5306                 av[ac++] = NULL;                /* filled in by postmaster_forkexec */
5307
5308                 snprintf(typebuf, sizeof(typebuf), "%d", type);
5309                 av[ac++] = typebuf;
5310
5311                 av[ac] = NULL;
5312                 Assert(ac < lengthof(av));
5313
5314                 pid = postmaster_forkexec(ac, av);
5315         }
5316 #else                                                   /* !EXEC_BACKEND */
5317         pid = fork_process();
5318
5319         if (pid == 0)                           /* child */
5320         {
5321                 InitPostmasterChild();
5322
5323                 /* Close the postmaster's sockets */
5324                 ClosePostmasterPorts(false);
5325
5326                 /* Release postmaster's working memory context */
5327                 MemoryContextSwitchTo(TopMemoryContext);
5328                 MemoryContextDelete(PostmasterContext);
5329                 PostmasterContext = NULL;
5330
5331                 AuxiliaryProcessMain(type); /* does not return */
5332         }
5333 #endif                                                  /* EXEC_BACKEND */
5334
5335         if (pid < 0)
5336         {
5337                 /* in parent, fork failed */
5338                 int                     save_errno = errno;
5339
5340                 errno = save_errno;
5341                 switch (type)
5342                 {
5343                         case StartupProcess:
5344                                 ereport(LOG,
5345                                                 (errmsg("could not fork startup process: %m")));
5346                                 break;
5347                         case ArchiverProcess:
5348                                 ereport(LOG,
5349                                                 (errmsg("could not fork archiver process: %m")));
5350                                 break;
5351                         case BgWriterProcess:
5352                                 ereport(LOG,
5353                                                 (errmsg("could not fork background writer process: %m")));
5354                                 break;
5355                         case CheckpointerProcess:
5356                                 ereport(LOG,
5357                                                 (errmsg("could not fork checkpointer process: %m")));
5358                                 break;
5359                         case WalWriterProcess:
5360                                 ereport(LOG,
5361                                                 (errmsg("could not fork WAL writer process: %m")));
5362                                 break;
5363                         case WalReceiverProcess:
5364                                 ereport(LOG,
5365                                                 (errmsg("could not fork WAL receiver process: %m")));
5366                                 break;
5367                         default:
5368                                 ereport(LOG,
5369                                                 (errmsg("could not fork process: %m")));
5370                                 break;
5371                 }
5372
5373                 /*
5374                  * fork failure is fatal during startup, but there's no need to choke
5375                  * immediately if starting other child types fails.
5376                  */
5377                 if (type == StartupProcess)
5378                         ExitPostmaster(1);
5379                 return 0;
5380         }
5381
5382         /*
5383          * in parent, successful fork
5384          */
5385         return pid;
5386 }
5387
5388 /*
5389  * StartAutovacuumWorker
5390  *              Start an autovac worker process.
5391  *
5392  * This function is here because it enters the resulting PID into the
5393  * postmaster's private backends list.
5394  *
5395  * NB -- this code very roughly matches BackendStartup.
5396  */
5397 static void
5398 StartAutovacuumWorker(void)
5399 {
5400         Backend    *bn;
5401
5402         /*
5403          * If not in condition to run a process, don't try, but handle it like a
5404          * fork failure.  This does not normally happen, since the signal is only
5405          * supposed to be sent by autovacuum launcher when it's OK to do it, but
5406          * we have to check to avoid race-condition problems during DB state
5407          * changes.
5408          */
5409         if (canAcceptConnections(BACKEND_TYPE_AUTOVAC) == CAC_OK)
5410         {
5411                 /*
5412                  * Compute the cancel key that will be assigned to this session. We
5413                  * probably don't need cancel keys for autovac workers, but we'd
5414                  * better have something random in the field to prevent unfriendly
5415                  * people from sending cancels to them.
5416                  */
5417                 if (!RandomCancelKey(&MyCancelKey))
5418                 {
5419                         ereport(LOG,
5420                                         (errcode(ERRCODE_INTERNAL_ERROR),
5421                                          errmsg("could not generate random cancel key")));
5422                         return;
5423                 }
5424
5425                 bn = (Backend *) palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
5426                 if (bn)
5427                 {
5428                         bn->cancel_key = MyCancelKey;
5429
5430                         /* Autovac workers are not dead_end and need a child slot */
5431                         bn->dead_end = false;
5432                         bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
5433                         bn->bgworker_notify = false;
5434
5435                         bn->pid = StartAutoVacWorker();
5436                         if (bn->pid > 0)
5437                         {
5438                                 bn->bkend_type = BACKEND_TYPE_AUTOVAC;
5439                                 dlist_push_head(&BackendList, &bn->elem);
5440 #ifdef EXEC_BACKEND
5441                                 ShmemBackendArrayAdd(bn);
5442 #endif
5443                                 /* all OK */
5444                                 return;
5445                         }
5446
5447                         /*
5448                          * fork failed, fall through to report -- actual error message was
5449                          * logged by StartAutoVacWorker
5450                          */
5451                         (void) ReleasePostmasterChildSlot(bn->child_slot);
5452                         pfree(bn);
5453                 }
5454                 else
5455                         ereport(LOG,
5456                                         (errcode(ERRCODE_OUT_OF_MEMORY),
5457                                          errmsg("out of memory")));
5458         }
5459
5460         /*
5461          * Report the failure to the launcher, if it's running.  (If it's not, we
5462          * might not even be connected to shared memory, so don't try to call
5463          * AutoVacWorkerFailed.)  Note that we also need to signal it so that it
5464          * responds to the condition, but we don't do that here, instead waiting
5465          * for ServerLoop to do it.  This way we avoid a ping-pong signaling in
5466          * quick succession between the autovac launcher and postmaster in case
5467          * things get ugly.
5468          */
5469         if (AutoVacPID != 0)
5470         {
5471                 AutoVacWorkerFailed();
5472                 avlauncher_needs_signal = true;
5473         }
5474 }
5475
5476 /*
5477  * MaybeStartWalReceiver
5478  *              Start the WAL receiver process, if not running and our state allows.
5479  *
5480  * Note: if WalReceiverPID is already nonzero, it might seem that we should
5481  * clear WalReceiverRequested.  However, there's a race condition if the
5482  * walreceiver terminates and the startup process immediately requests a new
5483  * one: it's quite possible to get the signal for the request before reaping
5484  * the dead walreceiver process.  Better to risk launching an extra
5485  * walreceiver than to miss launching one we need.  (The walreceiver code
5486  * has logic to recognize that it should go away if not needed.)
5487  */
5488 static void
5489 MaybeStartWalReceiver(void)
5490 {
5491         if (WalReceiverPID == 0 &&
5492                 (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5493                  pmState == PM_HOT_STANDBY) &&
5494                 Shutdown <= SmartShutdown)
5495         {
5496                 WalReceiverPID = StartWalReceiver();
5497                 if (WalReceiverPID != 0)
5498                         WalReceiverRequested = false;
5499                 /* else leave the flag set, so we'll try again later */
5500         }
5501 }
5502
5503
5504 /*
5505  * Create the opts file
5506  */
5507 static bool
5508 CreateOptsFile(int argc, char *argv[], char *fullprogname)
5509 {
5510         FILE       *fp;
5511         int                     i;
5512
5513 #define OPTS_FILE       "postmaster.opts"
5514
5515         if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5516         {
5517                 ereport(LOG,
5518                                 (errcode_for_file_access(),
5519                                  errmsg("could not create file \"%s\": %m", OPTS_FILE)));
5520                 return false;
5521         }
5522
5523         fprintf(fp, "%s", fullprogname);
5524         for (i = 1; i < argc; i++)
5525                 fprintf(fp, " \"%s\"", argv[i]);
5526         fputs("\n", fp);
5527
5528         if (fclose(fp))
5529         {
5530                 ereport(LOG,
5531                                 (errcode_for_file_access(),
5532                                  errmsg("could not write file \"%s\": %m", OPTS_FILE)));
5533                 return false;
5534         }
5535
5536         return true;
5537 }
5538
5539
5540 /*
5541  * MaxLivePostmasterChildren
5542  *
5543  * This reports the number of entries needed in per-child-process arrays
5544  * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5545  * These arrays include regular backends, autovac workers, walsenders
5546  * and background workers, but not special children nor dead_end children.
5547  * This allows the arrays to have a fixed maximum size, to wit the same
5548  * too-many-children limit enforced by canAcceptConnections().  The exact value
5549  * isn't too critical as long as it's more than MaxBackends.
5550  */
5551 int
5552 MaxLivePostmasterChildren(void)
5553 {
5554         return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5555                                 max_wal_senders + max_worker_processes);
5556 }
5557
5558 /*
5559  * Connect background worker to a database.
5560  */
5561 void
5562 BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags)
5563 {
5564         BackgroundWorker *worker = MyBgworkerEntry;
5565         bits32          init_flags = 0; /* never honor session_preload_libraries */
5566
5567         /* ignore datallowconn? */
5568         if (flags & BGWORKER_BYPASS_ALLOWCONN)
5569                 init_flags |= INIT_PG_OVERRIDE_ALLOW_CONNS;
5570         /* ignore rolcanlogin? */
5571         if (flags & BGWORKER_BYPASS_ROLELOGINCHECK)
5572                 init_flags |= INIT_PG_OVERRIDE_ROLE_LOGIN;
5573
5574         /* XXX is this the right errcode? */
5575         if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
5576                 ereport(FATAL,
5577                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5578                                  errmsg("database connection requirement not indicated during registration")));
5579
5580         InitPostgres(dbname, InvalidOid,        /* database to connect to */
5581                                  username, InvalidOid,  /* role to connect as */
5582                                  init_flags,
5583                                  NULL);                 /* no out_dbname */
5584
5585         /* it had better not gotten out of "init" mode yet */
5586         if (!IsInitProcessingMode())
5587                 ereport(ERROR,
5588                                 (errmsg("invalid processing mode in background worker")));
5589         SetProcessingMode(NormalProcessing);
5590 }
5591
5592 /*
5593  * Connect background worker to a database using OIDs.
5594  */
5595 void
5596 BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
5597 {
5598         BackgroundWorker *worker = MyBgworkerEntry;
5599         bits32          init_flags = 0; /* never honor session_preload_libraries */
5600
5601         /* ignore datallowconn? */
5602         if (flags & BGWORKER_BYPASS_ALLOWCONN)
5603                 init_flags |= INIT_PG_OVERRIDE_ALLOW_CONNS;
5604         /* ignore rolcanlogin? */
5605         if (flags & BGWORKER_BYPASS_ROLELOGINCHECK)
5606                 init_flags |= INIT_PG_OVERRIDE_ROLE_LOGIN;
5607
5608         /* XXX is this the right errcode? */
5609         if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
5610                 ereport(FATAL,
5611                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5612                                  errmsg("database connection requirement not indicated during registration")));
5613
5614         InitPostgres(NULL, dboid,       /* database to connect to */
5615                                  NULL, useroid, /* role to connect as */
5616                                  init_flags,
5617                                  NULL);                 /* no out_dbname */
5618
5619         /* it had better not gotten out of "init" mode yet */
5620         if (!IsInitProcessingMode())
5621                 ereport(ERROR,
5622                                 (errmsg("invalid processing mode in background worker")));
5623         SetProcessingMode(NormalProcessing);
5624 }
5625
5626 /*
5627  * Block/unblock signals in a background worker
5628  */
5629 void
5630 BackgroundWorkerBlockSignals(void)
5631 {
5632         sigprocmask(SIG_SETMASK, &BlockSig, NULL);
5633 }
5634
5635 void
5636 BackgroundWorkerUnblockSignals(void)
5637 {
5638         sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
5639 }
5640
5641 #ifdef EXEC_BACKEND
5642 static pid_t
5643 bgworker_forkexec(int shmem_slot)
5644 {
5645         char       *av[10];
5646         int                     ac = 0;
5647         char            forkav[MAXPGPATH];
5648
5649         snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5650
5651         av[ac++] = "postgres";
5652         av[ac++] = forkav;
5653         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
5654         av[ac] = NULL;
5655
5656         Assert(ac < lengthof(av));
5657
5658         return postmaster_forkexec(ac, av);
5659 }
5660 #endif
5661
5662 /*
5663  * Start a new bgworker.
5664  * Starting time conditions must have been checked already.
5665  *
5666  * Returns true on success, false on failure.
5667  * In either case, update the RegisteredBgWorker's state appropriately.
5668  *
5669  * This code is heavily based on autovacuum.c, q.v.
5670  */
5671 static bool
5672 do_start_bgworker(RegisteredBgWorker *rw)
5673 {
5674         pid_t           worker_pid;
5675
5676         Assert(rw->rw_pid == 0);
5677
5678         /*
5679          * Allocate and assign the Backend element.  Note we must do this before
5680          * forking, so that we can handle failures (out of memory or child-process
5681          * slots) cleanly.
5682          *
5683          * Treat failure as though the worker had crashed.  That way, the
5684          * postmaster will wait a bit before attempting to start it again; if we
5685          * tried again right away, most likely we'd find ourselves hitting the
5686          * same resource-exhaustion condition.
5687          */
5688         if (!assign_backendlist_entry(rw))
5689         {
5690                 rw->rw_crashed_at = GetCurrentTimestamp();
5691                 return false;
5692         }
5693
5694         ereport(DEBUG1,
5695                         (errmsg_internal("starting background worker process \"%s\"",
5696                                                          rw->rw_worker.bgw_name)));
5697
5698 #ifdef EXEC_BACKEND
5699         switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5700 #else
5701         switch ((worker_pid = fork_process()))
5702 #endif
5703         {
5704                 case -1:
5705                         /* in postmaster, fork failed ... */
5706                         ereport(LOG,
5707                                         (errmsg("could not fork worker process: %m")));
5708                         /* undo what assign_backendlist_entry did */
5709                         ReleasePostmasterChildSlot(rw->rw_child_slot);
5710                         rw->rw_child_slot = 0;
5711                         pfree(rw->rw_backend);
5712                         rw->rw_backend = NULL;
5713                         /* mark entry as crashed, so we'll try again later */
5714                         rw->rw_crashed_at = GetCurrentTimestamp();
5715                         break;
5716
5717 #ifndef EXEC_BACKEND
5718                 case 0:
5719                         /* in postmaster child ... */
5720                         InitPostmasterChild();
5721
5722                         /* Close the postmaster's sockets */
5723                         ClosePostmasterPorts(false);
5724
5725                         /*
5726                          * Before blowing away PostmasterContext, save this bgworker's
5727                          * data where it can find it.
5728                          */
5729                         MyBgworkerEntry = (BackgroundWorker *)
5730                                 MemoryContextAlloc(TopMemoryContext, sizeof(BackgroundWorker));
5731                         memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5732
5733                         /* Release postmaster's working memory context */
5734                         MemoryContextSwitchTo(TopMemoryContext);
5735                         MemoryContextDelete(PostmasterContext);
5736                         PostmasterContext = NULL;
5737
5738                         BackgroundWorkerMain();
5739
5740                         exit(1);                        /* should not get here */
5741                         break;
5742 #endif
5743                 default:
5744                         /* in postmaster, fork successful ... */
5745                         rw->rw_pid = worker_pid;
5746                         rw->rw_backend->pid = rw->rw_pid;
5747                         ReportBackgroundWorkerPID(rw);
5748                         /* add new worker to lists of backends */
5749                         dlist_push_head(&BackendList, &rw->rw_backend->elem);
5750 #ifdef EXEC_BACKEND
5751                         ShmemBackendArrayAdd(rw->rw_backend);
5752 #endif
5753                         return true;
5754         }
5755
5756         return false;
5757 }
5758
5759 /*
5760  * Does the current postmaster state require starting a worker with the
5761  * specified start_time?
5762  */
5763 static bool
5764 bgworker_should_start_now(BgWorkerStartTime start_time)
5765 {
5766         switch (pmState)
5767         {
5768                 case PM_NO_CHILDREN:
5769                 case PM_WAIT_DEAD_END:
5770                 case PM_SHUTDOWN_2:
5771                 case PM_SHUTDOWN:
5772                 case PM_WAIT_BACKENDS:
5773                 case PM_STOP_BACKENDS:
5774                         break;
5775
5776                 case PM_RUN:
5777                         if (start_time == BgWorkerStart_RecoveryFinished)
5778                                 return true;
5779                         /* fall through */
5780
5781                 case PM_HOT_STANDBY:
5782                         if (start_time == BgWorkerStart_ConsistentState)
5783                                 return true;
5784                         /* fall through */
5785
5786                 case PM_RECOVERY:
5787                 case PM_STARTUP:
5788                 case PM_INIT:
5789                         if (start_time == BgWorkerStart_PostmasterStart)
5790                                 return true;
5791                         /* fall through */
5792         }
5793
5794         return false;
5795 }
5796
5797 /*
5798  * Allocate the Backend struct for a connected background worker, but don't
5799  * add it to the list of backends just yet.
5800  *
5801  * On failure, return false without changing any worker state.
5802  *
5803  * Some info from the Backend is copied into the passed rw.
5804  */
5805 static bool
5806 assign_backendlist_entry(RegisteredBgWorker *rw)
5807 {
5808         Backend    *bn;
5809
5810         /*
5811          * Check that database state allows another connection.  Currently the
5812          * only possible failure is CAC_TOOMANY, so we just log an error message
5813          * based on that rather than checking the error code precisely.
5814          */
5815         if (canAcceptConnections(BACKEND_TYPE_BGWORKER) != CAC_OK)
5816         {
5817                 ereport(LOG,
5818                                 (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
5819                                  errmsg("no slot available for new worker process")));
5820                 return false;
5821         }
5822
5823         /*
5824          * Compute the cancel key that will be assigned to this session. We
5825          * probably don't need cancel keys for background workers, but we'd better
5826          * have something random in the field to prevent unfriendly people from
5827          * sending cancels to them.
5828          */
5829         if (!RandomCancelKey(&MyCancelKey))
5830         {
5831                 ereport(LOG,
5832                                 (errcode(ERRCODE_INTERNAL_ERROR),
5833                                  errmsg("could not generate random cancel key")));
5834                 return false;
5835         }
5836
5837         bn = palloc_extended(sizeof(Backend), MCXT_ALLOC_NO_OOM);
5838         if (bn == NULL)
5839         {
5840                 ereport(LOG,
5841                                 (errcode(ERRCODE_OUT_OF_MEMORY),
5842                                  errmsg("out of memory")));
5843                 return false;
5844         }
5845
5846         bn->cancel_key = MyCancelKey;
5847         bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
5848         bn->bkend_type = BACKEND_TYPE_BGWORKER;
5849         bn->dead_end = false;
5850         bn->bgworker_notify = false;
5851
5852         rw->rw_backend = bn;
5853         rw->rw_child_slot = bn->child_slot;
5854
5855         return true;
5856 }
5857
5858 /*
5859  * If the time is right, start background worker(s).
5860  *
5861  * As a side effect, the bgworker control variables are set or reset
5862  * depending on whether more workers may need to be started.
5863  *
5864  * We limit the number of workers started per call, to avoid consuming the
5865  * postmaster's attention for too long when many such requests are pending.
5866  * As long as StartWorkerNeeded is true, ServerLoop will not block and will
5867  * call this function again after dealing with any other issues.
5868  */
5869 static void
5870 maybe_start_bgworkers(void)
5871 {
5872 #define MAX_BGWORKERS_TO_LAUNCH 100
5873         int                     num_launched = 0;
5874         TimestampTz now = 0;
5875         slist_mutable_iter iter;
5876
5877         /*
5878          * During crash recovery, we have no need to be called until the state
5879          * transition out of recovery.
5880          */
5881         if (FatalError)
5882         {
5883                 StartWorkerNeeded = false;
5884                 HaveCrashedWorker = false;
5885                 return;
5886         }
5887
5888         /* Don't need to be called again unless we find a reason for it below */
5889         StartWorkerNeeded = false;
5890         HaveCrashedWorker = false;
5891
5892         slist_foreach_modify(iter, &BackgroundWorkerList)
5893         {
5894                 RegisteredBgWorker *rw;
5895
5896                 rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
5897
5898                 /* ignore if already running */
5899                 if (rw->rw_pid != 0)
5900                         continue;
5901
5902                 /* if marked for death, clean up and remove from list */
5903                 if (rw->rw_terminate)
5904                 {
5905                         ForgetBackgroundWorker(&iter);
5906                         continue;
5907                 }
5908
5909                 /*
5910                  * If this worker has crashed previously, maybe it needs to be
5911                  * restarted (unless on registration it specified it doesn't want to
5912                  * be restarted at all).  Check how long ago did a crash last happen.
5913                  * If the last crash is too recent, don't start it right away; let it
5914                  * be restarted once enough time has passed.
5915                  */
5916                 if (rw->rw_crashed_at != 0)
5917                 {
5918                         if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
5919                         {
5920                                 int                     notify_pid;
5921
5922                                 notify_pid = rw->rw_worker.bgw_notify_pid;
5923
5924                                 ForgetBackgroundWorker(&iter);
5925
5926                                 /* Report worker is gone now. */
5927                                 if (notify_pid != 0)
5928                                         kill(notify_pid, SIGUSR1);
5929
5930                                 continue;
5931                         }
5932
5933                         /* read system time only when needed */
5934                         if (now == 0)
5935                                 now = GetCurrentTimestamp();
5936
5937                         if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
5938                                                                                         rw->rw_worker.bgw_restart_time * 1000))
5939                         {
5940                                 /* Set flag to remember that we have workers to start later */
5941                                 HaveCrashedWorker = true;
5942                                 continue;
5943                         }
5944                 }
5945
5946                 if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
5947                 {
5948                         /* reset crash time before trying to start worker */
5949                         rw->rw_crashed_at = 0;
5950
5951                         /*
5952                          * Try to start the worker.
5953                          *
5954                          * On failure, give up processing workers for now, but set
5955                          * StartWorkerNeeded so we'll come back here on the next iteration
5956                          * of ServerLoop to try again.  (We don't want to wait, because
5957                          * there might be additional ready-to-run workers.)  We could set
5958                          * HaveCrashedWorker as well, since this worker is now marked
5959                          * crashed, but there's no need because the next run of this
5960                          * function will do that.
5961                          */
5962                         if (!do_start_bgworker(rw))
5963                         {
5964                                 StartWorkerNeeded = true;
5965                                 return;
5966                         }
5967
5968                         /*
5969                          * If we've launched as many workers as allowed, quit, but have
5970                          * ServerLoop call us again to look for additional ready-to-run
5971                          * workers.  There might not be any, but we'll find out the next
5972                          * time we run.
5973                          */
5974                         if (++num_launched >= MAX_BGWORKERS_TO_LAUNCH)
5975                         {
5976                                 StartWorkerNeeded = true;
5977                                 return;
5978                         }
5979                 }
5980         }
5981 }
5982
5983 /*
5984  * When a backend asks to be notified about worker state changes, we
5985  * set a flag in its backend entry.  The background worker machinery needs
5986  * to know when such backends exit.
5987  */
5988 bool
5989 PostmasterMarkPIDForWorkerNotify(int pid)
5990 {
5991         dlist_iter      iter;
5992         Backend    *bp;
5993
5994         dlist_foreach(iter, &BackendList)
5995         {
5996                 bp = dlist_container(Backend, elem, iter.cur);
5997                 if (bp->pid == pid)
5998                 {
5999                         bp->bgworker_notify = true;
6000                         return true;
6001                 }
6002         }
6003         return false;
6004 }
6005
6006 #ifdef EXEC_BACKEND
6007
6008 /*
6009  * The following need to be available to the save/restore_backend_variables
6010  * functions.  They are marked NON_EXEC_STATIC in their home modules.
6011  */
6012 extern slock_t *ShmemLock;
6013 extern slock_t *ProcStructLock;
6014 extern PGPROC *AuxiliaryProcs;
6015 extern PMSignalData *PMSignalState;
6016 extern pg_time_t first_syslogger_file_time;
6017
6018 #ifndef WIN32
6019 #define write_inheritable_socket(dest, src, childpid) ((*(dest) = (src)), true)
6020 #define read_inheritable_socket(dest, src) (*(dest) = *(src))
6021 #else
6022 static bool write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child);
6023 static bool write_inheritable_socket(InheritableSocket *dest, SOCKET src,
6024                                                                          pid_t childPid);
6025 static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src);
6026 #endif
6027
6028
6029 /* Save critical backend variables into the BackendParameters struct */
6030 #ifndef WIN32
6031 static bool
6032 save_backend_variables(BackendParameters *param, Port *port)
6033 #else
6034 static bool
6035 save_backend_variables(BackendParameters *param, Port *port,
6036                                            HANDLE childProcess, pid_t childPid)
6037 #endif
6038 {
6039         memcpy(&param->port, port, sizeof(Port));
6040         if (!write_inheritable_socket(&param->portsocket, port->sock, childPid))
6041                 return false;
6042
6043         strlcpy(param->DataDir, DataDir, MAXPGPATH);
6044
6045         param->MyCancelKey = MyCancelKey;
6046         param->MyPMChildSlot = MyPMChildSlot;
6047
6048 #ifdef WIN32
6049         param->ShmemProtectiveRegion = ShmemProtectiveRegion;
6050 #endif
6051         param->UsedShmemSegID = UsedShmemSegID;
6052         param->UsedShmemSegAddr = UsedShmemSegAddr;
6053
6054         param->ShmemLock = ShmemLock;
6055         param->ShmemVariableCache = ShmemVariableCache;
6056         param->ShmemBackendArray = ShmemBackendArray;
6057
6058 #ifndef HAVE_SPINLOCKS
6059         param->SpinlockSemaArray = SpinlockSemaArray;
6060 #endif
6061         param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests;
6062         param->NamedLWLockTrancheArray = NamedLWLockTrancheArray;
6063         param->MainLWLockArray = MainLWLockArray;
6064         param->ProcStructLock = ProcStructLock;
6065         param->ProcGlobal = ProcGlobal;
6066         param->AuxiliaryProcs = AuxiliaryProcs;
6067         param->PreparedXactProcs = PreparedXactProcs;
6068         param->PMSignalState = PMSignalState;
6069
6070         param->PostmasterPid = PostmasterPid;
6071         param->PgStartTime = PgStartTime;
6072         param->PgReloadTime = PgReloadTime;
6073         param->first_syslogger_file_time = first_syslogger_file_time;
6074
6075         param->redirection_done = redirection_done;
6076         param->IsBinaryUpgrade = IsBinaryUpgrade;
6077         param->query_id_enabled = query_id_enabled;
6078         param->max_safe_fds = max_safe_fds;
6079
6080         param->MaxBackends = MaxBackends;
6081
6082 #ifdef WIN32
6083         param->PostmasterHandle = PostmasterHandle;
6084         if (!write_duplicated_handle(&param->initial_signal_pipe,
6085                                                                  pgwin32_create_signal_listener(childPid),
6086                                                                  childProcess))
6087                 return false;
6088 #else
6089         memcpy(&param->postmaster_alive_fds, &postmaster_alive_fds,
6090                    sizeof(postmaster_alive_fds));
6091 #endif
6092
6093         memcpy(&param->syslogPipe, &syslogPipe, sizeof(syslogPipe));
6094
6095         strlcpy(param->my_exec_path, my_exec_path, MAXPGPATH);
6096
6097         strlcpy(param->pkglib_path, pkglib_path, MAXPGPATH);
6098
6099         return true;
6100 }
6101
6102
6103 #ifdef WIN32
6104 /*
6105  * Duplicate a handle for usage in a child process, and write the child
6106  * process instance of the handle to the parameter file.
6107  */
6108 static bool
6109 write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess)
6110 {
6111         HANDLE          hChild = INVALID_HANDLE_VALUE;
6112
6113         if (!DuplicateHandle(GetCurrentProcess(),
6114                                                  src,
6115                                                  childProcess,
6116                                                  &hChild,
6117                                                  0,
6118                                                  TRUE,
6119                                                  DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS))
6120         {
6121                 ereport(LOG,
6122                                 (errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %lu",
6123                                                                  GetLastError())));
6124                 return false;
6125         }
6126
6127         *dest = hChild;
6128         return true;
6129 }
6130
6131 /*
6132  * Duplicate a socket for usage in a child process, and write the resulting
6133  * structure to the parameter file.
6134  * This is required because a number of LSPs (Layered Service Providers) very
6135  * common on Windows (antivirus, firewalls, download managers etc) break
6136  * straight socket inheritance.
6137  */
6138 static bool
6139 write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid)
6140 {
6141         dest->origsocket = src;
6142         if (src != 0 && src != PGINVALID_SOCKET)
6143         {
6144                 /* Actual socket */
6145                 if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0)
6146                 {
6147                         ereport(LOG,
6148                                         (errmsg("could not duplicate socket %d for use in backend: error code %d",
6149                                                         (int) src, WSAGetLastError())));
6150                         return false;
6151                 }
6152         }
6153         return true;
6154 }
6155
6156 /*
6157  * Read a duplicate socket structure back, and get the socket descriptor.
6158  */
6159 static void
6160 read_inheritable_socket(SOCKET *dest, InheritableSocket *src)
6161 {
6162         SOCKET          s;
6163
6164         if (src->origsocket == PGINVALID_SOCKET || src->origsocket == 0)
6165         {
6166                 /* Not a real socket! */
6167                 *dest = src->origsocket;
6168         }
6169         else
6170         {
6171                 /* Actual socket, so create from structure */
6172                 s = WSASocket(FROM_PROTOCOL_INFO,
6173                                           FROM_PROTOCOL_INFO,
6174                                           FROM_PROTOCOL_INFO,
6175                                           &src->wsainfo,
6176                                           0,
6177                                           0);
6178                 if (s == INVALID_SOCKET)
6179                 {
6180                         write_stderr("could not create inherited socket: error code %d\n",
6181                                                  WSAGetLastError());
6182                         exit(1);
6183                 }
6184                 *dest = s;
6185
6186                 /*
6187                  * To make sure we don't get two references to the same socket, close
6188                  * the original one. (This would happen when inheritance actually
6189                  * works..
6190                  */
6191                 closesocket(src->origsocket);
6192         }
6193 }
6194 #endif
6195
6196 static void
6197 read_backend_variables(char *id, Port *port)
6198 {
6199         BackendParameters param;
6200
6201 #ifndef WIN32
6202         /* Non-win32 implementation reads from file */
6203         FILE       *fp;
6204
6205         /* Open file */
6206         fp = AllocateFile(id, PG_BINARY_R);
6207         if (!fp)
6208         {
6209                 write_stderr("could not open backend variables file \"%s\": %s\n",
6210                                          id, strerror(errno));
6211                 exit(1);
6212         }
6213
6214         if (fread(&param, sizeof(param), 1, fp) != 1)
6215         {
6216                 write_stderr("could not read from backend variables file \"%s\": %s\n",
6217                                          id, strerror(errno));
6218                 exit(1);
6219         }
6220
6221         /* Release file */
6222         FreeFile(fp);
6223         if (unlink(id) != 0)
6224         {
6225                 write_stderr("could not remove file \"%s\": %s\n",
6226                                          id, strerror(errno));
6227                 exit(1);
6228         }
6229 #else
6230         /* Win32 version uses mapped file */
6231         HANDLE          paramHandle;
6232         BackendParameters *paramp;
6233
6234 #ifdef _WIN64
6235         paramHandle = (HANDLE) _atoi64(id);
6236 #else
6237         paramHandle = (HANDLE) atol(id);
6238 #endif
6239         paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0);
6240         if (!paramp)
6241         {
6242                 write_stderr("could not map view of backend variables: error code %lu\n",
6243                                          GetLastError());
6244                 exit(1);
6245         }
6246
6247         memcpy(&param, paramp, sizeof(BackendParameters));
6248
6249         if (!UnmapViewOfFile(paramp))
6250         {
6251                 write_stderr("could not unmap view of backend variables: error code %lu\n",
6252                                          GetLastError());
6253                 exit(1);
6254         }
6255
6256         if (!CloseHandle(paramHandle))
6257         {
6258                 write_stderr("could not close handle to backend parameter variables: error code %lu\n",
6259                                          GetLastError());
6260                 exit(1);
6261         }
6262 #endif
6263
6264         restore_backend_variables(&param, port);
6265 }
6266
6267 /* Restore critical backend variables from the BackendParameters struct */
6268 static void
6269 restore_backend_variables(BackendParameters *param, Port *port)
6270 {
6271         memcpy(port, &param->port, sizeof(Port));
6272         read_inheritable_socket(&port->sock, &param->portsocket);
6273
6274         SetDataDir(param->DataDir);
6275
6276         MyCancelKey = param->MyCancelKey;
6277         MyPMChildSlot = param->MyPMChildSlot;
6278
6279 #ifdef WIN32
6280         ShmemProtectiveRegion = param->ShmemProtectiveRegion;
6281 #endif
6282         UsedShmemSegID = param->UsedShmemSegID;
6283         UsedShmemSegAddr = param->UsedShmemSegAddr;
6284
6285         ShmemLock = param->ShmemLock;
6286         ShmemVariableCache = param->ShmemVariableCache;
6287         ShmemBackendArray = param->ShmemBackendArray;
6288
6289 #ifndef HAVE_SPINLOCKS
6290         SpinlockSemaArray = param->SpinlockSemaArray;
6291 #endif
6292         NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests;
6293         NamedLWLockTrancheArray = param->NamedLWLockTrancheArray;
6294         MainLWLockArray = param->MainLWLockArray;
6295         ProcStructLock = param->ProcStructLock;
6296         ProcGlobal = param->ProcGlobal;
6297         AuxiliaryProcs = param->AuxiliaryProcs;
6298         PreparedXactProcs = param->PreparedXactProcs;
6299         PMSignalState = param->PMSignalState;
6300
6301         PostmasterPid = param->PostmasterPid;
6302         PgStartTime = param->PgStartTime;
6303         PgReloadTime = param->PgReloadTime;
6304         first_syslogger_file_time = param->first_syslogger_file_time;
6305
6306         redirection_done = param->redirection_done;
6307         IsBinaryUpgrade = param->IsBinaryUpgrade;
6308         query_id_enabled = param->query_id_enabled;
6309         max_safe_fds = param->max_safe_fds;
6310
6311         MaxBackends = param->MaxBackends;
6312
6313 #ifdef WIN32
6314         PostmasterHandle = param->PostmasterHandle;
6315         pgwin32_initial_signal_pipe = param->initial_signal_pipe;
6316 #else
6317         memcpy(&postmaster_alive_fds, &param->postmaster_alive_fds,
6318                    sizeof(postmaster_alive_fds));
6319 #endif
6320
6321         memcpy(&syslogPipe, &param->syslogPipe, sizeof(syslogPipe));
6322
6323         strlcpy(my_exec_path, param->my_exec_path, MAXPGPATH);
6324
6325         strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH);
6326
6327         /*
6328          * We need to restore fd.c's counts of externally-opened FDs; to avoid
6329          * confusion, be sure to do this after restoring max_safe_fds.  (Note:
6330          * BackendInitialize will handle this for port->sock.)
6331          */
6332 #ifndef WIN32
6333         if (postmaster_alive_fds[0] >= 0)
6334                 ReserveExternalFD();
6335         if (postmaster_alive_fds[1] >= 0)
6336                 ReserveExternalFD();
6337 #endif
6338 }
6339
6340
6341 Size
6342 ShmemBackendArraySize(void)
6343 {
6344         return mul_size(MaxLivePostmasterChildren(), sizeof(Backend));
6345 }
6346
6347 void
6348 ShmemBackendArrayAllocation(void)
6349 {
6350         Size            size = ShmemBackendArraySize();
6351
6352         ShmemBackendArray = (Backend *) ShmemAlloc(size);
6353         /* Mark all slots as empty */
6354         memset(ShmemBackendArray, 0, size);
6355 }
6356
6357 static void
6358 ShmemBackendArrayAdd(Backend *bn)
6359 {
6360         /* The array slot corresponding to my PMChildSlot should be free */
6361         int                     i = bn->child_slot - 1;
6362
6363         Assert(ShmemBackendArray[i].pid == 0);
6364         ShmemBackendArray[i] = *bn;
6365 }
6366
6367 static void
6368 ShmemBackendArrayRemove(Backend *bn)
6369 {
6370         int                     i = bn->child_slot - 1;
6371
6372         Assert(ShmemBackendArray[i].pid == bn->pid);
6373         /* Mark the slot as empty */
6374         ShmemBackendArray[i].pid = 0;
6375 }
6376 #endif                                                  /* EXEC_BACKEND */
6377
6378
6379 #ifdef WIN32
6380
6381 /*
6382  * Subset implementation of waitpid() for Windows.  We assume pid is -1
6383  * (that is, check all child processes) and options is WNOHANG (don't wait).
6384  */
6385 static pid_t
6386 waitpid(pid_t pid, int *exitstatus, int options)
6387 {
6388         win32_deadchild_waitinfo *childinfo;
6389         DWORD           exitcode;
6390         DWORD           dwd;
6391         ULONG_PTR       key;
6392         OVERLAPPED *ovl;
6393
6394         /* Try to consume one win32_deadchild_waitinfo from the queue. */
6395         if (!GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0))
6396         {
6397                 errno = EAGAIN;
6398                 return -1;
6399         }
6400
6401         childinfo = (win32_deadchild_waitinfo *) key;
6402         pid = childinfo->procId;
6403
6404         /*
6405          * Remove handle from wait - required even though it's set to wait only
6406          * once
6407          */
6408         UnregisterWaitEx(childinfo->waitHandle, NULL);
6409
6410         if (!GetExitCodeProcess(childinfo->procHandle, &exitcode))
6411         {
6412                 /*
6413                  * Should never happen. Inform user and set a fixed exitcode.
6414                  */
6415                 write_stderr("could not read exit code for process\n");
6416                 exitcode = 255;
6417         }
6418         *exitstatus = exitcode;
6419
6420         /*
6421          * Close the process handle.  Only after this point can the PID can be
6422          * recycled by the kernel.
6423          */
6424         CloseHandle(childinfo->procHandle);
6425
6426         /*
6427          * Free struct that was allocated before the call to
6428          * RegisterWaitForSingleObject()
6429          */
6430         pfree(childinfo);
6431
6432         return pid;
6433 }
6434
6435 /*
6436  * Note! Code below executes on a thread pool! All operations must
6437  * be thread safe! Note that elog() and friends must *not* be used.
6438  */
6439 static void WINAPI
6440 pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
6441 {
6442         /* Should never happen, since we use INFINITE as timeout value. */
6443         if (TimerOrWaitFired)
6444                 return;
6445
6446         /*
6447          * Post the win32_deadchild_waitinfo object for waitpid() to deal with. If
6448          * that fails, we leak the object, but we also leak a whole process and
6449          * get into an unrecoverable state, so there's not much point in worrying
6450          * about that.  We'd like to panic, but we can't use that infrastructure
6451          * from this thread.
6452          */
6453         if (!PostQueuedCompletionStatus(win32ChildQueue,
6454                                                                         0,
6455                                                                         (ULONG_PTR) lpParameter,
6456                                                                         NULL))
6457                 write_stderr("could not post child completion status\n");
6458
6459         /* Queue SIGCHLD signal. */
6460         pg_queue_signal(SIGCHLD);
6461 }
6462 #endif                                                  /* WIN32 */
6463
6464 /*
6465  * Initialize one and only handle for monitoring postmaster death.
6466  *
6467  * Called once in the postmaster, so that child processes can subsequently
6468  * monitor if their parent is dead.
6469  */
6470 static void
6471 InitPostmasterDeathWatchHandle(void)
6472 {
6473 #ifndef WIN32
6474
6475         /*
6476          * Create a pipe. Postmaster holds the write end of the pipe open
6477          * (POSTMASTER_FD_OWN), and children hold the read end. Children can pass
6478          * the read file descriptor to select() to wake up in case postmaster
6479          * dies, or check for postmaster death with a (read() == 0). Children must
6480          * close the write end as soon as possible after forking, because EOF
6481          * won't be signaled in the read end until all processes have closed the
6482          * write fd. That is taken care of in ClosePostmasterPorts().
6483          */
6484         Assert(MyProcPid == PostmasterPid);
6485         if (pipe(postmaster_alive_fds) < 0)
6486                 ereport(FATAL,
6487                                 (errcode_for_file_access(),
6488                                  errmsg_internal("could not create pipe to monitor postmaster death: %m")));
6489
6490         /* Notify fd.c that we've eaten two FDs for the pipe. */
6491         ReserveExternalFD();
6492         ReserveExternalFD();
6493
6494         /*
6495          * Set O_NONBLOCK to allow testing for the fd's presence with a read()
6496          * call.
6497          */
6498         if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1)
6499                 ereport(FATAL,
6500                                 (errcode_for_socket_access(),
6501                                  errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m")));
6502 #else
6503
6504         /*
6505          * On Windows, we use a process handle for the same purpose.
6506          */
6507         if (DuplicateHandle(GetCurrentProcess(),
6508                                                 GetCurrentProcess(),
6509                                                 GetCurrentProcess(),
6510                                                 &PostmasterHandle,
6511                                                 0,
6512                                                 TRUE,
6513                                                 DUPLICATE_SAME_ACCESS) == 0)
6514                 ereport(FATAL,
6515                                 (errmsg_internal("could not duplicate postmaster handle: error code %lu",
6516                                                                  GetLastError())));
6517 #endif                                                  /* WIN32 */
6518 }