From 3bcce67f10925bf23036567e31213a46cdd70beb Mon Sep 17 00:00:00 2001 From: "Kyle J. McKay" Date: Tue, 29 Nov 2016 23:33:46 -0800 Subject: [PATCH] taskd.pl: avoid missed connects during graceful restart During a graceful restart in non-inetd mode, the old Server (and hence the accept socket) was being closed by the exec and then re-created by the new process. While small, there is a window of opportunity for an incoming connection to be lost because the taskd socket is not open and listening for new connections during this brief restart period. Avoid this by leaving the old Server socket open (by disabling close-on-exec for its file descriptor) and telling the new process to adopt it as the Server (like inetd mode does for file descriptor 0) by passing the file descriptor in a special new enviornment variable. Almost all the work to support this was already done when the inetd support was added. As a bonus this same special environment variable can be used to tell inetd mode to use a file descriptor other than 0 (although that would technically then not be "inetd" mode anymore). A graceful restart can now be performed in non-inetd mode which is guaranteed to not lose any incoming connections because of the restart (this already worked in inetd mode because the inetd server keeps the socket open while the process is restarting). Signed-off-by: Kyle J. McKay --- taskd/taskd.pl | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/taskd/taskd.pl b/taskd/taskd.pl index 406c5cf..271d70a 100755 --- a/taskd/taskd.pl +++ b/taskd/taskd.pl @@ -48,6 +48,8 @@ use Girocco::Util qw(noFatalsToBrowser get_git); BEGIN {noFatalsToBrowser} use Girocco::ExecUtil; +use constant SOCKFDENV => "GIROCCO_TASKD_SOCKET_FD"; + # Throttle Classes Defaults # Note that any same-named classes in @Girocco::Config::throttle_classes # will override (completely replacing the entire hash) these ones. @@ -143,6 +145,24 @@ sub duration { return $secs . 'd' . $ans; } +sub isfdopen { + my $fd = shift; + return undef unless defined($fd) && $fd >= 0; + my $result = POSIX::dup($fd); + POSIX::close($result) if defined($result); + defined($result); +} + +sub setnoncloexec { + my $fd = shift; + fcntl($fd, F_SETFD, 0) or die "fcntl failed: $!"; +} + +sub setcloexec { + my $fd = shift; + fcntl($fd, F_SETFD, FD_CLOEXEC) or die "fcntl failed: $!"; +} + sub setnonblock { my $fd = shift; my $flags = fcntl($fd, F_GETFL, 0); @@ -1187,8 +1207,16 @@ my $NAME; my $restart_file = $Girocco::Config::chroot.'/etc/taskd.restart'; my $restart_active = 1; -if ($inetd) { - open Server, '<&=0' or die "open: $!"; +my $resumefd = $ENV{(SOCKFDENV)}; +delete $ENV{(SOCKFDENV)}; +if (defined($resumefd) && !isfdopen($resumefd)) { + warn "ignoring invalid ".SOCKFDENV." environment value\n"; + $resumefd = undef; +} +if ($inetd || defined($resumefd)) { + my $fdopen = defined($resumefd) ? $resumefd : 0; + open Server, "<&=$fdopen" or die "open: $!"; + setcloexec(\*Server) if $fdopen > $^F; my $sockname = getsockname Server; die "getsockname: $!" unless $sockname; die "socket already connected! must be 'wait' socket" if getpeername Server; @@ -1335,7 +1363,10 @@ while (1) { } else { statmsg "RESTART: restart requested; now restarting"; statmsg "RESTART: restart requested; jobd restart scheduled in 5 seconds" if $restarter; + setnoncloexec(\*Server); + $reexec->setenv(SOCKFDENV, fileno(Server)); $reexec->reexec($same_pid); + setcloexec(\*Server) if fileno(Server) > $^F; statmsg "RESTART: continuing after failed restart: $!"; chdir "/"; cancel_jobd_restart($restarter) if $restarter; -- 2.11.4.GIT