From 1369a44693d601a7dc917f2148bde1aa9daa1182 Mon Sep 17 00:00:00 2001 From: warner Date: Sat, 25 Nov 2006 13:55:52 +0100 Subject: [PATCH] SF#1517975: enhance 'start' and 'restart' commands to emit the useful parts of twistd.log, up until the process has started properly --- ChangeLog | 17 +++++ buildbot/scripts/logwatcher.py | 95 +++++++++++++++++++++++++ buildbot/scripts/reconfig.py | 156 +++++++++++++++++------------------------ buildbot/scripts/runner.py | 63 ++++++----------- buildbot/scripts/startup.py | 113 +++++++++++++++++++++++++++++ buildbot/slave/bot.py | 1 + 6 files changed, 310 insertions(+), 135 deletions(-) create mode 100644 buildbot/scripts/logwatcher.py rewrite buildbot/scripts/reconfig.py (77%) create mode 100644 buildbot/scripts/startup.py diff --git a/ChangeLog b/ChangeLog index 5a3d0d8..98f21cc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2006-11-25 Brian Warner + + * buildbot/scripts/runner.py: enhance 'start' and 'restart' to + follow twistd.log and print lines until the process has started + started properly. For the buildmaster, this means until the config + file has been parsed and accepted. For the buildslave, this means + until we've connected to the master. We give up after 5 seconds in + any case. Helpful error messages and troubleshooting suggestions + are printed when we don't see a successful startup. This closes the + remainder of SF#1517975. + * buildbot/scripts/startup.py: moved app startup code to here + * buildbot/scripts/logwatcher.py: utility class to follow log + * buildbot/scripts/reconfig.py: rewrite to use LogWatcher + * buildbot/slave/bot.py: announce our BuildSlaveness to the log + so the LogWatcher can tell the difference between a buildmaster + and a buildslave + 2006-11-24 Brian Warner * docs/examples/twisted_master.cfg: update to match the version diff --git a/buildbot/scripts/logwatcher.py b/buildbot/scripts/logwatcher.py new file mode 100644 index 0000000..2370825 --- /dev/null +++ b/buildbot/scripts/logwatcher.py @@ -0,0 +1,95 @@ + +import os +from twisted.python.failure import Failure +from twisted.internet import task, defer, reactor +from twisted.protocols.basic import LineOnlyReceiver + +class FakeTransport: + disconnecting = False + +class BuildmasterTimeoutError(Exception): + pass +class BuildslaveTimeoutError(Exception): + pass +class ReconfigError(Exception): + pass +class BuildSlaveDetectedError(Exception): + pass + +class LogWatcher(LineOnlyReceiver): + POLL_INTERVAL = 0.1 + TIMEOUT_DELAY = 5.0 + delimiter = os.linesep + + def __init__(self, logfile): + self.logfile = logfile + self.in_reconfig = False + self.transport = FakeTransport() + self.f = None + self.processtype = "buildmaster" + + def start(self): + # return a Deferred that fires when the reconfig process has + # finished. It errbacks with TimeoutError if the finish line has not + # been seen within 5 seconds, and with ReconfigError if the error + # line was seen. If the logfile could not be opened, it errbacks with + # an IOError. + self.running = True + d = defer.maybeDeferred(self._start) + return d + + def _start(self): + self.d = defer.Deferred() + try: + self.f = open(self.logfile, "rb") + self.f.seek(0, 2) # start watching from the end + except IOError: + pass + reactor.callLater(self.TIMEOUT_DELAY, self.timeout) + self.poller = task.LoopingCall(self.poll) + self.poller.start(self.POLL_INTERVAL) + return self.d + + def timeout(self): + if self.processtype == "buildmaster": + self.d.errback(BuildmasterTimeoutError()) + else: + self.d.errback(BuildslaveTimeoutError()) + + def finished(self, results): + self.running = False + self.in_reconfig = False + self.d.callback(results) + + def lineReceived(self, line): + if not self.running: + return + if "Log opened." in line: + self.in_reconfig = True + if "loading configuration from" in line: + self.in_reconfig = True + if "Creating BuildSlave" in line: + self.processtype = "buildslave" + + if self.in_reconfig: + print line + + if "message from master: attached" in line: + return self.finished("buildslave") + if "I will keep using the previous config file" in line: + return self.finished(Failure(ReconfigError())) + if "configuration update complete" in line: + return self.finished("buildmaster") + + def poll(self): + if not self.f: + try: + self.f = open(self.logfile, "rb") + except IOError: + return + while True: + data = self.f.read(1000) + if not data: + return + self.dataReceived(data) + diff --git a/buildbot/scripts/reconfig.py b/buildbot/scripts/reconfig.py dissimilarity index 77% index a1a57be..38ee984 100644 --- a/buildbot/scripts/reconfig.py +++ b/buildbot/scripts/reconfig.py @@ -1,93 +1,63 @@ - -import os, signal -from twisted.internet import reactor, task -from twisted.protocols.basic import LineOnlyReceiver - -class FakeTransport: - disconnecting = False - -class LogWatcher(LineOnlyReceiver): - POLL_INTERVAL = 0.1 - delimiter = "\n" - - def __init__(self, finished): - self.poller = task.LoopingCall(self.poll) - self.in_reconfig = False - self.finished_cb = finished - self.transport = FakeTransport() - - - def start(self, logfile): - try: - self.f = open(logfile, "rb") - self.f.seek(0, 2) - self.poller.start(self.POLL_INTERVAL) - except IOError: - print "Unable to follow %s" % logfile - return False - return True - - def finished(self, success): - self.in_reconfig = False - self.finished_cb(success) - - def lineReceived(self, line): - if "loading configuration from" in line: - self.in_reconfig = True - if self.in_reconfig: - print line - if "I will keep using the previous config file" in line: - self.finished(False) - if "configuration update complete" in line: - self.finished(True) - - def poll(self): - while True: - data = self.f.read(1000) - if not data: - return - self.dataReceived(data) - -class Reconfigurator: - def run(self, config): - - basedir = config['basedir'] - quiet = config['quiet'] - os.chdir(basedir) - f = open("twistd.pid", "rt") - pid = int(f.read().strip()) - if quiet: - os.kill(pid, signal.SIGHUP) - return - # keep reading twistd.log. Display all messages between "loading - # configuration from ..." and "configuration update complete" or - # "I will keep using the previous config file instead.", or until - # 5 seconds have elapsed. - reactor.callLater(5, self.timeout) - self.lw = lw = LogWatcher(self.finished) - if lw.start("twistd.log"): - # we're watching - # give the LogWatcher a chance to start reading - print "sending SIGHUP to process %d" % pid - reactor.callLater(0.2, os.kill, pid, signal.SIGHUP) - reactor.run() - else: - # we couldn't watch the file.. just SIGHUP it - os.kill(pid, signal.SIGHUP) - print "sent SIGHUP to process %d" % pid - - def finished(self, success): - if success: - print "Reconfiguration is complete." - else: - print "Reconfiguration failed." - reactor.stop() - - def timeout(self): - print "Never saw reconfiguration finish." - reactor.stop() - -def reconfig(config): - r = Reconfigurator() - r.run(config) - + +import os, signal +from twisted.internet import reactor + +from buildbot.scripts.logwatcher import LogWatcher, BuildmasterTimeoutError, \ + ReconfigError + +class Reconfigurator: + def run(self, config): + + basedir = config['basedir'] + quiet = config['quiet'] + os.chdir(basedir) + f = open("twistd.pid", "rt") + self.pid = int(f.read().strip()) + if quiet: + os.kill(self.pid, signal.SIGHUP) + return + + # keep reading twistd.log. Display all messages between "loading + # configuration from ..." and "configuration update complete" or + # "I will keep using the previous config file instead.", or until + # 5 seconds have elapsed. + + self.sent_signal = False + lw = LogWatcher("twistd.log") + d = lw.start() + d.addCallbacks(self.success, self.failure) + reactor.callLater(0.2, self.sighup) + reactor.run() + + def sighup(self): + if self.sent_signal: + return + print "sending SIGHUP to process %d" % self.pid + self.sent_signal = True + os.kill(self.pid, signal.SIGHUP) + + def success(self, res): + print """ +Reconfiguration appears to have completed successfully. +""" + reactor.stop() + + def failure(self, why): + if why.check(BuildmasterTimeoutError): + print "Never saw reconfiguration finish." + elif why.check(ReconfigError): + print """ +Reconfiguration failed. Please inspect the master.cfg file for errors, +correct them, then try 'buildbot reconfig' again. +""" + elif why.check(IOError): + # we were probably unable to open the file in the first place + self.sighup() + else: + print "Error while following twistd.log: %s" % why + reactor.stop() + +def reconfig(config): + r = Reconfigurator() + r.run(config) + diff --git a/buildbot/scripts/runner.py b/buildbot/scripts/runner.py index 825c6d1..150c940 100644 --- a/buildbot/scripts/runner.py +++ b/buildbot/scripts/runner.py @@ -290,41 +290,6 @@ def createSlave(config): if not m.quiet: print "buildslave configured in %s" % m.basedir -def start(config): - basedir = config['basedir'] - quiet = config['quiet'] - os.chdir(basedir) - sys.path.insert(0, os.path.abspath(os.getcwd())) - if os.path.exists("/usr/bin/make") and os.path.exists("Makefile.buildbot"): - # Preferring the Makefile lets slave admins do useful things like set - # up environment variables for the buildslave. - cmd = "make -f Makefile.buildbot start" - if not quiet: print cmd - os.system(cmd) - else: - # see if we can launch the application without actually having to - # spawn twistd, since spawning processes correctly is a real hassle - # on windows. - from twisted.python.runtime import platformType - argv = ["twistd", - "--no_save", - "--logfile=twistd.log", # windows doesn't use the same default - "--python=buildbot.tac"] - if platformType == "win32": - argv.append("--reactor=win32") - sys.argv = argv - - # this is copied from bin/twistd. twisted-1.3.0 uses twistw, while - # twisted-2.0.0 uses _twistw. - if platformType == "win32": - try: - from twisted.scripts._twistw import run - except ImportError: - from twisted.scripts.twistw import run - else: - from twisted.scripts.twistd import run - run() - def stop(config, signame="TERM", wait=False): import signal @@ -337,7 +302,8 @@ def stop(config, signame="TERM", wait=False): timer = 0 os.kill(pid, signum) if not wait: - print "sent SIG%s to process" % signame + if not quiet: + print "sent SIG%s to process" % signame return time.sleep(0.1) while timer < 5: @@ -345,19 +311,25 @@ def stop(config, signame="TERM", wait=False): try: os.kill(pid, 0) except OSError: - print "buildbot process %d is dead" % pid + if not quiet: + print "buildbot process %d is dead" % pid return timer += 1 time.sleep(1) - print "never saw process go away" + if not quiet: + print "never saw process go away" def restart(config): + quiet = config['quiet'] + from buildbot.scripts.startup import start stop(config, wait=True) - print "now restarting buildbot process.." + if not quiet: + print "now restarting buildbot process.." start(config) - # this next line might not be printed, if start() ended up running twistd - # inline - print "buildbot process has been restarted" + if not quiet: + # this next line might not be printed, if start() ended up running + # twistd inline + print "buildbot process has been restarted" def loadOptions(filename="options", here=None, home=None): @@ -420,6 +392,9 @@ def loadOptions(filename="options", here=None, home=None): return localDict class StartOptions(MakerBase): + optFlags = [ + ['quiet', 'q', "Don't display startup log messages"], + ] def getSynopsis(self): return "Usage: buildbot start " @@ -437,6 +412,9 @@ class ReconfigOptions(MakerBase): class RestartOptions(MakerBase): + optFlags = [ + ['quiet', 'q', "Don't display startup log messages"], + ] def getSynopsis(self): return "Usage: buildbot restart " @@ -736,6 +714,7 @@ def run(): elif command == "create-slave": createSlave(so) elif command == "start": + from buildbot.scripts.startup import start start(so) elif command == "stop": stop(so, wait=True) diff --git a/buildbot/scripts/startup.py b/buildbot/scripts/startup.py new file mode 100644 index 0000000..546b624 --- /dev/null +++ b/buildbot/scripts/startup.py @@ -0,0 +1,113 @@ + +import os, sys, time + +class Follower: + def follow(self): + from twisted.internet import reactor + from buildbot.scripts.reconfig import LogWatcher + self.rc = 0 + print "Following twistd.log until startup finished.." + lw = LogWatcher("twistd.log") + d = lw.start() + d.addCallbacks(self._success, self._failure) + reactor.run() + return self.rc + + def _success(self, processtype): + from twisted.internet import reactor + print "The %s appears to have (re)started correctly." % processtype + self.rc = 0 + reactor.stop() + + def _failure(self, why): + from twisted.internet import reactor + from buildbot.scripts.logwatcher import BuildmasterTimeoutError, \ + ReconfigError, BuildslaveTimeoutError + if why.check(BuildmasterTimeoutError): + print """ +The buildmaster took more than 5 seconds to start, so we were unable to +confirm that it started correctly. Please 'tail twistd.log' and look for a +line that says 'configuration update complete' to verify correct startup. +""" + elif why.check(BuildslaveTimeoutError): + print """ +The buildslave took more than 5 seconds to start and/or connect to the +buildmaster, so we were unable to confirm that it started and connected +correctly. Please 'tail twistd.log' and look for a line that says 'message +from master: attached' to verify correct startup. If you see a bunch of +messages like 'will retry in 6 seconds', your buildslave might not have the +correct hostname or portnumber for the buildmaster, or the buildmaster might +not be running. If you see messages like + 'Failure: twisted.cred.error.UnauthorizedLogin' +then your buildslave might be using the wrong botname or password. Please +correct these problems and then restart the buildslave. +""" + elif why.check(ReconfigError): + print """ +The buildmaster appears to have encountered an error in the master.cfg config +file during startup. It is probably running with an empty configuration right +now. Please inspect and fix master.cfg, then restart the buildmaster. +""" + elif why.check(BuildSlaveDetectedError): + print """ +Buildslave is starting up, not following logfile. +""" + else: + print """ +Unable to confirm that the buildmaster started correctly. You may need to +stop it, fix the config file, and restart. +""" + print why + self.rc = 1 + reactor.stop() + + +def start(config): + os.chdir(config['basedir']) + if config['quiet']: + return launch(config) + + # fork a child to launch the daemon, while the parent process tails the + # logfile + if os.fork(): + # this is the parent + rc = Follower().follow() + sys.exit(rc) + # this is the child: give the logfile-watching parent a chance to start + # watching it before we start the daemon + time.sleep(0.2) + launch(config) + +def launch(config): + sys.path.insert(0, os.path.abspath(os.getcwd())) + if os.path.exists("/usr/bin/make") and os.path.exists("Makefile.buildbot"): + # Preferring the Makefile lets slave admins do useful things like set + # up environment variables for the buildslave. + cmd = "make -f Makefile.buildbot start" + if not config['quiet']: + print cmd + os.system(cmd) + else: + # see if we can launch the application without actually having to + # spawn twistd, since spawning processes correctly is a real hassle + # on windows. + from twisted.python.runtime import platformType + argv = ["twistd", + "--no_save", + "--logfile=twistd.log", # windows doesn't use the same default + "--python=buildbot.tac"] + if platformType == "win32": + argv.append("--reactor=win32") + sys.argv = argv + + # this is copied from bin/twistd. twisted-1.3.0 uses twistw, while + # twisted-2.0.0 uses _twistw. + if platformType == "win32": + try: + from twisted.scripts._twistw import run + except ImportError: + from twisted.scripts.twistw import run + else: + from twisted.scripts.twistd import run + run() + diff --git a/buildbot/slave/bot.py b/buildbot/slave/bot.py index 6cb29f6..f2485fb 100644 --- a/buildbot/slave/bot.py +++ b/buildbot/slave/bot.py @@ -462,6 +462,7 @@ class BuildSlave(service.MultiService): def __init__(self, host, port, name, passwd, basedir, keepalive, usePTY, keepaliveTimeout=30, umask=None, debugOpts={}): + log.msg("Creating BuildSlave") service.MultiService.__init__(self) self.debugOpts = debugOpts.copy() bot = self.botClass(basedir, usePTY) -- 2.11.4.GIT