1 /* Copyright (c) 2003-2006 MySQL AB
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
16 #include <ndb_global.h>
17 #include <my_pthread.h>
19 #include <ndb_version.h>
20 #include "Configuration.hpp"
21 #include <ConfigRetriever.hpp>
22 #include <TransporterRegistry.hpp>
24 #include "vm/SimBlockList.hpp"
25 #include "ThreadConfig.hpp"
26 #include <SignalLoggerManager.hpp>
29 #include <NdbDaemon.h>
31 #include <NdbConfig.h>
32 #include <WatchDog.hpp>
34 #include <LogLevel.hpp>
35 #include <EventLogger.hpp>
37 #include <NdbAutoPtr.hpp>
39 #include <Properties.hpp>
41 #include <mgmapi_debug.h>
43 #if defined NDB_SOLARIS // ok
44 #include <sys/processor.h> // For system informatio
47 extern EventLogger g_eventLogger
;
48 extern NdbMutex
* theShutdownMutex
;
50 void catchsigs(bool ignore
); // for process signal handling
52 #define MAX_FAILED_STARTUPS 3
53 // Flag set by child through SIGUSR1 to signal a failed startup
54 static bool failed_startup_flag
= false;
55 // Counter for consecutive failed startups
56 static Uint32 failed_startups
= 0;
57 extern "C" void handler_shutdown(int signum
); // for process signal handling
58 extern "C" void handler_error(int signum
); // for process signal handling
59 extern "C" void handler_sigusr1(int signum
); // child signalling failed restart
61 // Shows system information
62 void systemInfo(const Configuration
& conf
,
65 // These are used already before fork if fetch_configuration() fails
66 // (e.g. Unable to alloc node id). Set them to something reasonable.
67 static FILE *child_info_file_r
= stdin
;
68 static FILE *child_info_file_w
= stdout
;
70 static void writeChildInfo(const char *token
, int val
)
72 fprintf(child_info_file_w
, "%s=%d\n", token
, val
);
73 fflush(child_info_file_w
);
76 void childReportSignal(int signum
)
78 writeChildInfo("signal", signum
);
81 void childReportError(int error
)
83 writeChildInfo("error", error
);
86 void childExit(int code
, Uint32 currentStartPhase
)
88 writeChildInfo("sphase", currentStartPhase
);
89 writeChildInfo("exit", code
);
90 fprintf(child_info_file_w
, "\n");
91 fclose(child_info_file_r
);
92 fclose(child_info_file_w
);
96 void childAbort(int code
, Uint32 currentStartPhase
)
98 writeChildInfo("sphase", currentStartPhase
);
99 writeChildInfo("exit", code
);
100 fprintf(child_info_file_w
, "\n");
101 fclose(child_info_file_r
);
102 fclose(child_info_file_w
);
107 static int insert(const char * pair
, Properties
& p
)
109 BaseString
tmp(pair
);
112 Vector
<BaseString
> split
;
113 tmp
.split(split
, ":=", 2);
114 if(split
.size() != 2)
116 p
.put(split
[0].trim().c_str(), split
[1].trim().c_str());
120 static int readChildInfo(Properties
&info
)
122 fclose(child_info_file_w
);
124 while (fgets(buf
,sizeof(buf
),child_info_file_r
))
126 fclose(child_info_file_r
);
130 static bool get_int_property(Properties
&info
,
131 const char *token
, Uint32
*int_val
)
133 const char *str_val
= 0;
134 if (!info
.get(token
, &str_val
))
137 long int tmp
= strtol(str_val
, &endptr
, 10);
138 if (str_val
== endptr
)
144 int reportShutdown(class Configuration
*config
, int error_exit
, int restart
)
146 Uint32 error
= 0, signum
= 0, sphase
= 256;
150 get_int_property(info
, "signal", &signum
);
151 get_int_property(info
, "error", &error
);
152 get_int_property(info
, "sphase", &sphase
);
154 Uint32 length
, theData
[25];
155 EventReport
*rep
= (EventReport
*)theData
;
157 rep
->setNodeId(globalData
.ownId
);
160 (globalData
.theRestartFlag
== initial_state
? 2 : 0) |
161 (config
->getInitialStart() ? 4 : 0);
167 rep
->setEventType(NDB_LE_NDBStopCompleted
);
173 rep
->setEventType(NDB_LE_NDBStopForced
);
177 theData
[5] = 0; // extra
182 const EventReport
* const eventReport
= (EventReport
*)&theData
[0];
183 g_eventLogger
.log(eventReport
->getEventType(), theData
,
184 eventReport
->getNodeId(), 0);
187 for (unsigned n
= 0; n
< config
->m_mgmds
.size(); n
++)
189 NdbMgmHandle h
= ndb_mgm_create_handle();
191 ndb_mgm_set_connectstring(h
, config
->m_mgmds
[n
].c_str()) ||
194 0, //retry_delay_in_seconds
200 if (ndb_mgm_report_event(h
, theData
, length
))
208 BaseString
tmp(ndb_mgm_get_latest_error_msg(h
));
210 tmp
.append(ndb_mgm_get_latest_error_desc(h
));
211 g_eventLogger
.warning("Unable to report shutdown reason to %s: %s",
212 config
->m_mgmds
[n
].c_str(), tmp
.c_str());
216 g_eventLogger
.error("Unable to report shutdown reason to %s",
217 config
->m_mgmds
[n
].c_str());
222 ndb_mgm_disconnect(h
);
223 ndb_mgm_destroy_handle(&h
);
229 int main(int argc
, char** argv
)
232 // Print to stdout/console
233 g_eventLogger
.createConsoleHandler();
234 g_eventLogger
.setCategory("ndbd");
235 g_eventLogger
.enable(Logger::LL_ON
, Logger::LL_INFO
);
236 g_eventLogger
.enable(Logger::LL_ON
, Logger::LL_CRITICAL
);
237 g_eventLogger
.enable(Logger::LL_ON
, Logger::LL_ERROR
);
238 g_eventLogger
.enable(Logger::LL_ON
, Logger::LL_WARNING
);
240 g_eventLogger
.m_logLevel
.setLogLevel(LogLevel::llStartUp
, 15);
242 globalEmulatorData
.create();
244 // Parse command line options
245 Configuration
* theConfig
= globalEmulatorData
.theConfiguration
;
246 if(!theConfig
->init(argc
, argv
)){
250 { // Do configuration
252 signal(SIGPIPE
, SIG_IGN
);
254 theConfig
->fetch_configuration();
257 my_setwd(NdbConfig_get_path(0), MYF(0));
259 if (theConfig
->getDaemonMode()) {
261 char *lockfile
= NdbConfig_PidFileName(globalData
.ownId
);
262 char *logfile
= NdbConfig_StdoutFileName(globalData
.ownId
);
263 NdbAutoPtr
<char> tmp_aptr1(lockfile
), tmp_aptr2(logfile
);
265 if (NdbDaemon_Make(lockfile
, logfile
, 0) == -1) {
266 ndbout
<< "Cannot become daemon: " << NdbDaemon_ErrorText
<< endl
;
272 signal(SIGUSR1
, handler_sigusr1
);
275 while (! theConfig
->getForegroundMode()) // the cond is const
277 // setup reporting between child and parent
281 g_eventLogger
.error("pipe() failed with errno=%d (%s)",
282 errno
, strerror(errno
));
287 if (!(child_info_file_w
= fdopen(filedes
[1],"w")))
289 g_eventLogger
.error("fdopen() failed with errno=%d (%s)",
290 errno
, strerror(errno
));
292 if (!(child_info_file_r
= fdopen(filedes
[0],"r")))
294 g_eventLogger
.error("fdopen() failed with errno=%d (%s)",
295 errno
, strerror(errno
));
299 if ((child
= fork()) <= 0)
300 break; // child or error
309 * We no longer need the mgm connection in this process
310 * (as we are the angel, not ndb)
312 * We don't want to purge any allocated resources (nodeid), so
313 * we set that option to false
315 theConfig
->closeConfiguration(false);
317 int status
= 0, error_exit
= 0, signum
= 0;
318 while(waitpid(child
, &status
, 0) != child
);
319 if(WIFEXITED(status
)){
320 switch(WEXITSTATUS(status
)){
322 g_eventLogger
.info("Angel shutting down");
323 reportShutdown(theConfig
, 0, 0);
326 case NRT_NoStart_Restart
:
327 theConfig
->setInitialStart(false);
328 globalData
.theRestartFlag
= initial_state
;
330 case NRT_NoStart_InitialStart
:
331 theConfig
->setInitialStart(true);
332 globalData
.theRestartFlag
= initial_state
;
334 case NRT_DoStart_InitialStart
:
335 theConfig
->setInitialStart(true);
336 globalData
.theRestartFlag
= perform_start
;
340 if(theConfig
->stopOnError()){
342 * Error shutdown && stopOnError()
344 reportShutdown(theConfig
, error_exit
, 0);
348 case NRT_DoStart_Restart
:
349 theConfig
->setInitialStart(false);
350 globalData
.theRestartFlag
= perform_start
;
355 if (WIFSIGNALED(status
))
357 signum
= WTERMSIG(status
);
358 childReportSignal(signum
);
363 g_eventLogger
.info("Unknown exit reason. Stopped.");
365 if(theConfig
->stopOnError()){
367 * Error shutdown && stopOnError()
369 reportShutdown(theConfig
, error_exit
, 0);
374 if (!failed_startup_flag
)
376 // Reset the counter for consecutive failed startups
379 else if (failed_startups
>= MAX_FAILED_STARTUPS
&& !theConfig
->stopOnError())
382 * Error shutdown && stopOnError()
384 g_eventLogger
.alert("Ndbd has failed %u consecutive startups. "
385 "Not restarting", failed_startups
);
386 reportShutdown(theConfig
, error_exit
, 0);
389 failed_startup_flag
= false;
390 reportShutdown(theConfig
, error_exit
, 1);
391 g_eventLogger
.info("Ndb has terminated (pid %d) restarting", child
);
392 theConfig
->fetch_configuration();
396 g_eventLogger
.info("Angel pid: %d ndb pid: %d", getppid(), getpid());
398 g_eventLogger
.info("Ndb pid: %d", getpid());
400 g_eventLogger
.info("Ndb started in foreground");
402 g_eventLogger
.info("Ndb started");
404 theConfig
->setupConfiguration();
405 systemInfo(* theConfig
, * theConfig
->m_logLevel
);
408 globalEmulatorData
.theSimBlockList
->load(globalEmulatorData
);
410 // Set thread concurrency for Solaris' light weight processes
412 status
= NdbThread_SetConcurrencyLevel(30);
416 // Create a signal logger
417 char *buf
= NdbConfig_SignalLogFileName(globalData
.ownId
);
418 NdbAutoPtr
<char> tmp_aptr(buf
);
419 FILE * signalLog
= fopen(buf
, "a");
420 globalSignalLoggers
.setOwnNodeId(globalData
.ownId
);
421 globalSignalLoggers
.setOutputStream(signalLog
);
422 #if 0 // to log startup
423 globalSignalLoggers
.log(SignalLoggerManager::LogInOut
, "BLOCK=DBDICT,DBDIH");
424 globalData
.testOn
= 1;
434 ErrorReporter::setErrorHandlerShutdownType(NST_ErrorHandlerStartup
);
436 switch(globalData
.theRestartFlag
){
438 globalEmulatorData
.theThreadConfig
->doStart(NodeState::SL_CMVMI
);
441 globalEmulatorData
.theThreadConfig
->doStart(NodeState::SL_CMVMI
);
442 globalEmulatorData
.theThreadConfig
->doStart(NodeState::SL_STARTING
);
445 assert("Illegal state globalData.theRestartFlag" == 0);
448 globalTransporterRegistry
.startSending();
449 globalTransporterRegistry
.startReceiving();
450 if (!globalTransporterRegistry
.start_service(*globalEmulatorData
.m_socket_server
)){
451 ndbout_c("globalTransporterRegistry.start_service() failed");
455 // Re-use the mgm handle as a transporter
456 if(!globalTransporterRegistry
.connect_client(
457 theConfig
->get_config_retriever()->get_mgmHandlePtr()))
458 ERROR_SET(fatal
, NDBD_EXIT_INVALID_CONFIG
,
459 "Connection to mgmd terminated before setup was complete",
460 "StopOnError missing");
462 if (!globalTransporterRegistry
.start_clients()){
463 ndbout_c("globalTransporterRegistry.start_clients() failed");
467 globalEmulatorData
.theWatchDog
->doStart();
469 globalEmulatorData
.m_socket_server
->startServer();
471 // theConfig->closeConfiguration();
473 globalEmulatorData
.theThreadConfig
->ipControlLoop();
475 NdbShutdown(NST_Normal
);
482 systemInfo(const Configuration
& config
, const LogLevel
& logLevel
){
487 GetSystemInfo(&sinfo
);
488 processors
= sinfo
.dwNumberOfProcessors
;
490 if(ERROR_SUCCESS
==RegOpenKeyEx
492 TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
493 0, KEY_READ
, &hKey
)) {
495 DWORD cbData
= sizeof(dwMHz
);
496 if(ERROR_SUCCESS
==RegQueryValueEx(hKey
,
497 "~MHz", 0, 0, (LPBYTE
)&dwMHz
, &cbData
)) {
502 #elif defined NDB_SOLARIS // ok
503 // Search for at max 16 processors among the first 256 processor ids
504 processor_info_t pinfo
; memset(&pinfo
, 0, sizeof(pinfo
));
506 while(processors
< 16 && pid
< 256){
507 if(!processor_info(pid
++, &pinfo
))
510 speed
= pinfo
.pi_clock
;
513 if(logLevel
.getLogLevel(LogLevel::llStartUp
) > 0){
514 g_eventLogger
.info("NDB Cluster -- DB node %d", globalData
.ownId
);
515 g_eventLogger
.info("%s --", NDB_VERSION_STRING
);
516 if (config
.get_mgmd_host())
517 g_eventLogger
.info("Configuration fetched at %s port %d",
518 config
.get_mgmd_host(), config
.get_mgmd_port());
519 #ifdef NDB_SOLARIS // ok
520 g_eventLogger
.info("NDB is running on a machine with %d processor(s) at %d MHz",
524 if(logLevel
.getLogLevel(LogLevel::llStartUp
) > 3){
525 Uint32 t
= config
.timeBetweenWatchDogCheck();
526 g_eventLogger
.info("WatchDog timer is set to %d ms", t
);
531 #define handler_register(signum, handler, ignore)\
534 if(signum != SIGCHLD)\
535 signal(signum, SIG_IGN);\
537 signal(signum, handler);\
541 catchsigs(bool ignore
){
542 #if !defined NDB_WIN32
544 static const int signals_shutdown
[] = {
552 #elif defined SIGINFO
564 static const int signals_error
[] = {
582 static const int signals_ignore
[] = {
587 for(i
= 0; i
< sizeof(signals_shutdown
)/sizeof(signals_shutdown
[0]); i
++)
588 handler_register(signals_shutdown
[i
], handler_shutdown
, ignore
);
589 for(i
= 0; i
< sizeof(signals_error
)/sizeof(signals_error
[0]); i
++)
590 handler_register(signals_error
[i
], handler_error
, ignore
);
591 for(i
= 0; i
< sizeof(signals_ignore
)/sizeof(signals_ignore
[0]); i
++)
592 handler_register(signals_ignore
[i
], SIG_IGN
, ignore
);
594 Configuration
* theConfig
= globalEmulatorData
.theConfiguration
;
595 if (! theConfig
->getForegroundMode())
596 handler_register(SIGTRAP
, handler_error
, ignore
);
603 handler_shutdown(int signum
){
604 g_eventLogger
.info("Received signal %d. Performing stop.", signum
);
606 childReportSignal(signum
);
607 globalData
.theRestartFlag
= perform_stop
;
612 handler_error(int signum
){
613 // only let one thread run shutdown
614 static long thread_id
= 0;
616 if (thread_id
!= 0 && thread_id
== my_thread_id())
618 // Shutdown thread received signal
620 signal(signum
, SIG_DFL
);
621 kill(getpid(), signum
);
624 NdbSleep_MilliSleep(10);
626 if(theShutdownMutex
&& NdbMutex_Trylock(theShutdownMutex
) != 0)
628 NdbSleep_MilliSleep(10);
629 thread_id
= my_thread_id();
630 g_eventLogger
.info("Received signal %d. Running error handler.", signum
);
631 childReportSignal(signum
);
632 // restart the system
633 char errorData
[64], *info
= 0;
634 #ifdef HAVE_STRSIGNAL
635 info
= strsignal(signum
);
637 BaseString::snprintf(errorData
, sizeof(errorData
), "Signal %d received; %s", signum
,
638 info
? info
: "No text for signal available");
639 ERROR_SET_SIGNAL(fatal
, NDBD_EXIT_OS_SIGNAL_RECEIVED
, errorData
, __FILE__
);
644 handler_sigusr1(int signum
)
646 if (!failed_startup_flag
)
649 failed_startup_flag
= true;
651 g_eventLogger
.info("Angel received ndbd startup failure count %u.", failed_startups
);