1 /* Copyright (c) 2004-2007 MySQL AB
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
17 #if defined(__GNUC__) && defined(USE_PRAGMA_IMPLEMENTATION)
18 #pragma implementation
23 #include <sys/types.h>
27 #include "instance_map.h"
29 #include "mysql_manager_error.h"
33 /*************************************************************************
34 {{{ Constructor & destructor.
35 *************************************************************************/
46 Nominal contructor intended for assigning references and initialize
47 trivial objects. Real initialization is made by init() method.
50 Guardian::Guardian(Thread_registry
*thread_registry_arg
,
51 Instance_map
*instance_map_arg
)
52 :shutdown_requested(FALSE
),
54 thread_registry(thread_registry_arg
),
55 instance_map(instance_map_arg
)
57 pthread_mutex_init(&LOCK_guardian
, 0);
58 pthread_cond_init(&COND_guardian
, 0);
65 NOTE: it's necessary to synchronize here, because Guiardian thread can be
66 still alive an hold the mutex (because it is detached and we have no
73 pthread_mutex_destroy(&LOCK_guardian
);
74 pthread_cond_destroy(&COND_guardian
);
77 /*************************************************************************
79 *************************************************************************/
83 Send request to stop Guardian.
89 void Guardian::request_shutdown()
94 shutdown_requested
= TRUE
;
106 instance a pointer to the instance for processing
109 - the given instance must be locked before calling this operation;
110 - Guardian must be locked before calling this operation.
113 void Guardian::process_instance(Instance
*instance
)
115 int restart_retry
= 100;
116 time_t current_time
= time(NULL
);
118 if (instance
->get_state() == Instance::STOPPING
)
120 /* This brach is executed during shutdown. */
122 /* This returns TRUE if and only if an instance was stopped for sure. */
123 if (instance
->is_crashed())
125 log_info("Guardian: '%s' stopped.",
126 (const char *) instance
->get_name()->str
);
128 instance
->set_state(Instance::STOPPED
);
130 else if ((uint
) (current_time
- instance
->last_checked
) >=
131 instance
->options
.get_shutdown_delay())
133 log_info("Guardian: '%s' hasn't stopped within %d secs.",
134 (const char *) instance
->get_name()->str
,
135 (int) instance
->options
.get_shutdown_delay());
137 instance
->kill_mysqld(SIGKILL
);
139 log_info("Guardian: pretend that '%s' is killed.",
140 (const char *) instance
->get_name()->str
);
142 instance
->set_state(Instance::STOPPED
);
146 log_info("Guardian: waiting for '%s' to stop (%d secs left).",
147 (const char *) instance
->get_name()->str
,
148 (int) (instance
->options
.get_shutdown_delay() -
149 current_time
+ instance
->last_checked
));
155 if (instance
->is_mysqld_running())
157 /* The instance can be contacted on it's port */
159 /* If STARTING also check that pidfile has been created */
160 if (instance
->get_state() == Instance::STARTING
&&
161 instance
->options
.load_pid() == 0)
163 /* Pid file not created yet, don't go to STARTED state yet */
165 else if (instance
->get_state() != Instance::STARTED
)
167 /* clear status fields */
168 log_info("Guardian: '%s' is running, set state to STARTED.",
169 (const char *) instance
->options
.instance_name
.str
);
170 instance
->reset_stat();
171 instance
->set_state(Instance::STARTED
);
176 switch (instance
->get_state()) {
177 case Instance::NOT_STARTED
:
178 log_info("Guardian: starting '%s'...",
179 (const char *) instance
->options
.instance_name
.str
);
181 /* NOTE: set state to STARTING _before_ start() is called. */
182 instance
->set_state(Instance::STARTING
);
183 instance
->last_checked
= current_time
;
185 instance
->start_mysqld();
189 case Instance::STARTED
: /* fallthrough */
190 case Instance::STARTING
: /* let the instance start or crash */
191 if (!instance
->is_crashed())
194 instance
->crash_moment
= current_time
;
195 instance
->last_checked
= current_time
;
196 instance
->set_state(Instance::JUST_CRASHED
);
197 /* fallthrough -- restart an instance immediately */
199 case Instance::JUST_CRASHED
:
200 if (current_time
- instance
->crash_moment
<= 2)
202 if (instance
->is_crashed())
204 instance
->start_mysqld();
205 log_info("Guardian: starting '%s'...",
206 (const char *) instance
->options
.instance_name
.str
);
210 instance
->set_state(Instance::CRASHED
);
214 case Instance::CRASHED
: /* just regular restarts */
215 if ((ulong
) (current_time
- instance
->last_checked
) <=
216 (ulong
) Options::Main::monitoring_interval
)
219 if (instance
->restart_counter
< restart_retry
)
221 if (instance
->is_crashed())
223 instance
->start_mysqld();
224 instance
->last_checked
= current_time
;
226 log_info("Guardian: restarting '%s'...",
227 (const char *) instance
->options
.instance_name
.str
);
232 log_info("Guardian: can not start '%s'. "
233 "Abandoning attempts to (re)start it",
234 (const char *) instance
->options
.instance_name
.str
);
236 instance
->set_state(Instance::CRASHED_AND_ABANDONED
);
241 case Instance::CRASHED_AND_ABANDONED
:
242 return; /* do nothing */
252 Main function of Guardian thread.
258 Check for all guarded instances and restart them if needed.
263 struct timespec timeout
;
265 log_info("Guardian: started.");
267 thread_registry
->register_thread(&thread_info
);
269 /* Loop, until all instances were shut down at the end. */
273 Instance_map::Iterator
instances_it(instance_map
);
275 bool all_instances_stopped
= TRUE
;
277 instance_map
->lock();
279 while ((instance
= instances_it
.next()))
283 if (!instance
->is_guarded() ||
284 instance
->get_state() == Instance::STOPPED
)
290 process_instance(instance
);
292 if (instance
->get_state() != Instance::STOPPED
)
293 all_instances_stopped
= FALSE
;
298 instance_map
->unlock();
302 if (shutdown_requested
&& all_instances_stopped
)
304 log_info("Guardian: all guarded mysqlds stopped.");
311 set_timespec(timeout
, Options::Main::monitoring_interval
);
313 thread_registry
->cond_timedwait(&thread_info
, &COND_guardian
,
314 &LOCK_guardian
, &timeout
);
318 log_info("Guardian: stopped.");
320 /* Now, when the Guardian is stopped we can stop the IM. */
322 thread_registry
->unregister_thread(&thread_info
);
323 thread_registry
->request_shutdown();
325 log_info("Guardian: finished.");
330 Return the value of stopped flag.
333 bool Guardian::is_stopped()
346 Wake up Guardian thread.
348 MT-NOTE: though usually the mutex associated with condition variable should
349 be acquired before signalling the variable, here this is not needed.
350 Signalling under locked mutex is used to avoid lost signals. In the current
351 logic however locking mutex does not guarantee that the signal will not be
355 void Guardian::ping()
357 pthread_cond_signal(&COND_guardian
);
362 Prepare list of instances.
367 MT-NOTE: Instance Map must be locked before calling the operation.
370 void Guardian::init()
373 Instance_map::Iterator
iterator(instance_map
);
375 while ((instance
= iterator
.next()))
379 instance
->reset_stat();
380 instance
->set_state(Instance::NOT_STARTED
);
388 An internal method which is called at shutdown to unregister instances and
389 attempt to stop them if requested.
395 Loops through the guarded_instances list and prepares them for shutdown.
396 For each instance we issue a stop command and change the state
400 Guardian object should be locked by the caller.
404 void Guardian::stop_instances()
406 static const int NUM_STOP_ATTEMPTS
= 100;
408 Instance_map::Iterator
instances_it(instance_map
);
411 instance_map
->lock();
413 while ((instance
= instances_it
.next()))
417 if (!instance
->is_guarded() ||
418 instance
->get_state() == Instance::STOPPED
)
425 If instance is running or was running (and now probably hanging),
429 if (instance
->is_mysqld_running() ||
430 instance
->get_state() == Instance::STARTED
)
432 instance
->set_state(Instance::STOPPING
);
433 instance
->last_checked
= time(NULL
);
437 /* Otherwise mark it as STOPPED. */
438 instance
->set_state(Instance::STOPPED
);
441 /* Request mysqld to stop. */
443 bool instance_stopped
= FALSE
;
445 for (int cur_attempt
= 0; cur_attempt
< NUM_STOP_ATTEMPTS
; ++cur_attempt
)
447 if (!instance
->kill_mysqld(SIGTERM
))
449 instance_stopped
= TRUE
;
453 if (!instance
->is_active())
455 instance_stopped
= TRUE
;
459 /* Sleep for 0.3 sec and check again. */
465 Abort if we failed to stop mysqld instance. That should not happen,
466 but if it happened, we don't know what to do and prefer to have clear
467 failure with coredump.
470 DBUG_ASSERT(instance_stopped
);
475 instance_map
->unlock();
483 void Guardian::lock()
485 pthread_mutex_lock(&LOCK_guardian
);
493 void Guardian::unlock()
495 pthread_mutex_unlock(&LOCK_guardian
);